diff --git a/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/train_log.json b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..00a46aae37024855b8321160fc9b80a0f99611c6 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.2550404185056685, "train/grad": 0.2482831509411335, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.064332275390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.061553955078125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.056885986328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.0524365234375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.04790771484375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.041546630859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.03451171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.02649658203125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.016219482421875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.005098876953125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.994290771484375, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.97808837890625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9623065185546875, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.939578857421875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.9178802490234377, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.8969342041015627, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.87062255859375, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.839599304199219, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.804775390625, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.7721661376953124, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.731590118408203, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.6888365173339843, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.6404107666015624, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.587350120544434, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.531125297546387, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.462882385253906, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.4009940242767334, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.344702091217041, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.2688086259365083, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.180449430346489, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.1034329706430435, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.0352000987529753, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.945673061311245, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8674681144952774, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.7754309424757957, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.6851028469204903, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.5900913473218679, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5025329897552728, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4139187163114548, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.3129774188250303, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2406132189184427, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.1814343256875872, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.1090810491144658, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.053549313507974, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.9913963255286217, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.9366141479276121, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.8926277449727058, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.8389950316399336, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.8004619358479976, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04628070447593927, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.046197921372950077, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0460607136413455, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.045924625713378193, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04578803114593029, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.045597368702292446, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04538072437979281, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.045138027109205725, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.044817583933472634, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0444678283855319, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04412218887358904, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04359229282476008, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.043069623867049815, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04230080808512866, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04155719737522304, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04084417587146163, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03996423305012286, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.038980892188847066, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03797122669406235, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03712284442037344, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.036187760913744565, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03532555416226387, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.034466233626008036, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03363084276206792, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.032840643944218754, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.031979885511100294, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03127182348631322, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.030672689015045763, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02992605964653194, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.029133452139794826, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.028504092590883375, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.027994078737683593, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02740110254846513, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.026960501284338533, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.026540963784791528, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.026229764185845852, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.025855370289646087, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.025344502683728933, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.025069237351417542, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.024729982083663345, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.024477307228371502, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.024238141211681066, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.023876179223880172, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02370861068367958, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02337492680642754, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.023873666888102888, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.024041516203433276, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02355442536063492, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.023501584311015903, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.026912212371826, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.018681526184082, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.005089282989502, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.991746425628662, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.978475570678711, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.9602420330047607, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.9397683143615723, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.9173386096954346, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.8879568576812744, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8574576377868652, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.827911376953125, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7843422889709473, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7431578636169434, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.685371160507202, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6321372985839844, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5827367305755615, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.521973133087158, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4523260593414307, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3759055137634277, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.304939031600952, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.216942310333252, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.1255221366882324, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.023524045944214, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.9145506620407104, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.8030637502670288, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.673952341079712, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.5633223056793213, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.4673019647598267, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.3443776369094849, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.208909273147583, "validation/loss_030_lr2.7e+00_wd1.0e+00": 1.0961449146270752, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.9987266063690186, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.8692936897277832, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.7502594590187073, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.6019185185432434, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.4789773225784302, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.3692744970321655, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.3011743426322937, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.27492475509643555, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2591584324836731, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2526072561740875, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.24883709847927094, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.23331980407238007, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.23209786415100098, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.23033387959003448, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.2723067104816437, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.29533088207244873, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.28236135840415955, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.30072221159935, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06820436507936507, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07837301587301587, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.10639880952380952, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.1515376984126984, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.19047619047619047, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.24156746031746032, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.2591765873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2584325396825397, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2442956349206349, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.2371031746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.22916666666666666, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21899801587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21626984126984128, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21453373015873015, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.21924603174603174, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.22842261904761904, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.24107142857142858, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2750496031746032, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.3219246031746032, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.36681547619047616, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.4191468253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.4573412698412698, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.4878472222222222, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.5223214285714286, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.5503472222222222, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.5739087301587301, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.5932539682539683, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.611359126984127, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.626984126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.6537698412698413, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.6770833333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.7016369047619048, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.7403273809523809, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.7785218253968254, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8157242063492064, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8454861111111112, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8864087301587301, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9079861111111112, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9107142857142857, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9144345238095238, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9176587301587301, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9174107142857143, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9231150793650794, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9255952380952381, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9233630952380952, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9077380952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8995535714285714, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9097222222222222, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9084821428571429, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.04296007072813332, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.04919911316806037, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.06027464390961506, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.07925816360854547, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.09090566156024588, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.1065165082574644, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.10415829337063338, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.09581778927290573, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.08198291869499072, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.07723716751614089, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07271577888477142, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.06494514773891903, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06256303592915773, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.06113753905953899, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06628668922562664, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.0752248787347861, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.08614395598668545, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.11598350967441676, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.14639953155612662, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.16929492234487092, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19817777281528942, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2240248441280833, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2504379527933109, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2848510518488996, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.31269031695225064, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.3418716838566796, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.3752531555216077, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.4053572388824128, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.43417335215766023, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.48696692036840056, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.5269740608928503, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.5697377036516154, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.6400712600252336, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.698373425780719, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.7614045288758887, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.808669335993131, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.859011456609555, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8871180489118079, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8922996249526033, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8961944586410079, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8947625297350086, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8910304188242668, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.8987536154434677, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9024426884900085, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9060500661046396, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.886814855968799, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8768509397685494, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8889566246671443, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8905215397935387, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 1.053549313507974, "validation/loss_best": 0.23209786415100098, "validation/acc_best": 0.9255952380952381, "validation/f1_best": 0.9024426884900085} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.3680834877490997, "train/grad": 0.18492609433829785, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.95788330078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.940504150390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.912496337890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.8852178955078127, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.858885498046875, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.8235015869140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.7852032470703123, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.744262390136719, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.6931137084960937, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.6415786743164062, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.5932606506347655, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5245054626464842, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.4607645416259767, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.3729749298095704, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.2926558685302734, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2186327743530274, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1279579544067384, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.025701198577881, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.9164947700500488, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.8195017790794372, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.7057865571975708, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.5947723215818406, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.4779253780841828, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.3597813692688943, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.2418233931064606, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.1042169199883938, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.9845914918184281, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.8810015180706978, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.7527467261999845, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.6229457625001669, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.5285329285264015, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.45735177520662545, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.38018600184470414, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.32628642465919255, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.2795591476559639, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.24742655711248518, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.22163550961762668, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.2035642568208277, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.18828068112954496, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.17485288749448955, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.1678208160586655, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.16315264543518423, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.15940805490128696, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.15887245325371624, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.16653648274950683, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1660448679327965, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.18049090114422142, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.21928821856155992, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.3261133943591267, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04257356096059084, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04202848542481661, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04113018411211669, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04025077159516513, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03939058949239552, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03822206575423479, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.036957810800522566, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03564148893579841, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03408098228275776, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03264848696999252, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0314496613945812, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029982301257550716, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02885909954085946, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02761234186589718, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.026711916262283922, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.026037282729521393, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.025355942351743578, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.024708307376131416, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.024100435497239232, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02360550928860903, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.023063086941838264, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022571770250797273, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022107792771421372, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.021705799177289008, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02137833773624152, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021091913026757537, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.020869481805711985, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02054940684698522, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.020049405260942878, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01952708830591291, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01930694492533803, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.019127152087166907, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.018896894818171858, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.018587578390724956, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.018314529564231633, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.017924702004529537, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.017149999241810293, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.016717721584718674, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.016301833640318363, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.015883283556904645, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.015710998049471528, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.015727100200019776, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01583865110995248, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.016340344788040966, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01737211235566065, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.017538986932486296, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01910867974278517, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.022744794376194477, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.030663616303354502, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.874826669692993, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.8471627235412598, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.80285906791687, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.7608835697174072, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.7211501598358154, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.6689016819000244, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.6135830879211426, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.556079149246216, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.485870361328125, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.4159202575683594, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.351121187210083, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.2585442066192627, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.1732089519500732, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.0566043853759766, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.9516464471817017, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.856791615486145, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.744203805923462, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.6219679117202759, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.4966912269592285, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.3895782232284546, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.2672556638717651, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.150583028793335, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.0282195806503296, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.9000164270401001, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.7607404589653015, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.5880128145217896, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.45641469955444336, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.37110698223114014, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.3111032247543335, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.2756292223930359, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.2548336386680603, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.23962169885635376, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.22245772182941437, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.20964841544628143, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.19544455409049988, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.18151676654815674, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.16888540983200073, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.16307786107063293, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.16417428851127625, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1672917902469635, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1545238196849823, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.14559093117713928, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.16035738587379456, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.17634408175945282, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.19973130524158478, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.19167615473270416, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.2217443883419037, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3100205063819885, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.42933160066604614, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2507440476190476, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.24702380952380953, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.23487103174603174, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.23015873015873015, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.2286706349206349, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.23015873015873015, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.23883928571428573, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2507440476190476, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2802579365079365, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.31547619047619047, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.35193452380952384, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.4089781746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.455109126984127, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.4930555555555556, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.5208333333333334, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.5379464285714286, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.560515873015873, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.5843253968253969, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.6056547619047619, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.6299603174603174, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.660218253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.6850198412698413, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.7113095238095238, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.746031746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.7916666666666666, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8315972222222222, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8759920634920635, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.894593253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9027777777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9097222222222222, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9144345238095238, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9208829365079365, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.925843253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9312996031746031, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9377480158730159, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9424603174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9439484126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9444444444444444, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9451884920634921, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9422123015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9489087301587301, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9508928571428571, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9422123015873016, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9424603174603174, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9437003968253969, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9382440476190477, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9347718253968254, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9278273809523809, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.08788952192106184, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.08437120537881386, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.07473684497643393, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.07105975995854281, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.07069189376074994, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.07177120050135302, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.07993901342506056, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.09100642687268812, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.11783318443995099, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.1458198692627572, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.17161745562656938, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.2077368845886702, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.2358206521927682, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.2608838277845356, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.2807566402540023, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.29547024791260246, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.32212391175722865, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.3620564890681424, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.40154302884685095, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.441920020429355, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.4942034437976738, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.5298558473890002, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.5727786727970097, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.6448827717799288, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.723017095695763, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7893021635398225, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8542352147458394, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8698016109326157, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8758594163537965, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8867968409720052, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8941289019096048, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9037481398783039, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.911342501075418, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9195935325962146, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.928186568557695, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9355601786532591, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9381866368536032, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9373237918796254, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9382732601450505, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9342902186586686, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9396347692012578, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9435717718664111, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9406314178618477, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9295945473549617, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9332681968817917, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9397571087315859, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9251302460900279, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9271828235237697, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9122497939439811, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.16315264543518423, "validation/loss_best": 0.14559093117713928, "validation/acc_best": 0.9541170634920635, "validation/f1_best": 0.9435717718664111} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 1.0624217510223388, "train/grad": 0.2224907886236906, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.7736614990234374, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.73573486328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.6767291259765624, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.6222714233398436, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.5714688110351562, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.505745544433594, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.4364747619628906, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.364426727294922, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.2760140991210935, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.1882372665405274, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.1072998046875, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.9942944717407227, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.8933520030975342, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.7607464218139648, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.646500177383423, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.5469830417633057, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.4320233261585236, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.3096441048383713, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.1843348574638366, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.0736183950304985, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.9394164274632931, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.8040399293601513, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6626529714465141, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5329885624349118, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.4269418607279658, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.33707777433097363, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.2831947761401534, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.25051847418770196, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.22170377857983112, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.19796516573056577, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.18184169283136725, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.16983098903670907, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.15679429451934992, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.14715698284097015, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.13776430691592395, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.1310186038352549, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.12388455426320434, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.11676952589303255, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.11220242752693593, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.11300381403416396, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.12082700707018375, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1288114405144006, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.14371078954078256, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.16526119897142053, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.23390964180231094, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.35839571644552054, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.5936001890804619, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.399656978547573, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.768166355183348, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03723329151980579, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.035957667659968134, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03402314021252096, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03235994550399482, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.030971944322809575, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.029446658240631224, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02817743219435215, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02717052910476923, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026254411926493047, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.025558836404234172, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025026641143485902, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024362568045035005, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02380653194151819, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023103888053447007, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02254238860681653, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022092684572562575, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021635362571105363, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.021228779177181422, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02090210481546819, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020672725825570525, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02043639315292239, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020192089807242155, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.019853842300362884, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.019273145664483307, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.018611160498112442, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01793565063737333, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.017298098732717336, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.016625414094887672, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01589873485965654, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.015334062611218542, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.015001143210101873, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.014772178460843862, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.014607281915377825, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.014461141638457775, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.014343774486333132, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.014321281246375293, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.014269140968099236, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.014211277507711203, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014247010580729693, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01463814390823245, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.015423113006399945, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.01692135991062969, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.018148350649280474, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.021008705266285687, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02629114102339372, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03549802669789642, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.050090571087785064, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.08677603291347623, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.10613596745301038, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.666802406311035, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.6196908950805664, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.547785997390747, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.482254981994629, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.4217638969421387, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.3430092334747314, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.2600347995758057, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.1736578941345215, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.0679726600646973, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.9643428325653076, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.8706214427947998, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.742989420890808, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.632127046585083, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.4904775619506836, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.3711328506469727, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.2682679891586304, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.1498982906341553, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.0228242874145508, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.8870844841003418, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.7568060159683228, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.5939938426017761, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.4555858373641968, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.34982284903526306, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.29697754979133606, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.2658011317253113, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.23964627087116241, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.22067658603191376, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.20667079091072083, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.19128689169883728, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.17707985639572144, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.16545049846172333, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1565805822610855, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.14736224710941315, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1412796527147293, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.13644638657569885, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13005314767360687, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12680958211421967, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13074630498886108, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14630749821662903, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1450573205947876, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.16534103453159332, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.17332158982753754, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3073618412017822, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.31406083703041077, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6619222164154053, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9005417823791504, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.889521598815918, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.9322617053985596, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.523763656616211, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.22495039682539683, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.23139880952380953, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.24702380952380953, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.27380952380952384, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.30654761904761907, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3551587301587302, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.4027777777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4476686507936508, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.48561507936507936, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5143849206349206, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5329861111111112, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5602678571428571, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5877976190476191, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6138392857142857, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6396329365079365, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.660218253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.6877480158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.714781746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7492559523809523, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7854662698412699, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.828125, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8732638888888888, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8980654761904762, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9077380952380952, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9151785714285714, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9228670634920635, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9270833333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9308035714285714, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9342757936507936, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9412202380952381, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9466765873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9489087301587301, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9506448412698413, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9563492063492064, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9548611111111112, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9499007936507936, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9501488095238095, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9285714285714286, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.935515873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9097222222222222, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.904265873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8799603174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8908730158730159, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8983134920634921, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.06805755165197813, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.07531888190402555, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.08950791545010303, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.11472558210925306, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.14237886632605315, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.17445348538403485, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.20268336891043592, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.22927975081172763, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.2567860260652461, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.27992818944731884, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.2962883953638904, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.32450796128575343, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.3637411555776164, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.4088073981465965, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.4495042420881697, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.4865072011971555, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.5283073386145274, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.5795647405703886, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.6442981315859704, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.709508775827596, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7808895844270409, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8484633235043106, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8759894194445452, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8911973003292863, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.900962169955958, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9087873194654025, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9142979944218252, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9191197614421176, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9239627475133722, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9326663233947619, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9394410515253607, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9409102416858927, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9422286000910974, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9447274019237852, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9483395374292984, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.949430986455684, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9515757103955722, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9504500871945991, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9420991342144363, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9458213884039631, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9405301284812262, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9460557118852906, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9166624358525634, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9157705501603182, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8759120964017265, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8907670633015973, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8832243939361428, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8745655539248238, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8485486202165966, "id_best": 36, "lr_best": 0.0021299999999999995, "wd_best": 0.05, "train/loss_best": 0.12388455426320434, "validation/loss_best": 0.12680958211421967, "validation/acc_best": 0.9563492063492064, "validation/f1_best": 0.9515757103955722} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 1.0337527433037759, "train/grad": 0.40907607175409794, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.5631558227539064, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.50829833984375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.42420654296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.3470379638671877, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.2751378631591797, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.182011833190918, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.084444465637207, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.984726333618164, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.8660264682769776, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.7531176567077638, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6536781978607178, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.52170152425766, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.409396577477455, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.267659965157509, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.1482409155368805, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.042925425171852, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.9147599567472935, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.7679797135293484, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6149808625876904, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.4930391930043697, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.382016122341156, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.30917363561689853, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.2626630286499858, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.23340576454997064, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.21089451748877763, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.18971333688125014, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.17368813609704375, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.16057636482641102, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.1451040029898286, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.12961288000456989, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.11740733951330184, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.10813438968732952, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0983960332069546, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.09215213334187865, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0860640732664615, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.08047207445837558, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07635901573114097, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07328767476603389, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09047077101655304, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.10901469041593373, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.1260706337541342, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.17874278917908668, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.3389157474040985, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.48408036107197405, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.7577350555639714, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.4043078690208495, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.2882108605094253, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.6116055740788577, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.513084714375436, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.030149801345542074, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.028911542687565087, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.027462210869416595, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02652354649268091, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.025872209100052713, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.025210004346445204, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02462166789919138, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024058560375124216, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02340169916860759, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02278615090996027, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022262712931260465, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02161971040070057, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021132776560261846, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02062073533423245, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02027743974234909, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02004397617187351, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019828257518820464, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019560946179553867, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.018984503699466585, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.018172552031464874, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.017191382548771797, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.016365008200518786, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01582378468476236, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01548393361736089, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015115335965529084, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.014736840724945069, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.014484498570673167, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.014303834340535105, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.014091360219754278, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01368952291784808, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.01314698891248554, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.012713173893280328, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.012266128816409037, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.012094399258494377, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.011869388211052864, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.011683063490781933, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011601521614938975, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.011689313784590922, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014130896429996938, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.016107983712572604, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.018891779310069978, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.022868258685339243, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03571615264751017, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04516664657741785, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06355217573232949, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.09192994887009263, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.14088536450639366, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.18796491427347065, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.26296133298426866, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.4506468772888184, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.388089179992676, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.291956901550293, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.2038333415985107, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.1220638751983643, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.017287492752075, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.9093471765518188, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.801102638244629, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.6751195192337036, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.5575681924819946, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.4556087255477905, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.3216661214828491, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.2080485820770264, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.0640277862548828, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.9394768476486206, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.822223961353302, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.6686935424804688, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.5092239379882812, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3775581419467926, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3146604001522064, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.27565762400627136, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.24969784915447235, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.22845770418643951, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.21100930869579315, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.19757294654846191, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1849203258752823, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1741512268781662, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.16463664174079895, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.15661565959453583, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1475827991962433, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13754114508628845, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1329786330461502, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1285209357738495, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12375292181968689, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12233373522758484, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1261034458875656, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.15001408755779266, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.18727347254753113, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.287899374961853, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3914075195789337, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.34189218282699585, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5405299067497253, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6556849479675293, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.488541841506958, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.0202661752700806, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.335909366607666, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.925696849822998, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.474703788757324, "validation/loss_048_lr5.0e+01_wd1.0e+00": 12.937477111816406, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2993551587301587, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.33110119047619047, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3861607142857143, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.43154761904761907, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.46378968253968256, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4905753968253968, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5188492063492064, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5399305555555556, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5699404761904762, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.595734126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6165674603174603, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6458333333333334, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6716269841269841, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7053571428571429, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7361111111111112, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7743055555555556, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8162202380952381, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8640873015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9012896825396826, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9087301587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9159226190476191, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.923859126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9285714285714286, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9320436507936508, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9357638888888888, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9399801587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9444444444444444, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9484126984126984, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9578373015873016, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9593253968253969, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9365079365079365, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9263392857142857, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9439484126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9161706349206349, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9270833333333334, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8864087301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9280753968253969, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9112103174603174, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9201388888888888, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8921130952380952, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8821924603174603, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.13413291963387322, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.15525465498096472, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.1870300548871087, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.21466715752670867, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.2349316192183625, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.2561467535599418, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.28070474209507634, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3003975922655549, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3419294748393054, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3825070226222689, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.41732849955635953, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.4707348248465661, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5106948280254473, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5691948179841546, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6294095247956006, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6977260354653539, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.76921616173984, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8402540031999544, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8830710459442699, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8899248691190177, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8994113626246377, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9097742137063565, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.915361089623126, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9207724409619306, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9243662102433522, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9311548372437926, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9341753814040343, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9400370855834351, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9443051220408036, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9448205607455165, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9506121033527792, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9515605159229213, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9539494143115284, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9558277833001644, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9547644695791495, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9556297607606643, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9515260072963129, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9373910015820596, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9354302456429089, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9019499738608823, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9408065878080484, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8895805495364228, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9148588231892093, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8567509989122347, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.898985687932236, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8918599744215427, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9008360754976303, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8730150854859564, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8511756116533247, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 0.09215213334187865, "validation/loss_best": 0.12375292181968689, "validation/acc_best": 0.9605654761904762, "validation/f1_best": 0.9558277833001644} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 1.2672547963261604, "train/grad": 0.6927152276039124, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.3478208923339845, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.2771381378173827, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.1687252426147463, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.070305061340332, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.98037015914917, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.8673067378997803, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.753484296798706, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.6418978261947632, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.5143053483963014, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.3969778728485107, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.2958411341905594, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.1624408879876136, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.046683935523033, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.8891413933038712, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.7428593343496322, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.6130777990818024, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.47347442351281643, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.3614408379793167, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.28998560950160024, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.2543071761727333, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.22582052785903214, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.20400828648358582, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.1844396273046732, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.16664945460855962, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.1501110798679292, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.13270851923152804, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.11887359842658043, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.10744289093650877, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0938117221929133, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0812953202240169, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.07221423361450434, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.06512973526492714, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.05716487244702875, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.05371216940693557, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.049489421201869846, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.051698057530447844, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06527605439536273, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0895725572295487, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.12970153626054526, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.20004038318060338, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.3877893952280283, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.49007904531434177, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.8204807697422802, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.2322362517286092, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.4647910589445383, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.8838098320551218, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.377794829569757, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.941411343514919, "train/loss_048_lr5.0e+01_wd1.0e+00": 14.05034973680973, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.026485001919791102, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.025856024222448468, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.025116236070170998, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.024551619067788123, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02406536675989628, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023470201762393117, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02288309680297971, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022334752045571803, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02176328211091459, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02130431362427771, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020966165428981186, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020596036622300744, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020332554108463227, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020014473190531134, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019665516382083296, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.019113272298127412, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.017970490232110024, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.016649054563604295, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.015714310738258064, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01529364313930273, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.014885169100016355, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014477618031669408, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01401511195115745, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013563882468733936, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.013104702741838992, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012580988286063076, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.012140276385471225, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.011769036576151849, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.011298919485416264, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.010924780744826421, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.010799885526066645, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.010673542198492213, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.010181537192547694, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010098425533506088, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.010066533173667267, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.011111441879766062, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.013059512051404454, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01606230979261454, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01985528830322437, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02608924835221842, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03971771945129149, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04476674002595246, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06573743472807109, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.08589660376310349, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0983427927363664, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.12106360169127584, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.24621784921735526, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.33416066840291025, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.4770348937064409, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.233508825302124, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.1546084880828857, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.034601926803589, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.9270684719085693, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.8304543495178223, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.710850715637207, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.5924948453903198, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.4780405759811401, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.3480088710784912, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.228775978088379, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.1257877349853516, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.9881097674369812, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.8633185625076294, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.6817463636398315, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5288301706314087, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.41405820846557617, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.3264869153499603, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.2808166742324829, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.2510882318019867, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.23188036680221558, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.21418149769306183, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.1993510127067566, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.18518023192882538, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.17283032834529877, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.16229823231697083, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.15196165442466736, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1440599113702774, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.13698890805244446, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.13024435937404633, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12617214024066925, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12055672705173492, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11674562096595764, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.114497609436512, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11941169947385788, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1376485973596573, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.16693606972694397, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14769113063812256, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2866867184638977, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.22455845773220062, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.4814445376396179, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.7514168620109558, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.8930782079696655, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.6369295120239258, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7751882076263428, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.151832103729248, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.7512567043304443, "validation/loss_046_lr3.6e+01_wd1.0e+00": 7.982227325439453, "validation/loss_047_lr4.3e+01_wd1.0e+00": 17.984832763671875, "validation/loss_048_lr5.0e+01_wd1.0e+00": 16.501739501953125, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.4181547619047619, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.45337301587301587, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.4915674603174603, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5203373015873016, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5399305555555556, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5649801587301587, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5873015873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6121031746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6383928571428571, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6646825396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6902281746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7259424603174603, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7606646825396826, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8139880952380952, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8586309523809523, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8960813492063492, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.908234126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.917906746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9255952380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9295634920634921, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9342757936507936, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9379960317460317, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9402281746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9424603174603174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9469246031746031, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9518849206349206, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9561011904761905, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9588293650793651, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9590773809523809, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9610615079365079, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9593253968253969, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9610615079365079, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9387400793650794, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.953125, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9382440476190477, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9290674603174603, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9260912698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9298115079365079, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8988095238095238, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9255952380952381, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9129464285714286, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9255952380952381, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8772321428571429, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9040178571428571, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.20851165398800986, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2282913477433039, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.2587710280668468, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.282366646300228, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.2977841469959644, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3267704557343909, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3591410484596734, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.403310013268595, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4434993755001076, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.49327942011895587, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5381168434411014, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.6036783660670633, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.6639244997611013, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7597778198987105, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8294039161742754, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8751178028199886, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8900887502611008, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9029470621952488, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9130844057921776, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9184155864650289, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9234376808795456, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9273391547020002, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9300953784544038, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9317024857790166, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9377134919262137, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9452194375715535, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9465158922792563, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9483582616248379, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9515030733601257, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9533033441536406, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9570943961547022, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9568328857054929, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9581408221938674, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9550626402689478, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9476196274596942, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9474553884542152, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9575040048297305, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9357673410122926, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9484643200592034, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9228959300474029, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9110297911566522, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9033848956621545, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9126622011422953, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.8864663384190186, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9056812118181234, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9043842719077921, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.917860520977557, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8499453443782838, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8816071580383118, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.05716487244702875, "validation/loss_best": 0.114497609436512, "validation/acc_best": 0.9637896825396826, "validation/f1_best": 0.9581408221938674} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 1.2344399732351303, "train/grad": 0.7476377050578594, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.1410531616210937, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.055300483703613, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9266399574279784, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.8133849620819091, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.7133590459823609, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.5915170121192932, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.4725287997722625, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.3584776920080186, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.2295373940467835, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.110546583235264, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.004889831840992, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.8532713913917541, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.7095695540308953, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5275018009543418, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.3999628409743309, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3244189584255219, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.2731976779550314, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2394746097549796, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.21355964455753565, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.1950583028420806, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.17620425306260587, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.1596773482300341, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.1433589502237737, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.12760354784317315, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.11252943709492684, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.09632071779109537, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.08340239249169827, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.07265060938894749, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.06017596365883946, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0481842974293977, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.03986331482417881, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.034865591349080204, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.03260180988349021, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.03115585678257048, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0472587024513632, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07253164827823638, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.09448883485049009, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.11423049508593977, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.17469916091300547, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.3735809601470828, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.4929430916253477, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.7149634527042508, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0100912732165306, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.7231900456920266, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.9781415444705635, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9647300907503813, "train/loss_046_lr3.6e+01_wd1.0e+00": 7.9429945933911945, "train/loss_047_lr4.3e+01_wd1.0e+00": 13.538941096663475, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.872931374907494, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024813948944211007, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.024339220523834228, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023652007700875402, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023055067490786313, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022538119265809654, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021945304125547408, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021424036608077585, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020996481059119106, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020591174345463516, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02029463189188391, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02007998211774975, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019790876866318287, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019406539350748063, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.018396616675890983, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.016979156588204206, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01587658337317407, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.015196759011596441, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.014759784829802811, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.014388995165936648, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.014065102376043796, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.013695195806212724, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.013314577641431243, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.012847575871273876, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.012337088522035628, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01180159880197607, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.011201776253292337, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.010649491077056155, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.010075767361558973, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.009278475110186264, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.008376567786326632, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00772381026938092, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0073703060118714345, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.007711485497711692, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.007775727990083397, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0106983278458938, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.014232319594011642, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.016945226398529484, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.018819598105037586, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.024994317754462828, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04125109900254756, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.047813981282524766, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06091301445325371, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07969666853081435, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.11816091078333557, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.13240142621482845, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.17883904915302992, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.34981194261461496, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.45139424689114094, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.21120350569486618, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.0350399017333984, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9440175294876099, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.809280514717102, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.6926771402359009, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.5907237529754639, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.4674025774002075, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.3478435277938843, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.2329256534576416, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.1024916172027588, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.9803378582000732, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.8677831292152405, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6979421377182007, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5500218272209167, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.39170488715171814, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.3146607279777527, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.2798535227775574, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.25271090865135193, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.2300695776939392, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.2110341340303421, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.1970600187778473, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.18287603557109833, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.1701316088438034, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.15790025889873505, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.14703908562660217, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.13898932933807373, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.13356587290763855, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1301504224538803, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12733443081378937, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12245722115039825, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11685283482074738, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.11298678815364838, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11372335255146027, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.13258768618106842, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.15321870148181915, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.15559856593608856, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.2014155238866806, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2691366672515869, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.23184794187545776, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3181708753108978, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.6107472777366638, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.8327485918998718, "validation/loss_041_lr1.6e+01_wd1.0e+00": 1.259531855583191, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.3098195791244507, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.666576623916626, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.160701274871826, "validation/loss_045_lr3.1e+01_wd1.0e+00": 5.485995769500732, "validation/loss_046_lr3.6e+01_wd1.0e+00": 8.77727222442627, "validation/loss_047_lr4.3e+01_wd1.0e+00": 12.020386695861816, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.4898313492063492, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.511656746031746, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5416666666666666, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5632440476190477, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5838293650793651, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6081349206349206, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6321924603174603, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6574900793650794, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6927083333333334, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7202380952380952, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7549603174603174, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8080357142857143, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8524305555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9000496031746031, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9109623015873016, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9169146825396826, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9236111111111112, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9285714285714286, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.933531746031746, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9402281746031746, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9432043650793651, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9466765873015873, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9523809523809523, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9578373015873016, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9590773809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9608134920634921, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9610615079365079, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9590773809523809, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9451884920634921, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9573412698412699, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9528769841269841, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9427083333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9340277777777778, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9357638888888888, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9523809523809523, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9384920634920635, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9057539682539683, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.925843253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9144345238095238, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2556205040681595, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.27213635566533767, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.3004521999004422, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.3279088859830424, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3597813479156081, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.40268063541313837, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.4398873960681079, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.48782972095923494, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.5475848550491376, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5991334351969083, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6615072236759127, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7566755477056535, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8227587237870638, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8814192767109907, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8946201989316542, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9016470063529332, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9096927310098246, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9168178491509311, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9215366714575408, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9274631078236072, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9297312779591742, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9332172536514146, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9378222898759823, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9444868060722618, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9480788350544574, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9514905966961109, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9528941577878847, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9551434934234132, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9551236040201926, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9597794755889621, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9590666463539658, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9599566112300327, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9546025416800594, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9535563403015337, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9526558898880666, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9425467603350055, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9408579060010408, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9497664016055289, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9462588945322482, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9458035467285403, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9334539821039263, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9253503952875645, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9191291734891879, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9466005674100698, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9271434955145317, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.892884992133119, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9193900384118665, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8952615050196729, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.034865591349080204, "validation/loss_best": 0.11372335255146027, "validation/acc_best": 0.9650297619047619, "validation/f1_best": 0.9599566112300327} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.8624135664105416, "train/grad": 0.5177333766222, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9541805267333985, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.8590189743041992, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.7205325412750243, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.6023610901832581, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5003780126571655, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.3781579649448394, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.2599309015274047, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.1464069029688835, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.0154346945881843, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8866613700985908, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.7617754046618939, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.5877439846098423, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4526110599935055, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.3288717698305845, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.2744804388657212, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.2445686900243163, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.21796079248189926, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.19450236104428767, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.1736446834541857, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.1574315847083926, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.1401147199049592, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.12455189817585051, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.10878921865485608, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.09386831452138722, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0795796282030642, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.06477514619939029, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.05347360171377659, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.04434947891160846, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.03376973480917513, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.024451207462698223, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.017449129922315478, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.012525928169488908, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.010960438940674066, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.012483636140823364, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.031254000859335065, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.02635432843118906, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04902754995971918, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0767849270068109, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.139001080468297, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.23223528402857482, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.283115204796195, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.5032579981349409, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.4567567283473909, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.7228947790618986, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.9821931241825222, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.8306144515424965, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.343991057872772, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.99611432839185, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023861652864143254, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023346718735992908, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022605934953317045, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02200100552290678, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02152386066969484, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021028013797476888, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020630499133840203, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020322274160571398, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020041395584121345, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019794035186059773, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01948843733407557, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.018724384978413583, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0175855854479596, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01595672780647874, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.015302821537479758, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.014945876789279283, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.014542246633209289, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.014104188517667354, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.013640609392896295, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.013270914857275783, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.012832983303815127, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.012439003938343376, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.011980468719266356, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.011488799096550793, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.010943064944585785, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.010325773615622893, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.009643910576123745, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.008907136960187927, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00775851390673779, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006486608756240457, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0051400192538858394, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0040852999767230355, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004075113647995749, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004586450391507242, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009257051948516164, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008452867446903838, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011981148189588566, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0161120853239845, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.023720761493896136, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03219990033267095, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03656026389202452, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.053930382307735274, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.053190443967056356, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07400671918349644, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.09478545635998868, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.22970461098477243, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.27341270316392186, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.30751865066587925, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.8718427419662476, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.7745825052261353, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.6340703964233398, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.5149767398834229, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.4123286008834839, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.2891972064971924, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.1696219444274902, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.053829550743103, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.9178974032402039, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.7782743573188782, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.6428477764129639, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.4770544171333313, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.3641599714756012, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.29196566343307495, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.25972121953964233, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.23937423527240753, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.2199590802192688, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.20227278769016266, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1862957626581192, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.17403876781463623, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.16083726286888123, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.14876455068588257, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.13737201690673828, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.12791909277439117, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.12005466967821121, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1140308603644371, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1106739491224289, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1090557649731636, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1077665463089943, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10713830590248108, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10855954140424728, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11386670172214508, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12354467064142227, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12610191106796265, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.18545344471931458, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1495022177696228, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.17978259921073914, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.33183401823043823, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.37800413370132446, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5883874297142029, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.42535513639450073, "validation/loss_041_lr1.6e+01_wd1.0e+00": 1.2779147624969482, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.01133394241333, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.3343392610549927, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.346124529838562, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.809389352798462, "validation/loss_046_lr3.6e+01_wd1.0e+00": 6.556675434112549, "validation/loss_047_lr4.3e+01_wd1.0e+00": 11.281064987182617, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5342261904761905, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5540674603174603, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5813492063492064, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6051587301587301, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.626984126984127, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6564980158730159, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6879960317460317, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7142857142857143, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7465277777777778, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.792906746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8291170634920635, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8774801587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9055059523809523, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9176587301587301, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9223710317460317, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9270833333333334, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9310515873015873, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9370039682539683, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9407242063492064, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9459325396825397, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.953125, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9553571428571429, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.957093253968254, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9620535714285714, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.966765873015873, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9613095238095238, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9481646825396826, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9548611111111112, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9513888888888888, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9652777777777778, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9446924603174603, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9523809523809523, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9543650793650794, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.955109126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9424603174603174, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9370039682539683, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9228670634920635, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2954544113937751, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3163686866896012, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.35354970632561206, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.3957920832832355, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.4305804513799668, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.4823877963877519, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.5375400789337198, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.5828527056896728, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.6424443299966344, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7300952320011912, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7911122583082268, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8552462319144002, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8877222911733728, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9024289359621879, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9083232710566388, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9141619052114629, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9194213174098849, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.92698559000227, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9297809067144691, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9320257439566411, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9376896813777782, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9465109263835455, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9496150602051022, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9506973553551946, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9543416719241246, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9564083453657941, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9584156173269557, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9603218432748444, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9625466508849685, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9605541974109196, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9620536827491167, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9636675954726186, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9604759100440473, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9589571939057777, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.945263060689801, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9613912744251487, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9580531592278521, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9435415904245726, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9548209899040353, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9416260068063151, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9642908075661767, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9394964061005249, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9466006431061926, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9492968311282963, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9478625762146267, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9378043064396648, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9240861416091215, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9155191768402456, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 28, "lr_best": 0.00057, "wd_best": 0.05, "train/loss_best": 0.03376973480917513, "validation/loss_best": 0.1077665463089943, "validation/acc_best": 0.9677579365079365, "validation/f1_best": 0.9625466508849685} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.6450717079639435, "train/grad": 0.40042422644793985, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.8024404335021973, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.7039295959472656, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.5633286333084107, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.445315318107605, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.3442075061798096, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.2230332785844802, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.104963355064392, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.9883852860331536, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.8447389641404152, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.694716265797615, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5623813796043396, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4082894794642925, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.31996301136910915, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.26235352989286187, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.23169021662324668, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.2103171221166849, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.1890724259801209, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.16859868580475448, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.14949091386049987, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.13402869037352502, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.1172650360967964, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.10170922676101327, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.08632326823659241, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.07161261294037104, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.05769552601501346, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.04366996333934367, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.03348359271883965, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.026125449128448962, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.019117990899831058, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.01337846864014864, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.007343725478276611, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0045707229804247615, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.007821268904954194, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.006391541482880712, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.020606899205595254, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.01843876587226987, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.049460257505998015, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07115649265237152, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.12884140509180725, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.19738640843890606, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.1683222015108913, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.3396700925100595, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.33812968833372, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.450224624928087, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.5068329405132681, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.31194644282572, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.291821277746931, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.721049377210438, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023286398546770216, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022759590027853847, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022050337912514806, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021517328042536973, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02112004576716572, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02071223401930183, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020392714263871313, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02014307798817754, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01982974627520889, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019354090741835535, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.018665704471059145, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.017030787272378802, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.015613183830864727, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01484408997464925, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.014416464027017356, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0140279748942703, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.013607045819517225, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.013133890957105905, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01265445310389623, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.012232447061687707, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.011733490203041583, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.011200290424749255, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.010623044554376975, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.009932381856488063, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.009108527969801798, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.008059314148849807, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.007058831013855524, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006237143686739728, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005593996858224272, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.004661274558166042, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.002702456731494749, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0017154870454396587, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0029186927475529957, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0028087796227191575, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006995396919446648, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007665935946643003, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01224926661223435, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.016090912786312403, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.022367998722183983, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0320914081976138, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.029402529455028345, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.047351405561094, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04667599648312268, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05632225738414436, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06404175748615397, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.1709981527541241, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.20816208200529218, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.23064098373055458, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.7388628721237183, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.6389778852462769, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.496907114982605, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.3777320384979248, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.2754089832305908, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.152474045753479, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.0319017171859741, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.9111455678939819, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7587881088256836, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6036902666091919, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.47967880964279175, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.349521279335022, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.29386046528816223, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.25496962666511536, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.23245388269424438, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.21666935086250305, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.20083852112293243, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.18563814461231232, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1708938032388687, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.159053772687912, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.14634039998054504, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.13555091619491577, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.12595900893211365, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11930707097053528, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11505647748708725, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11215458065271378, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11073725670576096, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11137226223945618, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11834243685007095, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11399566382169724, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.11121682822704315, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11113401502370834, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1233387440443039, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12787556648254395, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.16049811244010925, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.17625167965888977, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2077733725309372, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2608436346054077, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.33919134736061096, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.47289687395095825, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.7284716963768005, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.8394120335578918, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.9341526627540588, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.4301905632019043, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9453848004341125, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.3867175579071045, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.25883674621582, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.710312843322754, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5607638888888888, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5783730158730159, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6091269841269841, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6579861111111112, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6897321428571429, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7142857142857143, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7447916666666666, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7963789682539683, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8407738095238095, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8757440476190477, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9057539682539683, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.917906746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.923859126984127, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9300595238095238, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9350198412698413, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9377480158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9404761904761905, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9437003968253969, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9484126984126984, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9516369047619048, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9541170634920635, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9590773809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9610615079365079, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9615575396825397, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9615575396825397, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9590773809523809, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9615575396825397, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.955109126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9603174603174603, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9563492063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9580853174603174, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9645337301587301, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9506448412698413, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.949156746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9427083333333334, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.32415858383340057, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.35010727282289383, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.40378460056365384, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.44234333194639364, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.4854649475277384, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5383739219116409, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.5831501115924254, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6416008942233314, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7396961173700932, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8097727065609478, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8546749195535701, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8875180593487496, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9026126354560023, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9121884933696732, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9182191719569613, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9247216133920879, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9267827344219901, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9302896291100851, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.934129814286579, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9397213551721106, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9436786909018441, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.946881685219474, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9522942318365137, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9531921099864503, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.954313593234071, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9579668028030968, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9601428264402164, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9604486656713567, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9577622132505872, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9608918045628146, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9650294154628147, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.965309516184694, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9612957143853865, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9553508368812194, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9573624358815677, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9556950500832193, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9568447336106196, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9567458737174336, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9541171427960361, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9565946259537544, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9457639112322425, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9519337301192191, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9462815313500199, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9498537371958816, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9630824779403482, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9445722650988767, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9416182549005692, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9372172645749023, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.0045707229804247615, "validation/loss_best": 0.11113401502370834, "validation/acc_best": 0.9682539682539683, "validation/f1_best": 0.965309516184694} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.4575905567407608, "train/grad": 0.266677187345922, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.66831711769104, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.568159637451172, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4266370439529419, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.3088416945934296, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.2077810114622116, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.0858992207050324, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.964733572602272, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.8392587786912918, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6787223890423775, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5285887987911702, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.41182283259928226, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.3039844333380461, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.2577478055283427, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.22062561448663473, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.19676706694066526, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.1788481330499053, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.1601468606479466, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.1417399293743074, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.12401654080487788, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.1095082149002701, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.09352083557285368, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.07887715740129352, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.06413072579540312, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.050221011182293294, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.03757777033373713, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.025481227356940507, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.017372503550723194, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.012118351012468338, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.007559940051287413, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.004633092302829028, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.002944341171532869, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.002294164542108774, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0019596731662750244, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.002797467904165387, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.008420852031558752, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.011522887405008078, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.026176436971873045, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.03501206218264997, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.056630996670573946, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.089942163573578, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.14505782918073237, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1653530573565513, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.2072476760391146, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.2303854075446725, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.2463589778728783, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.9441810534894466, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.251395013956353, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.1737433580774814, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022274119323119523, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021763984672725202, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02111631170846522, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020657486864365637, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02032080485019833, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019973885854706167, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01967491427902132, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01936126747634262, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01882374005392194, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.017916233008727432, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.016627235380001368, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.015011957394890488, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.014437425637152047, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013933796579949557, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.013487255505751819, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.013088184634689241, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012612828745041042, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.012108676319476217, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01157593071460724, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01109764697495848, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01047769033582881, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.009798821239965037, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.008999567446298897, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008117372617707588, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007147698999615386, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005830141379265115, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0046013038468663585, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0035535831743618474, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0024574002856388687, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0016610527654120234, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.001050126763584558, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0008354520444117952, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0009079692930390593, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0011904156808668631, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00399996847841976, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005023623695606148, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008548485391402209, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.011512223882818943, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.015335725056577303, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02058034176063728, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.025362039240197344, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.028042807413421675, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03274246776208768, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03953243735251508, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0408977952464367, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.10008167391020865, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1272326354367422, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.15296844563251719, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.6316739320755005, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.5309818983078003, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3890844583511353, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.2704272270202637, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.1684050559997559, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.0448100566864014, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.9206064343452454, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7897988557815552, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6231718063354492, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.47898510098457336, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.37292924523353577, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.2962190508842468, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.26278576254844666, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.2338683158159256, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.21512921154499054, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.20111516118049622, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.18654228746891022, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.17216606438159943, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.15814600884914398, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.14736025035381317, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.13674896955490112, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.1275654435157776, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11934515833854675, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11373808979988098, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11050330847501755, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10890597850084305, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10870569199323654, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10879877209663391, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10967963188886642, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11212263256311417, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.11500746011734009, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1157255619764328, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.11922027915716171, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12104551494121552, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1394839882850647, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.14438077807426453, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2517815828323364, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2621481120586395, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.30375853180885315, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.445811003446579, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.5450009703636169, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7565394043922424, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.65866619348526, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.1973085403442383, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9675544500350952, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.6842803955078125, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.3719632625579834, "validation/loss_047_lr4.3e+01_wd1.0e+00": 5.015040874481201, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5798611111111112, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6019345238095238, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6312003968253969, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6557539682539683, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6830357142857143, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7098214285714286, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7452876984126984, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7839781746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.830109126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8727678571428571, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9032738095238095, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9151785714285714, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.921875, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9275793650793651, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.933531746031746, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9347718253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9451884920634921, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9484126984126984, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9508928571428571, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.955109126984127, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9583333333333334, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.966765873015873, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.966765873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.96875, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9568452380952381, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9635416666666666, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9635416666666666, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9618055555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9650297619047619, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9546130952380952, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9645337301587301, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9518849206349206, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9563492063492064, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9469246031746031, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.351391715973531, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.391053379741881, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.43593020639039226, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.4814223205712206, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5278751852442873, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5722862801808087, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6402851608610831, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7111425713399501, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7920713007030209, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8499419584549711, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8854594873398405, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.900207882231201, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.907653176594714, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9152539024930636, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9227260463248201, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9232744407378058, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9285008172269904, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9364675891437747, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9401165316723379, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9431507198499519, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.948555758781338, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9528295081287962, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9541100439049633, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9541640505484876, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.957802347844876, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9602342754839446, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9616001616497113, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9632430920045378, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9626890512445037, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9623889373083824, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9613160097944797, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9643666282211317, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9625532807558281, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9633326811909971, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9636153603529222, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9651970396044677, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9504582628788463, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9544953220278751, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9594761888063031, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9559815320599987, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9538155520618119, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9575866110431187, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9583650193915071, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9479290089268896, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9614636663757208, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9422604883701621, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.950408455348254, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9416685790220544, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 0.011522887405008078, "validation/loss_best": 0.14438077807426453, "validation/acc_best": 0.96875, "validation/f1_best": 0.9651970396044677} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.3694647002220154, "train/grad": 0.19593058802187444, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5863782024383546, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4864807558059692, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.3462405157089234, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.2293913465738298, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.1286404377222061, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.005292474925518, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8778161609172821, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7397745129466057, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5715322782099247, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.42878162026405336, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.3314581334590912, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.2644500870257616, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.23135614588856698, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.2006279295682907, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.17939154837280513, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.1628235975652933, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.14505304967984556, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.12709526382386685, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.10957249094732106, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.09521158771589398, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.07975727431476116, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.06572146158665419, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.0519948239531368, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.03897766972891986, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.02733512757346034, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.01699277582578361, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.010614518513903022, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0071183848008513455, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.004501854823902249, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0030377598479390143, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0023802452813833953, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.002017988637089729, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0017968266736716032, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0017208392079919577, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.003193466933444142, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.008929221788421274, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.01972863042727113, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.025917939273640514, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0397210210096091, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.048250556336715815, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05610018656589091, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07196791380643845, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1423568413965404, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.12192090915516018, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.14166707042604684, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.313850680841133, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.4745376921724528, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.0274178969394416, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02203722140751779, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021580263413488864, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021022797445766628, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020631913556717337, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020341154891066252, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020026759440079332, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019697130843997, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01924547486938536, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018374999002553523, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016934133968316018, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.015446984539739788, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014568782853893935, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.014150398978963495, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013636685768142342, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.013182593476958573, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012761535404715687, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012271275573875756, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.01173043759772554, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01115116192959249, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010602015189360827, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00993494264082983, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.009222928733797743, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.008376085377531126, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.007322571931872517, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00602398729941342, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004408315547625535, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.003034219281980768, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0021210804325528443, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0013333266942936461, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0008658928885415662, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0006338144875189755, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0005210556023666868, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0005164006492850604, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0005566948973137187, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0012329018648324563, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004382438828979502, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00656866611931946, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00893280054754655, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.012260389937472098, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01385484071429822, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.016135481204238272, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.020760948046067876, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.028768150517323118, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.029045210836516427, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.027609203463104057, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05865298758269895, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07851163354891696, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.10922530604768238, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.545572280883789, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4449459314346313, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3035290241241455, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1854522228240967, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.083504557609558, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9579921960830688, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8274526000022888, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.685566246509552, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.5222703218460083, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.39133837819099426, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.3172603249549866, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.26888611912727356, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.24312511086463928, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.217943474650383, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.2006315141916275, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1872536987066269, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1732180267572403, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.15910457074642181, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.14613467454910278, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.13658647239208221, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.12730853259563446, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11975880712270737, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11370571702718735, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.10990072041749954, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1076313778758049, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10703473538160324, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10791836678981781, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1091037318110466, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11021807789802551, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11281529068946838, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.11641798168420792, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1163124293088913, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.11926081776618958, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12024365365505219, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12637373805046082, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.17478661239147186, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.17211636900901794, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2596541941165924, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3133004307746887, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.41160884499549866, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4807994067668915, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7769809365272522, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6844587922096252, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.1123661994934082, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.808434247970581, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.1064419746398926, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.4136288166046143, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.483919143676758, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6001984126984127, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6200396825396826, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6498015873015873, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6800595238095238, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.701140873015873, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7326388888888888, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7725694444444444, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8127480158730159, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8611111111111112, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8990575396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9117063492063492, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9216269841269841, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9285714285714286, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9332837301587301, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9377480158730159, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9454365079365079, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9476686507936508, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9518849206349206, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9546130952380952, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9585813492063492, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9630456349206349, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9642857142857143, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.970734126984127, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.966765873015873, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9650297619047619, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9637896825396826, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9657738095238095, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9558531746031746, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9580853174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9548611111111112, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3880920827004729, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4203142764381571, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.47337266894326274, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5253412183254962, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5598604787627133, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6180671520964177, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.692897453217295, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7645193023852764, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8368321999095839, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8807946734171591, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8954410273352638, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9065922152857737, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9175839691542764, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9225824372400534, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9277263520103392, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9328870058557429, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9374724937640434, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9398456427122404, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9434989741350345, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9466809350119415, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9497646547088252, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.952145707428632, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9581091239588518, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9596553183753619, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9614719888614951, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9617855464718963, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9619501255362983, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.964190571086542, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9624021014674805, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9626961316221642, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9633398090950391, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9642897787120285, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9628161466583618, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.96249369415246, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9670409932487422, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9587847266846488, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9627002207814392, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9609931619950279, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.958718810093112, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9615387564906103, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9577135033017179, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9586459881466718, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9619429414468683, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9569868166502377, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9605019644046167, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9476038356509081, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9517663505060524, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9501386970036915, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.003193466933444142, "validation/loss_best": 0.12637373805046082, "validation/acc_best": 0.970734126984127, "validation/f1_best": 0.9670409932487422} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.3231295782327652, "train/grad": 0.14853914927691222, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.508147385120392, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.409107962846756, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2703908163309097, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1543196833133698, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0533340647816658, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9268866941332817, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7909993553161621, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6443485936522484, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.4846848188340664, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.35897655345499513, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.29335332080721854, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.24654447861015796, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.21916980862617494, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.19089380014687776, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.17016883382573725, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.15353984763845802, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.13541283523663877, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.11713431976735592, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.09941201565787196, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.08512876290827989, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.0695930131804198, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.05566487578675151, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.042148018460720776, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.02972368593327701, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.01935950090177357, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.011287477761507035, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.007147761387750506, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.004886290421709418, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0031751246470957993, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0021464113984256982, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0016674628108739853, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0013588264491409064, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0007433399744331836, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0005694890674203634, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.004601243333891034, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.004401385001838207, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.01040261280722916, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.013113326849415899, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.009276672294363379, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0210852690692991, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.046101280013099315, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0590372911002487, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.08024950795806944, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0688504005316645, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0745402211137116, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.19941929649561643, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.2220387623924762, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.41193221469409763, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021620612824335694, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021210373849608003, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020731272343546152, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020410096924751996, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020177116538397967, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019918700992129744, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0195971228601411, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01906809407286346, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017960503329522908, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016170524097979068, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.015178066724911333, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014619366540573537, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.014194248882122338, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013635065932758152, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.013101394057739526, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01260852245381102, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01200962959555909, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011337127080187201, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01062929374165833, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010008447784930468, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.009227126478217542, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008362331129610538, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007298439937876537, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006032526671770028, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004601382571272552, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.003036280936212279, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00203857668893761, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0014441288066154813, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0009590533300070092, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0006596587551757693, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0005113930455263471, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0004296718679688638, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00027407886696892094, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0002461933463200694, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0020080339000378444, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.002232412447515344, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004528499951626372, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006608947217392825, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.005230930006956331, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.009322928741642936, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01299917611364392, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.016734979153313373, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.017935596917482893, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.023733966003538595, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.020887424134491614, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03911404269012553, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04844766923088455, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06878321624086857, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.4768962860107422, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.3767483234405518, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2359501123428345, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1181186437606812, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.015548825263977, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8871190547943115, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7486114501953125, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6020748615264893, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.44868800044059753, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.33830171823501587, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.28850895166397095, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.25152549147605896, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2295137643814087, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.20680668950080872, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.19100700318813324, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.17859865725040436, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1652204394340515, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.15209493041038513, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.140028715133667, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.13152995705604553, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.12340251356363297, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11729919165372849, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11298058927059174, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11025481671094894, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1092831939458847, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10980454087257385, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1107928603887558, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11199448257684708, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11295150965452194, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11536382138729095, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.11854081600904465, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1183711513876915, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.11903959512710571, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11706876009702682, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.14259600639343262, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1620539426803589, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.18501105904579163, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.25747746229171753, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3032311797142029, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.372629314661026, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.43268269300460815, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7170229554176331, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5472782254219055, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.868537425994873, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6583852767944336, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.4828577041625977, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.0168752670288086, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.114288091659546, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6116071428571429, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6329365079365079, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6659226190476191, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6949404761904762, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.716765873015873, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7529761904761905, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7978670634920635, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8365575396825397, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8829365079365079, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9079861111111112, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9161706349206349, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9250992063492064, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9288194444444444, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9342757936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9382440476190477, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9419642857142857, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9456845238095238, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9501488095238095, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9538690476190477, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9590773809523809, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9603174603174603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9623015873015873, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9642857142857143, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.964781746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.966765873015873, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9692460317460317, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9662698412698413, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9595734126984127, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9677579365079365, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9662698412698413, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9665178571428571, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9580853174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9580853174603174, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.40761219508756996, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.44014428603009814, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5012371291418154, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5482591668770782, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5882505687396294, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6568192882390699, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.739179119985222, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7998121168598993, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.86286829644394, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8905022585038587, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9005504378120034, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9114848503120385, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9164796851557491, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9228663620254604, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9282009902454578, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9329355134543469, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9378206825088758, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9432328548661059, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9477244202056865, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9534702805515034, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.954563942773191, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9562540642454652, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9587464393403683, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9595235245976013, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9600815393521349, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9622140220800766, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9640038182962752, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9623632148045641, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9629583886166011, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9614164970475294, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9613175425620694, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9632019280057603, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9640579681771816, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9639029880016156, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9634982790496378, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9643534479589198, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.965109872138238, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9599156654601047, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9551475143920269, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9664027717357878, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9615461495586608, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9533445948464034, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9641610951754694, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9602122448630341, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9615160575817034, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9584580856289404, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.953686830781704, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9532532109569105, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 36, "lr_best": 0.0021299999999999995, "wd_best": 0.05, "train/loss_best": 0.01040261280722916, "validation/loss_best": 0.18501105904579163, "validation/acc_best": 0.9692460317460317, "validation/f1_best": 0.965109872138238} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.2905242046713829, "train/grad": 0.11059887107461691, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4412141561508178, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.3435415291786195, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2069309437274933, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.092335855960846, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9915197232365608, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.862666384279728, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.721187407374382, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.5754447616636753, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.4230353896319866, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.3169248350709677, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.2684498282149434, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.22925686210393906, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.20431198265403508, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.17782681992277502, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.15788992376998068, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.14174170073121786, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.12392933287657798, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.10563787555322052, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.08793312503024936, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.07336767974309623, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.057613063771277666, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.04344927147962153, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.030634801415726543, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.02016522018238902, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.012503019385039806, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0074008585885167125, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.004953372944146395, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.003608056120574474, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0024614581558853386, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0016995749436318874, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.001335633872076869, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.001094429073855281, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0006111550889909268, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00044882239773869515, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.000878955889493227, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.000661052642390132, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0007385214976966381, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.005501330904662609, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00519008157774806, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.009657037938013672, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.012411849396303297, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.010472689513117075, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.030455623464658858, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.050665554534643886, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.024512309469282626, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.06332060224376619, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.07512474031187594, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.16609608553349972, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021299274256452918, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020889560272917152, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02040330599527806, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020060554184019565, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01979311915114522, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019446367784403264, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018980240863747895, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018257358693517745, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016850022207945584, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015317238392308355, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01470730652101338, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01421340933535248, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013815441490150989, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01328690597321838, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01280968951061368, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012387272622436284, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.011870475416071714, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011254910554271191, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.010549953577574343, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009855168922804296, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008949921638704837, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00791037926566787, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006654823909047991, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0052097352279815825, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0036442791507579385, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.002281494429917075, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0015178622049279512, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0010887645730690565, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0007360152046021539, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0005069786639069207, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0003939724472729722, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0003240604964958038, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00019783541869401234, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00016291256544718636, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0004453086801277095, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00039534495495217924, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0006269218676197852, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0029242216593870296, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.002831881374433962, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.004905580395354218, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0069428616699417954, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.006941088737001232, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01173274517790139, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014571016564027928, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.011297815984208322, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.018775108568859764, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.02291491676909531, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04157949264238078, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.4229978322982788, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.3232427835464478, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1830174922943115, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0655035972595215, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9617940187454224, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8294550776481628, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.6846585273742676, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.540251612663269, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.39536550641059875, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.3085888624191284, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.27095460891723633, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.23924048244953156, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.21942144632339478, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.19886012375354767, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.18362051248550415, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.17173604667186737, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.15873320400714874, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.14594526588916779, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.13427366316318512, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.1261967569589615, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11872286349534988, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11331251263618469, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11005839705467224, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.10849346220493317, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.10823120921850204, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10963176935911179, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11144822090864182, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11291464418172836, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11412137001752853, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1170298159122467, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12007270753383636, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12011662870645523, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12046568840742111, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11677809804677963, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1277218759059906, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13971863687038422, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1666155308485031, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1894390732049942, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.24682965874671936, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3647479712963104, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.34875577688217163, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4472942650318146, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5894116759300232, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6111733913421631, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5708598494529724, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.1287658214569092, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.3604475259780884, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.230377197265625, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6207837301587301, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6455853174603174, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6775793650793651, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7028769841269841, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7311507936507936, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7725694444444444, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8147321428571429, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8546626984126984, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8973214285714286, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9134424603174603, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9228670634920635, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9293154761904762, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9325396825396826, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9375, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9409722222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9434523809523809, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9474206349206349, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9508928571428571, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9546130952380952, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.957093253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9598214285714286, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9618055555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9637896825396826, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.964781746031746, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.966765873015873, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.96875, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.96875, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9704861111111112, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9672619047619048, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9704861111111112, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9657738095238095, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9709821428571429, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.970734126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.964781746031746, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9675099206349206, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9598214285714286, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4203115920853196, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4630747627507818, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5208624706795199, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5608845987330221, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6162574420533833, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6903059177582858, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7674332139164234, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8265636765238339, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8780544245482912, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8966744115215745, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9091635341527959, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9182966772303283, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9212767510332288, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9271224895176496, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.932201073536175, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.935991212574106, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9407125825987792, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9434240916418294, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9472451828836683, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9502092275289349, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9543697316818095, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.95734364832658, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9604169474732555, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9614425936076123, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9617625578235656, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9623341928079897, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9621081725067802, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9622772579161305, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9603871631139159, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9617553972726066, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9598271481367954, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9628639528492471, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9629836954475612, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9634191138085688, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9652265558136586, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9659617090328119, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9649895551711447, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9661339929952333, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9675320925521504, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.963620512512197, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9670049626822693, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9657110661230426, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9605010162371506, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9667307672533867, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9675112541833161, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9608030879082369, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9653982391613132, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9570201211825233, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.050665554534643886, "validation/loss_best": 0.6111733913421631, "validation/acc_best": 0.9709821428571429, "validation/f1_best": 0.9667307672533867} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.27333796203136446, "train/grad": 0.08982638677582144, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4001581501960754, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.3020873427391053, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.1646218007802964, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.048847553730011, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9460411864519119, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8123653838038445, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.6653898669779301, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.5224233302474022, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3766100738942623, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2916259265691042, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.2521633575484157, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.2171376855298877, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.19401031970977783, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.1689579002931714, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.15007589496672152, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.13439038793556393, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.11694172473624348, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09896465759724378, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0811159262433648, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06649149940349161, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.05072591880336404, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03705759105272591, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.024897130699828268, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.015549201983958483, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.009381765527650713, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.005660836240276694, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0038860373478382827, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0029050337988883257, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.002045670133084059, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0014213291741907597, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.001075869258493185, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0008176847361028195, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0005385313648730517, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0004069853387773037, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00029787883162498476, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00037594830617308615, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0002868191432207823, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0011203063186258078, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0011547252163290978, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.004469429980963469, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00823544081300497, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.004422553349286318, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.017343657668679954, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.012323842430487276, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.018765246588736774, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.02260807919315994, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.026681362250819803, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.061810508528724314, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021073065944947303, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02070638397242874, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020275373277254403, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019979963679797947, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019743588096462192, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01940695588476956, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018888365030288696, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01805062307510525, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016250548758544026, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.014863727684132754, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014323301347903907, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013776417621411383, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013327140386682004, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012733584120869636, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012235675249248743, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011780644105747341, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01124027410056442, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010607098101172597, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009869910207344219, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009123771373415366, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008108251704834401, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006960487323813141, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005589020879124291, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004076466266997159, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0026922449772246184, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0016892072948394344, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0011595474513887893, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0008681243277533213, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0006129130559565965, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00042979443016520234, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00033153428630612326, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.000256166859617224, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00017063817653252046, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00014141564886813285, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0001586481863978406, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0001408484755393147, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0002492702323240792, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0003543065701057913, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00043108686154969434, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0028238295499756604, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0026443599504168943, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0023443280555389343, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.006984915022396162, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005379356212596432, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00747774241398861, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.011373497717919758, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.009816286825376042, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02275102444000587, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.381034255027771, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2816617488861084, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1419191360473633, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0239975452423096, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9193130731582642, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7831035852432251, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.6347339153289795, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.4949248433113098, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3597278296947479, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.29071304202079773, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.25894811749458313, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.2308410257101059, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.21238070726394653, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.19290772080421448, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.17849580943584442, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.16693982481956482, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1546000838279724, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.14229321479797363, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1316484957933426, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.12417005002498627, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.1176588162779808, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11314796656370163, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.1104445531964302, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.10934466868638992, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1094207838177681, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1116238459944725, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1134406253695488, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11474497616291046, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11602963507175446, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11830204725265503, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1210591048002243, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12067576497793198, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12066478282213211, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11688238382339478, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1252957284450531, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13490670919418335, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.15642008185386658, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1842576563358307, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.22208404541015625, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.30698224902153015, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3359583020210266, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.42524397373199463, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.546033501625061, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6674061417579651, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5305340886116028, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9884408712387085, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.283083438873291, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.2300329208374023, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6321924603174603, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6572420634920635, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6877480158730159, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7152777777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7450396825396826, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7872023809523809, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8268849206349206, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8687996031746031, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9057539682539683, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9159226190476191, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9236111111111112, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9295634920634921, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9399801587301587, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9432043650793651, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9456845238095238, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9471726190476191, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9513888888888888, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.955109126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9590773809523809, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9637896825396826, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.966765873015873, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.970734126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.972718253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9672619047619048, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9672619047619048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9689980158730159, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9697420634920635, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9675099206349206, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9598214285714286, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.43897304285172, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4843633829680796, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5369802442408873, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5845897030233441, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6429214524655467, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7211745644605918, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7879500937040237, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8457879512554001, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8890548152829031, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8999787004225843, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9093230117698736, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9178631040354448, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9218255915757533, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.930689286103741, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9352994516567692, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9371878117210892, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.938610922040831, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9429977681317447, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.94768359030961, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9502288639196552, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9527685374503821, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9563687959570826, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9595528180631904, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9605077865798075, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.960227533575015, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9621864148817278, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.962598348659079, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9620260451936099, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9614102906828198, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9620561045465634, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9615277505912412, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9631633441370641, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9630170922134058, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9627037564577741, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9649955008379678, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9665003676101476, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9671236725274871, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9644488726446632, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9695525945526, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9688428395995062, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9675664001158485, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9616074922170649, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9647165923604895, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9645091128443329, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9673165838819371, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9632745770344568, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.962826195966425, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9557090011823852, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 38, "lr_best": 0.00294, "wd_best": 0.05, "train/loss_best": 0.0011547252163290978, "validation/loss_best": 0.22208404541015625, "validation/acc_best": 0.972718253968254, "validation/f1_best": 0.9695525945526} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.2611759620159864, "train/grad": 0.0790774311311543, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.367292971611023, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.268898052573204, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.1304700592160224, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.0124960964918137, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9065954080224037, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7668350979685783, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.6155241726338864, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.4758786579966545, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3400003420561552, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2717264687642455, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.2383431350439787, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.20675496481359004, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.18502352751791476, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.1608762539923191, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.14242669753730297, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.12716303303837775, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.11016336409375072, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09273609989322722, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.07554805425927043, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06145916614681482, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04647265915758908, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03358024151995778, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.022226305631920695, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.013590265437960625, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.008156075878068804, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004831860791891813, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.003258966589346528, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0024064082466065885, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.001687673470005393, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.001154265059158206, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0008769112639129162, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0007080486603081226, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0004949173983186483, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0003810354135930538, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00020149369724094867, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00010109041817486286, "train/loss_036_lr7.1e+00_wd1.0e+00": 5.5984649807214735e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 8.14951490610838e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 5.067485384643078e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0002506245765835047, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0016697188653051853, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.001483206981793046, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0036623495258390902, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.004914952432736755, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.006062635350972414, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0077027520723640915, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0043075996357947585, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.02416560109704733, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020825493359006943, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020472663342952727, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020055434796959162, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019764288039878012, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01952554882969707, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019172415845096113, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018596909581683575, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017663906500674783, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.015784353488124906, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.014731601197272539, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014297903983388096, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013757374673150479, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013284745211713016, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012643193830735982, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012088804335799069, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0115839857282117, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010976627084892243, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010271019312785939, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009453872727463022, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008650990028399974, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00761040624405723, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006439976459369064, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0050589781673625114, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003579945559904445, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002331262082152534, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0014562126382952555, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0010158358894113916, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0007655859190708725, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005467938066431088, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0003782999491522787, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002815834915600135, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00022040399137040367, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00015744290631118928, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00012793597039490123, "train/grad_034_lr5.1e+00_wd1.0e+00": 9.212956861574639e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 7.880945104716375e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 5.891689248869625e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0001253562484908599, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00016142051061238227, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00040111919560391763, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00124410533074705, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0011397286731309336, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0022765278983655534, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.002382249265096165, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.003977414850125028, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.007765152734260856, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0035827626987281445, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.010758004742302443, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3494786024093628, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2504260540008545, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.110962152481079, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9929099678993225, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.887061357498169, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7479469776153564, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5984321236610413, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.4627172350883484, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3375311493873596, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.27994194626808167, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2514485716819763, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.22532303631305695, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2074914425611496, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1885148584842682, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.17428821325302124, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.16302521526813507, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.15053343772888184, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.13863824307918549, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12840405106544495, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.12150879204273224, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11559468507766724, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11177684366703033, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.10963183641433716, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.1091146469116211, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1099342331290245, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11236511170864105, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1141839548945427, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11563993990421295, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11670263856649399, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11930717527866364, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12169049680233002, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12129370868206024, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12113616615533829, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11692260950803757, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12358084321022034, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13349565863609314, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1515643149614334, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17432360351085663, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2130136936903, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2776302397251129, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3422795534133911, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.36612892150878906, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.45399680733680725, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5713164210319519, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4093342423439026, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8740025162696838, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.134393572807312, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.682159423828125, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6401289682539683, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6612103174603174, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6944444444444444, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.720734126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7524801587301587, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7966269841269841, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8360615079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8782242063492064, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.908234126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9184027777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9255952380952381, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9308035714285714, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9347718253968254, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.939484126984127, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9432043650793651, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9461805555555556, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.949156746031746, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.953125, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9580853174603174, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9630456349206349, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.964781746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9655257936507936, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.964781746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9737103174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.970734126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9709821428571429, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.96875, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9680059523809523, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.45120760371829954, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.49214451552013605, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.547714248981557, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5955008189360026, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6543202334304551, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7371635113204682, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8004240264401119, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8580186064841238, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8911582699643474, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.902502251100724, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9129785972878349, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.918444593194303, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9239841709489621, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9302463100325941, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9350339667356903, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.938649020718082, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9412730374751321, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9461339610436749, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9505834409231739, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9522420453213598, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9549113948759602, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9593909567093981, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9620991981687536, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9620043398715546, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9617746035308604, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9612509343075812, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9619213555221245, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9613445346028852, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9610286368253549, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.961968834474647, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9591884640612344, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9635718033543257, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9628024848069022, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9636272665155707, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9649792493204361, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9657578913508273, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9669593053753112, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9667960768367998, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9691200708807508, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9716416437382668, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9666892460889441, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9693037260577083, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9677798652969665, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9687440259897486, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9704342525818385, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9637286347918904, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9638023946015843, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9596771736385176, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.0002506245765835047, "validation/loss_best": 0.2776302397251129, "validation/acc_best": 0.9737103174603174, "validation/f1_best": 0.9716416437382668} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.2546610203385353, "train/grad": 0.07307530885562301, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3397535800933837, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.241448547244072, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.1032956078648568, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9859438332915306, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8799415880441666, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7393826657533645, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5899028220772743, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.4539817176759243, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3262191092967987, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2662484800070524, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.23526488814502955, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.20491895873099567, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1836184811219573, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.15955096267163754, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.14062898486852646, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.12487871259450913, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.10722635819576681, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.08906993978656828, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.07131060880608857, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.05685850073583424, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04186196940019727, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.029332603393122554, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.018852761602029203, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.011357481116428971, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.006890350310131907, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004141914332285524, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0028572727553546427, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0021279523428529503, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0014990559965372087, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.001047743521630764, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0008004394080489874, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0006563999224454165, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00047042494639754296, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.000359948156401515, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00019004824571311474, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00010204971767961979, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.0936370864510536e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 6.040383130311966e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.211049199104309e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.919605001807213e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.073206938803196e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00023196679539978505, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00017019410617649555, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0016478417068719864, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0006371938902884722, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0018793121166527271, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0004537794552743435, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.004308902751654386, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021164603061042727, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020817843093536795, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020389083474874496, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02006301979534328, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019774185442365706, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01930113635957241, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018529022000730037, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017274988451972603, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.015180386663414537, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.014338295026682317, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.013954682236071676, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013473250675015152, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01303067392203957, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012424219860695302, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011878619026392699, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011380394608713686, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010748898298479616, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010017992390785366, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009178389655426144, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008340760100400075, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0072547239193227146, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006032116677961313, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0045420031360117715, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0030443772629951127, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001965687673946377, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0012421832371910567, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.000876704186812276, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006622346983931493, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0004738491291936953, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0003270154697383987, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00024558371002058265, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00020260518154827879, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00014774181519896956, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00012203409933135845, "train/grad_034_lr5.1e+00_wd1.0e+00": 8.546162233869836e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 6.042011358204036e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 5.127717877371652e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00010102301870375641, "train/grad_038_lr9.8e+00_wd1.0e+00": 9.490648195632723e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00012977339105090912, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00016629184069758593, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0003592830035020057, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0003283074613485558, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.000607298317196863, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.000709243125032741, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0018452231864285375, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0007469967807690338, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0037388272644381, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3265305757522583, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2278013229370117, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0886234045028687, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9703047275543213, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8635764718055725, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7221148014068604, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5723456144332886, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.4397740364074707, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3236563503742218, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.2731249928474426, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.24707388877868652, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.22194251418113708, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.20495063066482544, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18667587637901306, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.17263492941856384, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.16151896119117737, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1489766240119934, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.13730672001838684, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1274498999118805, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.12102069705724716, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11556798219680786, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11204580217599869, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11030376702547073, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11027425527572632, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11117207258939743, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11381945759057999, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11583029478788376, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11732445657253265, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11857321113348007, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12097867578268051, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12353472411632538, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12298508733510971, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12177970260381699, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11749820411205292, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12264230102300644, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13182032108306885, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1484275758266449, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17164772748947144, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20833474397659302, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.26367321610450745, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.31338071823120117, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3538680970668793, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.40802958607673645, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5159278512001038, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.41486886143684387, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8275780081748962, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.0918923616409302, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.5053082704544067, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6455853174603174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6656746031746031, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7003968253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7284226190476191, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7628968253968254, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8018353174603174, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8432539682539683, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8864087301587301, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9104662698412699, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9184027777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9250992063492064, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9303075396825397, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9367559523809523, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9399801587301587, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9444444444444444, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9481646825396826, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9526289682539683, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9548611111111112, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9575892857142857, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9657738095238095, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9662698412698413, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9704861111111112, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9704861111111112, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.46211707617141734, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5019416265789033, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5576958553024722, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6098846764252737, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6758241308185037, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7456556452489581, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8123154958163509, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8671755337403614, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8946608412920855, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9037525111383309, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9125445953129252, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9188702126486507, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9268467014599802, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9305154603955459, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9363100584576449, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9407177542530972, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9453703732105587, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9481887036776913, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9517107085326515, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.955496027438726, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9566145589060663, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9585449861042596, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9621417362087864, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9624780720844532, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9622911271940033, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9624535540428019, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9635814673901903, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9625132284510405, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9607701359650322, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9608924907437674, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9606059219249717, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9621828716341531, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9628825361373357, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.962796502058053, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9657228715606272, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9659870615349342, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9670657002216462, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9672537243841776, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9693095788733641, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9719129459612907, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9684566762112777, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9688553330502885, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9671907433208533, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9683680348186847, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.972377879624043, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9635230651523643, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9638236749035675, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9590126601490605, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 2.919605001807213e-05, "validation/loss_best": 0.26367321610450745, "validation/acc_best": 0.9742063492063492, "validation/f1_best": 0.9719129459612907} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.24676613003015518, "train/grad": 0.0709693175368011, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3161242043972015, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2179451715946197, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.079274456202984, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.960312232375145, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8519195580482483, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7074540400505066, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5568746489286422, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.4228943547606468, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3034391090273857, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.24955321408808231, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.22044426057487726, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.19138456363230943, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.17072070272639395, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.1473642653413117, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.12925208777189254, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.11429924691095948, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09769035269506275, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.08077361122705043, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06447869435884059, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.051437615472823384, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03769176819361746, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.026102068331092597, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01675170457921922, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.01025059367530048, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.00625523773021996, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0037838503252714873, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0026406443677842615, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.001986923785880208, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0014031071309000254, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.000976821007207036, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007619562931358814, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0006257888209074736, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00045833013020455836, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00036086585372686387, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00019255590625107287, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00010106571950018406, "train/loss_036_lr7.1e+00_wd1.0e+00": 5.4268790408968924e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.035920836031437e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.678031101822853e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.4811977744102478e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.621922641992569e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.083484247326851e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.1876281350851058e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 8.231708779931069e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00028383995406329633, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.6211543008685112e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 8.719462901353836e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.1958740651607513e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0207765572424978, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020473652496002616, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02012235437054187, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019862930495291947, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019612565976567565, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019168349192477764, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018417891534045338, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017191118486225607, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.015280647305771708, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01450543464627117, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014086685648653657, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01354291104245931, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013069735059980303, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012440596562810243, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011884211476426572, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01137645857874304, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010742759285494684, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009987332578748464, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009112602322129532, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008239961137296631, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007068079117452726, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005775275217602029, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004231269455631264, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002793083690048661, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001822923475119751, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0011619467634591274, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0008239714641240425, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006261835710756713, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00044347148468659725, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00030523025583534036, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00023704497409198665, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00019303979053802322, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001408923951566976, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00011722942004780634, "train/grad_034_lr5.1e+00_wd1.0e+00": 8.107568070954585e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.544219921830518e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.5760670824392944e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 7.043317173639707e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 7.720529157886346e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.4231230192722775e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00011869180505782157, "train/grad_041_lr1.6e+01_wd1.0e+00": 5.15482008192715e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 2.6443251070056205e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00017427143408331905, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.000275298065775755, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00041271204287695874, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.00022785469063192482, "train/grad_047_lr4.3e+01_wd1.0e+00": 4.040442963572204e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3110392093658447, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2124786376953125, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0734939575195312, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9548062086105347, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8475407958030701, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7046526074409485, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5555245876312256, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.4244590699672699, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3146950304508209, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.26734015345573425, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.24250775575637817, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.21818852424621582, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2013002187013626, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18309608101844788, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.16941797733306885, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.15808069705963135, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1462326943874359, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.13474638760089874, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12504839897155762, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11859776079654694, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.1138593927025795, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11084433645009995, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.10946111381053925, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.10979738086462021, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11106237769126892, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1135781854391098, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11573862284421921, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11727757751941681, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1184622049331665, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12117382138967514, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12362147122621536, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12293284386396408, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12155210226774216, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1172688901424408, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12140372395515442, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13073064386844635, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14688564836978912, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.16633030772209167, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20356234908103943, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.25711724162101746, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3010954260826111, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3353766202926636, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3885652422904968, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4888709485530853, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3576367199420929, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.77060866355896, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.0085786581039429, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.4052447080612183, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6498015873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6716269841269841, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7026289682539683, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.730406746031746, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7663690476190477, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8072916666666666, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8501984126984127, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8888888888888888, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9129464285714286, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9233630952380952, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9278273809523809, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9370039682539683, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9399801587301587, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9446924603174603, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9486607142857143, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9513888888888888, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9541170634920635, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9595734126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9625496031746031, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9642857142857143, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9675099206349206, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.972718253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9689980158730159, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9672619047619048, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.47171145998226566, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5116309625297054, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5619348510843855, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6141684445412788, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6816257529565976, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7561005511767421, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8217886233357843, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.86958854054212, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.896334972247846, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9099390258307671, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9171172073745849, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9222138322974702, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.927420449999183, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9310516269766438, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9376165058882268, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9414482448687098, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9438099167389932, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9472618421560751, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9506845437589546, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9532461547140658, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9569683305668486, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.960415503892829, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9641051416042183, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9622128217041831, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9619954626876057, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9625202952445462, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9629499405607242, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9622711034916985, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9611311706551403, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.961396429261948, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9609978365086023, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.962732005910914, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9634861464653847, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9638135924063427, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9661380999699917, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9667459487221709, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9674679905963155, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9670836152745957, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9695141420776556, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9716054562971848, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9683138833369036, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9713659207997123, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9679156218339207, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9684306551242718, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9735510374906704, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9636969348467471, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9637276816881695, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9589213628047397, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 0.00028383995406329633, "validation/loss_best": 0.3576367199420929, "validation/acc_best": 0.9764384920634921, "validation/f1_best": 0.9735510374906704} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.24564227439463138, "train/grad": 0.07057232892140747, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3045945346355439, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2076578643918037, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0704290869832038, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9526064628362656, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8446669846773147, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6997445560991764, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5501323083043098, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.41803304098546507, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.30360825560987, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2524262035265565, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.22401573833078145, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.195157037563622, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1744647915288806, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.15063119545578957, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.13186962911859154, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.11632320160977543, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.098849703585729, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.08088261465542018, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06348083312623203, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04960440221242607, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03534338527359068, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.0237787626311183, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.014711623545736074, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00885282514616847, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.005427735513076186, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.003367997892200947, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002373098889365792, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.001804774049669504, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0012966633588075637, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0009167894069105387, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007065964676439763, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005821212101727724, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00043362414464354516, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0003399049397557974, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00017992804758250712, "train/loss_035_lr6.0e+00_wd1.0e+00": 9.864210151135921e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.1023160815238955e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.0556294843554495e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0011048763990403e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5690168365836142e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.64805818349123e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 7.832329720258712e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 9.981319308280945e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.1863727122545242e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.3004941865801812e-05, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.2745953649282457e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.4798791855573654e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.574913039803505e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02106576696038246, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020753328530117868, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020382727663964033, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020092209517024458, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01979518949519843, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019244404532946645, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018399051832966508, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017082084296271206, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.015139347170479595, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.014376071621663869, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.013937529576942324, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01335321472492069, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012844485552050174, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012173983585089445, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011608611769042909, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011097419755533338, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0104632777441293, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009679595241323113, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008747445561457425, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007812150707468391, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006610966713633388, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005289169360767118, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003785499506047927, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00249112420133315, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0016234218457248063, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0010410687899275216, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0007370562678261194, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.000559868788914173, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00040061585277726406, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002825699980530771, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002179514330055099, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00017908133231685496, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00013444789919958566, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00010764717464553541, "train/grad_034_lr5.1e+00_wd1.0e+00": 7.626899947354105e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.360921953979414e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.631373243768167e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 4.141006698375804e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 2.7082364427739592e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.6318513707224712e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 2.6165330846246162e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.7873321900736918e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.9933660765549065e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.7212088605480825e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.1225808771755054e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.1009151433498507e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.4057832361432295e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 5.765232122239755e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3013731241226196, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2029740810394287, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0641028881072998, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9452716708183289, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8374605774879456, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6938381791114807, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5450146198272705, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.41511979699134827, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3094486594200134, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.2645007073879242, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2401782125234604, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.21641629934310913, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.19984905421733856, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18179011344909668, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.16798394918441772, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.15695953369140625, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1449291706085205, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1337933987379074, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12441692501306534, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.1186068207025528, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11393656581640244, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11117637902498245, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11007288098335266, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11080796271562576, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1120191141963005, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11471311748027802, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11677228659391403, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11820293962955475, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1192556694149971, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12154451757669449, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12428862601518631, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12337058782577515, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12201262265443802, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11720460653305054, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1209779605269432, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13005408644676208, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14533278346061707, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.16471807658672333, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20076003670692444, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.25237134099006653, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2957647144794464, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.32896652817726135, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.37750372290611267, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4763985872268677, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.34729915857315063, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7455622553825378, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.9713018536567688, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.3487859964370728, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6525297619047619, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6733630952380952, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7026289682539683, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.734375, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7705853174603174, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8112599206349206, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8521825396825397, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8913690476190477, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9126984126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9233630952380952, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9285714285714286, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9375, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9439484126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9479166666666666, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9508928571428571, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9543650793650794, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9583333333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9588293650793651, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9630456349206349, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.966765873015873, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.972718253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.972718253968254, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.972718253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9692460317460317, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9675099206349206, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.47610761610764646, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5155848924851508, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5622250257060484, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.622400306372726, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6900284118537053, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7634173421301488, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8239805831452914, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8725309302299228, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8955199027252906, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9099202633095546, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9181914586222947, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9225510097586229, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9283665642017377, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9332232831460303, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9361887845301611, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9406518838831135, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9430751958494339, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9474498097221474, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9520383957602642, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9521110563081101, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9545867610693606, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9584830738745944, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9606460006128049, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9607623327371941, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9608137949194544, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9626566521223914, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9618767847494318, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9614325559667029, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9605198241518542, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9611484514921921, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9601173791395923, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.962732005910914, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9631156623056617, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9633038060396532, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9661380999699917, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9659615291188527, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9681185901730641, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9670836152745957, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9693408762466573, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9717863175873915, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9686817724393454, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.97116917927172, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9678733349418684, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9680097671250343, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.972985299107734, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9644748260862922, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9641212396405072, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9589213628047397, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 1.3004941865801812e-05, "validation/loss_best": 0.34729915857315063, "validation/acc_best": 0.9761904761904762, "validation/f1_best": 0.972985299107734} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.243759985268116, "train/grad": 0.06867594581097364, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3019576984643937, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2049867355823516, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.067738184928894, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9493655133247375, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8406368052959442, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6948433992266655, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5445881141722202, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.41072349674999714, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.2974960321933031, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.24689203299582005, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.21854212414473295, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.1898353908583522, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.16927254667505623, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.14575215773656963, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.1272428359091282, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.11196244145743549, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09493272350169718, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07749906591139734, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06073180289939046, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.047293506804853676, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03350732016377151, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.022350246841087936, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.013831452503800391, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.008355969954282045, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0051509496197104454, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0032450857385993, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002295391159132123, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0017396240122616292, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0012470862828195095, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0008874694630503654, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0006899981945753098, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005708852503448725, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.000420497814193368, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0003266798704862595, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00017994973808526993, "train/loss_035_lr6.0e+00_wd1.0e+00": 9.460819885134697e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.123007275164127e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.7676552310585975e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.117147669196129e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0143071413040162e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.1221710592508315e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 5.6884624063968655e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 7.415143772959709e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 7.553482428193092e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 7.227268069982529e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.2480526715517046e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.759078681468964e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.422155812382698e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020443589398637413, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020119252265430987, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019735212810337542, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019457814823836087, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019199224072508514, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018739395150914787, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01792851258534938, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01658698275219649, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.014776922268792987, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.014088929041754454, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01366968612652272, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013114465670660137, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012613458796404303, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011940681880805642, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011365045018028468, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010848983940668404, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010206356151029468, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009428683533333242, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008510602044407279, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007590513182803988, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006384050878696143, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005059901085915044, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0035760464530903845, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0023540498563670554, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0015378387970849871, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0009932734329777303, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0007111616655311082, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005435908120853128, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003881716595060425, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00027524438533873765, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00021261079571559095, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00017555540933244628, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00013089177504298276, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00010571758799414966, "train/grad_034_lr5.1e+00_wd1.0e+00": 7.601435958349611e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.106747127001654e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.347862587309237e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 4.140117582522862e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 2.9526768563528095e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.2733814366404024e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 2.240789862446052e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.3668240856858655e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.5330304939896732e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.011946664024101e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.5398380952763133e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.2057172156155112e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.425977888861309e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.9388110130339823e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2963707447052002, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1980533599853516, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0592776536941528, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9403637051582336, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8324485421180725, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6881846785545349, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5395970940589905, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.4102550446987152, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.30689552426338196, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.26330357789993286, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.23918460309505463, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.21564032137393951, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.19919924437999725, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18126384913921356, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.16738395392894745, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1560797095298767, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.14429675042629242, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1330406814813614, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12376783788204193, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11779586970806122, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11315415054559708, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11043305695056915, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.10959749668836594, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11027619242668152, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11163590848445892, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1145332083106041, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11670494079589844, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11807508766651154, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11952567845582962, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12173399329185486, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12435394525527954, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12358690798282623, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12188725918531418, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11724705249071121, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12104952335357666, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12953150272369385, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14472462236881256, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.16414609551429749, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.19955341517925262, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2508786916732788, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.29308488965034485, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3241341710090637, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3729022741317749, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.47078046202659607, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3423435389995575, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7340127825737, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.9505531787872314, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.3213527202606201, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6537698412698413, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6750992063492064, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7056051587301587, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7353670634920635, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7718253968253969, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8134920634920635, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8541666666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8918650793650794, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9126984126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.923859126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9288194444444444, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9327876984126984, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9375, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9446924603174603, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9489087301587301, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9521329365079365, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.955109126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9585813492063492, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9598214285714286, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9642857142857143, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.964781746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.970734126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.972718253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.96875, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.47821938636625805, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.517212466490143, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5672894406413803, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6260444452968239, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6938104538237148, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7666085342916666, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8266214069711378, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8729897512853215, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.89544925038134, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9106110797119056, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9178454509479181, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.921988283514493, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9274356903853731, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9333751604755927, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9366313364256992, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9417157836107984, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9447366558173975, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9488232345982642, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9521949358563387, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9534470516066239, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.955619093560934, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9604727558212437, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9626933180164121, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9608493466910943, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9621369305967177, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9620671848937787, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9623793299940899, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9622567380287262, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9604853366696352, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9599058769024335, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9600905298209361, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.962732005910914, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9634608584353882, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9632998951030656, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9661380999699917, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9657482834192402, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9675627977824428, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9669037674102005, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9695717666992394, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9717821301065697, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9684609405694138, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.97116917927172, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.967680882120236, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9675447279882091, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.972985299107734, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9635561589794444, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9637739797988997, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9588687079700885, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 7.227268069982529e-06, "validation/loss_best": 0.3423435389995575, "validation/acc_best": 0.9761904761904762, "validation/f1_best": 0.972985299107734} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.2400410721451044, "train/grad": 0.06906976053491234, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2848692047595978, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1886250215768814, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.052720557153225, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9354834809899331, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8276582685112953, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6828179654479026, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5339988829195499, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.40119843229651453, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.2900000421702862, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.24037058994174004, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.21231463257223368, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.18373049393296242, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.16303261103108524, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.13962860507890582, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.12146672810427844, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.106346095809713, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.08966805765405297, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07284162568859756, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05670046526938677, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04385848603211343, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.030916280802339314, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.020601634103804826, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.012762388521805406, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.007700335448607803, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004747313000261783, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0029913130402565, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0021219763439148664, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0016201788652688265, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0011723239906132222, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0008340711891651154, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0006524145975708961, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005374056100845337, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00039181255735456946, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00030730408616364, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00016757713630795478, "train/loss_035_lr6.0e+00_wd1.0e+00": 9.273478761315346e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.063987500965595e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.896224312484264e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9467268139123916e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0438524186611176e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.537720672786236e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 5.343174561858177e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 9.51029360294342e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 8.578700944781303e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 9.286636486649514e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.8604869991540905e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.594111815094948e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.38543376326561e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02072180971968919, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020404603546485305, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020017396276816726, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01972110795788467, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019431016338057817, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018910329919308425, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018076024167239666, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01670841969549656, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.014762750677764416, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01407135674264282, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01366847023833543, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01312429562676698, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01262704461114481, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0119625187641941, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011367912236601115, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010827746260911226, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010143357589840888, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00933207567082718, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008384853282477707, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0074500118521973495, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0062174044107086955, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004844990933197551, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0033786125888582317, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002198220936697908, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0014348362805321813, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0009287889319239184, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0006654664869711269, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005100595354451798, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003658282522519585, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002581860400459846, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00020201562168949749, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001670202094101114, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00012464345827538635, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00010354753971114405, "train/grad_034_lr5.1e+00_wd1.0e+00": 7.251563089084811e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 5.036352425122459e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.349185095350094e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.778573396381546e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 2.644490315038439e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.0360817734580704e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 2.1603580497689556e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.4437361862665787e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.9145302871322667e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.3037587949102314e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.3966483648204003e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.5088081532172095e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.1536682965517472e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.5627092757624042e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2945408821105957, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1962416172027588, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0572705268859863, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9385697245597839, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8304328918457031, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6860216856002808, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5378085374832153, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.40857788920402527, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.30590149760246277, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.2626144587993622, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.23866872489452362, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.21528573334217072, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.19882692396640778, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18089702725410461, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.16708359122276306, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.15588900446891785, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.14399689435958862, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.13273516297340393, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12341559678316116, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11767986416816711, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.1130974143743515, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11051754653453827, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.10960348695516586, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11058148741722107, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11201859265565872, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11465127766132355, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11684361100196838, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11826521903276443, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11938431113958359, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12201593816280365, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12443982064723969, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12374359369277954, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12205110490322113, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11724868416786194, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12105388939380646, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12940821051597595, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14445370435714722, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.16359807550907135, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.19943082332611084, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.24990421533584595, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.29217198491096497, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.32332396507263184, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3711426258087158, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.46837398409843445, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3398195207118988, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7297552227973938, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.9445183277130127, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.30968177318573, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.654265873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6743551587301587, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7065972222222222, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7361111111111112, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7738095238095238, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8142361111111112, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8546626984126984, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8921130952380952, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9126984126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.923859126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9285714285714286, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9327876984126984, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9372519841269841, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9451884920634921, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9486607142857143, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9518849206349206, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9558531746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9575892857142857, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9595734126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9630456349206349, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9662698412698413, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9655257936507936, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.96875, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.966765873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4802733643207621, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5173673754193078, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5697696547488862, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6277662472688542, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6959610658788806, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7676301651921219, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8278440011835742, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8731535691333292, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8956014495547814, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9107850339996835, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.917526885881423, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.921995696263073, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9270563893604621, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9331807855447533, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9375668869191732, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9417994171014742, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9443504449680914, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.949470899187628, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9514559635730622, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9531706520058744, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9550375448058928, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9587247386119004, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9632256021239384, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9621707982021309, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9627284430900878, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9618733113210212, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9623809253973118, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9620257477049551, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9605841174360028, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.961167114190818, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9596302542541512, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.962732005910914, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9634608584353882, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9632864075734489, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.965957751307022, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9651380953514558, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9674253897590503, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9670836152745957, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9694048769547607, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9715149970106981, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9684609405694138, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9713489042619712, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.967680882120236, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9675447279882091, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9733203015592021, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9635561589794444, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9633991681023565, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9589314890950783, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 9.286636486649514e-06, "validation/loss_best": 0.3398195207118988, "validation/acc_best": 0.9764384920634921, "validation/f1_best": 0.9733203015592021} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.2412570422887802, "train/grad": 0.06823582328855991, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2859815156459808, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1887953448295594, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0516402161121368, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9339764165878296, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8263311868906021, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6824178546667099, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5340516267716885, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.40238502480089666, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.29320254258811473, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2447794470563531, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.21753401290625335, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.18965290192514658, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.16933842511847616, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.1460719546303153, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.12755865255370735, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.11210567759349942, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09477031981572509, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07698035995475948, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05988571404479444, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04627639278769493, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.032378085730597375, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.021252321656793357, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01296124517917633, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.007868314608931542, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004947294499725103, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0031476662401109935, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0022326500806957483, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0016987168695777656, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.001218251371756196, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0008574557583779096, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0006762638967484235, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005546127259731293, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00041007185354828833, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0003211352322250605, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0001824568212032318, "train/loss_035_lr6.0e+00_wd1.0e+00": 9.593300521373749e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 5.784685723483562e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.873113542795181e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.1831467747688294e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.1026700958609581e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 9.44887287914753e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 5.528451874852181e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 5.33335842192173e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.0307347401976586e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 9.681787341833114e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.170116201043129e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.773616626858711e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.5655684769153595e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.046875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020311001511290668, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01999650872312486, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019614703599363566, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019324626671150327, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019032219862565397, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018525236109271644, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.017738191462121904, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01643221703823656, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01467750218231231, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.014054948897100985, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.013657826124690473, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013107371756341309, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012630839897319675, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011984295474831015, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011413691714406014, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010894438694231212, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010227044490166008, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009434921705396846, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00851305300486274, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007574993859743699, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006357014423119835, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0049869505257811395, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0034643660235451536, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0022558638534974306, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0014612310912343674, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0009411611630639527, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0006713966754614375, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005120480986806797, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003680735982925398, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002608513280938496, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002052151757743559, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00016862046926689799, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001263888952053094, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00010258295542371343, "train/grad_034_lr5.1e+00_wd1.0e+00": 7.337373288464733e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.973358201254996e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.30364976148212e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 4.0114926797656384e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 2.9808131414612583e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.0338874017671315e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 2.0351097200302927e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.4321585651891145e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.6972463296062985e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.8273824752946285e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.413013393298892e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 2.4282600382945998e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.5953770906211234e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.4722158632650412e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2943108081817627, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1959519386291504, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0569859743118286, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9382562637329102, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.830168604850769, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6858078241348267, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5375133156776428, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.408383309841156, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3057456910610199, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.26255205273628235, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.23862865567207336, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.2152843475341797, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.19888444244861603, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18089570105075836, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.16696906089782715, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.15597650408744812, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.14391186833381653, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1327504813671112, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1234109029173851, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11777739971876144, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11323683708906174, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.1106129065155983, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.10962734371423721, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11046767979860306, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11194747686386108, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11467870324850082, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11683139950037003, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11825116723775864, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11944346874952316, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12209739536046982, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12447850406169891, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12390751391649246, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12198902666568756, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11730454117059708, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12095552682876587, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12930896878242493, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.144463911652565, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1634577512741089, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1993938386440277, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.24990615248680115, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2920214831829071, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3228728473186493, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3715030252933502, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.46938973665237427, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3395904004573822, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.727676272392273, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.9423936009407043, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.3075612783432007, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0445220470428467, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6535218253968254, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6746031746031746, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7073412698412699, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7366071428571429, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7738095238095238, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.814484126984127, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8541666666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8921130952380952, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9129464285714286, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9231150793650794, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9285714285714286, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9327876984126984, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9372519841269841, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.941468253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9451884920634921, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9481646825396826, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9518849206349206, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9553571428571429, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9605654761904762, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9630456349206349, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9662698412698413, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9657738095238095, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.96875, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.966765873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.04365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.47943107459470463, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5168690750434753, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5708245081936362, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6281232613784384, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6964017795874387, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7680562452891525, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8273329473110467, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8733964202475503, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8959928980902907, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9097499039511832, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.917526885881423, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.921995696263073, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9270563893604621, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9328412657109888, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9375668869191732, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9408924584681773, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9443310220802514, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9487467193007367, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9518401175850313, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.952991463643331, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9546682354648224, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9587247386119004, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9632256021239384, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9624025542130965, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.962848892323616, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9618733113210212, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9623809253973118, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.962555532597786, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9603635777201305, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.961167114190818, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9596302542541512, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.962732005910914, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9634608584353882, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9632826503456745, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.965957751307022, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9651380953514558, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9677847018323242, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9670836152745957, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9694048769547607, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9715149970106981, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9684609405694138, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9713489042619712, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.967680882120236, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9675447279882091, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9742031792027654, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9635561589794444, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9633991681023565, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9588687079700885, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.003983342386384211, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 9.681787341833114e-06, "validation/loss_best": 0.3395904004573822, "validation/acc_best": 0.9769345238095238, "validation/f1_best": 0.9742031792027654} diff --git a/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..5f4df8e204d7ee2f8efaa7a03464a1287ef54923 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 16, "eval/last/lr_best": 8.1e-05, "eval/last/wd_best": 0.05, "eval/last/train/loss": 2.171297788619995, "eval/last/train/acc": 0.35084052982574754, "eval/last/train/acc_std": 0.0022872729527584545, "eval/last/train/f1": 0.28647551045480546, "eval/last/train/f1_std": 0.0023214594409422125, "eval/last/validation/loss": 2.4222354888916016, "eval/last/validation/acc": 0.27150239940937615, "eval/last/validation/acc_std": 0.00519698230893839, "eval/last/validation/f1": 0.20057228105758518, "eval/last/validation/f1_std": 0.004624768163389933, "eval/last/test/loss": 2.397343635559082, "eval/last/test/acc": 0.2792207792207792, "eval/last/test/acc_std": 0.00515904547244289, "eval/last/test/f1": 0.20079560877860803, "eval/last/test/f1_std": 0.0048771565982414795, "eval/last/testid/loss": 2.3290913105010986, "eval/last/testid/acc": 0.29535376903797955, "eval/last/testid/acc_std": 0.0056612386805859494, "eval/last/testid/f1": 0.22700868756848122, "eval/last/testid/f1_std": 0.005186164866650084} diff --git a/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..f4e8c06618bb21eb457f2619fdf1cd7f44c0aa25 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,6,0.000156,0.05,20,"[0.52, 1.0]",train,2.252119541168213,0.3276683364577891,0.002220140903461018,0.2622455758398266,0.0022267072748040464 +flat_mae,patch,attn,nsd_cococlip,best,6,0.000156,0.05,20,"[0.52, 1.0]",validation,2.415285348892212,0.2737172388335179,0.005144654041092983,0.20614164787225583,0.00463394310568278 +flat_mae,patch,attn,nsd_cococlip,best,6,0.000156,0.05,20,"[0.52, 1.0]",test,2.3897087574005127,0.2795918367346939,0.00530092004652869,0.20199755080867374,0.004819614958486068 +flat_mae,patch,attn,nsd_cococlip,best,6,0.000156,0.05,20,"[0.52, 1.0]",testid,2.375211715698242,0.2791594370541739,0.0055198147521058685,0.21536086551405484,0.004982723637039659 diff --git a/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..b709c63a3a06d0a04accfb32766a98303263163e --- /dev/null +++ b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,8.1e-05,0.05,16,"[0.27, 1.0]",train,2.171297788619995,0.35084052982574754,0.0022872729527584545,0.28647551045480546,0.0023214594409422125 +flat_mae,patch,attn,nsd_cococlip,last,19,8.1e-05,0.05,16,"[0.27, 1.0]",validation,2.4222354888916016,0.27150239940937615,0.00519698230893839,0.20057228105758518,0.004624768163389933 +flat_mae,patch,attn,nsd_cococlip,last,19,8.1e-05,0.05,16,"[0.27, 1.0]",test,2.397343635559082,0.2792207792207792,0.00515904547244289,0.20079560877860803,0.0048771565982414795 +flat_mae,patch,attn,nsd_cococlip,last,19,8.1e-05,0.05,16,"[0.27, 1.0]",testid,2.3290913105010986,0.29535376903797955,0.0056612386805859494,0.22700868756848122,0.005186164866650084 diff --git a/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/log.txt b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..94781e1b427b8172d6ee60e74508f5e390782689 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,962 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 20:16:39 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n200_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:24:24 lr: nan time: 3.6625 data: 3.1096 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:53 lr: 0.000003 loss: 3.1849 (3.1796) grad: 0.1680 (0.1715) time: 0.4609 data: 0.0029 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:12 lr: 0.000006 loss: 3.1686 (3.1709) grad: 0.1680 (0.1697) time: 0.4541 data: 0.0049 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:52 lr: 0.000009 loss: 3.1621 (3.1690) grad: 0.1653 (0.1687) time: 0.4520 data: 0.0049 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:37 lr: 0.000012 loss: 3.1665 (3.1672) grad: 0.1639 (0.1671) time: 0.4397 data: 0.0049 max mem: 22448 +train: [0] [100/400] eta: 0:02:25 lr: 0.000015 loss: 3.1567 (3.1656) grad: 0.1564 (0.1655) time: 0.4584 data: 0.0053 max mem: 22448 +train: [0] [120/400] eta: 0:02:14 lr: 0.000018 loss: 3.1508 (3.1632) grad: 0.1498 (0.1631) time: 0.4545 data: 0.0052 max mem: 22448 +train: [0] [140/400] eta: 0:02:03 lr: 0.000021 loss: 3.1542 (3.1621) grad: 0.1522 (0.1629) time: 0.4537 data: 0.0049 max mem: 22448 +train: [0] [160/400] eta: 0:01:54 lr: 0.000024 loss: 3.1515 (3.1587) grad: 0.1684 (0.1639) time: 0.4674 data: 0.0051 max mem: 22448 +train: [0] [180/400] eta: 0:01:43 lr: 0.000027 loss: 3.1258 (3.1560) grad: 0.1655 (0.1635) time: 0.4487 data: 0.0049 max mem: 22448 +train: [0] [200/400] eta: 0:01:34 lr: 0.000030 loss: 3.1364 (3.1549) grad: 0.1492 (0.1621) time: 0.4591 data: 0.0049 max mem: 22448 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 3.1409 (3.1537) grad: 0.1540 (0.1616) time: 0.4377 data: 0.0047 max mem: 22448 +train: [0] [240/400] eta: 0:01:14 lr: 0.000036 loss: 3.1343 (3.1521) grad: 0.1564 (0.1613) time: 0.4421 data: 0.0048 max mem: 22448 +train: [0] [260/400] eta: 0:01:04 lr: 0.000039 loss: 3.1244 (3.1500) grad: 0.1529 (0.1608) time: 0.4431 data: 0.0048 max mem: 22448 +train: [0] [280/400] eta: 0:00:55 lr: 0.000042 loss: 3.1098 (3.1469) grad: 0.1528 (0.1605) time: 0.4401 data: 0.0048 max mem: 22448 +train: [0] [300/400] eta: 0:00:46 lr: 0.000045 loss: 3.0939 (3.1424) grad: 0.1534 (0.1606) time: 0.4399 data: 0.0048 max mem: 22448 +train: [0] [320/400] eta: 0:00:36 lr: 0.000048 loss: 3.0794 (3.1391) grad: 0.1653 (0.1613) time: 0.4412 data: 0.0048 max mem: 22448 +train: [0] [340/400] eta: 0:00:27 lr: 0.000051 loss: 3.0884 (3.1363) grad: 0.1655 (0.1615) time: 0.4365 data: 0.0049 max mem: 22448 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 3.0753 (3.1326) grad: 0.1691 (0.1624) time: 0.4376 data: 0.0045 max mem: 22448 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 3.0690 (3.1290) grad: 0.1794 (0.1632) time: 0.4454 data: 0.0049 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.0676 (3.1266) grad: 0.1825 (0.1640) time: 0.4384 data: 0.0047 max mem: 22448 +train: [0] Total time: 0:03:02 (0.4562 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.0676 (3.1266) grad: 0.1825 (0.1640) +eval (validation): [0] [ 0/85] eta: 0:04:12 time: 2.9750 data: 2.7438 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:30 time: 0.3380 data: 0.0040 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:17 time: 0.3274 data: 0.0040 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:09 time: 0.3242 data: 0.0042 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3205 data: 0.0040 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3099 data: 0.0039 max mem: 22448 +eval (validation): [0] Total time: 0:00:30 (0.3609 s / it) +cv: [0] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 2.607 acc: 0.218 f1: 0.153 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:21:01 lr: nan time: 3.1546 data: 2.8181 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:43 lr: 0.000063 loss: 3.0186 (3.0243) grad: 0.1771 (0.1753) time: 0.4606 data: 0.0035 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:06 lr: 0.000066 loss: 3.0209 (3.0263) grad: 0.1674 (0.1713) time: 0.4441 data: 0.0050 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:49 lr: 0.000069 loss: 3.0071 (3.0125) grad: 0.1673 (0.1727) time: 0.4538 data: 0.0051 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:33 lr: 0.000072 loss: 2.9928 (3.0120) grad: 0.1753 (0.1751) time: 0.4278 data: 0.0047 max mem: 22448 +train: [1] [100/400] eta: 0:02:21 lr: 0.000075 loss: 3.0088 (3.0071) grad: 0.1762 (0.1756) time: 0.4310 data: 0.0047 max mem: 22448 +train: [1] [120/400] eta: 0:02:10 lr: 0.000078 loss: 2.9930 (3.0035) grad: 0.1793 (0.1765) time: 0.4467 data: 0.0050 max mem: 22448 +train: [1] [140/400] eta: 0:02:00 lr: 0.000081 loss: 2.9868 (3.0009) grad: 0.1814 (0.1779) time: 0.4377 data: 0.0048 max mem: 22448 +train: [1] [160/400] eta: 0:01:50 lr: 0.000084 loss: 2.9930 (3.0015) grad: 0.1832 (0.1785) time: 0.4380 data: 0.0048 max mem: 22448 +train: [1] [180/400] eta: 0:01:40 lr: 0.000087 loss: 2.9971 (3.0008) grad: 0.1826 (0.1792) time: 0.4488 data: 0.0048 max mem: 22448 +train: [1] [200/400] eta: 0:01:31 lr: 0.000090 loss: 2.9666 (2.9989) grad: 0.1799 (0.1798) time: 0.4372 data: 0.0050 max mem: 22448 +train: [1] [220/400] eta: 0:01:21 lr: 0.000093 loss: 2.9374 (2.9913) grad: 0.1944 (0.1819) time: 0.4330 data: 0.0049 max mem: 22448 +train: [1] [240/400] eta: 0:01:12 lr: 0.000096 loss: 2.9201 (2.9877) grad: 0.1944 (0.1821) time: 0.4394 data: 0.0049 max mem: 22448 +train: [1] [260/400] eta: 0:01:03 lr: 0.000099 loss: 2.9355 (2.9864) grad: 0.1827 (0.1826) time: 0.4422 data: 0.0049 max mem: 22448 +train: [1] [280/400] eta: 0:00:54 lr: 0.000102 loss: 2.9345 (2.9820) grad: 0.1832 (0.1827) time: 0.4350 data: 0.0048 max mem: 22448 +train: [1] [300/400] eta: 0:00:44 lr: 0.000105 loss: 2.9301 (2.9802) grad: 0.1865 (0.1833) time: 0.4325 data: 0.0048 max mem: 22448 +train: [1] [320/400] eta: 0:00:35 lr: 0.000108 loss: 2.9353 (2.9762) grad: 0.1920 (0.1842) time: 0.4404 data: 0.0050 max mem: 22448 +train: [1] [340/400] eta: 0:00:26 lr: 0.000111 loss: 2.8974 (2.9721) grad: 0.1943 (0.1850) time: 0.4509 data: 0.0049 max mem: 22448 +train: [1] [360/400] eta: 0:00:17 lr: 0.000114 loss: 2.9191 (2.9702) grad: 0.1934 (0.1855) time: 0.4364 data: 0.0048 max mem: 22448 +train: [1] [380/400] eta: 0:00:08 lr: 0.000117 loss: 2.9191 (2.9663) grad: 0.1941 (0.1864) time: 0.4307 data: 0.0051 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.9132 (2.9648) grad: 0.2020 (0.1875) time: 0.4409 data: 0.0050 max mem: 22448 +train: [1] Total time: 0:02:59 (0.4477 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.9132 (2.9648) grad: 0.2020 (0.1875) +eval (validation): [1] [ 0/85] eta: 0:04:37 time: 3.2594 data: 2.9670 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:30 time: 0.3366 data: 0.0038 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:18 time: 0.3353 data: 0.0038 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:09 time: 0.3143 data: 0.0042 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3323 data: 0.0044 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3309 data: 0.0044 max mem: 22448 +eval (validation): [1] Total time: 0:00:31 (0.3670 s / it) +cv: [1] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 2.516 acc: 0.243 f1: 0.166 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:26:25 lr: nan time: 3.9629 data: 3.5690 max mem: 22448 +train: [2] [ 20/400] eta: 0:03:52 lr: 0.000123 loss: 2.9222 (2.8929) grad: 0.2230 (0.2215) time: 0.4449 data: 0.0025 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:11 lr: 0.000126 loss: 2.9127 (2.8953) grad: 0.2169 (0.2176) time: 0.4479 data: 0.0049 max mem: 22448 +train: [2] [ 60/400] eta: 0:02:50 lr: 0.000129 loss: 2.9118 (2.8954) grad: 0.2075 (0.2136) time: 0.4408 data: 0.0048 max mem: 22448 +train: [2] [ 80/400] eta: 0:02:36 lr: 0.000132 loss: 2.8790 (2.8911) grad: 0.2049 (0.2132) time: 0.4432 data: 0.0050 max mem: 22448 +train: [2] [100/400] eta: 0:02:24 lr: 0.000135 loss: 2.8554 (2.8837) grad: 0.2121 (0.2135) time: 0.4512 data: 0.0048 max mem: 22448 +train: [2] [120/400] eta: 0:02:12 lr: 0.000138 loss: 2.8554 (2.8774) grad: 0.2181 (0.2156) time: 0.4315 data: 0.0046 max mem: 22448 +train: [2] [140/400] eta: 0:02:01 lr: 0.000141 loss: 2.8687 (2.8807) grad: 0.2269 (0.2183) time: 0.4422 data: 0.0047 max mem: 22448 +train: [2] [160/400] eta: 0:01:51 lr: 0.000144 loss: 2.8665 (2.8769) grad: 0.2431 (0.2224) time: 0.4503 data: 0.0049 max mem: 22448 +train: [2] [180/400] eta: 0:01:41 lr: 0.000147 loss: 2.8530 (2.8728) grad: 0.2437 (0.2246) time: 0.4383 data: 0.0047 max mem: 22448 +train: [2] [200/400] eta: 0:01:32 lr: 0.000150 loss: 2.8682 (2.8714) grad: 0.2474 (0.2311) time: 0.4397 data: 0.0047 max mem: 22448 +train: [2] [220/400] eta: 0:01:22 lr: 0.000153 loss: 2.9370 (2.8890) grad: 0.3308 (0.2646) time: 0.4286 data: 0.0049 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=66.15 > 63.56) at step 520. Freezing. +train: [2] [240/400] eta: 0:01:12 lr: 0.000156 loss: 3.1591 (2.9481) grad: 0.7622 (0.3475) time: 0.4342 data: 0.0051 max mem: 22448 +train: [2] [260/400] eta: 0:01:03 lr: 0.000159 loss: 2.9171 (2.9392) grad: 0.2338 (0.3372) time: 0.4540 data: 0.0052 max mem: 22448 +train: [2] [280/400] eta: 0:00:54 lr: 0.000162 loss: 2.8333 (2.9320) grad: 0.2300 (0.3306) time: 0.4470 data: 0.0050 max mem: 22448 +train: [2] [300/400] eta: 0:00:45 lr: 0.000165 loss: 2.8333 (2.9247) grad: 0.2352 (0.3241) time: 0.4375 data: 0.0049 max mem: 22448 +train: [2] [320/400] eta: 0:00:36 lr: 0.000168 loss: 2.8334 (2.9184) grad: 0.2275 (0.3178) time: 0.4582 data: 0.0051 max mem: 22448 +train: [2] [340/400] eta: 0:00:27 lr: 0.000171 loss: 2.8334 (2.9147) grad: 0.2332 (0.3132) time: 0.4412 data: 0.0049 max mem: 22448 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 2.9109 (2.9175) grad: 0.2451 (0.3204) time: 0.4304 data: 0.0048 max mem: 22448 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 3.1165 (2.9485) grad: 0.6340 (0.3645) time: 0.4426 data: 0.0048 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=68.26 > 63.56) at step 591. Freezing. +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 3.1165 (2.9489) grad: 0.5983 (0.3654) time: 0.4340 data: 0.0048 max mem: 22448 +train: [2] Total time: 0:03:00 (0.4513 s / it) +train: [2] Summary: lr: 0.000180 loss: 3.1165 (2.9489) grad: 0.5983 (0.3654) +eval (validation): [2] [ 0/85] eta: 0:04:32 time: 3.2011 data: 2.9676 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:32 time: 0.3598 data: 0.0040 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:18 time: 0.3120 data: 0.0039 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:09 time: 0.3436 data: 0.0045 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3201 data: 0.0039 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3108 data: 0.0039 max mem: 22448 +eval (validation): [2] Total time: 0:00:31 (0.3695 s / it) +cv: [2] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.454 acc: 0.261 f1: 0.189 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:09 lr: nan time: 3.3226 data: 2.9903 max mem: 22448 +train: [3] [ 20/400] eta: 0:03:33 lr: 0.000183 loss: 2.7712 (2.7956) grad: 0.2212 (0.2170) time: 0.4249 data: 0.0039 max mem: 22448 +train: [3] [ 40/400] eta: 0:02:59 lr: 0.000186 loss: 2.8045 (2.8164) grad: 0.2222 (0.2229) time: 0.4298 data: 0.0036 max mem: 22448 +train: [3] [ 60/400] eta: 0:02:41 lr: 0.000189 loss: 2.7729 (2.8028) grad: 0.2232 (0.2227) time: 0.4281 data: 0.0049 max mem: 22448 +train: [3] [ 80/400] eta: 0:02:30 lr: 0.000192 loss: 2.7623 (2.8020) grad: 0.2189 (0.2227) time: 0.4510 data: 0.0049 max mem: 22448 +train: [3] [100/400] eta: 0:02:19 lr: 0.000195 loss: 2.7823 (2.7979) grad: 0.2203 (0.2236) time: 0.4405 data: 0.0046 max mem: 22448 +train: [3] [120/400] eta: 0:02:08 lr: 0.000198 loss: 2.7787 (2.7951) grad: 0.2265 (0.2249) time: 0.4322 data: 0.0049 max mem: 22448 +train: [3] [140/400] eta: 0:01:58 lr: 0.000201 loss: 2.7756 (2.7950) grad: 0.2432 (0.2282) time: 0.4515 data: 0.0049 max mem: 22448 +train: [3] [160/400] eta: 0:01:49 lr: 0.000204 loss: 2.7880 (2.7961) grad: 0.2444 (0.2295) time: 0.4354 data: 0.0049 max mem: 22448 +train: [3] [180/400] eta: 0:01:39 lr: 0.000207 loss: 2.7748 (2.7917) grad: 0.2345 (0.2304) time: 0.4315 data: 0.0050 max mem: 22448 +train: [3] [200/400] eta: 0:01:29 lr: 0.000210 loss: 2.7748 (2.7947) grad: 0.2371 (0.2322) time: 0.4295 data: 0.0049 max mem: 22448 +train: [3] [220/400] eta: 0:01:20 lr: 0.000213 loss: 2.7794 (2.7952) grad: 0.2441 (0.2332) time: 0.4336 data: 0.0049 max mem: 22448 +train: [3] [240/400] eta: 0:01:11 lr: 0.000216 loss: 2.7741 (2.7943) grad: 0.2441 (0.2347) time: 0.4429 data: 0.0052 max mem: 22448 +train: [3] [260/400] eta: 0:01:02 lr: 0.000219 loss: 2.7850 (2.7929) grad: 0.2421 (0.2359) time: 0.4566 data: 0.0050 max mem: 22448 +train: [3] [280/400] eta: 0:00:53 lr: 0.000222 loss: 2.7787 (2.7907) grad: 0.2434 (0.2373) time: 0.4283 data: 0.0046 max mem: 22448 +train: [3] [300/400] eta: 0:00:44 lr: 0.000225 loss: 2.8008 (2.7943) grad: 0.2628 (0.2431) time: 0.4371 data: 0.0047 max mem: 22448 +train: [3] [320/400] eta: 0:00:35 lr: 0.000228 loss: 2.9331 (2.8212) grad: 0.4418 (0.2884) time: 0.4439 data: 0.0047 max mem: 22448 +WARNING: classifier 46 (36, 1.0) diverged (loss=80.56 > 63.56) at step 763. Freezing. +train: [3] [340/400] eta: 0:00:26 lr: 0.000231 loss: 2.9592 (2.8431) grad: 0.6130 (0.3189) time: 0.4349 data: 0.0048 max mem: 22448 +train: [3] [360/400] eta: 0:00:17 lr: 0.000234 loss: 2.8824 (2.8458) grad: 0.2953 (0.3205) time: 0.4251 data: 0.0050 max mem: 22448 +train: [3] [380/400] eta: 0:00:08 lr: 0.000237 loss: 2.9278 (2.8612) grad: 0.4424 (0.3481) time: 0.4279 data: 0.0048 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=73.52 > 63.56) at step 797. Freezing. +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 3.2573 (2.8950) grad: 0.9422 (0.3878) time: 0.4258 data: 0.0049 max mem: 22448 +train: [3] Total time: 0:02:57 (0.4433 s / it) +train: [3] Summary: lr: 0.000240 loss: 3.2573 (2.8950) grad: 0.9422 (0.3878) +eval (validation): [3] [ 0/85] eta: 0:04:28 time: 3.1614 data: 2.9331 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:29 time: 0.3220 data: 0.0036 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:18 time: 0.3617 data: 0.0041 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3455 data: 0.0043 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3174 data: 0.0041 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3113 data: 0.0038 max mem: 22448 +eval (validation): [3] Total time: 0:00:31 (0.3724 s / it) +cv: [3] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.480 acc: 0.263 f1: 0.186 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:21:48 lr: nan time: 3.2708 data: 2.8880 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:36 lr: 0.000243 loss: 2.6738 (2.7161) grad: 0.2289 (0.2290) time: 0.4357 data: 0.0038 max mem: 22448 +train: [4] [ 40/400] eta: 0:03:00 lr: 0.000246 loss: 2.6966 (2.7220) grad: 0.2299 (0.2317) time: 0.4288 data: 0.0050 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:45 lr: 0.000249 loss: 2.7327 (2.7258) grad: 0.2299 (0.2326) time: 0.4531 data: 0.0051 max mem: 22448 +train: [4] [ 80/400] eta: 0:02:32 lr: 0.000252 loss: 2.7226 (2.7277) grad: 0.2279 (0.2312) time: 0.4503 data: 0.0051 max mem: 22448 +train: [4] [100/400] eta: 0:02:20 lr: 0.000255 loss: 2.7565 (2.7389) grad: 0.2281 (0.2324) time: 0.4290 data: 0.0046 max mem: 22448 +train: [4] [120/400] eta: 0:02:10 lr: 0.000258 loss: 2.7519 (2.7342) grad: 0.2352 (0.2338) time: 0.4564 data: 0.0049 max mem: 22448 +train: [4] [140/400] eta: 0:01:59 lr: 0.000261 loss: 2.6905 (2.7338) grad: 0.2375 (0.2355) time: 0.4327 data: 0.0049 max mem: 22448 +train: [4] [160/400] eta: 0:01:49 lr: 0.000264 loss: 2.7460 (2.7349) grad: 0.2439 (0.2377) time: 0.4295 data: 0.0051 max mem: 22448 +train: [4] [180/400] eta: 0:01:40 lr: 0.000267 loss: 2.7399 (2.7393) grad: 0.2487 (0.2395) time: 0.4354 data: 0.0049 max mem: 22448 +train: [4] [200/400] eta: 0:01:30 lr: 0.000270 loss: 2.7265 (2.7363) grad: 0.2487 (0.2397) time: 0.4301 data: 0.0051 max mem: 22448 +train: [4] [220/400] eta: 0:01:21 lr: 0.000273 loss: 2.7417 (2.7399) grad: 0.2491 (0.2405) time: 0.4306 data: 0.0048 max mem: 22448 +train: [4] [240/400] eta: 0:01:12 lr: 0.000276 loss: 2.7470 (2.7389) grad: 0.2415 (0.2412) time: 0.4511 data: 0.0052 max mem: 22448 +train: [4] [260/400] eta: 0:01:02 lr: 0.000279 loss: 2.7411 (2.7409) grad: 0.2508 (0.2426) time: 0.4452 data: 0.0051 max mem: 22448 +train: [4] [280/400] eta: 0:00:53 lr: 0.000282 loss: 2.7455 (2.7415) grad: 0.2522 (0.2442) time: 0.4323 data: 0.0050 max mem: 22448 +train: [4] [300/400] eta: 0:00:44 lr: 0.000285 loss: 2.7471 (2.7428) grad: 0.2522 (0.2446) time: 0.4386 data: 0.0050 max mem: 22448 +train: [4] [320/400] eta: 0:00:35 lr: 0.000288 loss: 2.7464 (2.7427) grad: 0.2370 (0.2436) time: 0.4419 data: 0.0052 max mem: 22448 +train: [4] [340/400] eta: 0:00:26 lr: 0.000291 loss: 2.7306 (2.7421) grad: 0.2317 (0.2431) time: 0.4466 data: 0.0049 max mem: 22448 +train: [4] [360/400] eta: 0:00:17 lr: 0.000294 loss: 2.7106 (2.7424) grad: 0.2348 (0.2429) time: 0.4276 data: 0.0049 max mem: 22448 +train: [4] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.7051 (2.7412) grad: 0.2434 (0.2430) time: 0.4361 data: 0.0050 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.6937 (2.7385) grad: 0.2438 (0.2429) time: 0.4303 data: 0.0050 max mem: 22448 +train: [4] Total time: 0:02:58 (0.4457 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.6937 (2.7385) grad: 0.2438 (0.2429) +eval (validation): [4] [ 0/85] eta: 0:04:21 time: 3.0709 data: 2.8388 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:30 time: 0.3448 data: 0.0049 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:18 time: 0.3370 data: 0.0034 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:09 time: 0.3193 data: 0.0042 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3129 data: 0.0039 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3052 data: 0.0037 max mem: 22448 +eval (validation): [4] Total time: 0:00:30 (0.3625 s / it) +cv: [4] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 2.446 acc: 0.262 f1: 0.193 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:21:59 lr: nan time: 3.2998 data: 2.9227 max mem: 22448 +train: [5] [ 20/400] eta: 0:03:41 lr: 0.000300 loss: 2.6254 (2.6291) grad: 0.2490 (0.2505) time: 0.4467 data: 0.0056 max mem: 22448 +train: [5] [ 40/400] eta: 0:03:02 lr: 0.000300 loss: 2.6700 (2.6723) grad: 0.2579 (0.2566) time: 0.4294 data: 0.0049 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:45 lr: 0.000300 loss: 2.6964 (2.6869) grad: 0.2651 (0.2602) time: 0.4465 data: 0.0052 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:31 lr: 0.000300 loss: 2.6964 (2.6835) grad: 0.2651 (0.2617) time: 0.4360 data: 0.0051 max mem: 22448 +train: [5] [100/400] eta: 0:02:18 lr: 0.000300 loss: 2.6933 (2.6912) grad: 0.2749 (0.2695) time: 0.4152 data: 0.0046 max mem: 22448 +train: [5] [120/400] eta: 0:02:09 lr: 0.000300 loss: 2.6933 (2.6927) grad: 0.3091 (0.2831) time: 0.4617 data: 0.0052 max mem: 22448 +train: [5] [140/400] eta: 0:01:59 lr: 0.000300 loss: 2.7748 (2.7373) grad: 0.4383 (0.3551) time: 0.4322 data: 0.0052 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=69.24 > 63.56) at step 1074. Freezing. +train: [5] [160/400] eta: 0:01:49 lr: 0.000299 loss: 2.8681 (2.7832) grad: 0.7122 (0.4052) time: 0.4362 data: 0.0050 max mem: 22448 +train: [5] [180/400] eta: 0:01:39 lr: 0.000299 loss: 2.6900 (2.7759) grad: 0.2406 (0.3870) time: 0.4329 data: 0.0049 max mem: 22448 +train: [5] [200/400] eta: 0:01:30 lr: 0.000299 loss: 2.6900 (2.7640) grad: 0.2406 (0.3730) time: 0.4249 data: 0.0050 max mem: 22448 +train: [5] [220/400] eta: 0:01:20 lr: 0.000299 loss: 2.6513 (2.7555) grad: 0.2440 (0.3606) time: 0.4340 data: 0.0051 max mem: 22448 +train: [5] [240/400] eta: 0:01:11 lr: 0.000299 loss: 2.6464 (2.7486) grad: 0.2392 (0.3511) time: 0.4429 data: 0.0049 max mem: 22448 +train: [5] [260/400] eta: 0:01:02 lr: 0.000299 loss: 2.6464 (2.7392) grad: 0.2392 (0.3424) time: 0.4382 data: 0.0050 max mem: 22448 +train: [5] [280/400] eta: 0:00:53 lr: 0.000298 loss: 2.6534 (2.7359) grad: 0.2445 (0.3362) time: 0.4293 data: 0.0048 max mem: 22448 +train: [5] [300/400] eta: 0:00:44 lr: 0.000298 loss: 2.6683 (2.7285) grad: 0.2466 (0.3303) time: 0.4487 data: 0.0051 max mem: 22448 +train: [5] [320/400] eta: 0:00:35 lr: 0.000298 loss: 2.6619 (2.7262) grad: 0.2499 (0.3258) time: 0.4403 data: 0.0050 max mem: 22448 +train: [5] [340/400] eta: 0:00:26 lr: 0.000298 loss: 2.6766 (2.7218) grad: 0.2561 (0.3213) time: 0.4328 data: 0.0050 max mem: 22448 +train: [5] [360/400] eta: 0:00:17 lr: 0.000297 loss: 2.6603 (2.7181) grad: 0.2520 (0.3177) time: 0.4349 data: 0.0050 max mem: 22448 +train: [5] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.6341 (2.7142) grad: 0.2516 (0.3143) time: 0.4354 data: 0.0049 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.5976 (2.7085) grad: 0.2454 (0.3105) time: 0.4308 data: 0.0049 max mem: 22448 +train: [5] Total time: 0:02:57 (0.4442 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.5976 (2.7085) grad: 0.2454 (0.3105) +eval (validation): [5] [ 0/85] eta: 0:04:38 time: 3.2775 data: 2.9780 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:33 time: 0.3835 data: 0.0041 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:19 time: 0.3417 data: 0.0038 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:10 time: 0.3333 data: 0.0039 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3246 data: 0.0042 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3217 data: 0.0041 max mem: 22448 +eval (validation): [5] Total time: 0:00:32 (0.3824 s / it) +cv: [5] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 2.431 acc: 0.271 f1: 0.196 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:21:29 lr: nan time: 3.2241 data: 2.8484 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:38 lr: 0.000296 loss: 2.5889 (2.5836) grad: 0.2379 (0.2379) time: 0.4413 data: 0.0039 max mem: 22448 +train: [6] [ 40/400] eta: 0:03:03 lr: 0.000296 loss: 2.5889 (2.6038) grad: 0.2432 (0.2455) time: 0.4448 data: 0.0051 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:45 lr: 0.000296 loss: 2.5638 (2.5962) grad: 0.2454 (0.2459) time: 0.4365 data: 0.0048 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:30 lr: 0.000295 loss: 2.5636 (2.5886) grad: 0.2449 (0.2468) time: 0.4215 data: 0.0048 max mem: 22448 +train: [6] [100/400] eta: 0:02:19 lr: 0.000295 loss: 2.5873 (2.5885) grad: 0.2498 (0.2481) time: 0.4452 data: 0.0050 max mem: 22448 +train: [6] [120/400] eta: 0:02:08 lr: 0.000295 loss: 2.5873 (2.5889) grad: 0.2498 (0.2485) time: 0.4287 data: 0.0049 max mem: 22448 +train: [6] [140/400] eta: 0:01:58 lr: 0.000294 loss: 2.6127 (2.5953) grad: 0.2479 (0.2489) time: 0.4230 data: 0.0049 max mem: 22448 +train: [6] [160/400] eta: 0:01:48 lr: 0.000294 loss: 2.6388 (2.6013) grad: 0.2541 (0.2499) time: 0.4266 data: 0.0048 max mem: 22448 +train: [6] [180/400] eta: 0:01:38 lr: 0.000293 loss: 2.6351 (2.6009) grad: 0.2566 (0.2513) time: 0.4243 data: 0.0051 max mem: 22448 +train: [6] [200/400] eta: 0:01:29 lr: 0.000293 loss: 2.6138 (2.6039) grad: 0.2566 (0.2517) time: 0.4313 data: 0.0051 max mem: 22448 +train: [6] [220/400] eta: 0:01:20 lr: 0.000292 loss: 2.6376 (2.6016) grad: 0.2559 (0.2523) time: 0.4497 data: 0.0050 max mem: 22448 +train: [6] [240/400] eta: 0:01:11 lr: 0.000292 loss: 2.6242 (2.6041) grad: 0.2551 (0.2524) time: 0.4428 data: 0.0050 max mem: 22448 +train: [6] [260/400] eta: 0:01:02 lr: 0.000291 loss: 2.6185 (2.6013) grad: 0.2496 (0.2519) time: 0.4148 data: 0.0048 max mem: 22448 +train: [6] [280/400] eta: 0:00:53 lr: 0.000291 loss: 2.6112 (2.6021) grad: 0.2443 (0.2519) time: 0.4265 data: 0.0047 max mem: 22448 +train: [6] [300/400] eta: 0:00:44 lr: 0.000290 loss: 2.6218 (2.6038) grad: 0.2457 (0.2519) time: 0.4522 data: 0.0052 max mem: 22448 +train: [6] [320/400] eta: 0:00:35 lr: 0.000290 loss: 2.6178 (2.6037) grad: 0.2506 (0.2524) time: 0.4319 data: 0.0049 max mem: 22448 +train: [6] [340/400] eta: 0:00:26 lr: 0.000289 loss: 2.6077 (2.6051) grad: 0.2539 (0.2525) time: 0.4362 data: 0.0050 max mem: 22448 +train: [6] [360/400] eta: 0:00:17 lr: 0.000288 loss: 2.5886 (2.6032) grad: 0.2478 (0.2521) time: 0.4329 data: 0.0051 max mem: 22448 +train: [6] [380/400] eta: 0:00:08 lr: 0.000288 loss: 2.5741 (2.6058) grad: 0.2492 (0.2523) time: 0.4321 data: 0.0049 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.6337 (2.6059) grad: 0.2453 (0.2516) time: 0.4447 data: 0.0048 max mem: 22448 +train: [6] Total time: 0:02:56 (0.4420 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.6337 (2.6059) grad: 0.2453 (0.2516) +eval (validation): [6] [ 0/85] eta: 0:04:33 time: 3.2212 data: 2.9349 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:32 time: 0.3661 data: 0.0045 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:18 time: 0.3320 data: 0.0043 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:09 time: 0.3236 data: 0.0040 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3222 data: 0.0040 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3198 data: 0.0041 max mem: 22448 +eval (validation): [6] Total time: 0:00:31 (0.3730 s / it) +cv: [6] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.415 acc: 0.274 f1: 0.206 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:20:50 lr: nan time: 3.1265 data: 2.8069 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:30 lr: 0.000286 loss: 2.5197 (2.5044) grad: 0.2363 (0.2454) time: 0.4264 data: 0.0034 max mem: 22448 +train: [7] [ 40/400] eta: 0:02:59 lr: 0.000286 loss: 2.5197 (2.5193) grad: 0.2449 (0.2519) time: 0.4403 data: 0.0042 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:42 lr: 0.000285 loss: 2.5156 (2.5064) grad: 0.2606 (0.2558) time: 0.4328 data: 0.0048 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:28 lr: 0.000284 loss: 2.5169 (2.5235) grad: 0.2582 (0.2539) time: 0.4275 data: 0.0047 max mem: 22448 +train: [7] [100/400] eta: 0:02:19 lr: 0.000284 loss: 2.4995 (2.5151) grad: 0.2484 (0.2538) time: 0.4569 data: 0.0050 max mem: 22448 +train: [7] [120/400] eta: 0:02:08 lr: 0.000283 loss: 2.4809 (2.5163) grad: 0.2552 (0.2549) time: 0.4304 data: 0.0049 max mem: 22448 +train: [7] [140/400] eta: 0:01:58 lr: 0.000282 loss: 2.5095 (2.5206) grad: 0.2566 (0.2553) time: 0.4298 data: 0.0051 max mem: 22448 +train: [7] [160/400] eta: 0:01:48 lr: 0.000282 loss: 2.5274 (2.5220) grad: 0.2562 (0.2554) time: 0.4231 data: 0.0050 max mem: 22448 +train: [7] [180/400] eta: 0:01:38 lr: 0.000281 loss: 2.5705 (2.5281) grad: 0.2617 (0.2568) time: 0.4256 data: 0.0048 max mem: 22448 +train: [7] [200/400] eta: 0:01:29 lr: 0.000280 loss: 2.5705 (2.5279) grad: 0.2630 (0.2568) time: 0.4329 data: 0.0053 max mem: 22448 +train: [7] [220/400] eta: 0:01:20 lr: 0.000279 loss: 2.4959 (2.5236) grad: 0.2531 (0.2567) time: 0.4389 data: 0.0053 max mem: 22448 +train: [7] [240/400] eta: 0:01:11 lr: 0.000278 loss: 2.5249 (2.5280) grad: 0.2568 (0.2573) time: 0.4448 data: 0.0049 max mem: 22448 +train: [7] [260/400] eta: 0:01:02 lr: 0.000278 loss: 2.5424 (2.5272) grad: 0.2561 (0.2567) time: 0.4299 data: 0.0048 max mem: 22448 +train: [7] [280/400] eta: 0:00:53 lr: 0.000277 loss: 2.4930 (2.5234) grad: 0.2510 (0.2566) time: 0.4317 data: 0.0048 max mem: 22448 +train: [7] [300/400] eta: 0:00:44 lr: 0.000276 loss: 2.4584 (2.5222) grad: 0.2513 (0.2572) time: 0.4514 data: 0.0047 max mem: 22448 +train: [7] [320/400] eta: 0:00:35 lr: 0.000275 loss: 2.5011 (2.5230) grad: 0.2623 (0.2569) time: 0.4340 data: 0.0050 max mem: 22448 +train: [7] [340/400] eta: 0:00:26 lr: 0.000274 loss: 2.4969 (2.5203) grad: 0.2459 (0.2565) time: 0.4326 data: 0.0050 max mem: 22448 +train: [7] [360/400] eta: 0:00:17 lr: 0.000273 loss: 2.5058 (2.5226) grad: 0.2508 (0.2568) time: 0.4294 data: 0.0050 max mem: 22448 +train: [7] [380/400] eta: 0:00:08 lr: 0.000272 loss: 2.5317 (2.5223) grad: 0.2581 (0.2571) time: 0.4251 data: 0.0050 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.5383 (2.5239) grad: 0.2611 (0.2577) time: 0.4550 data: 0.0052 max mem: 22448 +train: [7] Total time: 0:02:56 (0.4423 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.5383 (2.5239) grad: 0.2611 (0.2577) +eval (validation): [7] [ 0/85] eta: 0:04:33 time: 3.2165 data: 2.9795 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:30 time: 0.3390 data: 0.0069 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:18 time: 0.3306 data: 0.0036 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:09 time: 0.3222 data: 0.0035 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3303 data: 0.0039 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3288 data: 0.0039 max mem: 22448 +eval (validation): [7] Total time: 0:00:31 (0.3672 s / it) +cv: [7] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.425 acc: 0.266 f1: 0.202 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:21:19 lr: nan time: 3.1997 data: 2.8214 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:42 lr: 0.000270 loss: 2.3457 (2.3958) grad: 0.2386 (0.2449) time: 0.4540 data: 0.0048 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:06 lr: 0.000270 loss: 2.4368 (2.4309) grad: 0.2502 (0.2522) time: 0.4495 data: 0.0053 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:46 lr: 0.000269 loss: 2.4619 (2.4417) grad: 0.2577 (0.2541) time: 0.4325 data: 0.0048 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:31 lr: 0.000268 loss: 2.4893 (2.4547) grad: 0.2577 (0.2575) time: 0.4270 data: 0.0048 max mem: 22448 +train: [8] [100/400] eta: 0:02:20 lr: 0.000267 loss: 2.4588 (2.4521) grad: 0.2689 (0.2608) time: 0.4422 data: 0.0050 max mem: 22448 +train: [8] [120/400] eta: 0:02:09 lr: 0.000266 loss: 2.4518 (2.4537) grad: 0.2731 (0.2639) time: 0.4361 data: 0.0049 max mem: 22448 +train: [8] [140/400] eta: 0:01:58 lr: 0.000265 loss: 2.4720 (2.4607) grad: 0.2703 (0.2651) time: 0.4252 data: 0.0048 max mem: 22448 +train: [8] [160/400] eta: 0:01:48 lr: 0.000264 loss: 2.4617 (2.4626) grad: 0.2727 (0.2677) time: 0.4251 data: 0.0048 max mem: 22448 +train: [8] [180/400] eta: 0:01:39 lr: 0.000263 loss: 2.4479 (2.4583) grad: 0.2688 (0.2672) time: 0.4313 data: 0.0049 max mem: 22448 +train: [8] [200/400] eta: 0:01:29 lr: 0.000262 loss: 2.4536 (2.4613) grad: 0.2653 (0.2676) time: 0.4310 data: 0.0049 max mem: 22448 +train: [8] [220/400] eta: 0:01:20 lr: 0.000260 loss: 2.4703 (2.4622) grad: 0.2653 (0.2672) time: 0.4325 data: 0.0047 max mem: 22448 +train: [8] [240/400] eta: 0:01:11 lr: 0.000259 loss: 2.4563 (2.4620) grad: 0.2603 (0.2670) time: 0.4401 data: 0.0049 max mem: 22448 +train: [8] [260/400] eta: 0:01:02 lr: 0.000258 loss: 2.4668 (2.4644) grad: 0.2620 (0.2673) time: 0.4357 data: 0.0051 max mem: 22448 +train: [8] [280/400] eta: 0:00:53 lr: 0.000257 loss: 2.4660 (2.4621) grad: 0.2620 (0.2673) time: 0.4238 data: 0.0051 max mem: 22448 +train: [8] [300/400] eta: 0:00:44 lr: 0.000256 loss: 2.4596 (2.4627) grad: 0.2637 (0.2672) time: 0.4455 data: 0.0053 max mem: 22448 +train: [8] [320/400] eta: 0:00:35 lr: 0.000255 loss: 2.4768 (2.4634) grad: 0.2616 (0.2666) time: 0.4300 data: 0.0052 max mem: 22448 +train: [8] [340/400] eta: 0:00:26 lr: 0.000254 loss: 2.4768 (2.4625) grad: 0.2614 (0.2667) time: 0.4352 data: 0.0051 max mem: 22448 +train: [8] [360/400] eta: 0:00:17 lr: 0.000253 loss: 2.4256 (2.4628) grad: 0.2614 (0.2662) time: 0.4345 data: 0.0052 max mem: 22448 +train: [8] [380/400] eta: 0:00:08 lr: 0.000252 loss: 2.4360 (2.4635) grad: 0.2597 (0.2661) time: 0.4217 data: 0.0052 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.4665 (2.4648) grad: 0.2608 (0.2665) time: 0.4492 data: 0.0051 max mem: 22448 +train: [8] Total time: 0:02:57 (0.4427 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.4665 (2.4648) grad: 0.2608 (0.2665) +eval (validation): [8] [ 0/85] eta: 0:04:32 time: 3.2112 data: 2.9785 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:30 time: 0.3332 data: 0.0053 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:18 time: 0.3343 data: 0.0036 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:09 time: 0.3380 data: 0.0042 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3116 data: 0.0041 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3108 data: 0.0041 max mem: 22448 +eval (validation): [8] Total time: 0:00:31 (0.3655 s / it) +cv: [8] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.433 acc: 0.267 f1: 0.202 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:21:42 lr: nan time: 3.2555 data: 2.9169 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:35 lr: 0.000249 loss: 2.4060 (2.4104) grad: 0.2582 (0.2693) time: 0.4338 data: 0.0045 max mem: 22448 +train: [9] [ 40/400] eta: 0:03:04 lr: 0.000248 loss: 2.4508 (2.4264) grad: 0.2591 (0.2639) time: 0.4554 data: 0.0046 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:46 lr: 0.000247 loss: 2.4160 (2.4128) grad: 0.2557 (0.2603) time: 0.4380 data: 0.0050 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:31 lr: 0.000246 loss: 2.4002 (2.4212) grad: 0.2557 (0.2617) time: 0.4321 data: 0.0047 max mem: 22448 +train: [9] [100/400] eta: 0:02:19 lr: 0.000244 loss: 2.4036 (2.4231) grad: 0.2664 (0.2629) time: 0.4335 data: 0.0046 max mem: 22448 +train: [9] [120/400] eta: 0:02:10 lr: 0.000243 loss: 2.3919 (2.4176) grad: 0.2602 (0.2624) time: 0.4562 data: 0.0050 max mem: 22448 +train: [9] [140/400] eta: 0:02:00 lr: 0.000242 loss: 2.3919 (2.4185) grad: 0.2607 (0.2633) time: 0.4469 data: 0.0052 max mem: 22448 +train: [9] [160/400] eta: 0:01:50 lr: 0.000241 loss: 2.3866 (2.4126) grad: 0.2647 (0.2639) time: 0.4361 data: 0.0049 max mem: 22448 +train: [9] [180/400] eta: 0:01:40 lr: 0.000240 loss: 2.3952 (2.4160) grad: 0.2618 (0.2650) time: 0.4354 data: 0.0049 max mem: 22448 +train: [9] [200/400] eta: 0:01:31 lr: 0.000238 loss: 2.4008 (2.4152) grad: 0.2703 (0.2660) time: 0.4426 data: 0.0052 max mem: 22448 +train: [9] [220/400] eta: 0:01:21 lr: 0.000237 loss: 2.4079 (2.4120) grad: 0.2703 (0.2666) time: 0.4361 data: 0.0051 max mem: 22448 +train: [9] [240/400] eta: 0:01:12 lr: 0.000236 loss: 2.4377 (2.4169) grad: 0.2701 (0.2664) time: 0.4545 data: 0.0050 max mem: 22448 +train: [9] [260/400] eta: 0:01:03 lr: 0.000234 loss: 2.4444 (2.4167) grad: 0.2646 (0.2661) time: 0.4455 data: 0.0051 max mem: 22448 +train: [9] [280/400] eta: 0:00:54 lr: 0.000233 loss: 2.4205 (2.4174) grad: 0.2670 (0.2665) time: 0.4302 data: 0.0047 max mem: 22448 +train: [9] [300/400] eta: 0:00:45 lr: 0.000232 loss: 2.4112 (2.4184) grad: 0.2727 (0.2672) time: 0.4458 data: 0.0050 max mem: 22448 +train: [9] [320/400] eta: 0:00:35 lr: 0.000230 loss: 2.4112 (2.4199) grad: 0.2715 (0.2671) time: 0.4269 data: 0.0047 max mem: 22448 +train: [9] [340/400] eta: 0:00:26 lr: 0.000229 loss: 2.3910 (2.4181) grad: 0.2671 (0.2677) time: 0.4474 data: 0.0050 max mem: 22448 +train: [9] [360/400] eta: 0:00:17 lr: 0.000228 loss: 2.4466 (2.4202) grad: 0.2760 (0.2684) time: 0.4438 data: 0.0050 max mem: 22448 +train: [9] [380/400] eta: 0:00:08 lr: 0.000226 loss: 2.4080 (2.4191) grad: 0.2679 (0.2685) time: 0.4236 data: 0.0048 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.4172 (2.4198) grad: 0.2691 (0.2690) time: 0.4431 data: 0.0051 max mem: 22448 +train: [9] Total time: 0:02:59 (0.4480 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.4172 (2.4198) grad: 0.2691 (0.2690) +eval (validation): [9] [ 0/85] eta: 0:04:49 time: 3.4016 data: 3.0947 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:33 time: 0.3692 data: 0.0052 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:19 time: 0.3429 data: 0.0044 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:09 time: 0.3329 data: 0.0041 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3258 data: 0.0042 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3257 data: 0.0040 max mem: 22448 +eval (validation): [9] Total time: 0:00:32 (0.3818 s / it) +cv: [9] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.418 acc: 0.267 f1: 0.193 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:21:53 lr: nan time: 3.2827 data: 2.8879 max mem: 22448 +train: [10] [ 20/400] eta: 0:03:46 lr: 0.000224 loss: 2.3366 (2.3420) grad: 0.2728 (0.2732) time: 0.4630 data: 0.0065 max mem: 22448 +train: [10] [ 40/400] eta: 0:03:07 lr: 0.000222 loss: 2.3284 (2.3344) grad: 0.2681 (0.2691) time: 0.4386 data: 0.0047 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:47 lr: 0.000221 loss: 2.3284 (2.3566) grad: 0.2588 (0.2669) time: 0.4390 data: 0.0051 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:33 lr: 0.000220 loss: 2.3518 (2.3490) grad: 0.2587 (0.2656) time: 0.4364 data: 0.0051 max mem: 22448 +train: [10] [100/400] eta: 0:02:20 lr: 0.000218 loss: 2.3510 (2.3492) grad: 0.2685 (0.2672) time: 0.4260 data: 0.0050 max mem: 22448 +train: [10] [120/400] eta: 0:02:09 lr: 0.000217 loss: 2.3696 (2.3509) grad: 0.2711 (0.2676) time: 0.4414 data: 0.0048 max mem: 22448 +train: [10] [140/400] eta: 0:01:59 lr: 0.000215 loss: 2.3597 (2.3555) grad: 0.2673 (0.2675) time: 0.4374 data: 0.0051 max mem: 22448 +train: [10] [160/400] eta: 0:01:49 lr: 0.000214 loss: 2.3597 (2.3561) grad: 0.2613 (0.2674) time: 0.4356 data: 0.0049 max mem: 22448 +train: [10] [180/400] eta: 0:01:40 lr: 0.000213 loss: 2.3417 (2.3597) grad: 0.2630 (0.2675) time: 0.4431 data: 0.0051 max mem: 22448 +train: [10] [200/400] eta: 0:01:30 lr: 0.000211 loss: 2.3908 (2.3600) grad: 0.2682 (0.2676) time: 0.4283 data: 0.0048 max mem: 22448 +train: [10] [220/400] eta: 0:01:21 lr: 0.000210 loss: 2.3760 (2.3596) grad: 0.2608 (0.2674) time: 0.4451 data: 0.0048 max mem: 22448 +train: [10] [240/400] eta: 0:01:12 lr: 0.000208 loss: 2.3594 (2.3609) grad: 0.2639 (0.2674) time: 0.4571 data: 0.0051 max mem: 22448 +train: [10] [260/400] eta: 0:01:03 lr: 0.000207 loss: 2.4081 (2.3626) grad: 0.2639 (0.2671) time: 0.4379 data: 0.0049 max mem: 22448 +train: [10] [280/400] eta: 0:00:54 lr: 0.000205 loss: 2.4081 (2.3628) grad: 0.2646 (0.2672) time: 0.4313 data: 0.0049 max mem: 22448 +train: [10] [300/400] eta: 0:00:45 lr: 0.000204 loss: 2.3193 (2.3596) grad: 0.2594 (0.2665) time: 0.4533 data: 0.0049 max mem: 22448 +train: [10] [320/400] eta: 0:00:35 lr: 0.000202 loss: 2.3182 (2.3572) grad: 0.2602 (0.2667) time: 0.4352 data: 0.0051 max mem: 22448 +train: [10] [340/400] eta: 0:00:26 lr: 0.000201 loss: 2.3419 (2.3574) grad: 0.2724 (0.2669) time: 0.4173 data: 0.0044 max mem: 22448 +train: [10] [360/400] eta: 0:00:17 lr: 0.000199 loss: 2.3205 (2.3553) grad: 0.2626 (0.2665) time: 0.4329 data: 0.0048 max mem: 22448 +train: [10] [380/400] eta: 0:00:08 lr: 0.000198 loss: 2.3032 (2.3544) grad: 0.2618 (0.2665) time: 0.4276 data: 0.0047 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.3335 (2.3557) grad: 0.2691 (0.2669) time: 0.4262 data: 0.0048 max mem: 22448 +train: [10] Total time: 0:02:58 (0.4453 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.3335 (2.3557) grad: 0.2691 (0.2669) +eval (validation): [10] [ 0/85] eta: 0:04:19 time: 3.0491 data: 2.8103 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:30 time: 0.3477 data: 0.0041 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:18 time: 0.3255 data: 0.0037 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:09 time: 0.3302 data: 0.0038 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3143 data: 0.0038 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3091 data: 0.0038 max mem: 22448 +eval (validation): [10] Total time: 0:00:30 (0.3638 s / it) +cv: [10] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.421 acc: 0.267 f1: 0.195 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:21:55 lr: nan time: 3.2900 data: 2.9735 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:33 lr: 0.000195 loss: 2.2532 (2.3028) grad: 0.2523 (0.2616) time: 0.4249 data: 0.0033 max mem: 22448 +train: [11] [ 40/400] eta: 0:02:57 lr: 0.000193 loss: 2.3034 (2.3110) grad: 0.2639 (0.2649) time: 0.4231 data: 0.0048 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:41 lr: 0.000192 loss: 2.2905 (2.2917) grad: 0.2681 (0.2669) time: 0.4322 data: 0.0048 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:28 lr: 0.000190 loss: 2.3033 (2.3042) grad: 0.2710 (0.2694) time: 0.4287 data: 0.0047 max mem: 22448 +train: [11] [100/400] eta: 0:02:16 lr: 0.000189 loss: 2.3186 (2.2980) grad: 0.2710 (0.2685) time: 0.4218 data: 0.0049 max mem: 22448 +train: [11] [120/400] eta: 0:02:07 lr: 0.000187 loss: 2.2011 (2.2846) grad: 0.2660 (0.2686) time: 0.4520 data: 0.0050 max mem: 22448 +train: [11] [140/400] eta: 0:01:57 lr: 0.000186 loss: 2.2591 (2.2898) grad: 0.2660 (0.2693) time: 0.4410 data: 0.0051 max mem: 22448 +train: [11] [160/400] eta: 0:01:48 lr: 0.000184 loss: 2.3172 (2.2957) grad: 0.2744 (0.2698) time: 0.4387 data: 0.0050 max mem: 22448 +train: [11] [180/400] eta: 0:01:38 lr: 0.000183 loss: 2.2912 (2.2955) grad: 0.2821 (0.2719) time: 0.4348 data: 0.0052 max mem: 22448 +train: [11] [200/400] eta: 0:01:29 lr: 0.000181 loss: 2.2962 (2.2995) grad: 0.2856 (0.2726) time: 0.4376 data: 0.0046 max mem: 22448 +train: [11] [220/400] eta: 0:01:20 lr: 0.000180 loss: 2.3475 (2.3054) grad: 0.2765 (0.2723) time: 0.4291 data: 0.0053 max mem: 22448 +train: [11] [240/400] eta: 0:01:11 lr: 0.000178 loss: 2.3508 (2.3078) grad: 0.2614 (0.2721) time: 0.4466 data: 0.0051 max mem: 22448 +train: [11] [260/400] eta: 0:01:02 lr: 0.000177 loss: 2.3220 (2.3087) grad: 0.2658 (0.2727) time: 0.4479 data: 0.0051 max mem: 22448 +train: [11] [280/400] eta: 0:00:53 lr: 0.000175 loss: 2.3135 (2.3102) grad: 0.2709 (0.2729) time: 0.4295 data: 0.0049 max mem: 22448 +train: [11] [300/400] eta: 0:00:44 lr: 0.000174 loss: 2.3461 (2.3137) grad: 0.2730 (0.2731) time: 0.4285 data: 0.0048 max mem: 22448 +train: [11] [320/400] eta: 0:00:35 lr: 0.000172 loss: 2.3461 (2.3148) grad: 0.2753 (0.2738) time: 0.4400 data: 0.0047 max mem: 22448 +train: [11] [340/400] eta: 0:00:26 lr: 0.000170 loss: 2.2901 (2.3154) grad: 0.2753 (0.2744) time: 0.4400 data: 0.0049 max mem: 22448 +train: [11] [360/400] eta: 0:00:17 lr: 0.000169 loss: 2.2953 (2.3142) grad: 0.2684 (0.2748) time: 0.4384 data: 0.0051 max mem: 22448 +train: [11] [380/400] eta: 0:00:08 lr: 0.000167 loss: 2.2778 (2.3110) grad: 0.2684 (0.2745) time: 0.4408 data: 0.0050 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.2979 (2.3136) grad: 0.2628 (0.2739) time: 0.4320 data: 0.0051 max mem: 22448 +train: [11] Total time: 0:02:57 (0.4431 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.2979 (2.3136) grad: 0.2628 (0.2739) +eval (validation): [11] [ 0/85] eta: 0:04:40 time: 3.2953 data: 3.0454 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:32 time: 0.3594 data: 0.0062 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:19 time: 0.3502 data: 0.0037 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:09 time: 0.3166 data: 0.0039 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3145 data: 0.0039 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3140 data: 0.0039 max mem: 22448 +eval (validation): [11] Total time: 0:00:31 (0.3722 s / it) +cv: [11] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.428 acc: 0.265 f1: 0.193 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:23:06 lr: nan time: 3.4671 data: 3.0893 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:44 lr: 0.000164 loss: 2.2093 (2.2211) grad: 0.2607 (0.2612) time: 0.4458 data: 0.0041 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:05 lr: 0.000163 loss: 2.2416 (2.2402) grad: 0.2625 (0.2662) time: 0.4389 data: 0.0051 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:47 lr: 0.000161 loss: 2.2452 (2.2466) grad: 0.2652 (0.2651) time: 0.4415 data: 0.0051 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:32 lr: 0.000160 loss: 2.2551 (2.2514) grad: 0.2619 (0.2641) time: 0.4328 data: 0.0050 max mem: 22448 +train: [12] [100/400] eta: 0:02:21 lr: 0.000158 loss: 2.2579 (2.2509) grad: 0.2659 (0.2648) time: 0.4438 data: 0.0050 max mem: 22448 +train: [12] [120/400] eta: 0:02:09 lr: 0.000156 loss: 2.2561 (2.2548) grad: 0.2651 (0.2643) time: 0.4262 data: 0.0050 max mem: 22448 +train: [12] [140/400] eta: 0:02:00 lr: 0.000155 loss: 2.2513 (2.2519) grad: 0.2692 (0.2665) time: 0.4526 data: 0.0053 max mem: 22448 +train: [12] [160/400] eta: 0:01:50 lr: 0.000153 loss: 2.2253 (2.2512) grad: 0.2774 (0.2680) time: 0.4407 data: 0.0052 max mem: 22448 +train: [12] [180/400] eta: 0:01:40 lr: 0.000152 loss: 2.2468 (2.2521) grad: 0.2774 (0.2688) time: 0.4360 data: 0.0051 max mem: 22448 +train: [12] [200/400] eta: 0:01:30 lr: 0.000150 loss: 2.2939 (2.2586) grad: 0.2712 (0.2693) time: 0.4401 data: 0.0051 max mem: 22448 +train: [12] [220/400] eta: 0:01:21 lr: 0.000149 loss: 2.3136 (2.2639) grad: 0.2667 (0.2689) time: 0.4484 data: 0.0053 max mem: 22448 +train: [12] [240/400] eta: 0:01:12 lr: 0.000147 loss: 2.2598 (2.2630) grad: 0.2740 (0.2702) time: 0.4418 data: 0.0051 max mem: 22448 +train: [12] [260/400] eta: 0:01:03 lr: 0.000145 loss: 2.2438 (2.2621) grad: 0.2753 (0.2703) time: 0.4550 data: 0.0051 max mem: 22448 +train: [12] [280/400] eta: 0:00:54 lr: 0.000144 loss: 2.2416 (2.2574) grad: 0.2654 (0.2702) time: 0.4404 data: 0.0051 max mem: 22448 +train: [12] [300/400] eta: 0:00:45 lr: 0.000142 loss: 2.2533 (2.2610) grad: 0.2693 (0.2711) time: 0.4399 data: 0.0050 max mem: 22448 +train: [12] [320/400] eta: 0:00:36 lr: 0.000141 loss: 2.2617 (2.2620) grad: 0.2694 (0.2709) time: 0.4506 data: 0.0053 max mem: 22448 +train: [12] [340/400] eta: 0:00:27 lr: 0.000139 loss: 2.2617 (2.2617) grad: 0.2673 (0.2707) time: 0.4476 data: 0.0050 max mem: 22448 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 2.2277 (2.2602) grad: 0.2635 (0.2704) time: 0.4448 data: 0.0051 max mem: 22448 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 2.2327 (2.2601) grad: 0.2639 (0.2702) time: 0.4412 data: 0.0050 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.2335 (2.2589) grad: 0.2691 (0.2706) time: 0.4359 data: 0.0052 max mem: 22448 +train: [12] Total time: 0:03:00 (0.4504 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.2335 (2.2589) grad: 0.2691 (0.2706) +eval (validation): [12] [ 0/85] eta: 0:04:39 time: 3.2833 data: 3.0432 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:32 time: 0.3592 data: 0.0039 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:19 time: 0.3439 data: 0.0041 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:09 time: 0.3498 data: 0.0046 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3285 data: 0.0038 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3252 data: 0.0036 max mem: 22448 +eval (validation): [12] Total time: 0:00:32 (0.3816 s / it) +cv: [12] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.443 acc: 0.264 f1: 0.195 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:22:23 lr: nan time: 3.3597 data: 2.9843 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:39 lr: 0.000133 loss: 2.2115 (2.2137) grad: 0.2693 (0.2699) time: 0.4381 data: 0.0045 max mem: 22448 +train: [13] [ 40/400] eta: 0:03:03 lr: 0.000131 loss: 2.2199 (2.2106) grad: 0.2693 (0.2707) time: 0.4416 data: 0.0049 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:44 lr: 0.000130 loss: 2.2094 (2.2071) grad: 0.2710 (0.2711) time: 0.4265 data: 0.0048 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:32 lr: 0.000128 loss: 2.1794 (2.2013) grad: 0.2706 (0.2713) time: 0.4500 data: 0.0051 max mem: 22448 +train: [13] [100/400] eta: 0:02:20 lr: 0.000127 loss: 2.1653 (2.1991) grad: 0.2594 (0.2699) time: 0.4361 data: 0.0052 max mem: 22448 +train: [13] [120/400] eta: 0:02:09 lr: 0.000125 loss: 2.1899 (2.1953) grad: 0.2594 (0.2701) time: 0.4383 data: 0.0050 max mem: 22448 +train: [13] [140/400] eta: 0:01:59 lr: 0.000124 loss: 2.1701 (2.2048) grad: 0.2773 (0.2720) time: 0.4456 data: 0.0050 max mem: 22448 +train: [13] [160/400] eta: 0:01:50 lr: 0.000122 loss: 2.1956 (2.2042) grad: 0.2837 (0.2736) time: 0.4463 data: 0.0053 max mem: 22448 +train: [13] [180/400] eta: 0:01:40 lr: 0.000120 loss: 2.2223 (2.2130) grad: 0.2837 (0.2743) time: 0.4456 data: 0.0052 max mem: 22448 +train: [13] [200/400] eta: 0:01:31 lr: 0.000119 loss: 2.2049 (2.2084) grad: 0.2764 (0.2743) time: 0.4434 data: 0.0050 max mem: 22448 +train: [13] [220/400] eta: 0:01:22 lr: 0.000117 loss: 2.2077 (2.2100) grad: 0.2741 (0.2750) time: 0.4589 data: 0.0050 max mem: 22448 +train: [13] [240/400] eta: 0:01:13 lr: 0.000116 loss: 2.2248 (2.2092) grad: 0.2741 (0.2750) time: 0.4604 data: 0.0052 max mem: 22448 +train: [13] [260/400] eta: 0:01:03 lr: 0.000114 loss: 2.1933 (2.2108) grad: 0.2699 (0.2744) time: 0.4590 data: 0.0053 max mem: 22448 +train: [13] [280/400] eta: 0:00:54 lr: 0.000113 loss: 2.1670 (2.2074) grad: 0.2645 (0.2733) time: 0.4401 data: 0.0050 max mem: 22448 +train: [13] [300/400] eta: 0:00:45 lr: 0.000111 loss: 2.2017 (2.2076) grad: 0.2610 (0.2721) time: 0.4468 data: 0.0051 max mem: 22448 +train: [13] [320/400] eta: 0:00:36 lr: 0.000110 loss: 2.2212 (2.2092) grad: 0.2636 (0.2723) time: 0.4503 data: 0.0053 max mem: 22448 +train: [13] [340/400] eta: 0:00:27 lr: 0.000108 loss: 2.2123 (2.2102) grad: 0.2696 (0.2720) time: 0.4409 data: 0.0052 max mem: 22448 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 2.2060 (2.2105) grad: 0.2734 (0.2724) time: 0.4363 data: 0.0052 max mem: 22448 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 2.2226 (2.2107) grad: 0.2764 (0.2727) time: 0.4467 data: 0.0053 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.2250 (2.2121) grad: 0.2728 (0.2728) time: 0.4472 data: 0.0051 max mem: 22448 +train: [13] Total time: 0:03:01 (0.4528 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.2250 (2.2121) grad: 0.2728 (0.2728) +eval (validation): [13] [ 0/85] eta: 0:04:27 time: 3.1466 data: 2.8638 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:34 time: 0.4065 data: 0.0340 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:20 time: 0.3511 data: 0.0042 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:10 time: 0.3506 data: 0.0046 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3233 data: 0.0042 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3167 data: 0.0041 max mem: 22448 +eval (validation): [13] Total time: 0:00:33 (0.3926 s / it) +cv: [13] best hparam: (0.27, 1.0) (016) ('016_lr2.7e-01_wd1.0e+00') loss: 2.429 acc: 0.270 f1: 0.198 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:22:12 lr: nan time: 3.3307 data: 2.9967 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:43 lr: 0.000102 loss: 2.1245 (2.1219) grad: 0.2563 (0.2580) time: 0.4511 data: 0.0042 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:08 lr: 0.000101 loss: 2.1318 (2.1277) grad: 0.2558 (0.2589) time: 0.4549 data: 0.0046 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:47 lr: 0.000099 loss: 2.1673 (2.1404) grad: 0.2664 (0.2622) time: 0.4285 data: 0.0050 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:33 lr: 0.000098 loss: 2.1652 (2.1504) grad: 0.2619 (0.2614) time: 0.4459 data: 0.0051 max mem: 22448 +train: [14] [100/400] eta: 0:02:21 lr: 0.000096 loss: 2.1565 (2.1572) grad: 0.2572 (0.2625) time: 0.4274 data: 0.0049 max mem: 22448 +train: [14] [120/400] eta: 0:02:10 lr: 0.000095 loss: 2.1632 (2.1533) grad: 0.2680 (0.2640) time: 0.4466 data: 0.0049 max mem: 22448 +train: [14] [140/400] eta: 0:02:00 lr: 0.000093 loss: 2.1588 (2.1541) grad: 0.2702 (0.2654) time: 0.4393 data: 0.0051 max mem: 22448 +train: [14] [160/400] eta: 0:01:50 lr: 0.000092 loss: 2.1128 (2.1490) grad: 0.2722 (0.2659) time: 0.4485 data: 0.0050 max mem: 22448 +train: [14] [180/400] eta: 0:01:40 lr: 0.000090 loss: 2.1100 (2.1453) grad: 0.2685 (0.2663) time: 0.4427 data: 0.0051 max mem: 22448 +train: [14] [200/400] eta: 0:01:31 lr: 0.000089 loss: 2.1343 (2.1479) grad: 0.2734 (0.2675) time: 0.4431 data: 0.0053 max mem: 22448 +train: [14] [220/400] eta: 0:01:22 lr: 0.000088 loss: 2.1699 (2.1504) grad: 0.2822 (0.2681) time: 0.4416 data: 0.0053 max mem: 22448 +train: [14] [240/400] eta: 0:01:12 lr: 0.000086 loss: 2.1746 (2.1549) grad: 0.2707 (0.2682) time: 0.4505 data: 0.0051 max mem: 22448 +train: [14] [260/400] eta: 0:01:03 lr: 0.000085 loss: 2.2083 (2.1575) grad: 0.2711 (0.2686) time: 0.4528 data: 0.0052 max mem: 22448 +train: [14] [280/400] eta: 0:00:54 lr: 0.000083 loss: 2.2083 (2.1596) grad: 0.2705 (0.2682) time: 0.4330 data: 0.0050 max mem: 22448 +train: [14] [300/400] eta: 0:00:45 lr: 0.000082 loss: 2.2189 (2.1648) grad: 0.2651 (0.2682) time: 0.4677 data: 0.0053 max mem: 22448 +train: [14] [320/400] eta: 0:00:36 lr: 0.000081 loss: 2.2080 (2.1659) grad: 0.2711 (0.2687) time: 0.4409 data: 0.0052 max mem: 22448 +train: [14] [340/400] eta: 0:00:27 lr: 0.000079 loss: 2.1886 (2.1662) grad: 0.2738 (0.2690) time: 0.4360 data: 0.0052 max mem: 22448 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 2.1486 (2.1661) grad: 0.2666 (0.2689) time: 0.4332 data: 0.0051 max mem: 22448 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 2.1375 (2.1639) grad: 0.2672 (0.2689) time: 0.4427 data: 0.0049 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.1298 (2.1644) grad: 0.2761 (0.2695) time: 0.4366 data: 0.0051 max mem: 22448 +train: [14] Total time: 0:03:00 (0.4510 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.1298 (2.1644) grad: 0.2761 (0.2695) +eval (validation): [14] [ 0/85] eta: 0:04:27 time: 3.1498 data: 2.9163 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:31 time: 0.3448 data: 0.0041 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:18 time: 0.3526 data: 0.0042 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:09 time: 0.3539 data: 0.0043 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3191 data: 0.0041 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3138 data: 0.0042 max mem: 22448 +eval (validation): [14] Total time: 0:00:32 (0.3772 s / it) +cv: [14] best hparam: (0.27, 1.0) (016) ('016_lr2.7e-01_wd1.0e+00') loss: 2.424 acc: 0.272 f1: 0.201 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:22:30 lr: nan time: 3.3752 data: 2.9929 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:41 lr: 0.000074 loss: 2.1213 (2.1263) grad: 0.2582 (0.2638) time: 0.4445 data: 0.0047 max mem: 22448 +train: [15] [ 40/400] eta: 0:03:04 lr: 0.000072 loss: 2.1213 (2.1260) grad: 0.2582 (0.2620) time: 0.4384 data: 0.0048 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:46 lr: 0.000071 loss: 2.0979 (2.1146) grad: 0.2632 (0.2634) time: 0.4409 data: 0.0049 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:32 lr: 0.000070 loss: 2.0834 (2.1039) grad: 0.2632 (0.2627) time: 0.4416 data: 0.0052 max mem: 22448 +train: [15] [100/400] eta: 0:02:20 lr: 0.000068 loss: 2.0599 (2.0983) grad: 0.2599 (0.2635) time: 0.4292 data: 0.0050 max mem: 22448 +train: [15] [120/400] eta: 0:02:10 lr: 0.000067 loss: 2.1045 (2.1064) grad: 0.2736 (0.2654) time: 0.4588 data: 0.0051 max mem: 22448 +train: [15] [140/400] eta: 0:02:00 lr: 0.000066 loss: 2.1342 (2.1142) grad: 0.2744 (0.2679) time: 0.4361 data: 0.0049 max mem: 22448 +train: [15] [160/400] eta: 0:01:50 lr: 0.000064 loss: 2.1161 (2.1140) grad: 0.2721 (0.2675) time: 0.4387 data: 0.0043 max mem: 22448 +train: [15] [180/400] eta: 0:01:40 lr: 0.000063 loss: 2.1684 (2.1252) grad: 0.2727 (0.2687) time: 0.4481 data: 0.0053 max mem: 22448 +train: [15] [200/400] eta: 0:01:31 lr: 0.000062 loss: 2.1684 (2.1259) grad: 0.2658 (0.2682) time: 0.4487 data: 0.0051 max mem: 22448 +train: [15] [220/400] eta: 0:01:21 lr: 0.000061 loss: 2.1272 (2.1242) grad: 0.2625 (0.2683) time: 0.4333 data: 0.0051 max mem: 22448 +train: [15] [240/400] eta: 0:01:12 lr: 0.000059 loss: 2.1272 (2.1259) grad: 0.2741 (0.2687) time: 0.4557 data: 0.0054 max mem: 22448 +train: [15] [260/400] eta: 0:01:03 lr: 0.000058 loss: 2.1445 (2.1301) grad: 0.2701 (0.2687) time: 0.4627 data: 0.0052 max mem: 22448 +train: [15] [280/400] eta: 0:00:54 lr: 0.000057 loss: 2.1445 (2.1280) grad: 0.2583 (0.2679) time: 0.4279 data: 0.0047 max mem: 22448 +train: [15] [300/400] eta: 0:00:45 lr: 0.000056 loss: 2.1152 (2.1280) grad: 0.2582 (0.2678) time: 0.4589 data: 0.0052 max mem: 22448 +train: [15] [320/400] eta: 0:00:36 lr: 0.000054 loss: 2.1242 (2.1284) grad: 0.2696 (0.2677) time: 0.4535 data: 0.0051 max mem: 22448 +train: [15] [340/400] eta: 0:00:27 lr: 0.000053 loss: 2.1466 (2.1297) grad: 0.2660 (0.2676) time: 0.4468 data: 0.0051 max mem: 22448 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 2.1243 (2.1293) grad: 0.2660 (0.2678) time: 0.4372 data: 0.0052 max mem: 22448 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 2.0983 (2.1275) grad: 0.2589 (0.2673) time: 0.4481 data: 0.0051 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 2.1103 (2.1271) grad: 0.2530 (0.2669) time: 0.4419 data: 0.0051 max mem: 22448 +train: [15] Total time: 0:03:01 (0.4526 s / it) +train: [15] Summary: lr: 0.000050 loss: 2.1103 (2.1271) grad: 0.2530 (0.2669) +eval (validation): [15] [ 0/85] eta: 0:04:46 time: 3.3666 data: 3.0691 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:31 time: 0.3453 data: 0.0045 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:18 time: 0.3488 data: 0.0040 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:09 time: 0.3427 data: 0.0043 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:01 time: 0.3283 data: 0.0042 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3226 data: 0.0042 max mem: 22448 +eval (validation): [15] Total time: 0:00:32 (0.3793 s / it) +cv: [15] best hparam: (0.27, 1.0) (016) ('016_lr2.7e-01_wd1.0e+00') loss: 2.426 acc: 0.269 f1: 0.198 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:22:45 lr: nan time: 3.4149 data: 3.0298 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:36 lr: 0.000048 loss: 2.0669 (2.0892) grad: 0.2458 (0.2493) time: 0.4275 data: 0.0045 max mem: 22448 +train: [16] [ 40/400] eta: 0:03:04 lr: 0.000047 loss: 2.0669 (2.0831) grad: 0.2515 (0.2518) time: 0.4511 data: 0.0047 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:46 lr: 0.000046 loss: 2.0665 (2.0802) grad: 0.2564 (0.2555) time: 0.4451 data: 0.0054 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:33 lr: 0.000045 loss: 2.1132 (2.0917) grad: 0.2658 (0.2596) time: 0.4533 data: 0.0051 max mem: 22448 +train: [16] [100/400] eta: 0:02:20 lr: 0.000044 loss: 2.1047 (2.0883) grad: 0.2694 (0.2607) time: 0.4255 data: 0.0049 max mem: 22448 +train: [16] [120/400] eta: 0:02:10 lr: 0.000043 loss: 2.0633 (2.0883) grad: 0.2642 (0.2609) time: 0.4529 data: 0.0051 max mem: 22448 +train: [16] [140/400] eta: 0:02:00 lr: 0.000042 loss: 2.0917 (2.0890) grad: 0.2602 (0.2608) time: 0.4431 data: 0.0050 max mem: 22448 +train: [16] [160/400] eta: 0:01:50 lr: 0.000041 loss: 2.0949 (2.0937) grad: 0.2651 (0.2619) time: 0.4231 data: 0.0048 max mem: 22448 +train: [16] [180/400] eta: 0:01:40 lr: 0.000040 loss: 2.1094 (2.0956) grad: 0.2651 (0.2616) time: 0.4531 data: 0.0053 max mem: 22448 +train: [16] [200/400] eta: 0:01:31 lr: 0.000039 loss: 2.0844 (2.0913) grad: 0.2562 (0.2608) time: 0.4501 data: 0.0049 max mem: 22448 +train: [16] [220/400] eta: 0:01:21 lr: 0.000038 loss: 2.0830 (2.0906) grad: 0.2534 (0.2606) time: 0.4361 data: 0.0049 max mem: 22448 +train: [16] [240/400] eta: 0:01:12 lr: 0.000036 loss: 2.0764 (2.0896) grad: 0.2576 (0.2613) time: 0.4602 data: 0.0052 max mem: 22448 +train: [16] [260/400] eta: 0:01:03 lr: 0.000035 loss: 2.0764 (2.0922) grad: 0.2715 (0.2621) time: 0.4538 data: 0.0053 max mem: 22448 +train: [16] [280/400] eta: 0:00:54 lr: 0.000034 loss: 2.1062 (2.0931) grad: 0.2650 (0.2621) time: 0.4334 data: 0.0050 max mem: 22448 +train: [16] [300/400] eta: 0:00:45 lr: 0.000033 loss: 2.1086 (2.0945) grad: 0.2619 (0.2625) time: 0.4517 data: 0.0052 max mem: 22448 +train: [16] [320/400] eta: 0:00:36 lr: 0.000032 loss: 2.1190 (2.0976) grad: 0.2691 (0.2629) time: 0.4468 data: 0.0051 max mem: 22448 +train: [16] [340/400] eta: 0:00:27 lr: 0.000031 loss: 2.0696 (2.0963) grad: 0.2680 (0.2631) time: 0.4538 data: 0.0050 max mem: 22448 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 2.0627 (2.0966) grad: 0.2621 (0.2633) time: 0.4522 data: 0.0049 max mem: 22448 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 2.0819 (2.0963) grad: 0.2630 (0.2638) time: 0.4277 data: 0.0050 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 2.0938 (2.0975) grad: 0.2698 (0.2642) time: 0.4515 data: 0.0049 max mem: 22448 +train: [16] Total time: 0:03:01 (0.4525 s / it) +train: [16] Summary: lr: 0.000029 loss: 2.0938 (2.0975) grad: 0.2698 (0.2642) +eval (validation): [16] [ 0/85] eta: 0:04:42 time: 3.3287 data: 3.0722 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:33 time: 0.3679 data: 0.0049 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:19 time: 0.3500 data: 0.0037 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:10 time: 0.3438 data: 0.0043 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3226 data: 0.0039 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3182 data: 0.0039 max mem: 22448 +eval (validation): [16] Total time: 0:00:32 (0.3829 s / it) +cv: [16] best hparam: (0.27, 1.0) (016) ('016_lr2.7e-01_wd1.0e+00') loss: 2.424 acc: 0.272 f1: 0.200 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:11 lr: nan time: 3.3279 data: 2.9813 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:43 lr: 0.000028 loss: 2.0036 (2.0288) grad: 0.2365 (0.2524) time: 0.4506 data: 0.0248 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:07 lr: 0.000027 loss: 2.0672 (2.0622) grad: 0.2541 (0.2556) time: 0.4481 data: 0.0044 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:48 lr: 0.000026 loss: 2.0902 (2.0794) grad: 0.2515 (0.2546) time: 0.4445 data: 0.0050 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:33 lr: 0.000025 loss: 2.0462 (2.0664) grad: 0.2509 (0.2544) time: 0.4366 data: 0.0052 max mem: 22448 +train: [17] [100/400] eta: 0:02:21 lr: 0.000024 loss: 2.0343 (2.0702) grad: 0.2473 (0.2532) time: 0.4342 data: 0.0049 max mem: 22448 +train: [17] [120/400] eta: 0:02:11 lr: 0.000023 loss: 2.0787 (2.0693) grad: 0.2476 (0.2538) time: 0.4557 data: 0.0053 max mem: 22448 +train: [17] [140/400] eta: 0:02:00 lr: 0.000023 loss: 2.0787 (2.0693) grad: 0.2538 (0.2539) time: 0.4399 data: 0.0052 max mem: 22448 +train: [17] [160/400] eta: 0:01:51 lr: 0.000022 loss: 2.0837 (2.0701) grad: 0.2562 (0.2550) time: 0.4494 data: 0.0051 max mem: 22448 +train: [17] [180/400] eta: 0:01:41 lr: 0.000021 loss: 2.0666 (2.0686) grad: 0.2529 (0.2545) time: 0.4469 data: 0.0051 max mem: 22448 +train: [17] [200/400] eta: 0:01:31 lr: 0.000020 loss: 2.0608 (2.0678) grad: 0.2528 (0.2544) time: 0.4459 data: 0.0050 max mem: 22448 +train: [17] [220/400] eta: 0:01:22 lr: 0.000019 loss: 2.0644 (2.0683) grad: 0.2568 (0.2552) time: 0.4469 data: 0.0050 max mem: 22448 +train: [17] [240/400] eta: 0:01:13 lr: 0.000019 loss: 2.0644 (2.0659) grad: 0.2535 (0.2555) time: 0.4544 data: 0.0051 max mem: 22448 +train: [17] [260/400] eta: 0:01:04 lr: 0.000018 loss: 2.0179 (2.0648) grad: 0.2540 (0.2559) time: 0.4527 data: 0.0050 max mem: 22448 +train: [17] [280/400] eta: 0:00:54 lr: 0.000017 loss: 2.0501 (2.0655) grad: 0.2600 (0.2561) time: 0.4382 data: 0.0050 max mem: 22448 +train: [17] [300/400] eta: 0:00:45 lr: 0.000016 loss: 2.0746 (2.0674) grad: 0.2593 (0.2562) time: 0.4528 data: 0.0052 max mem: 22448 +train: [17] [320/400] eta: 0:00:36 lr: 0.000016 loss: 2.0664 (2.0670) grad: 0.2530 (0.2555) time: 0.4470 data: 0.0049 max mem: 22448 +train: [17] [340/400] eta: 0:00:27 lr: 0.000015 loss: 2.0664 (2.0673) grad: 0.2508 (0.2559) time: 0.4456 data: 0.0050 max mem: 22448 +train: [17] [360/400] eta: 0:00:18 lr: 0.000014 loss: 2.0937 (2.0681) grad: 0.2533 (0.2560) time: 0.4524 data: 0.0052 max mem: 22448 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 2.0494 (2.0673) grad: 0.2589 (0.2567) time: 0.4356 data: 0.0049 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 2.0188 (2.0663) grad: 0.2597 (0.2565) time: 0.4462 data: 0.0049 max mem: 22448 +train: [17] Total time: 0:03:01 (0.4541 s / it) +train: [17] Summary: lr: 0.000013 loss: 2.0188 (2.0663) grad: 0.2597 (0.2565) +eval (validation): [17] [ 0/85] eta: 0:04:47 time: 3.3877 data: 3.1394 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:32 time: 0.3495 data: 0.0049 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:19 time: 0.3523 data: 0.0035 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:10 time: 0.3573 data: 0.0044 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3234 data: 0.0041 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3108 data: 0.0039 max mem: 22448 +eval (validation): [17] Total time: 0:00:32 (0.3825 s / it) +cv: [17] best hparam: (0.27, 1.0) (016) ('016_lr2.7e-01_wd1.0e+00') loss: 2.420 acc: 0.272 f1: 0.201 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:57 lr: nan time: 3.4442 data: 3.0506 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:48 lr: 0.000012 loss: 2.1094 (2.1033) grad: 0.2486 (0.2546) time: 0.4590 data: 0.0049 max mem: 22448 +train: [18] [ 40/400] eta: 0:03:07 lr: 0.000012 loss: 2.0405 (2.0512) grad: 0.2510 (0.2527) time: 0.4356 data: 0.0048 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:47 lr: 0.000011 loss: 1.9924 (2.0426) grad: 0.2571 (0.2570) time: 0.4322 data: 0.0050 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:33 lr: 0.000011 loss: 2.0018 (2.0411) grad: 0.2567 (0.2549) time: 0.4389 data: 0.0052 max mem: 22448 +train: [18] [100/400] eta: 0:02:21 lr: 0.000010 loss: 2.0655 (2.0411) grad: 0.2504 (0.2543) time: 0.4371 data: 0.0048 max mem: 22448 +train: [18] [120/400] eta: 0:02:10 lr: 0.000009 loss: 2.0057 (2.0343) grad: 0.2487 (0.2536) time: 0.4485 data: 0.0053 max mem: 22448 +train: [18] [140/400] eta: 0:02:00 lr: 0.000009 loss: 2.0278 (2.0387) grad: 0.2529 (0.2542) time: 0.4537 data: 0.0051 max mem: 22448 +train: [18] [160/400] eta: 0:01:50 lr: 0.000008 loss: 2.0371 (2.0404) grad: 0.2529 (0.2534) time: 0.4433 data: 0.0047 max mem: 22448 +train: [18] [180/400] eta: 0:01:41 lr: 0.000008 loss: 2.0258 (2.0383) grad: 0.2548 (0.2542) time: 0.4534 data: 0.0054 max mem: 22448 +train: [18] [200/400] eta: 0:01:31 lr: 0.000007 loss: 2.0258 (2.0406) grad: 0.2548 (0.2541) time: 0.4443 data: 0.0050 max mem: 22448 +train: [18] [220/400] eta: 0:01:22 lr: 0.000007 loss: 2.0573 (2.0406) grad: 0.2528 (0.2543) time: 0.4647 data: 0.0051 max mem: 22448 +train: [18] [240/400] eta: 0:01:13 lr: 0.000006 loss: 2.0575 (2.0440) grad: 0.2546 (0.2545) time: 0.4563 data: 0.0049 max mem: 22448 +train: [18] [260/400] eta: 0:01:04 lr: 0.000006 loss: 2.0709 (2.0454) grad: 0.2523 (0.2544) time: 0.4354 data: 0.0047 max mem: 22448 +train: [18] [280/400] eta: 0:00:54 lr: 0.000006 loss: 2.0580 (2.0451) grad: 0.2519 (0.2544) time: 0.4414 data: 0.0049 max mem: 22448 +train: [18] [300/400] eta: 0:00:45 lr: 0.000005 loss: 2.0558 (2.0458) grad: 0.2517 (0.2542) time: 0.4579 data: 0.0052 max mem: 22448 +train: [18] [320/400] eta: 0:00:36 lr: 0.000005 loss: 2.0841 (2.0488) grad: 0.2517 (0.2541) time: 0.4484 data: 0.0051 max mem: 22448 +train: [18] [340/400] eta: 0:00:27 lr: 0.000004 loss: 2.0363 (2.0464) grad: 0.2524 (0.2537) time: 0.4492 data: 0.0051 max mem: 22448 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 2.0343 (2.0458) grad: 0.2516 (0.2536) time: 0.4461 data: 0.0049 max mem: 22448 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 2.0392 (2.0448) grad: 0.2516 (0.2536) time: 0.4432 data: 0.0050 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 2.0269 (2.0429) grad: 0.2498 (0.2538) time: 0.4491 data: 0.0050 max mem: 22448 +train: [18] Total time: 0:03:02 (0.4550 s / it) +train: [18] Summary: lr: 0.000003 loss: 2.0269 (2.0429) grad: 0.2498 (0.2538) +eval (validation): [18] [ 0/85] eta: 0:04:47 time: 3.3823 data: 3.1419 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:34 time: 0.3855 data: 0.0043 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:19 time: 0.3360 data: 0.0040 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:10 time: 0.3480 data: 0.0043 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:01 time: 0.3219 data: 0.0039 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3204 data: 0.0039 max mem: 22448 +eval (validation): [18] Total time: 0:00:32 (0.3858 s / it) +cv: [18] best hparam: (0.27, 1.0) (016) ('016_lr2.7e-01_wd1.0e+00') loss: 2.423 acc: 0.271 f1: 0.200 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:27:35 lr: nan time: 4.1396 data: 3.8006 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:54 lr: 0.000003 loss: 2.0197 (2.0603) grad: 0.2396 (0.2472) time: 0.4422 data: 0.0027 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:11 lr: 0.000003 loss: 2.0197 (2.0297) grad: 0.2472 (0.2495) time: 0.4405 data: 0.0049 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:50 lr: 0.000002 loss: 2.0304 (2.0459) grad: 0.2467 (0.2495) time: 0.4430 data: 0.0051 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:36 lr: 0.000002 loss: 2.0552 (2.0471) grad: 0.2461 (0.2485) time: 0.4476 data: 0.0049 max mem: 22448 +train: [19] [100/400] eta: 0:02:24 lr: 0.000002 loss: 2.0300 (2.0443) grad: 0.2471 (0.2492) time: 0.4479 data: 0.0051 max mem: 22448 +train: [19] [120/400] eta: 0:02:12 lr: 0.000002 loss: 2.0361 (2.0484) grad: 0.2471 (0.2503) time: 0.4358 data: 0.0050 max mem: 22448 +train: [19] [140/400] eta: 0:02:02 lr: 0.000001 loss: 2.0409 (2.0456) grad: 0.2438 (0.2495) time: 0.4517 data: 0.0051 max mem: 22448 +train: [19] [160/400] eta: 0:01:51 lr: 0.000001 loss: 2.0248 (2.0426) grad: 0.2413 (0.2486) time: 0.4360 data: 0.0052 max mem: 22448 +train: [19] [180/400] eta: 0:01:41 lr: 0.000001 loss: 2.0129 (2.0404) grad: 0.2502 (0.2500) time: 0.4438 data: 0.0053 max mem: 22448 +train: [19] [200/400] eta: 0:01:32 lr: 0.000001 loss: 2.0089 (2.0385) grad: 0.2512 (0.2496) time: 0.4468 data: 0.0054 max mem: 22448 +train: [19] [220/400] eta: 0:01:23 lr: 0.000001 loss: 2.0168 (2.0378) grad: 0.2442 (0.2490) time: 0.4548 data: 0.0053 max mem: 22448 +train: [19] [240/400] eta: 0:01:13 lr: 0.000001 loss: 2.0334 (2.0364) grad: 0.2453 (0.2496) time: 0.4532 data: 0.0051 max mem: 22448 +train: [19] [260/400] eta: 0:01:04 lr: 0.000000 loss: 2.0209 (2.0342) grad: 0.2490 (0.2495) time: 0.4384 data: 0.0051 max mem: 22448 +train: [19] [280/400] eta: 0:00:54 lr: 0.000000 loss: 2.0409 (2.0378) grad: 0.2500 (0.2497) time: 0.4401 data: 0.0051 max mem: 22448 +train: [19] [300/400] eta: 0:00:45 lr: 0.000000 loss: 2.0573 (2.0371) grad: 0.2516 (0.2496) time: 0.4467 data: 0.0052 max mem: 22448 +train: [19] [320/400] eta: 0:00:36 lr: 0.000000 loss: 2.0566 (2.0399) grad: 0.2535 (0.2501) time: 0.4439 data: 0.0051 max mem: 22448 +train: [19] [340/400] eta: 0:00:27 lr: 0.000000 loss: 2.0707 (2.0420) grad: 0.2482 (0.2500) time: 0.4310 data: 0.0048 max mem: 22448 +train: [19] [360/400] eta: 0:00:18 lr: 0.000000 loss: 2.0841 (2.0430) grad: 0.2438 (0.2497) time: 0.4477 data: 0.0051 max mem: 22448 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 2.0280 (2.0426) grad: 0.2457 (0.2496) time: 0.4390 data: 0.0050 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 2.0331 (2.0438) grad: 0.2460 (0.2496) time: 0.4389 data: 0.0050 max mem: 22448 +train: [19] Total time: 0:03:01 (0.4534 s / it) +train: [19] Summary: lr: 0.000000 loss: 2.0331 (2.0438) grad: 0.2460 (0.2496) +eval (validation): [19] [ 0/85] eta: 0:04:34 time: 3.2287 data: 2.9470 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:32 time: 0.3701 data: 0.0320 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:19 time: 0.3642 data: 0.0031 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:10 time: 0.3731 data: 0.0040 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:01 time: 0.3204 data: 0.0038 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3121 data: 0.0037 max mem: 22448 +eval (validation): [19] Total time: 0:00:33 (0.3920 s / it) +cv: [19] best hparam: (0.27, 1.0) (016) ('016_lr2.7e-01_wd1.0e+00') loss: 2.422 acc: 0.272 f1: 0.201 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.27150239940937615, "hparam": [0.27, 1.0], "hparam_id": 16, "epoch": 19, "is_best": false, "best_score": 0.2737172388335179} +eval (train): [20] [ 0/509] eta: 0:26:29 time: 3.1236 data: 2.8258 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:17 time: 0.3978 data: 0.0049 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:31 time: 0.3713 data: 0.0047 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:07 time: 0.3497 data: 0.0043 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:53 time: 0.3595 data: 0.0045 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:38 time: 0.3258 data: 0.0041 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:28 time: 0.3511 data: 0.0041 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:19 time: 0.3634 data: 0.0046 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:10 time: 0.3408 data: 0.0045 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:02 time: 0.3422 data: 0.0042 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:53 time: 0.3460 data: 0.0043 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:45 time: 0.3393 data: 0.0041 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:37 time: 0.3406 data: 0.0043 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:29 time: 0.3286 data: 0.0040 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:22 time: 0.3787 data: 0.0048 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:15 time: 0.3507 data: 0.0040 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:08 time: 0.3415 data: 0.0040 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:00 time: 0.3265 data: 0.0041 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:53 time: 0.3529 data: 0.0044 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:46 time: 0.3458 data: 0.0044 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:38 time: 0.3579 data: 0.0047 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:31 time: 0.3497 data: 0.0043 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:24 time: 0.3243 data: 0.0041 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:17 time: 0.3473 data: 0.0044 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3505 data: 0.0045 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3430 data: 0.0040 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3245 data: 0.0039 max mem: 22448 +eval (train): [20] Total time: 0:03:01 (0.3561 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:24 time: 3.1064 data: 2.8701 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:32 time: 0.3622 data: 0.0034 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3628 data: 0.0040 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3532 data: 0.0042 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3243 data: 0.0040 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3198 data: 0.0039 max mem: 22448 +eval (validation): [20] Total time: 0:00:32 (0.3846 s / it) +eval (test): [20] [ 0/85] eta: 0:04:31 time: 3.1907 data: 2.8804 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:32 time: 0.3629 data: 0.0053 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:20 time: 0.3981 data: 0.0044 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3551 data: 0.0044 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3316 data: 0.0043 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3150 data: 0.0040 max mem: 22448 +eval (test): [20] Total time: 0:00:33 (0.3947 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:03 time: 2.9740 data: 2.6964 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3797 data: 0.0047 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3722 data: 0.0044 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3683 data: 0.0049 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3390 data: 0.0050 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3242 data: 0.0047 max mem: 22448 +eval (testid): [20] Total time: 0:00:32 (0.3980 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.2737172388335179, "hparam": [0.52, 1.0], "hparam_id": 20, "epoch": 6, "is_best": true, "best_score": 0.2737172388335179} +eval (train): [20] [ 0/509] eta: 0:26:20 time: 3.1046 data: 2.8597 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:12 time: 0.3876 data: 0.0145 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:22 time: 0.3426 data: 0.0034 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:01 time: 0.3502 data: 0.0041 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:48 time: 0.3534 data: 0.0044 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:41 time: 0.4094 data: 0.0048 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:34 time: 0.4093 data: 0.0050 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:24 time: 0.3532 data: 0.0046 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:15 time: 0.3644 data: 0.0046 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:06 time: 0.3588 data: 0.0046 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:58 time: 0.3683 data: 0.0046 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:50 time: 0.3745 data: 0.0046 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:42 time: 0.3753 data: 0.0046 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:34 time: 0.3575 data: 0.0044 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:26 time: 0.3507 data: 0.0044 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:18 time: 0.3545 data: 0.0046 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:10 time: 0.3409 data: 0.0039 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:03 time: 0.3613 data: 0.0042 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:55 time: 0.3600 data: 0.0042 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:48 time: 0.3876 data: 0.0046 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:40 time: 0.3679 data: 0.0045 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:33 time: 0.3357 data: 0.0041 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:25 time: 0.3744 data: 0.0043 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3457 data: 0.0037 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3630 data: 0.0043 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3646 data: 0.0045 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3427 data: 0.0038 max mem: 22448 +eval (train): [20] Total time: 0:03:08 (0.3713 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:00 time: 2.8269 data: 2.5972 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:33 time: 0.3997 data: 0.0245 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3635 data: 0.0041 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3604 data: 0.0045 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3304 data: 0.0039 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3126 data: 0.0036 max mem: 22448 +eval (validation): [20] Total time: 0:00:33 (0.3932 s / it) +eval (test): [20] [ 0/85] eta: 0:04:19 time: 3.0568 data: 2.7767 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:35 time: 0.4136 data: 0.0050 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:20 time: 0.3830 data: 0.0048 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3669 data: 0.0044 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:02 time: 0.3755 data: 0.0046 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3503 data: 0.0042 max mem: 22448 +eval (test): [20] Total time: 0:00:35 (0.4167 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:18 time: 3.1475 data: 2.8597 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:32 time: 0.3848 data: 0.0056 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3527 data: 0.0037 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3655 data: 0.0044 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3360 data: 0.0043 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3240 data: 0.0041 max mem: 22448 +eval (testid): [20] Total time: 0:00:32 (0.3951 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|---------:|-----:|------------:|:------------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.000156 | 0.05 | 20 | [0.52, 1.0] | train | 2.2521 | 0.32767 | 0.0022201 | 0.26225 | 0.0022267 | +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.000156 | 0.05 | 20 | [0.52, 1.0] | validation | 2.4153 | 0.27372 | 0.0051447 | 0.20614 | 0.0046339 | +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.000156 | 0.05 | 20 | [0.52, 1.0] | test | 2.3897 | 0.27959 | 0.0053009 | 0.202 | 0.0048196 | +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.000156 | 0.05 | 20 | [0.52, 1.0] | testid | 2.3752 | 0.27916 | 0.0055198 | 0.21536 | 0.0049827 | + + +done! total time: 1:23:07 diff --git a/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/train_log.json b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..a05d312102105e8e4774219874bd97c0d2d5d3e1 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.1266173803806305, "train/grad": 0.16402100570499897, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.17427490234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.173802490234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.17290283203125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.172034912109375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.171219482421875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.170194091796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.16894287109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.167685546875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.166173095703125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.164573974609375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.163131103515625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.16118896484375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.159349365234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.157244873046875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.155401611328125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.15379150390625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.15202392578125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1502880859375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1486669921875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.147271728515625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.145986328125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.14469482421875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.143553466796875, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.142476806640625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.141541748046875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.14073974609375, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.140015869140625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.139605712890625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.139237060546875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.138941650390625, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.13878173828125, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.13874755859375, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.138787841796875, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.138861083984375, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.13908935546875, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.13903076171875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.1388055419921876, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.137625732421875, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.132833251953125, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.1136456298828126, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.097481536865234, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.0809950256347656, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0590164184570314, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.038164520263672, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.016712188720703, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.992781524658203, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.979961357116699, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.9619751167297363, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.9540028190612793, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024424024671316148, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.024381232718005776, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.024311111867427827, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.024243522277101873, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.024178221938200295, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02408911946695298, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02399372313171625, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023890425539575516, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02376238737255335, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0236344324471429, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023517039520666003, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023355420865118503, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023213395848870277, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023033850411884486, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022885772767476737, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022763154455460607, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02263057019561529, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022500544586218894, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02238277732860297, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022293403688818218, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022201422499492765, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022125663743354382, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022058713026344778, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02200236096046865, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.021956082368269562, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021917027467861772, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.021889764978550375, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.021872572652064264, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.021856714081950487, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.021847816985100507, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.021842292295768857, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0218355278018862, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.021820664564147593, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.021796933012083172, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.021745520988479258, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.021665972443297505, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.021547721466049552, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.021424752841703594, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021382748149335384, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02181693337392062, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.022677932530641556, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.023276290707290172, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02400298244319856, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02467929871287197, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.025325749474577604, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.026001818887889386, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.027032918464392423, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02808162719011307, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02920815084129572, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.165050983428955, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1635022163391113, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.160989284515381, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.158609390258789, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1564033031463623, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1535484790802, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.150527238845825, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1474764347076416, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.143948793411255, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1406688690185547, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1378633975982666, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1344404220581055, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.131826162338257, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1290090084075928, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1271352767944336, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1258695125579834, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.124765634536743, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1239163875579834, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1233301162719727, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.122929811477661, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.122605323791504, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.122434139251709, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1224045753479004, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1226394176483154, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.123197317123413, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1241304874420166, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1251494884490967, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1261532306671143, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.127241611480713, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1281535625457764, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.128131151199341, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.127270460128784, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.124711513519287, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1216626167297363, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.117708683013916, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.112616777420044, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.102417469024658, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0755324363708496, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9805383682250977, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.821201801300049, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7620978355407715, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.710529327392578, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6611826419830322, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6591598987579346, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.6382052898406982, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.6071255207061768, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.6246654987335205, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.6299848556518555, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.653925895690918, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.056109265411590996, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05703211517165006, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0592469545957918, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.060723514211886306, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06016980435585087, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.060538944259874494, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.060538944259874494, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06201550387596899, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.059800664451827246, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06183093392395718, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.067921742340347, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06662975267626431, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06570690291620525, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.059616094499815434, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.05906238464377999, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.05518641565153193, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.05278700627537837, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.053156146179401995, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.052417866371354746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.05444813584348468, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.058693244739756366, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07216685123661867, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.07770394979697305, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.0812107788851975, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.09782207456626062, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.1138796603912883, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.16666666666666666, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18124769287559986, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1952750092284976, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20155038759689922, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.19730527870062753, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.20358065706902917, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.21760797342192692, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.2131782945736434, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.20450350682908822, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.20764119601328904, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012797615972987548, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012996761612616222, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013452887533160448, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013931492393200464, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01403280846372252, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014483232052457771, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.015195489806816946, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015857979650026378, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.017250999994271456, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0175629061483364, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.018514687292615465, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01765005450233507, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.018235913078962704, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.017899977157803004, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.017039566398100624, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.016072229192781676, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01425073126815673, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.014092452412131237, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.014670608246721034, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.014882079335349114, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.015773621896773345, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.016574631392096743, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.016790611547713538, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.017281203206132304, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.01814492838992412, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.015786345284649863, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.013009798294050861, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.011512944568775293, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.009039759095931929, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.007627470611530303, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.008737513722569994, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.008731593162441932, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.012010779969127472, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.01601407520811964, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.02017559227236381, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.022135632196548616, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.02679117805586689, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.041158079414183445, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.056064869042243226, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.09442070922612877, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.10094951623699237, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.10950282341833671, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.11838349128090118, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.12672217455401416, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.14048213832554626, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.15319804203231976, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.15393045851039222, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1459894462219424, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.14824239972798123, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 2.992781524658203, "validation/loss_best": 2.6071255207061768, "validation/acc_best": 0.21760797342192692, "validation/f1_best": 0.15319804203231976} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.964813303947449, "train/grad": 0.18751651115715504, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.157137451171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.15484619140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.151507568359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.148714599609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.146160888671875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.14326416015625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.140662841796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13834228515625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1360888671875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.134327392578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.13303466796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.13171875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1308349609375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1299169921875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.129345703125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.128992919921875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.128544921875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1280126953125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.127451171875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.12701904296875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.1263818359375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.12567138671875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.12486572265625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.124111328125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.12316162109375, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.121949462890625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.120650634765625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.119151611328125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.1151043701171877, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0995501708984374, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.063423614501953, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.0221202087402346, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.960144805908203, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.9062425231933595, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.835534439086914, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.771191749572754, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.707862377166748, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.6549607276916505, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.6066425037384033, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.5676262092590334, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.555438380241394, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.544385356903076, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.536764223575592, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5396044921875, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5616758489608764, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.5815479922294617, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.607069594860077, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.628465805053711, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.6786320090293883, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022613624501973392, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022418514690361917, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022128564394079148, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02188071232289076, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021669895192608236, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02142674877308309, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02120858170557767, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02102246280759573, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0208439664542675, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020717117111198603, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02063348312396556, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020556856077164412, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0205190710676834, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020498529109172523, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020499367853626608, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020509409150108696, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020526969460770487, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02055115966591984, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020578520074486732, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020603865589946507, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.020631708637811243, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020656321542337538, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.020679460340179503, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.020695086694322528, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.020701114311814307, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.020678283516317605, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02062566820066422, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02054879466071725, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.020422436469234525, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.020468093124218285, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.021220224942080677, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.022232175352983177, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.023558935848996043, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02476291994564235, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.026396921621635556, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.027668104330077766, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.029135042456910013, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.030722833136096597, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03244736095424741, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03406385592184961, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0352533158659935, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0362284617125988, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03670034802518785, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03770631217397749, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.039275648510083555, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03918208058923483, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04039813572540879, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.042938179513439534, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.04574765215627849, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1422696113586426, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1394877433776855, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.135711431503296, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.132835865020752, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1306586265563965, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.128459930419922, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.126819133758545, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1256186962127686, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1246650218963623, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1240673065185547, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1236629486083984, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1232383251190186, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.122882604598999, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.122386932373047, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.121870279312134, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1213366985321045, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1206085681915283, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.119723081588745, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.11879825592041, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1180267333984375, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1173057556152344, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1167995929718018, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.116515874862671, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.116452217102051, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1164491176605225, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1163878440856934, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1153411865234375, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.111337184906006, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.0898706912994385, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.956000566482544, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7796144485473633, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7079930305480957, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6137259006500244, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5620641708374023, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.548595905303955, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5372157096862793, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.5080714225769043, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.493283987045288, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5156617164611816, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.524670124053955, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.516486644744873, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.5335352420806885, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.5973281860351562, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6257758140563965, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.692042589187622, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.714284658432007, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.7438089847564697, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.8707072734832764, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.859907865524292, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06312292358803986, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06312292358803986, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06404577334809892, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06275378368401624, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07345884090070137, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.073827980804725, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07401255075673681, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07604282022886674, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.0753045404208195, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.07715023994093761, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.08877814691768181, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.13732004429678848, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.17220376522702105, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.18826135105204872, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2207456626061277, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22443706164636398, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23126614987080105, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24031007751937986, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2427094868955334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23588039867109634, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.24289405684754523, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23329641934293097, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22665190107050573, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2201919527500923, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.22000738279808046, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.21354743447766703, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.20358065706902917, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.18881506090808417, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.19010705057216684, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01725168054546219, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.016544415531039025, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01741079771606059, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.016963753567739725, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01679229284520684, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015724227177028507, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013765242676487764, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011383770985416491, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.010161192109034066, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.009767637883754373, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.010079855094579983, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01053462691010893, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.010770248936721483, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.010853948240514022, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.010962403555016752, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.012304937249204895, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01282444424892034, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01297185739957086, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.014231633967150272, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01653127118973201, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.020981505030560166, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.0220581261769279, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.023590304379956534, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.024234399897264534, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.02382918242625032, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.02289778514986979, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.02159936314276464, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.02153570141694272, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.0293129174698627, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.06394896861435222, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.09524163817057489, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.11431373751476819, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1373005058528303, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1578279621185291, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16313973656982209, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1656139577426854, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17158630133858785, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17830204605081387, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17314049633042675, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16357530208212998, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1662490326820699, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15931157769166346, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16046939596432444, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15239580620302823, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.16178056615276773, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.14427319746532052, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.13266580310708712, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1311797676036194, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.12632849303553853, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 2.555438380241394, "validation/loss_best": 2.516486644744873, "validation/acc_best": 0.24289405684754523, "validation/f1_best": 0.1662490326820699} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 2.948859519958496, "train/grad": 0.3653836324810982, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.141119384765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.13912109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.13681396484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.135179443359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.134091796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1330859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13253662109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13201171875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.13144287109375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1309228515625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.130450439453125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.129781494140625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12913330078125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.128116455078125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.127177734375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.126322021484375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.125208740234375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.123790283203125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.12233642578125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.12092529296875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.1192236328125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.1168798828125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.11326416015625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.1048040771484375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.0763705444335936, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.000306396484375, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.918101119995117, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.84202693939209, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.738894386291504, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.633126964569092, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.5626304817199705, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.514662265777588, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.4657856178283692, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.4317030239105226, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3949668502807615, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.377296795845032, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.362912142276764, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3618588924407957, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.366714553833008, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3811258697509765, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.396776571273804, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.408642246723175, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4407203912734987, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.467011456489563, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5358604180812834, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.598733446598053, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.7046384906768797, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.070247242450714, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.379266786575317, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021204453087411822, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02103519511874765, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020844285637140275, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020729404194280505, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02066356752999127, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02061769265215844, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02060049220919609, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020601646187715233, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020615415200591087, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02063251681625843, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020647982535883784, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02067005377728492, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020686962478794157, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020709367864765228, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020728225447237493, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020743743083439767, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02076075255870819, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020774045633152126, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020780831212177872, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020776962819509207, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0207540768943727, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020707182050682605, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.020633261669427156, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.020589476577006282, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02098445043899119, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0228428757423535, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02460836029611528, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02620704172179103, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02847064639441669, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03110799437388778, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03255855874158442, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03321021807380021, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03413846155628562, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.034991232845932245, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03586421146057546, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.036587751917541024, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03772064739838243, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.038696241909638045, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.039512205608189105, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04021040833555162, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04172019382938743, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.042591743161901834, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.043822614969685675, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04502596547827124, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.04854940677061677, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05012158835306764, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05634777088649571, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.14022349759936334, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.13274894570931792, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.128654956817627, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.127246618270874, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.125781536102295, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.124938726425171, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.124399423599243, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.123936176300049, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1235404014587402, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1232025623321533, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1228249073028564, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.122480869293213, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.122225046157837, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1218268871307373, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1215507984161377, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1212704181671143, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.121107339859009, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.120957851409912, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1207268238067627, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.12030029296875, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1192467212677, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1176702976226807, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1143431663513184, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1083548069000244, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0942203998565674, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.047938346862793, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.867403030395508, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.699863910675049, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6293246746063232, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5686278343200684, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.50313401222229, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4686334133148193, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4499833583831787, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.456571578979492, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4544594287872314, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.464933395385742, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.4881114959716797, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5452685356140137, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.5692880153656006, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.577803134918213, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.593675136566162, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5798983573913574, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.585641622543335, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.6090023517608643, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6975045204162598, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7046923637390137, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.7690653800964355, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.857156991958618, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.9785728454589844, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08231819859726837, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.1064968623108158, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.16057585825027684, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.19324473975636766, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21280915466961978, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24953857511997046, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25950535252860835, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.260797342192691, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.25599852344038393, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26135105204872644, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.25950535252860835, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25064599483204136, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24067921742340348, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23588039867109634, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22609819121447028, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22369878183831673, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22720561092654115, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20967146548541898, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2157622739018088, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.22093023255813954, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.2100406053894426, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.20321151716500555, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014752262455821038, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012627345207585258, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010961188330440104, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010457356634818185, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010185234361582725, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010234532550617763, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01026926725805971, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.010226204708375404, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.010777519383081205, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.011137170972881269, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.011414768205857859, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01291199502315976, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013298039798430021, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013600372499408535, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.013327221057718198, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.013476834515553343, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.013346986910113429, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.013459368935519342, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.013449703642696723, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01349414471348521, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.015770087290933158, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.017158498839626134, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.024705394290143765, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.040757133563078494, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.07397072538104434, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.10901352846872163, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.13315248007207223, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.15237778718201658, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1757850866397653, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18800876195513713, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18630801851598958, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17883545850738816, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18862375386811459, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18862364630139913, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1806349846862915, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17751284300079362, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1739119231326344, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1751020544890958, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17398429504836058, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17600701838864152, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18112383746157112, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1686390007330177, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15832636914835072, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16482770985307918, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.15332282745426115, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.1455001666723509, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.13775736396039742, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.4657856178283692, "validation/loss_best": 2.4544594287872314, "validation/acc_best": 0.26135105204872644, "validation/f1_best": 0.18862375386811459} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 2.8950232315063475, "train/grad": 0.3878345040231943, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.132310791015625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.131414794921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.13044189453125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1297705078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12920166015625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12863037109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.128001708984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.127474365234375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.126531982421875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1257470703125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.12490966796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.123765869140625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.122467041015625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1209228515625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1192333984375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1178564453125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.115938720703125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.113240966796875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.109102783203125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.102064208984375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.078211669921875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.0034144592285155, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.8895838928222655, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.7643707275390623, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.654366760253906, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.5651084899902346, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.5024816513061525, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.4581409454345704, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.4050786209106447, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.3546216011047365, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.3202068853378295, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.297275824546814, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.2756389141082765, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.2642825412750245, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.260812101364136, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.2635351276397704, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2697319555282593, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.285107629299164, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3113554739952087, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3551955044269564, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.389390333890915, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.431517918109894, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.512093189954758, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5833821046352385, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.66297189950943, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.733438225984574, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.162049511671066, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020529457796365023, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02050071441568434, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020489885285496712, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020497074155136942, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02050824843812734, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020524109387770294, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020537491915747524, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02055084948427975, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02056604959536344, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02057965560350567, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020591806368902326, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02060643641743809, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020617279028519987, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020626199478283525, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020627925167791546, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020622614533640445, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020606484431773424, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020572966574691237, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020520592262037098, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020492467232979834, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02074925279710442, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02225722559262067, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.025032560201361777, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.028012992944568396, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03069798480719328, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03233519613742828, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03350469777360558, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03415381035767496, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03522766913287342, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.036277171410620215, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.037209905432537195, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03802575820125639, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03912170009687543, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.040013048816472295, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04079259127378464, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04164475250989199, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.042903425600379706, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04343094022944569, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04437713548541069, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04536162596195936, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.046490503456443545, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0483974196203053, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05159328401088715, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.055640529170632363, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05842427179217338, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.16426345437765122, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1366490214690566, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.124640941619873, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1241698265075684, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1236367225646973, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.12322998046875, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.122849941253662, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122344732284546, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.121777296066284, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1211414337158203, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1203036308288574, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1194186210632324, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1185715198516846, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.117288827896118, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.116124391555786, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1144535541534424, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.112936019897461, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.111461877822876, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.109382152557373, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1058177947998047, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.097471237182617, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.0757148265838623, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.965503692626953, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.7589495182037354, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6607954502105713, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5771799087524414, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5248162746429443, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4833223819732666, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4711599349975586, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4728505611419678, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4844181537628174, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4812793731689453, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4855151176452637, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4868335723876953, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4800033569335938, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.51420259475708, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.518503189086914, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.546903371810913, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.615666627883911, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6433417797088623, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6853370666503906, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.775270700454712, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6870274543762207, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.72511887550354, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.79587721824646, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9526920318603516, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.955667734146118, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07770394979697305, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.09080841638981174, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.13602805463270579, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.17552602436323367, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2026578073089701, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22000738279808046, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23015873015873015, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2454780361757106, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25230712440014763, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2484311554078996, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24861572535991142, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2558139534883721, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.25599852344038393, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26264304171280917, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25544481358434845, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.25212255444813586, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24012550756736803, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23772609819121446, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20837947582133629, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21963824289405684, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21502399409376152, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20321151716500555, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20801033591731266, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.18881506090808417, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011200619879036233, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010973072965435655, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011111839178671342, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010993591656466989, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.011031392145173869, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01092041407095652, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010794751314914858, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01091663634735606, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011875939841798083, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012810657978242565, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013785394001211095, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.014569234430670944, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01641600088435787, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.018127002764973484, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.020710075496475744, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.022195125137817077, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02400822240499248, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.027718883270165723, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.030581512864233575, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.04309169813412872, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.07631296543036967, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.10539410353017398, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1276203501097836, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1441841669003081, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1567899052020313, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18046507041966756, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19008467076565552, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18551249403759762, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1855346180572671, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19035640745884733, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18696137770843044, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18335800011135303, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18571777635057174, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17715100938130934, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1819356931790843, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1743528203817636, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17226653416832138, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17858336378661108, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17197888451179907, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15718691677033111, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16496555997194853, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1633180340994127, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15380284035116445, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.13067344178920795, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.14488938894995293, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.2756389141082765, "validation/loss_best": 2.4800033569335938, "validation/acc_best": 0.26264304171280917, "validation/f1_best": 0.18571777635057174} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.7385277926921843, "train/grad": 0.2429221387207508, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.133216552734375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1327392578125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.131866455078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13124755859375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.130560302734375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.129638671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1286083984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12757080078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.12608642578125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.124765625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.123409423828125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12138916015625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.11950927734375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.116588134765625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.113570556640625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.109759521484375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.102158203125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.078404541015625, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.9863824462890625, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.8685702514648437, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.7304312133789064, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.6307544708251953, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.544554214477539, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.471829376220703, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.413003978729248, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.3556212615966796, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.3112665843963622, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2824370861053467, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.2460236072540285, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.212335548400879, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.1916538763046263, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.177711081504822, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.174936590194702, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.1809600448608397, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.1909579205513, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.2041236972808838, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.230388479232788, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2574828815460206, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2957912981510162, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.368400764465332, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.427916793823242, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4765157675743104, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.564676582813263, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.658742023706436, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.8658076882362367, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02032390847336501, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020332885389216244, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02034680490847677, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020356777189299465, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020364839415997267, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02037245022598654, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020379689438268542, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020387080051004886, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02039541138801724, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020402704062871636, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020407747118733822, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020415514330379664, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020415599239058793, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02040520144626498, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02038189461454749, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020350535516627133, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020324807944707573, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02051470975857228, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02211170951835811, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02486682452261448, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.028245042944326997, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.030858546495437622, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.032746521765366195, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03399407960474491, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03482795053161681, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03567225151695311, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03634144221432507, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0367272031493485, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03782388336025178, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.038865697458386424, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.039501650538295505, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.039878231808543205, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0410340128839016, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.041949652004987, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04329194210469723, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04414484564214945, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04527963768690824, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04612807096913457, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04727332239970565, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.048630294892936944, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.051609785687178375, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05446693122386932, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.057141458224505184, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06245324930176139, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0747447706758976, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123962163925171, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1237130165100098, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123335599899292, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1230273246765137, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.122673511505127, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1221845149993896, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1216909885406494, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1210641860961914, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.120218515396118, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.119274377822876, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.118281841278076, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1165902614593506, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1147866249084473, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.111725091934204, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1077287197113037, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.101475954055786, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.0834238529205322, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.0008623600006104, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.7754721641540527, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.6859242916107178, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.6031041145324707, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.547868490219116, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5083346366882324, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.469614028930664, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.446472644805908, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4541099071502686, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4605958461761475, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4712891578674316, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.503643751144409, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.554161787033081, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5737383365631104, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6044039726257324, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.625922918319702, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.666365385055542, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.7066187858581543, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.674267292022705, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6558501720428467, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.699054002761841, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.675379753112793, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7630972862243652, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.877192735671997, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9052059650421143, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8855817317962646, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.248483896255493, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.174722194671631, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07641196013289037, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.08748615725359911, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.12440014765596161, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.16758951642672573, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.1908453303802141, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.21354743447766703, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2279438907345884, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23809523809523808, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2502768549280177, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26245847176079734, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2604282022886674, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2589516426725729, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2606127722406792, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25230712440014763, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24067921742340348, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2351421188630491, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2294204503506829, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2220376522702104, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22812846068660023, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2349575489110373, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21908453303802142, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20284237726098192, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.18715393133997785, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.1925064599483204, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.17423403469915097, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.1790328534514581, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009964392118642319, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010012854819911958, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0101562156242493, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01020884822058075, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010164406282511885, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010634402235287013, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.011163416289143352, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012424045355383039, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.014622534287795629, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01572900002472853, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01582339772502297, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.017254520075493808, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.020330627967877594, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.022837009470036074, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.024892603759069116, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.027960654165985827, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.03723517815307962, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.060520535853728775, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.09835731035718621, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.11859684353582606, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1403760637938549, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.15579671002552054, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.16848056167766798, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18246020011063746, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19298470837322867, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19358175349199133, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1956282053441811, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2020629268452908, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19941720671794552, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1950637504089542, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1939750409695414, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18579152738875052, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17917532261526423, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17390699543912982, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16571357334406442, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16688820650480998, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17324678248843137, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1698330898376641, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18020833239303688, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16841100281850233, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15883303963262257, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15337995297634105, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15798156051027643, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.12549629638063817, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.13222763264457071, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 2.413003978729248, "validation/loss_best": 2.446472644805908, "validation/acc_best": 0.26245847176079734, "validation/f1_best": 0.19298470837322867} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.7085488176345827, "train/grad": 0.31045504689216613, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.124698486328125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124056396484375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1229638671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1220068359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12111328125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1198779296875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11853759765625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.117138671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.115302734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.113465576171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.111724853515625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.108948974609375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.105869140625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0997119140625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.08810791015625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0588250732421876, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.9432757568359373, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.7720054626464843, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.647071533203125, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.571981430053711, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.4906365203857423, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.4159828186035157, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.3506885528564454, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.2948366451263427, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.2433902168273927, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.19872932434082, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.1643371868133543, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.1409102725982665, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.109724273681641, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.0909984350204467, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.0867730951309205, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.084808578491211, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.0771921825408937, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.085278403759003, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.1005716371536254, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.129157826900482, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.1687469840049745, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2136185646057127, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2645494031906126, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3269487005472183, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.378463121652603, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.441927398443222, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.571788954734802, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.6313339614868165, "train/loss_044_lr2.6e+01_wd1.0e+00": 5.263318018913269, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020604416271671654, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020613176645711065, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020624805479310452, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020634722956456245, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02064357669092715, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020655576731078328, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02066837065387517, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02067884768359363, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020691025028936563, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020701113096438347, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020706492690369487, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020708099957555532, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02070129125379026, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02069104222115129, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02074703190010041, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021135961594991384, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023339760727249085, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.027044758778065444, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.030083511471748352, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03184574390761554, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.033925687689334155, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.035561761297285555, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.036486765602603555, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.037171411281451586, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03787449295632541, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03833992078900337, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03913906549103558, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.039550123475492, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04048125455155969, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0418317661434412, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04302249208092689, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.043776095826178786, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04456870906054974, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0451715636998415, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04651908488944173, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04783834369853139, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04931204177439213, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.050923112835735085, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.051273949220776556, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05131404859945178, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05304885137826204, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05597954718396068, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.061547421514987946, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06299390586093068, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.10227952932938933, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.122138023376465, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.121582269668579, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1206893920898438, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.11981463432312, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1190197467803955, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.11797833442688, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1168954372406006, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1157004833221436, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1142218112945557, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1126301288604736, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.111018419265747, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.108196496963501, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1045081615448, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.094744920730591, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.0698540210723877, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.986616849899292, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.758103847503662, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.654646635055542, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5661816596984863, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.507779121398926, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4627323150634766, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.445571184158325, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4435760974884033, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4460208415985107, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4306907653808594, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.438108205795288, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4502556324005127, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4962358474731445, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.561976194381714, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5640366077423096, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.572144031524658, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5831868648529053, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.610095739364624, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.6467394828796387, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.702916383743286, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.782078504562378, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.8640122413635254, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8452701568603516, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8307182788848877, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.818718910217285, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8071255683898926, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.864485025405884, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8713440895080566, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.94378924369812, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.08176448874123293, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.09634551495016612, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.12790697674418605, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.17423403469915097, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.19915097822074565, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.21853082318198597, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.23864894795127353, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2499077150239941, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2574750830564784, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26153562200073827, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2705795496493171, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2661498708010336, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2648578811369509, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2572905131044666, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2502768549280177, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25046142488002954, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24455518641565152, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24510889627168697, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23661867847914358, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22517534145441123, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22425249169435216, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1997046880767811, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2085640457733481, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19601328903654486, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20764119601328904, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19656699889258028, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19952011812476927, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.19712070874861573, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008731505812289165, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.00870678870753721, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.007796741088429295, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008115594122948119, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008154876619437012, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008491566640441275, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.008383758484829347, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.008918957628601532, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011403798420848881, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012077097374344592, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013246473594824226, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.014603975724657806, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.018234649972552048, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.026385283464974207, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.03877256140383013, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.06039206872624081, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.09616685397820306, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.11800957460127014, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1367979367450572, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.16067809222826032, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.17576036289349214, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18460976034110713, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1921710956380567, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1872561229899384, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19619333098696626, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19819073109762564, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1997421897322107, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19354931477367512, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1887657249691471, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19423077116441512, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19055219403423326, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19168652556717872, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19782398701100598, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19256821264743448, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17848415496842632, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15875462109885477, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1524886786220912, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15481572666342056, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.15462422867757272, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15990242435791494, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16164149611257833, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1524217202375953, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.166330191858169, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14908400327320118, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 2.2433902168273927, "validation/loss_best": 2.4306907653808594, "validation/acc_best": 0.2705795496493171, "validation/f1_best": 0.19619333098696626} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.6058802115917206, "train/grad": 0.2515591936558485, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.127711181640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.126904296875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1254736328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.124111328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.122864990234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12100830078125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11922119140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.117200927734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.114632568359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11184326171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.10906005859375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.10401611328125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0967138671875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.07187744140625, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.9790814208984373, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.8175643920898437, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.683145523071289, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.5912247848510743, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.5025456237792967, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.43813346862793, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.3675025939941405, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.3005370903015137, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.2444728565216066, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.1945267295837403, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.1474512004852295, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.1094961833953856, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.076963324546814, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.054515452384949, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0229310464859007, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.0004924654960634, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.9993424582481385, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9978477764129638, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.0053583681583405, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.0214082479476927, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.0415104484558104, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.0729297041893004, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.108947387933731, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.141664981842041, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.1951014757156373, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.2826798963546753, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.350919214487076, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.423932819366455, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4858964252471925, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5779956090450287, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02058455245103687, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02058876207098365, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020593973179347812, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020596914230845868, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020600444516167046, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020603880216367543, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02060618647839874, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02060791294556111, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020606324886903168, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02059913715813309, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020589790884405375, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020570028005167842, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02056400179397315, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02076148786582053, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022270903661847113, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0258168535400182, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.029026980055496098, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03127288629300892, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0337390214484185, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.035075100995600225, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0362756453640759, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03712584026157856, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03752180048264563, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03799232012592256, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03883474288508296, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03957825778052211, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0405903578735888, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04118130650371313, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04220969228073954, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04321799881756306, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.044297375045716764, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.044854807816445824, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04562699608504772, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04618030089884997, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04713840506970882, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.047912546303123235, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.048912846986204385, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04910653438419103, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.050003711096942426, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05135800717398524, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05338399447500706, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05624669935554266, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.058024434335529805, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06182327341288328, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.121178150177002, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1205523014068604, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.119619369506836, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1187634468078613, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.117987871170044, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.116945266723633, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.115818977355957, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1146175861358643, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1129543781280518, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1109671592712402, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.108591318130493, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.103053569793701, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0921630859375, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.0399341583251953, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.825115203857422, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6874988079071045, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.6012392044067383, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5212113857269287, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4674065113067627, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.441228151321411, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.415285348892212, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4149670600891113, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4245574474334717, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4322938919067383, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.438321113586426, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4736993312835693, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.503950357437134, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5318100452423096, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5634067058563232, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5948705673217773, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.6043014526367188, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.639071226119995, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6620211601257324, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.665306329727173, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.7031474113464355, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.7289764881134033, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7412312030792236, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.747131586074829, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6998987197875977, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7876856327056885, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8693807125091553, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8010220527648926, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.878197431564331, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.985473155975342, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.08194905869324474, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.10908084163898117, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.16371354743447766, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.19472129937246216, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.21650055370985605, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.23624953857511996, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2648578811369509, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2737172388335179, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2678110003691399, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26208933185677374, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25802879291251385, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26301218161683276, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25839793281653745, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25101513473606496, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24732373569582872, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2511997046880768, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24289405684754523, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23864894795127353, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23883351790328536, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23255813953488372, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2368032484311554, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23791066814322628, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22388335179032853, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2321889996308601, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2085640457733481, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2009966777408638, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22129937246216316, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2041343669250646, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20505721668512367, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008410386126009953, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008480437389919254, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00895789885533626, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.00866956241300088, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.00884857574764388, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010068457275213197, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01139533971777633, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.013816837069388544, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01612691715950788, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.017929240243147463, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.020750243623566113, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.02578317038329067, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.03233430145710393, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.05175600624974641, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.08806801638456369, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.12013958829773881, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.14435793584549902, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.16434189271035776, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.18513037674250776, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1979322456872544, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20614164787225583, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20161160568341355, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19786703598248093, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1954111466516725, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20273812035474947, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.204796430568131, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19564130815523642, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19361423496361893, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20067548422062229, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19514629539226327, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19056836669960245, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19410580793605747, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19411942167597163, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19159641701501343, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19070417886653077, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.19116699119478306, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18150368825184568, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17529769042699658, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19021136293379814, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1668330443006636, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15891154097843682, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17890524360470828, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16185047802002417, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15565614098797162, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.3675025939941405, "validation/loss_best": 2.415285348892212, "validation/acc_best": 0.2737172388335179, "validation/f1_best": 0.20614164787225583} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.5238887786865236, "train/grad": 0.25766103677451613, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.125994873046875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124840087890625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.123265380859375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1215478515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.120048828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11794189453125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11572021484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.113193359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.109832763671875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.10590087890625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.101258544921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.090220947265625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0661773681640625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.9246868896484375, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.726167755126953, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.6358168792724608, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.5435448455810548, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.4562582778930664, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.3747111129760743, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.3124123001098633, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.2423004341125488, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.1810316276550292, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.1287864542007444, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.07982622385025, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.0280236053466796, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.9888173031806946, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.9510640680789948, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.9286724495887757, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.9050985729694367, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.8910445511341094, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.88482638835907, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.883834980726242, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8929682552814484, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.9114634519815445, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9385889035463333, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9652669578790665, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9990240913629531, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.0365367674827577, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0944125401973723, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.183035694360733, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.2235508620738984, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.275761970281601, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.367769640684128, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.419931048154831, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020141889099031685, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02014589906670153, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020152003895491363, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020155573398806154, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0201587358629331, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020161016415804624, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020161939547397197, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02015980925410986, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02015478893648833, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020143405334092677, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020133260362781583, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020150164971128107, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02035008423496038, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022544035078026353, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02704425949603319, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.029381736740469934, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0317941758595407, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03395581804215908, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03571060233749449, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03666527010500431, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037790342597290874, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038359160367399454, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0387957675755024, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03940218804404139, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04036373160779476, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04097832564264536, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04185024373233318, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04270141927525401, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.044519676137715576, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.046214789561927316, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.047028556801378726, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04758155595511198, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0481391204893589, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.048678292501717804, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04986614309251308, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05027170276269317, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.050703497454524044, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05090024594217539, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.051252775453031064, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05216784302145243, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05230962943285704, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.053484134450554846, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.056081048510968685, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05757583973929286, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.120541572570801, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1198666095733643, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.118879556655884, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.117964029312134, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1170783042907715, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1159560680389404, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.114654064178467, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1132099628448486, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.111086130142212, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.108186721801758, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.103806495666504, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.089951276779175, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0492236614227295, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.8023314476013184, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.666217565536499, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5912716388702393, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5151095390319824, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.464494228363037, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4328131675720215, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.416653633117676, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4152374267578125, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4251291751861572, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.446549415588379, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4715778827667236, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4993879795074463, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.527129888534546, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5584940910339355, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.591825246810913, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.649339437484741, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.692060947418213, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.73734712600708, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7693910598754883, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.7935564517974854, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.780094861984253, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8253488540649414, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.9108664989471436, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9517922401428223, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9357635974884033, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9252631664276123, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9119818210601807, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.030104637145996, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.009814500808716, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9853150844573975, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1219913959503174, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.08914728682170543, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11221853082318199, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.17681801402731634, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.20893318567737174, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.22462163159837578, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2427094868955334, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2530454042081949, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26448874123292726, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26541159099298633, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26578073089701, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26448874123292726, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.260797342192691, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.257844222960502, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24713916574381692, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24861572535991142, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2482465854558878, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24473975636766335, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23901808785529716, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23181985972683647, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23015873015873015, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21760797342192692, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21557770394979697, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2085640457733481, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20302694721299372, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21244001476559615, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19896640826873385, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20782576596530086, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20487264673311184, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20487264673311184, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009414105736335455, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009882354405798162, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010389646871191356, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011440300304214378, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.011963329004639561, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01368571300113118, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.015524940590999339, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017648309438389426, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01847912016761709, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.020209036984196113, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.023910087537395208, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.03138151550470945, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.04797722122268222, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.09296853684306357, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1273970261254467, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14463709206084677, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.16532193848532972, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.17999215394732646, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.18855945922862302, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1976555029496807, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19929257829751268, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20189021462590703, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20288395314020172, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2024973254779349, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2051241367011157, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1998565448430626, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20353540979939258, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.203583667663494, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20080744239339, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1921918590817889, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18714262755755065, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18694326362225167, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18535809760446767, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18166052684522105, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17519484232192303, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1694525151878367, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16437571376202006, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1714729239075481, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16069680204998918, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.161527899478932, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.14229165264284246, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.14779007797202984, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1471273958134333, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.12603476326404792, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 2.1810316276550292, "validation/loss_best": 2.4251291751861572, "validation/acc_best": 0.26578073089701, "validation/f1_best": 0.20189021462590703} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.464825495481491, "train/grad": 0.26649704568088056, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.121248779296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12002685546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11822509765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1164404296875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11476318359375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.112554931640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1100439453125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1070849609375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10295654296875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.097388916015625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0892138671875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0624371337890626, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.969743347167969, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7199632263183595, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6170780944824217, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.537160110473633, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.449535064697266, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3705310821533203, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.289806327819824, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.228359718322754, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.1612284851074217, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.100739154815674, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.0499386739730836, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.0016641283035277, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9447695803642273, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.907185869216919, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8722126233577727, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.8489867901802064, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8176307374238967, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.8067759364843368, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8031641191244125, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8029590529203414, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8092399936914445, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8274498403072357, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.840943174958229, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.8699094903469087, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9074175643920899, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.958400558233261, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0073407459259034, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.101514899134636, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.157892439365387, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.219852488040924, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.29926923930645, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.3460292196273804, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020734560415148734, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020738873477093876, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02074644391424954, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02075059318449348, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02075550501700491, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020762073169462384, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020764881065115332, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02076422850601375, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02076022868975997, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020756218889728187, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020775740128010513, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021012320364825428, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022334496323019266, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.027752555971965194, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030781477196142076, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.033110607527196405, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03555825971066952, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03700399282388389, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03837212778627872, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039147329023107885, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.039726169295608994, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04001409519463778, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04036751352250576, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04103046488016844, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.042164953984320165, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04311800623312592, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.044490108955651524, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0453688944876194, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04681882750242949, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04821444634348154, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04874843645840883, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04895970707759261, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04908559562638402, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.049406284764409066, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05004196297377348, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05058337021619082, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05089213613420725, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.051617642901837825, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0518696079775691, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.052305354699492455, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.053350353930145505, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05542183456942439, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05610389536246657, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.056710468288511034, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.119939088821411, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1192238330841064, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1180505752563477, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1169474124908447, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1158463954925537, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1142635345458984, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.112501859664917, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1103193759918213, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1067910194396973, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.101119041442871, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0911316871643066, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.050144672393799, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.8875155448913574, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6760926246643066, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5859053134918213, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.516721487045288, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.46826434135437, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4440090656280518, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4333813190460205, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.438326120376587, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.459754705429077, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.488677501678467, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5255286693573, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.550217390060425, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5651438236236572, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.589524269104004, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.616729259490967, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6396405696868896, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6996231079101562, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.751633405685425, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.815751552581787, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.8463921546936035, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8645589351654053, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.880401849746704, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.893176555633545, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.959498882293701, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9546914100646973, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.004009962081909, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.950422763824463, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8574752807617188, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8251938819885254, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8027453422546387, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.855590343475342, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9067726135253906, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07438169066076043, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07899593946105574, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.08748615725359911, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.11129568106312292, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1526393503137689, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.20062753783684018, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22056109265411591, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23883351790328536, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25083056478405313, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26116648209671467, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26744186046511625, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2631967515688446, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2604282022886674, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2502768549280177, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24584717607973422, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23791066814322628, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2279438907345884, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22369878183831673, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21207087486157253, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21225544481358435, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2129937246216316, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22296050203026946, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2188999630860096, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2069029162052418, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20468807678110004, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009458073940482146, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010091709225900336, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012262025408933007, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013357711173987515, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016557294362603527, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017545427645729195, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019509976488108042, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.022584241714265874, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.026650931833377328, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.031930931855862175, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.04039804161315375, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.054180064626052765, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.07930412830458185, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12664594203968985, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15006630663832599, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17062920467927944, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18463524128500408, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19647216921274943, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2020169690706184, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19994674714162897, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19951355216066038, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1946594597217514, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19425865085809058, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19137448418282696, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19363100944459435, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19331394094060025, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18960259336567264, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19320703047915624, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18592496704541794, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.183690347773982, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17842912665857025, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17490322865593141, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17326976416608986, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17077844616910057, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17429027914946973, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15790728897882864, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1612588009816817, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15673989899190346, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1635358154986278, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16894764990637223, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17033382781818837, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16928580643323543, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16766120263867404, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1526212702862279, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.289806327819824, "validation/loss_best": 2.4333813190460205, "validation/acc_best": 0.26744186046511625, "validation/f1_best": 0.2020169690706184} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.41979843378067, "train/grad": 0.26897518388926983, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.118701171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.117412109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1150927734375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1129443359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.110885009765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.108099365234375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10482177734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.095380859375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.086329345703125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.07051025390625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9988446044921875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.795427093505859, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6432960510253904, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5465308380126954, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.467845001220703, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3881393814086915, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3141863250732424, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.235015354156494, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.17718017578125, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.112059497833252, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.053198709487915, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.0017461585998535, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.950991461277008, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8927550029754638, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8481350994110108, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8079483211040497, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.781854535341263, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.745714665055275, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.7271124267578124, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7204597520828246, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7226939332485198, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.7257124334573746, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7489420813322067, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.7663251960277557, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.7960604745149613, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.8499242323637008, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8958800911903382, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.968650039434433, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.05218821644783, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.120037451982498, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.159785960316658, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.2550637835264205, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.299775822162628, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020629534237086772, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020632564881816508, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020637104222550988, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02064096146263182, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020642952006310226, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02064549126662314, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02064419534057379, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020641043223440646, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020638736388646065, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020657241395674646, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020772644840180873, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02167628929018974, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.025519706392660737, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029813816165551545, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.032730085449293254, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03499244246631861, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03691606440581381, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0379696892388165, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.039237749967724084, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039792031534016135, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04033393105491996, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04064159136265516, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04100310353562236, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.041727378871291874, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.043054437674582, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04403969740495086, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04527317516505718, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04588889580219984, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04706573249772191, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0483706340007484, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04913596890866757, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04945717560127377, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04947451006621122, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04953545041382313, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.050152703877538445, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05072466043755412, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05168790761381388, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.052114059273153546, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05299948761239648, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05250846801325679, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05317492499947548, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05380296215415001, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.055835917964577673, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05574956214055419, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.119053363800049, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1181817054748535, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1168479919433594, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.115605354309082, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1143712997436523, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1126677989959717, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.110593795776367, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1078853607177734, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.102851152420044, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.093261480331421, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.07283616065979, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.9609267711639404, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.738807201385498, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6164846420288086, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5305559635162354, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.478947401046753, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4457058906555176, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.423438787460327, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4183852672576904, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.425332546234131, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4424843788146973, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.461777925491333, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4849319458007812, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.511153221130371, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.546931505203247, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5990102291107178, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6439473628997803, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.677915334701538, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.756460189819336, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.838757276535034, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.8863120079040527, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.9220340251922607, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.925839900970459, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.9406251907348633, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.949331283569336, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.915262460708618, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.926828384399414, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9397528171539307, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9372751712799072, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.825148820877075, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.799868583679199, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7560620307922363, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7704226970672607, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7379658222198486, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07936507936507936, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08250276854928018, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.10095976375046142, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.14285714285714285, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.18530823181985973, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21133259505352528, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2369878183831672, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2502768549280177, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2576596530084902, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26448874123292726, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2672572905131045, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2643041712809155, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25655223329641935, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2484311554078996, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23809523809523808, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23791066814322628, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23237356958287192, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22868217054263565, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21926910299003322, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2100406053894426, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20376522702104097, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20653377630121816, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21188630490956073, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2039497969730528, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21982281284606867, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2174234034699151, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21779254337393872, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22185308231819859, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2203765227021041, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009650596933522616, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009892393351501038, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013627719823461115, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.016005857188386204, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0184532592367332, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.019967327373959055, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021591640224329384, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.024147140385621574, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02978635834891956, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03481797417825, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.048927515629479244, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.07083793756163309, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.10490381362985851, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1310441326196434, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15749211605660696, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17341858424116205, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18489194601345837, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19236182466146687, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1934957584333946, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19112512136395884, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1934984450788586, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19397654593873162, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19710800439996637, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19649822037789613, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19404135841313397, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19689480788133915, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19399478914999516, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19275578189069478, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1898268117030549, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18390290096288311, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18005412295996637, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18477057879284944, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18210177282259807, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1767011711604504, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18212064434910102, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1814478545803984, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17197277979981296, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1685640630733056, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16054861686048313, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1773217226202524, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1777289420524298, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17792502198400265, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1804905092990853, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18390663978162522, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.235015354156494, "validation/loss_best": 2.4183852672576904, "validation/acc_best": 0.2672572905131045, "validation/f1_best": 0.1934957584333946} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.3556615591049193, "train/grad": 0.2668778163194656, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.119429931640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11793212890625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.115498046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.113170166015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1108935546875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10779296875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.104339599609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.099857177734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.09226806640625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0783441162109373, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0486181640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.8828169250488282, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7019197082519533, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5834588623046875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.484939651489258, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.410084228515625, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.334434700012207, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2574540328979493, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.180379695892334, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.121360855102539, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.053282175064087, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9904251527786254, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9337339401245117, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.87859956741333, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8130610918998717, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7674511337280274, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7199946200847627, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6895486426353454, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.645604019165039, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.6185359513759614, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.6060107600688935, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.602322444319725, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6149774831533432, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.631482139825821, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6569336813688278, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.6925511080026627, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7339500415325164, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.7774516999721528, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8454039454460145, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.9351794362068175, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.982755919098854, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.027188755273819, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.1084743535518644, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.178133577108383, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02035518513992429, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020357271968387068, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020360628012567758, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020363643821328877, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02036487604957074, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020367291518487037, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020367116327397527, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020365570811554788, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02037883792538196, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020459699374623597, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020766124264337122, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023227513590827583, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027625137316063045, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030931543223559857, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03396368280984461, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035921450704336166, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.037493419935926794, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03858724691905081, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.039626571983098983, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04014117887243628, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.040587987825274466, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04083715993911028, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04122163837775588, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.041924332156777384, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.043040417712181804, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.044050610549747946, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04510237157344818, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04578700268641114, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.047055921610444786, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.048250646367669105, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04874605640769005, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04908471396192908, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04997994687408209, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05023792121559381, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.051173587031662464, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05185227863490582, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05212319258600474, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.052444881685078146, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05169205600395799, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.051007470898330215, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05008864406496286, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.050170149449259044, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0514810012280941, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05236168690025807, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.118732452392578, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.117888927459717, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1165578365325928, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.115265369415283, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1139957904815674, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1121623516082764, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1097946166992188, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1063849925994873, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0992348194122314, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.083411455154419, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.043583393096924, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.8258144855499268, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6798717975616455, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5731499195098877, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.498657464981079, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.460848093032837, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.438629150390625, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.421279191970825, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4220681190490723, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4322292804718018, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.455996513366699, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.478337526321411, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.509298324584961, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5513429641723633, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.59810209274292, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6503655910491943, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6915507316589355, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.729680299758911, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8004958629608154, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.891080379486084, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9329800605773926, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.9612388610839844, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.9585423469543457, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.9898180961608887, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0196163654327393, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.043999671936035, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.063800573348999, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.064549684524536, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0188076496124268, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0139663219451904, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.957210063934326, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0092973709106445, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8835535049438477, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.8982460498809814, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0799187892211148, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.09154669619785899, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.11148025101513473, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.16925064599483206, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2056109265411591, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2249907715023994, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2469545957918051, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2602436323366556, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26245847176079734, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2591362126245847, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25839793281653745, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2530454042081949, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24658545588778147, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23791066814322628, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23237356958287192, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23089700996677742, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22443706164636398, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21557770394979697, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2100406053894426, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20228866740494647, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21428571428571427, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20967146548541898, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20653377630121816, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20653377630121816, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20764119601328904, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20967146548541898, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20339608711701734, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22111480251015134, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2187153931339978, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010320385829039226, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011548711407245412, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012519050051229885, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01622159441475675, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01769463117136189, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.019736151925768244, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02133535822875203, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.024508058802441394, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03026184549882398, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.04052889539131657, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.05324266438790418, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.09229492352290104, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1288286930572991, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1507094807424082, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.17100235294616684, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18456603761562387, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19030650477751257, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19468122828455595, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19693316393957794, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19469750610764777, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19317131471195179, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19816030556229516, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19620094251204176, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1933704811580744, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1882751866130672, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19505741658179718, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19114511539021373, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1921979415347835, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19102115228410985, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17964593105326718, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18223538098973735, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1753719055661828, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17787118907439115, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16728015292103038, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17535024890287307, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17390742153456662, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16775323800549455, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1690497353826542, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16372044439661648, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16667558992346043, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16360640145398056, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15870232584015942, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17678689726336347, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1715336492934877, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.2574540328979493, "validation/loss_best": 2.421279191970825, "validation/acc_best": 0.26688815060908083, "validation/f1_best": 0.19468122828455595} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.31358735203743, "train/grad": 0.273937386199832, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.122783203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1212109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.118616943359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11614013671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.113720703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.110318603515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10623046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.100858154296875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.090540771484375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0690362548828123, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.01396484375, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.7799383544921876, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6623138427734374, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5435552978515625, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4438218688964843, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3694476318359374, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.292112865447998, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2133191776275636, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1353178882598876, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.074882302284241, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0040040254592895, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9407916355133057, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8837086153030396, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8261939233541489, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.757612881064415, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7096213239431381, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6608897244930267, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.626022841334343, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5819702339172363, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.5526323986053467, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.5457845902442933, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.545585306286812, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5565273350477218, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.571252024769783, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5824184536933898, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.6148517221212386, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.6557111859321594, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.6951523643732072, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.7593852490186692, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.86452177464962, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.9089836066961288, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9589567971229553, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.023799529671669, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.081900041103363, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020465433024801315, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02046633540187031, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02046675850637257, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0204691514139995, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020471053938381376, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020475370269268753, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02047940476797521, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02048732469789684, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020530442264862357, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020717634516768158, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021381167694926262, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025556733068078756, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.029093749234452845, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03259567401371896, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.035676970696076754, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.037536615328863265, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.039006152972579006, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.040021983860060575, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04091873964294791, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04147421794012189, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04196398947387934, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04224019946530461, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.042735071647912265, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043503243401646616, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.044750354327261446, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04581501834094524, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04703061934560537, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.047803916577249764, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.048953707832843064, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.050038819573819636, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05119152734056115, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.051868654750287534, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05281197004020214, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05272398911416531, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.052501285914331675, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05231889501214027, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05258831506595016, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05286620996892452, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05210125610232353, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05152169290930033, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.050313766989856956, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05050388228148222, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05046807734295726, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05029063493013382, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.11840558052063, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1174628734588623, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.115929365158081, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1144237518310547, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1129088401794434, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.110630512237549, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1076223850250244, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1031363010406494, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0930464267730713, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.068202257156372, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9959604740142822, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.746619701385498, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6447994709014893, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5411760807037354, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.482017755508423, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4546360969543457, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.43904972076416, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.427647113800049, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.432332754135132, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4419643878936768, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4615638256073, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.479478597640991, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5085930824279785, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.542269468307495, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.585761785507202, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.640031099319458, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6796133518218994, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7176620960235596, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.777616024017334, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.851881980895996, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9081897735595703, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.9631192684173584, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0028464794158936, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0159010887145996, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.9860708713531494, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.9686975479125977, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.980581283569336, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9923949241638184, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9256715774536133, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8501694202423096, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7843852043151855, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7644078731536865, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.784640312194824, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.845254421234131, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.067921742340347, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07715023994093761, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08600959763750461, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.10169804355850867, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.12975267626430417, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.18327796234772978, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.20948689553340716, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.23477297895902546, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24861572535991142, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26264304171280917, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2646733111849391, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26375046142488, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26098191214470284, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.257844222960502, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2552602436323367, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24713916574381692, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24197120708748615, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23606496862310816, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2249907715023994, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22056109265411591, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21816168327796234, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2203765227021041, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22093023255813954, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22812846068660023, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2369878183831672, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2336655592469546, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2353266888150609, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2292358803986711, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01093703143751657, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011471530953680623, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012839256946953474, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01575083532921934, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.017052935682848283, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01800826443673573, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.020621758775671142, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02750819776724638, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03693518639923126, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.04769513026880546, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06231876020114844, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.10435231601940037, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1298010580989983, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15585011988345424, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16994969609629854, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18626602127283762, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19064031588417665, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19320141597693496, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19629283237372977, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19695710375630174, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19917033817853494, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20043879680783938, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20359169277448497, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20148109017576835, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20180891922963243, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20172717509894442, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19682135519134594, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19539538180837782, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19163431271020528, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1916169169064721, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18896585518290054, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18463928047007197, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1838671313897301, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18234415453204153, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1900160406478613, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18949219363852907, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18159591335766945, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17982897633752326, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18458973476642612, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1835630273783807, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.19496952251867636, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19198044228019717, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19749516054653768, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18983814269895416, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.2133191776275636, "validation/loss_best": 2.427647113800049, "validation/acc_best": 0.2646733111849391, "validation/f1_best": 0.19320141597693496} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.258929190635681, "train/grad": 0.2705794834345579, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.114512939453125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.112764892578125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.110069580078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.107305908203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10455322265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.100859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.096114501953125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.08946044921875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.07563232421875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.042940673828125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.947901611328125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.7108709716796877, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.614374008178711, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.494568328857422, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4002519607543946, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3296197128295897, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.252172908782959, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1755871963500977, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1008973217010496, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.040900263786316, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.970240077972412, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9044507598876954, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.844155511856079, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7846124243736268, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7130267143249511, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6591172862052916, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6050776767730712, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5639344215393067, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.504437306523323, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4613718938827516, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4406559079885484, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.4296249437332154, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.4335609364509583, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.4470832216739655, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.465723188519478, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4987808972597123, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.5421170741319656, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5708402341604233, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.6486055266857147, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.7389202785491944, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8058447974920273, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.854037191271782, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.929488542675972, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.9910928118228912, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020436717858538032, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02043872794602066, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020442359670996665, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02044675066135824, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020450235633179544, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020454349010251463, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020459318268112838, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020473501030355692, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020549110779538752, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020861812010407446, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022067976677790285, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.027182203689590095, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030134941982105375, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03388139182701707, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.036824604347348217, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0384851433429867, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03977779194712639, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04052426777780056, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04121368385851383, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.041699418760836125, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.042075987290591005, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04235404700040817, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.042855716794729236, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04375015335157514, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04495389871299267, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04563160726800561, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04676047019660473, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.047254567872732876, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04814717417582869, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.049015219435095785, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04963116154074669, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.049848295245319606, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05024299871176481, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05012106226757169, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05088934618979692, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.051291657332330945, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.051560679245740176, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05172840008512139, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05091779103502631, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04963144505396486, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04919641928747296, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04955989070236683, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.048942637983709576, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04957819676026702, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.118173360824585, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1172091960906982, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1156363487243652, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1141183376312256, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1124932765960693, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.110036611557007, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1066508293151855, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1012372970581055, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0879385471343994, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.051886558532715, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9388298988342285, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7073190212249756, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6188666820526123, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5210659503936768, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4749162197113037, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4549617767333984, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4448068141937256, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4430599212646484, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4562625885009766, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4731802940368652, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4983315467834473, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5216100215911865, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.559588670730591, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6060078144073486, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.66146183013916, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.727299690246582, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7787575721740723, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.822474956512451, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8955836296081543, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9843037128448486, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0373692512512207, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0791261196136475, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0996696949005127, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1496126651763916, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1607561111450195, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1405885219573975, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0689022541046143, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1011757850646973, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.03936505317688, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.943573474884033, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.905681610107422, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.846669912338257, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.84605073928833, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.888782501220703, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07715023994093761, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08822443706164636, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.11129568106312292, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.14304171280915468, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.1921373200442968, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21096345514950166, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2369878183831672, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24713916574381692, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2604282022886674, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26393503137689184, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2593207825765965, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2561830933923957, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24880029531192321, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2469545957918051, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2412329272794389, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2369878183831672, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23329641934293097, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2320044296788483, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22905131044665927, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22517534145441123, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22259136212624583, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22683647102251753, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21945367294204504, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22093023255813954, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22517534145441123, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22443706164636398, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2207456626061277, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22720561092654115, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2364341085271318, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22997416020671835, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010185196451059647, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010676736508003755, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01183032545900258, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013697073719721098, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015502946831287256, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016570186510536256, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.022382001465155395, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.027756076529910124, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03805671404628009, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0526310036253683, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07045081486398526, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1131085159720463, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1301477716015422, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15907215573859032, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.17148785302240574, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18331813384977566, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19042094982513624, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1953044079202074, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19465992990801126, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19160020926856172, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19344898578046754, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1946019731645795, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19452965576686077, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18918391306455784, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18739362982780527, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1823507273969346, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18241224685297722, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17944892989187564, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17981869024310235, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17736387571508558, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17402597038675727, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1724618145979214, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17839445300927917, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17186723434486573, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17307789872649848, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17381429754158462, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1791810640155506, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18054448835901735, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17907209231934398, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1829347053975545, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18010751831724234, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19146326448159767, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19302262180393345, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18562447090613943, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.1755871963500977, "validation/loss_best": 2.4430599212646484, "validation/acc_best": 0.26393503137689184, "validation/f1_best": 0.1953044079202074} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.2120706498622895, "train/grad": 0.27276968739926816, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.118778076171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1169873046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11404541015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.111138916015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.108341064453125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.104315185546875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.099031982421875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.09148681640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.074715576171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.031318359375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.8989193725585936, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6892054748535155, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.596857147216797, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.476492652893066, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.381655387878418, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3083130073547364, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2262257957458496, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1467372798919677, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0681412029266357, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0050848007202147, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9314547061920166, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8621495056152344, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.79859765291214, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7348654925823213, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6602776634693146, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5950386673212051, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5355835723876954, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.492113608121872, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4314373129606246, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.383304825425148, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3597648131847382, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.3458311647176742, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.3429909288883208, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.3540397435426712, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.3589117157459258, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.3871483969688416, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.421858851313591, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.457971717119217, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.5325620514154434, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.62975989818573, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.7005468702316284, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.7490000122785567, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.819278100132942, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.879810700416565, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020779694435186683, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020781803289428354, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02078513444401324, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0207870648894459, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020788990873843432, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020792543455027044, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020796561790630223, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020815005833283065, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020923306639306246, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021373200602829455, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02316826238296926, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028310270849615335, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.031059547709301114, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03482163444161415, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03758649272844195, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.039136741366237404, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04039506765082478, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.041026295721530916, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.041686849910765884, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04215728959068656, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04253417119383812, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04272884957492352, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04310846140608191, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04387544594705105, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.045116941314190626, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04606094500049949, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.047332631200551985, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04805920336395502, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04915137441828847, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04988373404368758, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05038091765716672, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05063389535993337, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05096701992675662, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.051064842380583286, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.051345885023474695, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.051924751121550795, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05198414864018559, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05152715712785721, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05102055624127388, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.049386232439428565, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.048646276891231535, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04854841021820903, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04833346743136644, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04807806307449937, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.117902994155884, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.116865634918213, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.115164041519165, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.11346697807312, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1116526126861572, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.108874559402466, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1049201488494873, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.098381519317627, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.081660270690918, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0331332683563232, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.8830626010894775, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6818127632141113, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.597248077392578, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.503319263458252, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4607043266296387, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.440385341644287, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4292759895324707, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.428398847579956, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4420769214630127, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.460224151611328, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.487833261489868, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5148935317993164, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5549240112304688, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.600982189178467, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.658501625061035, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7185451984405518, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7719149589538574, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8209400177001953, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.917814016342163, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9966230392456055, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0778186321258545, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1315925121307373, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1760761737823486, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.231022834777832, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.2119107246398926, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.2239625453948975, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1663260459899902, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1903693675994873, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1420345306396484, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.029534339904785, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.960439920425415, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9429283142089844, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.929560899734497, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9286110401153564, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07308970099667775, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07918050941306755, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08896271686969362, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.11627906976744186, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1568844592100406, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2024732373569583, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21926910299003322, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.24713916574381692, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2572905131044666, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2633813215208564, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2702104097452935, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2646733111849391, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2596899224806202, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2543373938722776, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25101513473606496, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24732373569582872, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23864894795127353, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23292727943890734, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23569582871908454, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22056109265411591, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20911775562938353, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20764119601328904, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21207087486157253, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20653377630121816, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23385012919896642, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23735695828719083, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22591362126245848, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2353266888150609, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011253060631380799, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012907609569689676, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.015635136654332124, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.017649366386962125, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.018313963397532263, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02110411378348664, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.024636291065019415, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.030895006012033956, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03958406225251917, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.05437027525413748, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07905430578536787, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12204722288034603, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14123580765885715, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17071324728911008, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18377699570296682, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19216817556386676, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19800439162511566, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19561714030583946, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1964517222358746, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1951908120559871, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19502841458330225, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1958257639389258, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19849070435981983, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19763107320450393, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19442413205361128, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19055433417453027, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19326760538409649, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19277028704650237, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18461355735946597, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18519838615296033, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1823918131200268, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17863244072232232, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17407138309671363, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1697452639976672, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17621425364874024, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17276484358120073, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17855096335476575, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1817288601986723, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17076528542644512, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18032067064532065, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1850533829590518, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1877765054978934, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18015092311825573, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1852397701630283, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 16, "lr_best": 8.1e-05, "wd_best": 0.05, "train/loss_best": 2.2262257957458496, "validation/loss_best": 2.4292759895324707, "validation/acc_best": 0.2702104097452935, "validation/f1_best": 0.19800439162511566} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.164377524256706, "train/grad": 0.2695275468379259, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11517578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11317138671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11008056640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.10707763671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10393798828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.09958251953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.09369873046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0849090576171876, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0643414306640624, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0074859619140626, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.842593994140625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6561912536621093, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5619849395751952, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.439968490600586, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3485507583618164, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2779191017150877, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1987392997741697, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1234504795074463, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0461437797546385, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9819181728363038, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9065488529205323, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8356000661849976, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.768563449382782, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.702780692577362, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.624851324558258, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5582874095439911, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4900177466869353, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4418836158514023, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.376136944293976, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3115236884355546, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2799299621582032, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2600781852006913, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2523162442445754, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.2576276987791062, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.2582652434706687, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.269752233028412, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2967381912469864, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.3184724873304368, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.409190185070038, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5120320701599121, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.5789758455753327, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.635800850391388, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7076876950263977, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.7651442432403563, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020372426644898953, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02037332943174988, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02037554757669568, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02037871832959354, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020381877832114696, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02038719673641026, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020398696633055807, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020431450260803105, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020589929474517703, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02121078041382134, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02360763951204717, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02852133902721107, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.031207003574818374, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03494709946215153, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03763556122779846, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03917170250788331, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.040442117433995006, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.041001693941652775, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.041634348873049024, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04209265779703855, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04238431636244058, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04258454563096166, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.042996799591928724, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043771691247820856, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.044953336641192436, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.045870290417224166, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.046730656679719686, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04725082540884614, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04828302856534719, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04912978248670697, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049675889052450656, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05014020064845681, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04999647943302989, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.050189746785908936, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.050162205938249826, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0500074603036046, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05008075658231974, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.049934666287153956, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.050415806286036965, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04990638295188546, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04863576510921121, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04832195393741131, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04761778680607676, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.046709695272147654, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1172783374786377, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.11618971824646, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1143627166748047, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1125473976135254, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1106324195861816, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.107656478881836, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1033129692077637, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.095973014831543, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0763893127441406, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.016206741333008, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.8424856662750244, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.665862560272217, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5818302631378174, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4922592639923096, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4532580375671387, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.433985710144043, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4244484901428223, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4242324829101562, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4379937648773193, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.455068826675415, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4811911582946777, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5082879066467285, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.546663284301758, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5941672325134277, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6623616218566895, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7304203510284424, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.793456554412842, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8402726650238037, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9419705867767334, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0474863052368164, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.116586685180664, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.176858425140381, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2227118015289307, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.272373676300049, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3021554946899414, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3296072483062744, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.328164577484131, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.3039283752441406, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.21453595161438, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.106205701828003, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0353639125823975, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.994964122772217, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9856057167053223, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.008599281311035, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07715023994093761, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08213362864525656, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09542266519010704, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.12513842746400886, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.16758951642672573, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20487264673311184, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22406792174234036, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.24732373569582872, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2591362126245847, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.26707272056109266, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27205610926541157, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26744186046511625, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2646733111849391, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2622739018087855, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2547065337763012, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24898486526393504, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24584717607973422, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23661867847914358, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23292727943890734, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2292358803986711, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2087486157253599, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2070874861572536, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2041343669250646, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20468807678110004, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20837947582133629, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.1969361387966039, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1954595791805094, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20764119601328904, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21963824289405684, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21945367294204504, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22406792174234036, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2207456626061277, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011377025526071638, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013276316889492169, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.015537535738798243, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.017683807152900164, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01819021252605883, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02022961676260706, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.025756801549732578, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03155109750034369, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04300195183591984, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.059020237845808045, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.08726408172487617, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.125212942122383, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14655036139340674, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.16998006394692103, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18472938516062884, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.195468248457031, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20050589811279884, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1986982376771663, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19711205017541555, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19822925270285133, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19712094882860295, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2002422493765669, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20054748818338508, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19814951021037483, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19258395017959085, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19206661317358784, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18972514948809463, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18624441944578465, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17987101370816005, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17862575542634565, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17481182977218138, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17417610196641056, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1744115461611274, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17583637842210487, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17300733797636758, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1665789804001814, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1682057161226708, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17771890145467284, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17143760049444987, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17673513621613468, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18684566019117588, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1855004579487989, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1841306383803267, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17911863907849504, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 16, "lr_best": 8.1e-05, "wd_best": 0.05, "train/loss_best": 2.1987392997741697, "validation/loss_best": 2.4244484901428223, "validation/acc_best": 0.27205610926541157, "validation/f1_best": 0.20050589811279884} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.127118589282036, "train/grad": 0.2669114263355732, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.111220703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.109229736328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.106058349609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.103031005859375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.0998876953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.095386962890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0894287109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.08025634765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.057962646484375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.9926190185546875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.813456573486328, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6487887573242186, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5542828369140627, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4319435119628907, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3395103454589843, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2670282173156737, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1868643379211425, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.110911474227905, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0321542739868166, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9672132444381714, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8903835701942444, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8199466753005982, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7523897993564606, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.682979038953781, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6033924221992493, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5304622304439546, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4612344306707383, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4094613283872603, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.337005518078804, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2668689805269242, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.225155957341194, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.194448117017746, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1791491308808326, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1767298474907875, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1613845109939576, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1674454474449159, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.1834095054864884, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2086964434385299, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.2949233627319336, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.3978963857889175, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.467266390323639, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.5143028193712234, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.5952586925029755, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.6520124030113221, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020803891718387604, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020805799653753638, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020809425450861455, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020811831392347813, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020813298840075732, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020815192759037017, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020822146162390708, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02085431112907827, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02103244467638433, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021752557195723058, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024438593806698918, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02913541367277503, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.031877580201253294, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.035697801792994144, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03833160493522882, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03981339225545526, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04098266484215855, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04142432756721973, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.041963500715792176, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.042377655748277905, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.042622272409498695, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04275478014722466, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.043146995157003404, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04383436309173703, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04492260461673141, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04557655563578009, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04649853365495801, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04681162791326642, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04738531643524766, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0478184756077826, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04799859875813126, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04795749170705676, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04829124821349978, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.048275014609098436, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04843164732679725, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04859701588749885, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04866492725908756, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04958990428596735, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04904586531221867, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0488224802352488, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.047902388777583835, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.047367239519953724, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.046874965596944096, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04645838879048824, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1171677112579346, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1160354614257812, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.114238739013672, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.112410306930542, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1104907989501953, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1074399948120117, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1029276847839355, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0950253009796143, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0732979774475098, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.004054307937622, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.8186910152435303, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6576123237609863, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5741448402404785, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4878389835357666, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.451907157897949, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4342520236968994, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.426093339920044, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4274449348449707, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.443087339401245, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4610884189605713, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4889016151428223, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5152881145477295, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5544958114624023, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.600339412689209, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.662804365158081, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7327516078948975, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7885618209838867, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.835794448852539, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.94315242767334, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0541460514068604, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1344046592712402, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.199188470840454, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2199714183807373, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.258646011352539, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.2910473346710205, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.320032835006714, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.3368775844573975, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.3583343029022217, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.281697988510132, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.189624071121216, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0691494941711426, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0285074710845947, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.992447853088379, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.983489751815796, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07715023994093761, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08213362864525656, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09726836471022518, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.12975267626430417, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17331118493909192, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2085640457733481, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22591362126245848, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.24898486526393504, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25858250276854927, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2643041712809155, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2692875599852344, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26264304171280917, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2530454042081949, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2547065337763012, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25101513473606496, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24806201550387597, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23717238833517904, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2292358803986711, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22812846068660023, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20893318567737174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21077888519748986, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20210409745293467, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2011812476928756, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2072720561092654, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2041343669250646, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.1937984496124031, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20136581764488742, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1998892580287929, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.18973791066814322, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20671834625322996, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2144702842377261, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21428571428571427, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2129937246216316, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2201919527500923, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011548769120303982, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013030862180841457, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01623729213609885, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01762411654743438, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.018230020656970786, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02107292756626312, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.027033716486644804, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03375424981522327, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.045992139416556645, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06264794983682186, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.0947649283395005, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12956046262014895, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14952045068445785, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17212552221948077, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1859795991178017, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19326864470858465, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19786087086256568, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19942844479521774, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1993696937230746, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19728320623427717, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1937765969310005, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1978637831542611, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.199368791359106, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19840369487499368, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19481176653474128, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19231886844038323, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19229566671940976, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19226767918307197, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18277017785585423, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18112494484203068, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1820094104672941, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17420124585263053, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17570788364291198, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18148889000646448, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1787282778545598, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17288556908311636, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1790676465529956, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18171361408267825, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17187836499342043, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18192907110173928, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1882459113682444, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1901605360108466, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18751918533016942, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18830362435650674, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 16, "lr_best": 8.1e-05, "wd_best": 0.05, "train/loss_best": 2.1868643379211425, "validation/loss_best": 2.426093339920044, "validation/acc_best": 0.2692875599852344, "validation/f1_best": 0.19786087086256568} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 2.0975087344646455, "train/grad": 0.264221508577466, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.118756103515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.116727294921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11041015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.107286376953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10255126953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.096346435546875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0863385009765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0615899658203123, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.9876470947265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.8015536499023437, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6506808471679686, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5595358276367186, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.440400390625, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.349526824951172, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2777834510803223, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1977998352050783, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.121164617538452, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0408000850677492, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9734695196151733, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8932852745056152, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8187311458587647, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7459593260288238, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6728909003734589, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5878008222579956, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5118781274557114, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.433647211790085, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.3781123292446136, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3045544958114623, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2295987835526467, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1771672669053077, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1414687156677246, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1115004539489746, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0976469340920447, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0831540396809578, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.0807051500678062, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.0881191065907478, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0945926064252853, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.173152160346508, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2660191962122918, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.3392683202028275, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.3906323504447937, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.4622509378194808, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.5224574375152589, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02076077403035015, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020761051438748835, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020765464347787203, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020767263444140552, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02077032535802573, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020772318062372504, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020780951092019675, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020821493505500256, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021023247111588716, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021864948528818786, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024880873132497074, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029550478383898734, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03233718421310186, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.036188308969140055, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03876416520215571, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.040197171289473775, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.041309265308082106, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04172187309712171, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.042225644513964655, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04261408807709813, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04279209835454822, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.042911730781197546, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04328068988397717, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04392691768705845, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04485764717683196, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04554326802492142, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04623960997909307, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04649335853755474, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04698525784537196, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0473615207336843, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0473593807220459, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04725699918344617, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04719155333936215, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04693289164453745, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04720344351604581, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.047366635557264086, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04770035859197378, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04738440714776516, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04747145375236869, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04709877412766218, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04667459202930331, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04644361704587936, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04614999085664749, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.045362199023365975, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1170926094055176, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1159512996673584, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.11413311958313, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1122729778289795, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1102936267852783, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1071829795837402, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.102491855621338, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.09419584274292, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0709588527679443, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9955697059631348, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.804939031600952, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6515750885009766, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5678205490112305, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4834911823272705, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.448568344116211, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.431105852127075, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4240667819976807, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4264731407165527, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.443887948989868, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.464031457901001, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4953885078430176, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5246002674102783, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.565639019012451, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.615960121154785, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6828722953796387, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.755798101425171, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.8214502334594727, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8751254081726074, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.990203380584717, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.10224986076355, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.195233106613159, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.270822525024414, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3026771545410156, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3504364490509033, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3879148960113525, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.403290033340454, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.431511163711548, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.455653429031372, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.3646719455718994, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2843141555786133, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.158193349838257, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1169543266296387, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0774919986724854, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.044370651245117, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07604282022886674, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08324104835732743, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09819121447028424, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1330749354005168, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17534145441122184, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20967146548541898, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22812846068660023, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.24953857511997046, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25950535252860835, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2646733111849391, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27205610926541157, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.266703580657069, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26245847176079734, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2502768549280177, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2500922849760059, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24769287559985234, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23089700996677742, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2279438907345884, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2235142118863049, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21040974529346623, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21096345514950166, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20671834625322996, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2085640457733481, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20358065706902917, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19878183831672203, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20062753783684018, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20302694721299372, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19638242894056848, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2026578073089701, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2115171650055371, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21114802510151348, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2129937246216316, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2146548541897379, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010735455798809197, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012828793013244758, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.016271954880306295, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0171435742181903, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0179434011002, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.020232790494164307, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.025620113881057904, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0330863269189059, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04642298910789245, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06501082139425642, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09630479736989446, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1293889830400525, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15083716000473013, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17278248844892943, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18815665416910168, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19385524958370925, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20029460714874622, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.197651804905031, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1992688588434172, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19802683076916905, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19596864491130553, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1949761016155964, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1989019127425445, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1997525014194825, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19337442641125327, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19081923453687133, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1892871417158523, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1857530916814761, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17515461496147622, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17789984369383113, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17099225598749382, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16965547733599654, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17513144961779706, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17669836339065914, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17193774654719193, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16984262292811095, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1704142004969297, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17511885697128873, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1712195660929483, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1686033627625487, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18383056638882864, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.179984129806485, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.179885004391628, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1818673059242979, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 16, "lr_best": 8.1e-05, "wd_best": 0.05, "train/loss_best": 2.1977998352050783, "validation/loss_best": 2.4240667819976807, "validation/acc_best": 0.27205610926541157, "validation/f1_best": 0.20029460714874622} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 2.066263191103935, "train/grad": 0.2565155360847712, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11481201171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.112862548828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1097216796875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1066015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10342529296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.098780517578125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0924462890625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.08238525390625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0571148681640623, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.979444580078125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.789217224121094, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.637034454345703, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5428659820556643, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.423571090698242, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.333025016784668, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2610161972045897, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1815822601318358, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.105273714065552, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.025398817062378, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.958618950843811, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8795259809494018, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8043458318710328, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7325117778778076, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6595816922187805, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5716487097740173, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4920470118522644, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4146017253398895, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.3571380347013473, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2751346784830093, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1916036432981492, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1419932666420936, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.101008362174034, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.070283289849758, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0543008503317832, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.028928809463978, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.013266542851925, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.013353483080864, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0085013949871062, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0773520949482918, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.1582118967175483, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2303214120864867, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.2773466005921363, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.3339945939183235, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.38532295525074, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020542218619957565, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02054192646406591, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020543598928488793, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02054439151659608, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020546899209730327, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02054799594450742, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020558641985990108, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020603068443015217, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020821899650618435, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0217117456253618, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02476635843515396, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029225654751062393, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.031976531418040396, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03575174381956458, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03827222301624715, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.039693477172404526, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0407785876840353, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04111294746398926, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.041590708587318656, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04198729328811169, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04218990912660956, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04225572483614087, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.042537042051553724, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04311689928174019, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0440015571936965, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.044672314543277025, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.045338116083294154, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04549248928204179, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045959658958017825, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04614122910425067, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.046000495310872795, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04579131191596389, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04545086750760675, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04523885292932391, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0451754535920918, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04507118333131075, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.045172323267906905, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04493695616722107, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04486884659156203, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.044553956426680086, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0443525449745357, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04409500189125538, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0438847447745502, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04322464419528842, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1170156002044678, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1159017086029053, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.114070415496826, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1121723651885986, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1102163791656494, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.107034683227539, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1022300720214844, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.093695878982544, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.069661855697632, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.991147994995117, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7986111640930176, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.648590326309204, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5647575855255127, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4807136058807373, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.445972442626953, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.427842617034912, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4199793338775635, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.421361207962036, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4369425773620605, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4551963806152344, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.485184907913208, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5146021842956543, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5571112632751465, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.608293294906616, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6758792400360107, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7455148696899414, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.810875177383423, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.864217519760132, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9805748462677, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.098796844482422, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1841416358947754, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2622082233428955, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3009397983551025, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3656513690948486, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.4177513122558594, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4532523155212402, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.4744129180908203, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.5080935955047607, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.4172866344451904, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.3293001651763916, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2059857845306396, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.16279673576355, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1071724891662598, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.080120325088501, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07733480989294943, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.082687338501292, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.10040605389442599, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.13344407530454042, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17626430417128092, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21096345514950166, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22849760059062385, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2497231450719823, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2604282022886674, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2652270210409745, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.271686969361388, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2606127722406792, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25452196382428943, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2497231450719823, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2441860465116279, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2353266888150609, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23292727943890734, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21280915466961978, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20579549649317092, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2024732373569583, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2074566260612772, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2011812476928756, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19896640826873385, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19878183831672203, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1997046880767811, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19767441860465115, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20634920634920634, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21705426356589147, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21705426356589147, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21391657438169065, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21797711332595054, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011557268191026612, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012895058528623165, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.016187706029627425, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.017539127001899343, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.017975619754089307, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.021708963619558805, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.027151708087954612, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03438507329828, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04777609731360385, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06560740929139758, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09690347553277096, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1310056895178866, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15188620855381604, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17310394578117527, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1896131302381948, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1946167509241061, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20139328674334012, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19843894655281905, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20150724833572686, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19853202858901697, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19664504081655934, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20000216807118118, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2011648815107782, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19801423409475447, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19464675264010187, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19524991613225773, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19142104069594387, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1893116963356296, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18136057580786621, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17778024087962419, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17458014193706498, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1722928158459349, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1722005791681691, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17599807332484393, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16857547410387672, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16881816596541452, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1705177401508464, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1737915068998844, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16746805330933592, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17236001161394032, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18471922693305962, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1849424284603424, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18141078503168337, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18355470144220684, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 16, "lr_best": 8.1e-05, "wd_best": 0.05, "train/loss_best": 2.1815822601318358, "validation/loss_best": 2.4199793338775635, "validation/acc_best": 0.271686969361388, "validation/f1_best": 0.20139328674334012} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 2.0429483205080032, "train/grad": 0.2538204224407673, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11611083984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1141748046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1109228515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.107794189453125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.104625244140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.09989013671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.09350341796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0832958984375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0574224853515624, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.97801513671875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.788893127441406, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.63955078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.544894790649414, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.42260196685791, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.329197998046875, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2547276496887205, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1742798614501955, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0963821983337403, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.014575357437134, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9461713123321533, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8663912391662598, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.7902705883979797, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7171362960338592, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6425522065162659, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5525454008579254, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4756357043981552, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3939070188999176, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.3340191698074342, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2507523372769356, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1636639320850373, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1096524134278298, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0646464201807975, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0283085131645202, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0145699620246886, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.9820393058657646, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.96851049721241, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.9643598574399949, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.9521316316723824, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0081936848163604, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0750437626242637, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.1355840477347374, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.1803020343184472, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.224956351518631, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.272892463207245, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020640835212543607, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020639266776852308, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02063898623920977, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020638413317501544, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02063777884002775, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02063933643978089, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02064777513965964, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02069033701904118, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020913642668165266, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021828265977092087, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02490610469132662, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029373658755794167, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03219488295726478, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03597187272273004, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03847030459903181, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03989727492444217, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04098506756126881, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04136686877347529, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04187245296314359, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.042234745640307665, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04243969973176718, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04249136952683329, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04274682022631168, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04330736069008708, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04413798704743385, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04464175743982196, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0451732530631125, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045306508764624595, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04552318271249533, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.045449567884206774, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04532791763544083, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.045035407543182374, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04452244307845831, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04415325054898858, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04408744869753718, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04392832981422543, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04379183376207948, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04339948732405901, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.043474833350628614, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04320793230086565, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04297123668715358, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04267334999516606, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0423066764883697, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.041992961708456275, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1170077323913574, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.115886926651001, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1140248775482178, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1121482849121094, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1101644039154053, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.106980562210083, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.102144718170166, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0935330390930176, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0692100524902344, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9895565509796143, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.796494722366333, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.647793769836426, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.564042806625366, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4809131622314453, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4467689990997314, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4294326305389404, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4226605892181396, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.424994945526123, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4417693614959717, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4605305194854736, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.490281581878662, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.519313097000122, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.560967445373535, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6104347705841064, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6771059036254883, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7458579540252686, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.808809995651245, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.862316370010376, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9753525257110596, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.096360206604004, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.183509111404419, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.261418104171753, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3042593002319336, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.360464334487915, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.4188849925994873, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4615392684936523, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.4829254150390625, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.5198614597320557, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.440206527709961, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.357013702392578, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.238997220993042, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.206563949584961, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.172109365463257, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1555330753326416, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07308970099667775, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07715023994093761, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08324104835732743, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1005906238464378, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.13362864525655224, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17718715393134, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21077888519748986, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22812846068660023, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2500922849760059, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25858250276854927, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2707641196013289, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26578073089701, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2622739018087855, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25489110372831303, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2526762643041713, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2502768549280177, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24455518641565152, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23403469915097821, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23145071982281284, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22462163159837578, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2117017349575489, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20967146548541898, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2041343669250646, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20210409745293467, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2024732373569583, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20302694721299372, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20007382798080472, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.1906607604282023, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1954595791805094, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1967515688445921, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19287559985234404, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20339608711701734, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2115171650055371, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2172388335179033, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2056109265411591, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2115171650055371, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011310074197660244, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013009200938940079, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01635539611419418, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.017236827781013524, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.018212596151941988, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02210798106823356, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02780176740083361, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03449068404990124, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.047681047850875485, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0653794742293953, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09825235931105296, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1312513877059406, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1515171457675794, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17288360079302315, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18754701483373182, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1956494616170992, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19981981882685385, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19945440561655134, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19991066637633492, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19903339071059278, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19573914534302939, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19737577883427085, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20134717923461695, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19686612408439066, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19281812365655834, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.193705998773794, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19062814096383218, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18869183249236912, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17983627864921545, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17891700889464643, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17310180006504572, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17187218528604578, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17444483524941337, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17434317482158943, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17110415523029476, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1642783889841072, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17006059568701296, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17339016153166906, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16671960788276322, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17183716191714662, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.182894951851203, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18846973838223324, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17526378388591116, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1798314618463576, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 16, "lr_best": 8.1e-05, "wd_best": 0.05, "train/loss_best": 2.1742798614501955, "validation/loss_best": 2.4226605892181396, "validation/acc_best": 0.2707641196013289, "validation/f1_best": 0.19981981882685385} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 2.0437963724136354, "train/grad": 0.2496294429153204, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11775634765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11593994140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11291015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.110028076171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10698974609375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10251708984375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.096446533203125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0865869140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.06107421875, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.9822088623046876, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.790812683105469, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.643025665283203, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5488595581054687, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.428095817565918, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.336940956115723, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2641225242614746, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.184377670288086, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1069023418426513, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.025745553970337, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9566916275024413, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8776857137680054, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8021058750152588, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7277477955818177, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6524158215522766, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5646725237369536, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4808157932758332, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.401968876719475, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.3411184173822404, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.259232558608055, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1801112347841263, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1187716507911682, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0741621533036232, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0330452147126197, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0146878808736801, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.9863722661137581, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.9579059627652168, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.9465895926952362, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.937001103758812, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.9996217930316925, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0613089764118195, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.1108499020338058, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.152452449798584, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.197219035923481, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.2307548558712005, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020281159342266618, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020280161537230015, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020279381298460065, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02027779399883002, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020277336086146534, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020277319862507283, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02028537943493575, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02032820682041347, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02054678671527654, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021450207959860565, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024495147494599225, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02887486347928643, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03163798962719738, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.035382277183234695, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03785783206112683, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03925192563794553, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.04032401267439127, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04071168972179293, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04125157264992595, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04167423993349075, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04188383903354406, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04194831544533372, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.042172599658370016, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04274936573579907, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0435757651925087, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.044199308678507805, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04471563939005137, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04477607062086463, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045071139652282, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04502284547314048, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04469073737040162, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04441214056685567, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04409553490579128, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04359161531552672, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04334220442920923, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04303589591756463, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04294803222641349, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04256385033950209, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.042522434554994104, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04217591369524598, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04172877004370093, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04151007741689682, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04122182201594114, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04067459313198924, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1169986724853516, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.115872383117676, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.11401629447937, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.112133502960205, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.11014723777771, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1069531440734863, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1021101474761963, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.093505620956421, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.069143772125244, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.989297389984131, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7961292266845703, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.647583246231079, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5638933181762695, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4807240962982178, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.446570873260498, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.429158926010132, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4222354888916016, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4245376586914062, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4411911964416504, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4599287509918213, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.489762783050537, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.519012212753296, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5609326362609863, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6111464500427246, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6782960891723633, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.747246026992798, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.8100714683532715, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8636856079101562, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.97758150100708, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.098320245742798, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.184886932373047, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2624268531799316, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.302835702896118, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3600234985351562, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.4175992012023926, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4600489139556885, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.4871270656585693, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.526933431625366, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.439591884613037, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.366823673248291, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.246868848800659, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2140719890594482, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.176880121231079, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1661176681518555, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07290513104466592, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07678110003691399, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08342561830933924, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.10114433370247324, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1343669250645995, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17644887412329271, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21077888519748986, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2277593207825766, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25064599483204136, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25950535252860835, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.26578073089701, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27150239940937615, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.266703580657069, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26578073089701, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2617201919527501, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2552602436323367, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2532299741602067, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2511997046880768, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24307862679955702, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2349575489110373, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22388335179032853, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21133259505352528, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20930232558139536, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20450350682908822, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20228866740494647, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20173495754891105, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20228866740494647, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19158361018826134, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19582871908453303, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19933554817275748, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.1921373200442968, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20468807678110004, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21188630490956073, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21244001476559615, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2100406053894426, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21262458471760798, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011382152928417053, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01300919099370986, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01635742266588179, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01722456996057401, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.018531675837945055, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.021997263841390714, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.027459718299235703, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03469183749824157, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04811381698214263, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06587317267229308, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09735331105844547, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13131156603801333, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15124194613382885, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17332632823870597, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18863353270951086, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1957896885522715, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20057228105758518, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19832054017751063, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20051791560592705, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19811272542231487, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19561982003044873, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19795357858339488, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2014976157971727, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19580311570478678, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19338204282115065, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19331768012641748, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1899619182187394, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18808415995519998, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1786748753631474, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1783678807557971, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17404190943772893, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17142180634915818, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17342352267545272, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1758935444540122, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1734220308443717, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16490486014056074, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17022959147765016, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17493771344043219, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16617416749391725, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17394143417759844, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1827932986411287, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18432420336350522, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17922589328296665, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1816368975491649, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 16, "lr_best": 8.1e-05, "wd_best": 0.05, "train/loss_best": 2.184377670288086, "validation/loss_best": 2.4222354888916016, "validation/acc_best": 0.27150239940937615, "validation/f1_best": 0.20057228105758518} diff --git a/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic/log.txt b/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..cdc0f384ed5b83dac216e5a629b81254606bd557 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:21:48 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:14:38 time: 3.7856 data: 2.9313 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:16 time: 0.1890 data: 0.0601 max mem: 2851 +extract (train) [ 40/232] eta: 0:00:51 time: 0.1671 data: 0.0498 max mem: 2851 +extract (train) [ 60/232] eta: 0:00:40 time: 0.1758 data: 0.0546 max mem: 2851 +extract (train) [ 80/232] eta: 0:00:33 time: 0.1709 data: 0.0519 max mem: 2851 +extract (train) [100/232] eta: 0:00:27 time: 0.1754 data: 0.0571 max mem: 2851 +extract (train) [120/232] eta: 0:00:22 time: 0.1531 data: 0.0443 max mem: 2851 +extract (train) [140/232] eta: 0:00:18 time: 0.1666 data: 0.0528 max mem: 2851 +extract (train) [160/232] eta: 0:00:13 time: 0.1595 data: 0.0470 max mem: 2851 +extract (train) [180/232] eta: 0:00:09 time: 0.1664 data: 0.0525 max mem: 2851 +extract (train) [200/232] eta: 0:00:05 time: 0.1620 data: 0.0506 max mem: 2851 +extract (train) [220/232] eta: 0:00:02 time: 0.1453 data: 0.0412 max mem: 2851 +extract (train) [231/232] eta: 0:00:00 time: 0.1378 data: 0.0391 max mem: 2851 +extract (train) Total time: 0:00:42 (0.1819 s / it) +extract (validation) [ 0/50] eta: 0:02:20 time: 2.8169 data: 2.6676 max mem: 2851 +extract (validation) [20/50] eta: 0:00:09 time: 0.2090 data: 0.0740 max mem: 2851 +extract (validation) [40/50] eta: 0:00:02 time: 0.1357 data: 0.0350 max mem: 2851 +extract (validation) [49/50] eta: 0:00:00 time: 0.1339 data: 0.0350 max mem: 2851 +extract (validation) Total time: 0:00:11 (0.2245 s / it) +extract (test) [ 0/50] eta: 0:02:29 time: 2.9931 data: 2.8634 max mem: 2851 +extract (test) [20/50] eta: 0:00:09 time: 0.1930 data: 0.0628 max mem: 2851 +extract (test) [40/50] eta: 0:00:02 time: 0.1538 data: 0.0456 max mem: 2851 +extract (test) [49/50] eta: 0:00:00 time: 0.1464 data: 0.0428 max mem: 2851 +extract (test) Total time: 0:00:11 (0.2312 s / it) +feature extraction time: 0:01:05 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 0.0059948 | train | 0.70819 | 0.018364 | 0.66683 | 0.022029 | 0.6623 | 0.0203 | +| flat_mae | patch | logistic | ppmi_dx | | 0.0059948 | test | 0.62 | 0.038342 | 0.52877 | 0.049457 | 0.54226 | 0.041356 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.000774263682681127, "split": "test", "acc": 0.68, "acc_std": 0.030736629613540897, "f1": 0.5733333333333333, "f1_std": 0.05189001976482154, "bacc": 0.5942275042444822, "bacc_std": 0.03683512850271709} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04277136892829127, "f1": 0.6155585707824514, "f1_std": 0.04936262247944019, "bacc": 0.6137521222410866, "bacc_std": 0.04616418432116035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04848987935641828, "f1": 0.6179966044142615, "f1_std": 0.05050006009842691, "bacc": 0.6179966044142615, "bacc_std": 0.050408197170613095} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.044444464222217824, "f1": 0.6178622120318812, "f1_std": 0.0483860205983203, "bacc": 0.615874363327674, "bacc_std": 0.04680937673319964} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04224012784071563, "f1": 0.5324918186068257, "f1_std": 0.04990448951082515, "bacc": 0.5398981324278438, "bacc_std": 0.044283867469402716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04451380010738243, "f1": 0.5324918186068257, "f1_std": 0.05261052105748719, "bacc": 0.5398981324278438, "bacc_std": 0.046844995667879634} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 2.782559402207126, "split": "test", "acc": 0.47, "acc_std": 0.05139017804989589, "f1": 0.4403970013726111, "f1_std": 0.051665368136301855, "bacc": 0.4401528013582343, "bacc_std": 0.05200238970440739} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.044051220187413656, "f1": 0.5863970588235294, "f1_std": 0.052056350631273174, "bacc": 0.5874363327674024, "bacc_std": 0.04724368307900424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.73, "acc_std": 0.03768944679880564, "f1": 0.6754417598269022, "f1_std": 0.050577116921077185, "bacc": 0.6702037351443124, "bacc_std": 0.04406554199505446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04443790724145322, "f1": 0.6043956043956044, "f1_std": 0.049265180585515815, "bacc": 0.6027164685908319, "bacc_std": 0.04750955857849844} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.040212689539497345, "f1": 0.6408295678368672, "f1_std": 0.0509312152451608, "bacc": 0.6379456706281834, "bacc_std": 0.04584673033014523} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04825188493727473, "f1": 0.5577607593571352, "f1_std": 0.0518659397048125, "bacc": 0.5573005093378608, "bacc_std": 0.050764171992111} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.040304893003207436, "f1": 0.5872154735228211, "f1_std": 0.04987334119805604, "bacc": 0.5904074702886248, "bacc_std": 0.04374388828264086} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04498719373332815, "f1": 0.5766488413547237, "f1_std": 0.05078639753526829, "bacc": 0.5764006791171477, "bacc_std": 0.04804089903406483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04330683086996784, "f1": 0.6349153667441089, "f1_std": 0.048321722942161256, "bacc": 0.6320033955857385, "bacc_std": 0.04625283487390942} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 2.782559402207126, "split": "test", "acc": 0.59, "acc_std": 0.05059563617546478, "f1": 0.5777983729790959, "f1_std": 0.05142044196964082, "bacc": 0.58276740237691, "bacc_std": 0.0526037340494783} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.03856329342781812, "f1": 0.5628946090335114, "f1_std": 0.04917020115973005, "bacc": 0.5721561969439728, "bacc_std": 0.04170609521593963} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.042491617996965, "f1": 0.5071523019593701, "f1_std": 0.05090138759945219, "bacc": 0.5216468590831919, "bacc_std": 0.044021137704484334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 21.54434690031882, "split": "test", "acc": 0.52, "acc_std": 0.05258934873146843, "f1": 0.5073891625615763, "f1_std": 0.05291226820519527, "bacc": 0.5110356536502546, "bacc_std": 0.05422756704125392} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04369902058399021, "f1": 0.5634191176470589, "f1_std": 0.05118711544378891, "bacc": 0.566213921901528, "bacc_std": 0.046604896158455617} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04264118197236094, "f1": 0.609375, "f1_std": 0.050920675764732984, "bacc": 0.6086587436332768, "bacc_std": 0.04633493208437767} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.046937486085217646, "f1": 0.5623386825272135, "f1_std": 0.05279943125632097, "bacc": 0.5632427843803056, "bacc_std": 0.04940461508775626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.050275640224665456, "f1": 0.5384615384615385, "f1_std": 0.05410545452032071, "bacc": 0.5390492359932089, "bacc_std": 0.05216279865791108} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.039359242878896944, "f1": 0.587178241864983, "f1_std": 0.0518995362724847, "bacc": 0.5933786078098472, "bacc_std": 0.043844258623806844} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.03874296839427769, "f1": 0.5872154735228211, "f1_std": 0.04856378598021259, "bacc": 0.5904074702886248, "bacc_std": 0.04255879245264194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.0442093021885666, "f1": 0.6570417081535569, "f1_std": 0.04929526034755114, "bacc": 0.6532258064516129, "bacc_std": 0.04723120308719199} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.04159736049318513, "f1": 0.6553308823529411, "f1_std": 0.05078575885749312, "bacc": 0.6511035653650254, "bacc_std": 0.04659728183366576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.000774263682681127, "split": "test", "acc": 0.67, "acc_std": 0.03067796603427286, "f1": 0.553993782943641, "f1_std": 0.051359156581592735, "bacc": 0.5810696095076401, "bacc_std": 0.03623461740012693} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.042535161925164926, "f1": 0.5952380952380952, "f1_std": 0.05439996012862267, "bacc": 0.5984719864176571, "bacc_std": 0.04721323439084075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.037467185642906235, "f1": 0.5862068965517242, "f1_std": 0.05314493459309705, "bacc": 0.5963497453310695, "bacc_std": 0.04261170123691426} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 0.000774263682681127, "split": "test", "acc": 0.68, "acc_std": 0.03890572708483931, "f1": 0.6114618746964546, "f1_std": 0.05213949409250786, "bacc": 0.6146010186757216, "bacc_std": 0.0438386360921965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.04585163464915945, "f1": 0.5384615384615385, "f1_std": 0.05025710180174014, "bacc": 0.5390492359932089, "bacc_std": 0.04839080570630248} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.04169225827416883, "f1": 0.66078697421981, "f1_std": 0.04900294942351862, "bacc": 0.6561969439728353, "bacc_std": 0.04594011927019174} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.037143241646361444, "f1": 0.627359057579036, "f1_std": 0.04924439057761353, "bacc": 0.6277589134125636, "bacc_std": 0.04238580014377241} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.52, "acc_std": 0.04902917906716367, "f1": 0.5104039167686658, "f1_std": 0.04935047167202559, "bacc": 0.5161290322580645, "bacc_std": 0.05097404658807995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.0467050318488276, "f1": 0.6043956043956044, "f1_std": 0.05175280086752021, "bacc": 0.6027164685908319, "bacc_std": 0.04994603542896979} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.040771382120305906, "f1": 0.5792426367461431, "f1_std": 0.049714054848068435, "bacc": 0.5823429541595926, "bacc_std": 0.04407053423483324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.039408202191929545, "f1": 0.5238095238095238, "f1_std": 0.04776189835226844, "bacc": 0.5348047538200339, "bacc_std": 0.04162718831390681} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.04189844388518504, "f1": 0.6521153630344518, "f1_std": 0.04886775212455069, "bacc": 0.6481324278438031, "bacc_std": 0.04623444912076014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.040883106535585086, "f1": 0.5944849959448499, "f1_std": 0.04888557286750138, "bacc": 0.5955008488964346, "bacc_std": 0.04405137536803788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 2.782559402207126, "split": "test", "acc": 0.56, "acc_std": 0.04783058017628471, "f1": 0.537620849096259, "f1_std": 0.04993789794468509, "bacc": 0.5382003395585738, "bacc_std": 0.050065650091611735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.03861988606922604, "f1": 0.6428571428571428, "f1_std": 0.05049855592888827, "bacc": 0.6409168081494058, "bacc_std": 0.0441068985962559} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.04438772803377078, "f1": 0.5960257670051315, "f1_std": 0.048085910881573234, "bacc": 0.5946519524617997, "bacc_std": 0.04668779065305063} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.039695133202950705, "f1": 0.6493688639551192, "f1_std": 0.05053104375039742, "bacc": 0.6460101867572157, "bacc_std": 0.04523567723721884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.77, "acc_std": 0.03939796441442122, "f1": 0.7335187116209014, "f1_std": 0.04950828575556397, "bacc": 0.7228353140916808, "bacc_std": 0.04632578261857936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.042296453752058226, "f1": 0.5634191176470589, "f1_std": 0.05003778234165454, "bacc": 0.566213921901528, "bacc_std": 0.04529927427406702} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 21.54434690031882, "split": "test", "acc": 0.58, "acc_std": 0.049160353945023626, "f1": 0.565936337329475, "f1_std": 0.050333300825817016, "bacc": 0.5696095076400679, "bacc_std": 0.051501506448355466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.042002338030162074, "f1": 0.6521153630344518, "f1_std": 0.04890805221489297, "bacc": 0.6481324278438031, "bacc_std": 0.04612783881202852} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04315617684642605, "f1": 0.592944369063772, "f1_std": 0.05008514708316441, "bacc": 0.5925297113752122, "bacc_std": 0.04653431799624525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04186960233868958, "f1": 0.5872154735228211, "f1_std": 0.052089441479292606, "bacc": 0.5904074702886248, "bacc_std": 0.0457184701150498} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.04394858814569587, "f1": 0.525101763907734, "f1_std": 0.04925723519474069, "bacc": 0.5288624787775891, "bacc_std": 0.04576065646434931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04594526743855128, "f1": 0.6043956043956044, "f1_std": 0.05108141746199053, "bacc": 0.6027164685908319, "bacc_std": 0.04921371355729635} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.046641423648941076, "f1": 0.5755517826825127, "f1_std": 0.04922807942378391, "bacc": 0.5755517826825127, "bacc_std": 0.04909963904002723} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.04654825882887565, "f1": 0.47456914670029426, "f1_std": 0.04571767923692783, "bacc": 0.47453310696095075, "bacc_std": 0.045906918194169734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04858992076552502, "f1": 0.5960257670051315, "f1_std": 0.053327716404404285, "bacc": 0.5946519524617997, "bacc_std": 0.05170670299797897} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04263674940705495, "f1": 0.5481404240528328, "f1_std": 0.050460640504954794, "bacc": 0.5530560271646858, "bacc_std": 0.045366017278115796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 166.81005372000556, "split": "test", "acc": 0.59, "acc_std": 0.047769714254954465, "f1": 0.5710848415106182, "f1_std": 0.04885226079628632, "bacc": 0.5725806451612903, "bacc_std": 0.04926112876344077} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04806986165987999, "f1": 0.6216897856242118, "f1_std": 0.05020571911418118, "bacc": 0.6230899830220713, "bacc_std": 0.05049275220263842} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.038603191577899355, "f1": 0.6514004087029691, "f1_std": 0.05191555313728436, "bacc": 0.648981324278438, "bacc_std": 0.04456835347795613} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.56, "acc_std": 0.04742544043021635, "f1": 0.5225694444444444, "f1_std": 0.051029139827030244, "bacc": 0.5229202037351443, "bacc_std": 0.049639849423037244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.040202666578225875, "f1": 0.6695156695156695, "f1_std": 0.04840326159540572, "bacc": 0.6642614601018676, "bacc_std": 0.04503985354922326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.036247460600709665, "f1": 0.5783730158730158, "f1_std": 0.05049094614126628, "bacc": 0.5882852292020373, "bacc_std": 0.04075973323452815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 21.54434690031882, "split": "test", "acc": 0.55, "acc_std": 0.04771942581381297, "f1": 0.5248653785239151, "f1_std": 0.04885202447274437, "bacc": 0.5250424448217317, "bacc_std": 0.04900030924280954} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.042978743583311045, "f1": 0.5713127099988413, "f1_std": 0.05176255931612501, "bacc": 0.5742784380305602, "bacc_std": 0.046450711647883274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.046169682693299935, "f1": 0.565936337329475, "f1_std": 0.0469895086396627, "bacc": 0.5696095076400679, "bacc_std": 0.04818929973274407} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.043131686727972976, "f1": 0.5558672276764843, "f1_std": 0.0523967815617926, "bacc": 0.5611205432937181, "bacc_std": 0.04641474866162193} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04479919195699852, "f1": 0.5847828526540231, "f1_std": 0.05143843886985339, "bacc": 0.5844651952461799, "bacc_std": 0.04847250386261593} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04474319613080853, "f1": 0.6011396011396011, "f1_std": 0.05305899889450785, "bacc": 0.6005942275042444, "bacc_std": 0.048770019244813355} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04067628301602791, "f1": 0.5944849959448499, "f1_std": 0.04928671416219566, "bacc": 0.5955008488964346, "bacc_std": 0.0444172881789824} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.000774263682681127, "split": "test", "acc": 0.67, "acc_std": 0.03445126412775008, "f1": 0.5764343473238351, "f1_std": 0.050888489064860036, "bacc": 0.5912563667232598, "bacc_std": 0.03929187207403891} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.04122567646503814, "f1": 0.5143273433705683, "f1_std": 0.04924436446425617, "bacc": 0.5297113752122241, "bacc_std": 0.04259209948312931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04190751722543343, "f1": 0.6323529411764706, "f1_std": 0.049305480374412375, "bacc": 0.6298811544991512, "bacc_std": 0.045275786603852244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 1291.5496650148827, "split": "test", "acc": 0.57, "acc_std": 0.05017910322036454, "f1": 0.5664885573142454, "f1_std": 0.04985104033358967, "bacc": 0.581918505942275, "bacc_std": 0.05150845717111757} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.57, "acc_std": 0.043248024232327646, "f1": 0.50997150997151, "f1_std": 0.04921508989038315, "bacc": 0.515704584040747, "bacc_std": 0.04522025283875993} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.046275743105864874, "f1": 0.554367201426025, "f1_std": 0.05114936117160911, "bacc": 0.5551782682512734, "bacc_std": 0.04860033491428074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 0.000774263682681127, "split": "test", "acc": 0.69, "acc_std": 0.035506866941480475, "f1": 0.6112852664576802, "f1_std": 0.05146837049809206, "bacc": 0.6175721561969439, "bacc_std": 0.041218261308588126} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04395857595509664, "f1": 0.5863970588235294, "f1_std": 0.051798751158720534, "bacc": 0.5874363327674024, "bacc_std": 0.04739767414166249} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.03937946673077226, "f1": 0.6033177064551027, "f1_std": 0.049849766014654365, "bacc": 0.6065365025466893, "bacc_std": 0.04306558442341393} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04121473522904156, "f1": 0.6108031607500884, "f1_std": 0.051745442356282666, "bacc": 0.6116298811544991, "bacc_std": 0.04574801539268233} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.0399095777978169, "f1": 0.6026180458158018, "f1_std": 0.049791065432573195, "bacc": 0.6035653650254669, "bacc_std": 0.044132056201035434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04566519462347665, "f1": 0.5989304812834224, "f1_std": 0.05039282945270579, "bacc": 0.597623089983022, "bacc_std": 0.04779268998359048} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.039005763676667075, "f1": 0.5792763553311696, "f1_std": 0.050154649859558226, "bacc": 0.5853140916808149, "bacc_std": 0.04275125937926583} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.049621995123130626, "f1": 0.5796553173602353, "f1_std": 0.05164147892762746, "bacc": 0.5806451612903225, "bacc_std": 0.052138683193298986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04225444828654139, "f1": 0.5783475783475784, "f1_std": 0.04927786317854687, "bacc": 0.5793718166383701, "bacc_std": 0.04561249419529883} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.03957314240744599, "f1": 0.5714285714285714, "f1_std": 0.0492308765102415, "bacc": 0.5772495755517827, "bacc_std": 0.04288495908842678} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 166.81005372000556, "split": "test", "acc": 0.56, "acc_std": 0.04400586779055719, "f1": 0.5098039215686274, "f1_std": 0.04914286928388947, "bacc": 0.5127334465195246, "bacc_std": 0.046033859951775294} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04589185548656755, "f1": 0.5555555555555556, "f1_std": 0.05325900343796785, "bacc": 0.5581494057724957, "bacc_std": 0.048876738658343286} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.044693247812169566, "f1": 0.64349376114082, "f1_std": 0.050921199274246945, "bacc": 0.6400679117147707, "bacc_std": 0.04834830926838689} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.74, "acc_std": 0.03629377908126958, "f1": 0.6843127731908694, "f1_std": 0.05108378237636309, "bacc": 0.6782682512733447, "bacc_std": 0.04379747710619452} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04499066569856462, "f1": 0.5523528769516323, "f1_std": 0.04785661818701442, "bacc": 0.5522071307300509, "bacc_std": 0.04626587123696536} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 0.000774263682681127, "split": "test", "acc": 0.67, "acc_std": 0.03502371196775121, "f1": 0.5764343473238351, "f1_std": 0.05233653378290198, "bacc": 0.5912563667232598, "bacc_std": 0.04002633085863833} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 1291.5496650148827, "split": "test", "acc": 0.63, "acc_std": 0.05081862257086471, "f1": 0.6009060511271707, "f1_std": 0.053810753861437015, "bacc": 0.5997453310696095, "bacc_std": 0.05276452961869997} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04602699642601068, "f1": 0.5577607593571352, "f1_std": 0.04998317101197874, "bacc": 0.5573005093378608, "bacc_std": 0.0490519133303839} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.03923965341335215, "f1": 0.6343908479773559, "f1_std": 0.050090822011478915, "bacc": 0.6328522920203735, "bacc_std": 0.04416126339782681} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.03705590911042393, "f1": 0.587178241864983, "f1_std": 0.04962565319413015, "bacc": 0.5933786078098472, "bacc_std": 0.041286367439623155} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.56, "acc_std": 0.044387457687955045, "f1": 0.5024875621890548, "f1_std": 0.048658932682766505, "bacc": 0.5076400679117148, "bacc_std": 0.045479593773532924} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 21.54434690031882, "split": "test", "acc": 0.56, "acc_std": 0.04806538879484904, "f1": 0.5452666391070691, "f1_std": 0.04831039666654839, "bacc": 0.5483870967741935, "bacc_std": 0.04918423056231293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04396021383023517, "f1": 0.6263736263736264, "f1_std": 0.04962542459903583, "bacc": 0.6239388794567062, "bacc_std": 0.048158213756542334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 166.81005372000556, "split": "test", "acc": 0.55, "acc_std": 0.04653043735019047, "f1": 0.529239460194581, "f1_std": 0.048117163702210176, "bacc": 0.5301358234295416, "bacc_std": 0.04879885908818956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.04733426243219598, "f1": 0.5876736111111112, "f1_std": 0.05216983789690622, "bacc": 0.5865874363327674, "bacc_std": 0.05078974637886583} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 31.834 | 183.13 | 0.79861 | 0.10142 | 0.76515 | 0.1232 | 0.76035 | 0.12253 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 31.834 | 183.13 | 0.6326 | 0.052486 | 0.58374 | 0.048046 | 0.58595 | 0.045347 | + + +done! total time: 0:05:11 diff --git a/data_scaling/n200_2/pretrain/config.yaml b/data_scaling/n200_2/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..255fe8435fb391a2900facc319ab4efc9b07e49f --- /dev/null +++ b/data_scaling/n200_2/pretrain/config.yaml @@ -0,0 +1,109 @@ +name: data_scaling/n200_2/pretrain +notes: data scaling experiment n200_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n200_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..00999}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 diff --git a/data_scaling/n200_2/pretrain/log.json b/data_scaling/n200_2/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..6282a791ac32487c182c31b876e139a934e78e69 --- /dev/null +++ b/data_scaling/n200_2/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.06074886535599828, "train/loss": 0.9929967404460907, "eval/hcp-train-subset/loss": 0.9907431679387246, "eval/hcp-val/loss": 0.9907587920465777, "eval/nsd-val/loss": 0.9919356838349374} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.09351523154258728, "train/loss": 0.986495768699646, "eval/hcp-train-subset/loss": 0.9877115382302192, "eval/hcp-val/loss": 0.9871609307104542, "eval/nsd-val/loss": 0.9874657959707321} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.14525675498695612, "train/loss": 0.9820738704299927, "eval/hcp-train-subset/loss": 0.9808463252359821, "eval/hcp-val/loss": 0.980232952102538, "eval/nsd-val/loss": 0.9813966395393494} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.22075390657766333, "train/loss": 0.9706496034145355, "eval/hcp-train-subset/loss": 0.9609758171343035, "eval/hcp-val/loss": 0.9621732955978762, "eval/nsd-val/loss": 0.946446277441517} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.25123899827846036, "train/loss": 0.9345017951488495, "eval/hcp-train-subset/loss": 0.9174794506642127, "eval/hcp-val/loss": 0.9159393512433575, "eval/nsd-val/loss": 0.884861292377595} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.17747241761091137, "train/loss": 0.8922654366493226, "eval/hcp-train-subset/loss": 0.8803248453524805, "eval/hcp-val/loss": 0.8790773566692106, "eval/nsd-val/loss": 0.8424928832438684} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.12238330470347984, "train/loss": 0.867705241613388, "eval/hcp-train-subset/loss": 0.8673066727576717, "eval/hcp-val/loss": 0.8655408351652084, "eval/nsd-val/loss": 0.8303181577113367} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.1037894368042884, "train/loss": 0.8538292635154724, "eval/hcp-train-subset/loss": 0.8628166587122025, "eval/hcp-val/loss": 0.8607290762086068, "eval/nsd-val/loss": 0.8269688756235184} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.09880859165890059, "train/loss": 0.8468218855571746, "eval/hcp-train-subset/loss": 0.8601642731697329, "eval/hcp-val/loss": 0.8583529245468878, "eval/nsd-val/loss": 0.8248861797394291} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.10141315121380771, "train/loss": 0.8383862003993988, "eval/hcp-train-subset/loss": 0.858305658063581, "eval/hcp-val/loss": 0.8568743332739799, "eval/nsd-val/loss": 0.8243883915485875} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.10129307399125066, "train/loss": 0.8328803121948242, "eval/hcp-train-subset/loss": 0.8574367694316372, "eval/hcp-val/loss": 0.8551280931118996, "eval/nsd-val/loss": 0.825301754859186} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.09790804282322087, "train/loss": 0.8301758789920807, "eval/hcp-train-subset/loss": 0.857351605930636, "eval/hcp-val/loss": 0.8552730717966633, "eval/nsd-val/loss": 0.8271028716717997} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.10205107842358581, "train/loss": 0.8263192761421203, "eval/hcp-train-subset/loss": 0.855712209017046, "eval/hcp-val/loss": 0.8536175806676188, "eval/nsd-val/loss": 0.8211562056695262} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.10727709414742687, "train/loss": 0.8184426688480377, "eval/hcp-train-subset/loss": 0.8572292212517031, "eval/hcp-val/loss": 0.8547284324322978, "eval/nsd-val/loss": 0.8278160325942501} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.11076085550755872, "train/loss": 0.8117992891883851, "eval/hcp-train-subset/loss": 0.8562669840551191, "eval/hcp-val/loss": 0.8550113699128551, "eval/nsd-val/loss": 0.8245106733614399} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.1141433127931696, "train/loss": 0.8045218962097168, "eval/hcp-train-subset/loss": 0.8558692653332988, "eval/hcp-val/loss": 0.854046288036531, "eval/nsd-val/loss": 0.8244515176742307} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.11490780322975866, "train/loss": 0.8031361094474793, "eval/hcp-train-subset/loss": 0.8552336000627087, "eval/hcp-val/loss": 0.8533100553097264, "eval/nsd-val/loss": 0.8275022872032658} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.11752134790218702, "train/loss": 0.7948549908924103, "eval/hcp-train-subset/loss": 0.8574261367321014, "eval/hcp-val/loss": 0.8549363670810577, "eval/nsd-val/loss": 0.8386709565116514} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.11806112254488386, "train/loss": 0.7936267003154754, "eval/hcp-train-subset/loss": 0.8547361589247181, "eval/hcp-val/loss": 0.853979435659224, "eval/nsd-val/loss": 0.8342916850120791} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.12073047730651959, "train/loss": 0.7897758152580261, "eval/hcp-train-subset/loss": 0.8555082063521108, "eval/hcp-val/loss": 0.8539005767914557, "eval/nsd-val/loss": 0.8277239213066716} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.11888711814780092, "train/loss": 0.7894636356163025, "eval/hcp-train-subset/loss": 0.8570529222488403, "eval/hcp-val/loss": 0.854197072405969, "eval/nsd-val/loss": 0.8252206761990825} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.12250903351859849, "train/loss": 0.7834287502002716, "eval/hcp-train-subset/loss": 0.8577828772606388, "eval/hcp-val/loss": 0.8554161806260386, "eval/nsd-val/loss": 0.8294835311751212} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.12603224357551313, "train/loss": 0.7777083412647248, "eval/hcp-train-subset/loss": 0.8552338903950106, "eval/hcp-val/loss": 0.8539669282974736, "eval/nsd-val/loss": 0.8302801559048314} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.1259937200483016, "train/loss": 0.777787874212265, "eval/hcp-train-subset/loss": 0.8568591398577536, "eval/hcp-val/loss": 0.8549495243257091, "eval/nsd-val/loss": 0.8314102638152338} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.12925501137246853, "train/loss": 0.7733322998523712, "eval/hcp-train-subset/loss": 0.8569119726457903, "eval/hcp-val/loss": 0.8549876761051917, "eval/nsd-val/loss": 0.8321533443466309} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.1301940694759089, "train/loss": 0.7696429798603058, "eval/hcp-train-subset/loss": 0.8567405496874163, "eval/hcp-val/loss": 0.8540384827121612, "eval/nsd-val/loss": 0.8328149049512802} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.1288295753405128, "train/loss": 0.7737991156959534, "eval/hcp-train-subset/loss": 0.8570112778294471, "eval/hcp-val/loss": 0.8556579293743256, "eval/nsd-val/loss": 0.8308307278540826} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.13354393598598532, "train/loss": 0.7642915752506256, "eval/hcp-train-subset/loss": 0.8561610098808042, "eval/hcp-val/loss": 0.8544871143756374, "eval/nsd-val/loss": 0.8286346418242301} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.13351944186257347, "train/loss": 0.7627652764129639, "eval/hcp-train-subset/loss": 0.8574909615901208, "eval/hcp-val/loss": 0.8555521897731289, "eval/nsd-val/loss": 0.832959400069329} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.13470303897802707, "train/loss": 0.7668015116500855, "eval/hcp-train-subset/loss": 0.856824601850202, "eval/hcp-val/loss": 0.8546492236275827, "eval/nsd-val/loss": 0.8305300464553218} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.1373441127124874, "train/loss": 0.7615819299697876, "eval/hcp-train-subset/loss": 0.8593883033721678, "eval/hcp-val/loss": 0.8568122329250458, "eval/nsd-val/loss": 0.8302586838122337} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.1374305129592592, "train/loss": 0.7591948459815979, "eval/hcp-train-subset/loss": 0.8580583333969116, "eval/hcp-val/loss": 0.8546640949864541, "eval/nsd-val/loss": 0.8360447777855781} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.13892441383690318, "train/loss": 0.7577868027496338, "eval/hcp-train-subset/loss": 0.8576850814204062, "eval/hcp-val/loss": 0.855865998614219, "eval/nsd-val/loss": 0.8296882241002975} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.1419349339823281, "train/loss": 0.7534558692550659, "eval/hcp-train-subset/loss": 0.8592259355129734, "eval/hcp-val/loss": 0.855732322700562, "eval/nsd-val/loss": 0.8300859274402741} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.143895031402774, "train/loss": 0.7519091509437561, "eval/hcp-train-subset/loss": 0.856386472140589, "eval/hcp-val/loss": 0.8546181513417151, "eval/nsd-val/loss": 0.830078270166151} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.14372638611673888, "train/loss": 0.7531642698287964, "eval/hcp-train-subset/loss": 0.8588362432295277, "eval/hcp-val/loss": 0.8562913344752404, "eval/nsd-val/loss": 0.8313780330842541} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.14384142661087987, "train/loss": 0.750057115983963, "eval/hcp-train-subset/loss": 0.8600366577025382, "eval/hcp-val/loss": 0.8589841233145806, "eval/nsd-val/loss": 0.8332039425449986} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.14616839765963383, "train/loss": 0.7520381041717529, "eval/hcp-train-subset/loss": 0.857975838645812, "eval/hcp-val/loss": 0.8556927115686478, "eval/nsd-val/loss": 0.8268031708655819} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.1505200284176653, "train/loss": 0.7466013880825043, "eval/hcp-train-subset/loss": 0.8612049894948159, "eval/hcp-val/loss": 0.8601584280690839, "eval/nsd-val/loss": 0.8306797665934409} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.15134367883548958, "train/loss": 0.7425296100139618, "eval/hcp-train-subset/loss": 0.862225366215552, "eval/hcp-val/loss": 0.8605233094384593, "eval/nsd-val/loss": 0.8346813744114291} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.15440325881026581, "train/loss": 0.7406586199569702, "eval/hcp-train-subset/loss": 0.8603614318755365, "eval/hcp-val/loss": 0.8590858299886027, "eval/nsd-val/loss": 0.8317492700392201} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.15227809418636767, "train/loss": 0.7426287366676331, "eval/hcp-train-subset/loss": 0.8601657411744518, "eval/hcp-val/loss": 0.8584105603156551, "eval/nsd-val/loss": 0.8297559445904147} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.1562015736516131, "train/loss": 0.741456170244217, "eval/hcp-train-subset/loss": 0.8620805855720274, "eval/hcp-val/loss": 0.8601814921825163, "eval/nsd-val/loss": 0.8320642584754575} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.15440719326138402, "train/loss": 0.7408219782924652, "eval/hcp-train-subset/loss": 0.8630445263078136, "eval/hcp-val/loss": 0.8605247668681606, "eval/nsd-val/loss": 0.8352487500636808} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.15839646718966824, "train/loss": 0.7380263511753082, "eval/hcp-train-subset/loss": 0.8628127123078992, "eval/hcp-val/loss": 0.8615153076187256, "eval/nsd-val/loss": 0.8343590786380153} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.15937190388762196, "train/loss": 0.7351642228794097, "eval/hcp-train-subset/loss": 0.8627195829345334, "eval/hcp-val/loss": 0.8612916882961027, "eval/nsd-val/loss": 0.8365419987709292} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.16109139839289818, "train/loss": 0.7339894597244263, "eval/hcp-train-subset/loss": 0.8622432293430451, "eval/hcp-val/loss": 0.8592312259058799, "eval/nsd-val/loss": 0.8326808339165103} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.16124788359661377, "train/loss": 0.7399765062332153, "eval/hcp-train-subset/loss": 0.8648987047133907, "eval/hcp-val/loss": 0.8632527647479888, "eval/nsd-val/loss": 0.8369248759362006} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.1616229197050942, "train/loss": 0.735530675201416, "eval/hcp-train-subset/loss": 0.8628119037997338, "eval/hcp-val/loss": 0.8614514393191184, "eval/nsd-val/loss": 0.8336973747899455} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.16531834598944933, "train/loss": 0.7309800907564163, "eval/hcp-train-subset/loss": 0.8645809100520226, "eval/hcp-val/loss": 0.862122526091914, "eval/nsd-val/loss": 0.8330993479298007} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.16626524612088134, "train/loss": 0.7342979122543335, "eval/hcp-train-subset/loss": 0.863880826580909, "eval/hcp-val/loss": 0.862056827352893, "eval/nsd-val/loss": 0.8326942872616553} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.16825580714539462, "train/loss": 0.7308739564037323, "eval/hcp-train-subset/loss": 0.8655327645040327, "eval/hcp-val/loss": 0.8624400054254839, "eval/nsd-val/loss": 0.834556172932348} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.16845951939458026, "train/loss": 0.7318064731693268, "eval/hcp-train-subset/loss": 0.8651785216023845, "eval/hcp-val/loss": 0.8621255361264751, "eval/nsd-val/loss": 0.833506760097319} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.1715095791341859, "train/loss": 0.7311238735961914, "eval/hcp-train-subset/loss": 0.8672118927201917, "eval/hcp-val/loss": 0.8654560456352849, "eval/nsd-val/loss": 0.8398959213687528} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.17294580892269698, "train/loss": 0.7287598222446442, "eval/hcp-train-subset/loss": 0.8661532527016055, "eval/hcp-val/loss": 0.8639100501614232, "eval/nsd-val/loss": 0.837516192466982} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.17358474203081534, "train/loss": 0.7297283543205261, "eval/hcp-train-subset/loss": 0.8651725111469146, "eval/hcp-val/loss": 0.8636563747159897, "eval/nsd-val/loss": 0.8346559107303619} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.17487595142217094, "train/loss": 0.7304398733901978, "eval/hcp-train-subset/loss": 0.868542968265472, "eval/hcp-val/loss": 0.8662177362749653, "eval/nsd-val/loss": 0.8387845687327846} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.1788934983346473, "train/loss": 0.7268931568813324, "eval/hcp-train-subset/loss": 0.8664373063272045, "eval/hcp-val/loss": 0.864177510623009, "eval/nsd-val/loss": 0.8342412960144782} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.1820407150501401, "train/loss": 0.724999898147583, "eval/hcp-train-subset/loss": 0.8662707978679288, "eval/hcp-val/loss": 0.8643075093146293, "eval/nsd-val/loss": 0.8361720519681131} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.18057002734140076, "train/loss": 0.7259486714363098, "eval/hcp-train-subset/loss": 0.8676299289349587, "eval/hcp-val/loss": 0.864532700469417, "eval/nsd-val/loss": 0.8378282564301645} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.1837256980408013, "train/loss": 0.7241565581226349, "eval/hcp-train-subset/loss": 0.8668681950338425, "eval/hcp-val/loss": 0.8644066981730922, "eval/nsd-val/loss": 0.8358535439737381} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.18283198898832256, "train/loss": 0.7249583155345917, "eval/hcp-train-subset/loss": 0.8678699529940083, "eval/hcp-val/loss": 0.8661888828200679, "eval/nsd-val/loss": 0.8391736457424779} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.18456242503534226, "train/loss": 0.7249735138511658, "eval/hcp-train-subset/loss": 0.871081055172028, "eval/hcp-val/loss": 0.8678340171614001, "eval/nsd-val/loss": 0.8378261837267107} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.18991707668497942, "train/loss": 0.7197164055347443, "eval/hcp-train-subset/loss": 0.8694705203656228, "eval/hcp-val/loss": 0.8660907774202286, "eval/nsd-val/loss": 0.8371644452694924} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.18921554228589035, "train/loss": 0.7204726678657531, "eval/hcp-train-subset/loss": 0.8705275683633743, "eval/hcp-val/loss": 0.8681311367019531, "eval/nsd-val/loss": 0.83871970253606} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.1895571667879877, "train/loss": 0.7221404563713074, "eval/hcp-train-subset/loss": 0.8718270919015331, "eval/hcp-val/loss": 0.8685564177651559, "eval/nsd-val/loss": 0.8460862732702686} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.19051936196423255, "train/loss": 0.7215649707508087, "eval/hcp-train-subset/loss": 0.8701012240302178, "eval/hcp-val/loss": 0.8664481437975361, "eval/nsd-val/loss": 0.8381536699110462} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.19591290835009723, "train/loss": 0.7148278578662872, "eval/hcp-train-subset/loss": 0.8713854628224527, "eval/hcp-val/loss": 0.8682702299087278, "eval/nsd-val/loss": 0.8397899981467954} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.19391465681472678, "train/loss": 0.7174831116771698, "eval/hcp-train-subset/loss": 0.8710751966122658, "eval/hcp-val/loss": 0.8685679368434414, "eval/nsd-val/loss": 0.8402931286442664} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.19518698169096824, "train/loss": 0.7163275263023376, "eval/hcp-train-subset/loss": 0.870512455701828, "eval/hcp-val/loss": 0.867338785240727, "eval/nsd-val/loss": 0.8393718744477918} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.19657022177829633, "train/loss": 0.7163009504032135, "eval/hcp-train-subset/loss": 0.8719363049153359, "eval/hcp-val/loss": 0.8699121225264764, "eval/nsd-val/loss": 0.8437373215152372} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.19868654596038574, "train/loss": 0.7157794020652771, "eval/hcp-train-subset/loss": 0.8731889157525955, "eval/hcp-val/loss": 0.8693707037356592, "eval/nsd-val/loss": 0.8405706324884968} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.19881315912669117, "train/loss": 0.7153051083278656, "eval/hcp-train-subset/loss": 0.8710730969905853, "eval/hcp-val/loss": 0.868619813073066, "eval/nsd-val/loss": 0.8423014427385023} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.20231571043608226, "train/loss": 0.7095257316207886, "eval/hcp-train-subset/loss": 0.8719962383470228, "eval/hcp-val/loss": 0.8693943331318517, "eval/nsd-val/loss": 0.8413551940071967} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.20074898637396027, "train/loss": 0.7185145177268982, "eval/hcp-train-subset/loss": 0.8739026602237455, "eval/hcp-val/loss": 0.8716033639446381, "eval/nsd-val/loss": 0.8412466251081036} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.20581496126111343, "train/loss": 0.7118735445785522, "eval/hcp-train-subset/loss": 0.8721510864073231, "eval/hcp-val/loss": 0.8696432094420156, "eval/nsd-val/loss": 0.8432149733266523} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.2044664387055762, "train/loss": 0.7115664150524139, "eval/hcp-train-subset/loss": 0.8722263901464401, "eval/hcp-val/loss": 0.8695706196369664, "eval/nsd-val/loss": 0.8433404141856778} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.20386270371307885, "train/loss": 0.7118117702579498, "eval/hcp-train-subset/loss": 0.8739733119164744, "eval/hcp-val/loss": 0.8728330366073116, "eval/nsd-val/loss": 0.8437683851488175} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.20352018478442635, "train/loss": 0.7160713409042359, "eval/hcp-train-subset/loss": 0.8724139102043644, "eval/hcp-val/loss": 0.8698414112291029, "eval/nsd-val/loss": 0.8450444465683352} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.20562076799906545, "train/loss": 0.7119615220165253, "eval/hcp-train-subset/loss": 0.8747318137076593, "eval/hcp-val/loss": 0.8716151637415732, "eval/nsd-val/loss": 0.844193696975708} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.20649914063687436, "train/loss": 0.7110919276618958, "eval/hcp-train-subset/loss": 0.8739719967688283, "eval/hcp-val/loss": 0.8717914041011564, "eval/nsd-val/loss": 0.843709175625155} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.20632213215068193, "train/loss": 0.7137980181980133, "eval/hcp-train-subset/loss": 0.874406042598909, "eval/hcp-val/loss": 0.8710521055806068, "eval/nsd-val/loss": 0.8447390910117857} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.20756376241470917, "train/loss": 0.7112266501331329, "eval/hcp-train-subset/loss": 0.8749512558983218, "eval/hcp-val/loss": 0.8725365534905465, "eval/nsd-val/loss": 0.8450656956241976} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.20902254831386372, "train/loss": 0.7091298444461822, "eval/hcp-train-subset/loss": 0.874635495485798, "eval/hcp-val/loss": 0.8719231563229715, "eval/nsd-val/loss": 0.8439989013056601} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.20842173441765077, "train/loss": 0.7131246441555024, "eval/hcp-train-subset/loss": 0.8733978136893241, "eval/hcp-val/loss": 0.8721965022625462, "eval/nsd-val/loss": 0.8446195760080891} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.20892425406063092, "train/loss": 0.7118115135288239, "eval/hcp-train-subset/loss": 0.8749923196531111, "eval/hcp-val/loss": 0.8721272830040224, "eval/nsd-val/loss": 0.8438200095007496} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.21078284344160106, "train/loss": 0.7100567783260345, "eval/hcp-train-subset/loss": 0.8739077996823096, "eval/hcp-val/loss": 0.871945938756389, "eval/nsd-val/loss": 0.8460658077270754} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.21001632502075956, "train/loss": 0.7114262472438813, "eval/hcp-train-subset/loss": 0.8740562565865055, "eval/hcp-val/loss": 0.8712447535607123, "eval/nsd-val/loss": 0.8460225443686208} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.20902271229104077, "train/loss": 0.7131079891490937, "eval/hcp-train-subset/loss": 0.8759983720317963, "eval/hcp-val/loss": 0.8730242540759425, "eval/nsd-val/loss": 0.8468314092005452} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.21100131321397925, "train/loss": 0.7082633395290375, "eval/hcp-train-subset/loss": 0.8761817668714831, "eval/hcp-val/loss": 0.8725108206272125, "eval/nsd-val/loss": 0.8460712115610799} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.21295957351711586, "train/loss": 0.7054266449785233, "eval/hcp-train-subset/loss": 0.8761070361060481, "eval/hcp-val/loss": 0.8739212607183764, "eval/nsd-val/loss": 0.8470285813654622} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.21217555239169297, "train/loss": 0.7091550577688217, "eval/hcp-train-subset/loss": 0.8751362014201379, "eval/hcp-val/loss": 0.872117583790133, "eval/nsd-val/loss": 0.8472357536515882} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.2120047974902254, "train/loss": 0.7074762072086335, "eval/hcp-train-subset/loss": 0.8745175619279185, "eval/hcp-val/loss": 0.872350683135371, "eval/nsd-val/loss": 0.847239847144773} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.21079717397394085, "train/loss": 0.7093232870435715, "eval/hcp-train-subset/loss": 0.8745136626305119, "eval/hcp-val/loss": 0.8721338539354263, "eval/nsd-val/loss": 0.8467171124873623} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.2118202142452007, "train/loss": 0.7079074987983703, "eval/hcp-train-subset/loss": 0.8746398187452747, "eval/hcp-val/loss": 0.8725971262301168, "eval/nsd-val/loss": 0.8466979649759108} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.21210052014790473, "train/loss": 0.7090149361419678, "eval/hcp-train-subset/loss": 0.8753533920934123, "eval/hcp-val/loss": 0.872240723140778, "eval/nsd-val/loss": 0.8467560920023149} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.2098160737817785, "train/loss": 0.7156093966960907, "eval/hcp-train-subset/loss": 0.8753257903360552, "eval/hcp-val/loss": 0.8723211663384591, "eval/nsd-val/loss": 0.8467711863979217} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.21401807334025383, "train/loss": 0.7060976057100296, "eval/hcp-train-subset/loss": 0.8748501702662437, "eval/hcp-val/loss": 0.8720592654520466, "eval/nsd-val/loss": 0.8469197596273115} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.211841072680321, "train/loss": 0.7058749144601821, "eval/hcp-train-subset/loss": 0.8750185783832304, "eval/hcp-val/loss": 0.8720644250992806, "eval/nsd-val/loss": 0.8466727656702842} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.21050761424233988, "train/loss": 0.7094936176300048, "eval/hcp-train-subset/loss": 0.8747062904219474, "eval/hcp-val/loss": 0.8730447455759971, "eval/nsd-val/loss": 0.8471881987587098} diff --git a/data_scaling/n200_2/pretrain/log.txt b/data_scaling/n200_2/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0676c3c65eced3b41ba3eb691196f35247f288ee --- /dev/null +++ b/data_scaling/n200_2/pretrain/log.txt @@ -0,0 +1,8236 @@ +pretraining fmri mae +start: 2026-01-17 20:35:51 +cwd: /admin/home/connor/fmri-fm +sha: 4c3ccfb0b63e4f01e9758042b5299530a6d93949, status: has uncommitted changes, branch: dev/clane9 +config: +name: data_scaling/n200_2/pretrain +notes: data scaling experiment n200_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n200_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..00999}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +val transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..00999}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [8543, 6917, 6772, 3955, 6165, 1554, 1082, 5811, 6919, 3150] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +loading dataset: nsd-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1493, 4276, 245, 3092, 3905, 1862, 2362, 4411, 1138, 2824] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=0, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=True, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 10:21:09 lr: 0.000000 grad: 0.0118 (0.0118) loss: 0.9967 (0.9967) time: 5.9631 data: 4.8795 max mem: 8570 +Train: [0] [ 100/6250] eta: 0:19:42 lr: 0.000000 grad: 0.0151 (0.0160) loss: 0.9956 (0.9960) time: 0.1488 data: 0.0716 max mem: 9377 +Train: [0] [ 200/6250] eta: 0:16:57 lr: 0.000001 grad: 0.0140 (0.0152) loss: 0.9958 (0.9960) time: 0.1557 data: 0.0758 max mem: 9377 +Train: [0] [ 300/6250] eta: 0:16:00 lr: 0.000001 grad: 0.0130 (0.0146) loss: 0.9963 (0.9961) time: 0.1263 data: 0.0400 max mem: 9377 +Train: [0] [ 400/6250] eta: 0:15:34 lr: 0.000002 grad: 0.0128 (0.0142) loss: 0.9957 (0.9960) time: 0.1690 data: 0.0807 max mem: 9377 +Train: [0] [ 500/6250] eta: 0:14:56 lr: 0.000002 grad: 0.0131 (0.0139) loss: 0.9959 (0.9960) time: 0.1452 data: 0.0606 max mem: 9377 +Train: [0] [ 600/6250] eta: 0:14:36 lr: 0.000002 grad: 0.0131 (0.0138) loss: 0.9959 (0.9960) time: 0.1670 data: 0.0794 max mem: 9377 +Train: [0] [ 700/6250] eta: 0:14:10 lr: 0.000003 grad: 0.0129 (0.0137) loss: 0.9965 (0.9959) time: 0.1496 data: 0.0736 max mem: 9377 +Train: [0] [ 800/6250] eta: 0:13:43 lr: 0.000003 grad: 0.0131 (0.0136) loss: 0.9957 (0.9959) time: 0.1156 data: 0.0322 max mem: 9377 +Train: [0] [ 900/6250] eta: 0:13:21 lr: 0.000004 grad: 0.0133 (0.0136) loss: 0.9958 (0.9959) time: 0.1302 data: 0.0431 max mem: 9377 +Train: [0] [1000/6250] eta: 0:12:56 lr: 0.000004 grad: 0.0139 (0.0137) loss: 0.9961 (0.9959) time: 0.1270 data: 0.0477 max mem: 9377 +Train: [0] [1100/6250] eta: 0:12:35 lr: 0.000004 grad: 0.0159 (0.0138) loss: 0.9958 (0.9959) time: 0.1401 data: 0.0541 max mem: 9377 +Train: [0] [1200/6250] eta: 0:12:14 lr: 0.000005 grad: 0.0190 (0.0142) loss: 0.9958 (0.9959) time: 0.1215 data: 0.0374 max mem: 9377 +Train: [0] [1300/6250] eta: 0:11:52 lr: 0.000005 grad: 0.0183 (0.0147) loss: 0.9956 (0.9959) time: 0.1205 data: 0.0335 max mem: 9377 +Train: [0] [1400/6250] eta: 0:11:32 lr: 0.000006 grad: 0.0262 (0.0156) loss: 0.9964 (0.9959) time: 0.1298 data: 0.0457 max mem: 9377 +Train: [0] [1500/6250] eta: 0:11:13 lr: 0.000006 grad: 0.0281 (0.0166) loss: 0.9956 (0.9958) time: 0.1312 data: 0.0500 max mem: 9377 +Train: [0] [1600/6250] eta: 0:10:54 lr: 0.000006 grad: 0.0281 (0.0172) loss: 0.9955 (0.9958) time: 0.1167 data: 0.0347 max mem: 9377 +Train: [0] [1700/6250] eta: 0:10:37 lr: 0.000007 grad: 0.0273 (0.0180) loss: 0.9955 (0.9958) time: 0.1250 data: 0.0409 max mem: 9377 +Train: [0] [1800/6250] eta: 0:10:21 lr: 0.000007 grad: 0.0330 (0.0188) loss: 0.9954 (0.9958) time: 0.1269 data: 0.0446 max mem: 9377 +Train: [0] [1900/6250] eta: 0:10:04 lr: 0.000008 grad: 0.0306 (0.0199) loss: 0.9953 (0.9958) time: 0.1116 data: 0.0175 max mem: 9377 +Train: [0] [2000/6250] eta: 0:09:48 lr: 0.000008 grad: 0.0565 (0.0214) loss: 0.9940 (0.9957) time: 0.1593 data: 0.0778 max mem: 9377 +Train: [0] [2100/6250] eta: 0:09:31 lr: 0.000008 grad: 0.0484 (0.0231) loss: 0.9947 (0.9957) time: 0.1362 data: 0.0451 max mem: 9377 +Train: [0] [2200/6250] eta: 0:09:16 lr: 0.000009 grad: 0.0535 (0.0246) loss: 0.9940 (0.9956) time: 0.1300 data: 0.0435 max mem: 9377 +Train: [0] [2300/6250] eta: 0:09:00 lr: 0.000009 grad: 0.0500 (0.0258) loss: 0.9942 (0.9956) time: 0.1218 data: 0.0344 max mem: 9377 +Train: [0] [2400/6250] eta: 0:08:44 lr: 0.000010 grad: 0.0443 (0.0272) loss: 0.9941 (0.9955) time: 0.1132 data: 0.0315 max mem: 9377 +Train: [0] [2500/6250] eta: 0:08:31 lr: 0.000010 grad: 0.0760 (0.0288) loss: 0.9934 (0.9954) time: 0.1534 data: 0.0731 max mem: 9377 +Train: [0] [2600/6250] eta: 0:08:16 lr: 0.000010 grad: 0.0482 (0.0303) loss: 0.9941 (0.9954) time: 0.1430 data: 0.0614 max mem: 9377 +Train: [0] [2700/6250] eta: 0:08:01 lr: 0.000011 grad: 0.0564 (0.0312) loss: 0.9938 (0.9953) time: 0.1285 data: 0.0451 max mem: 9377 +Train: [0] [2800/6250] eta: 0:07:47 lr: 0.000011 grad: 0.0547 (0.0325) loss: 0.9933 (0.9953) time: 0.1425 data: 0.0605 max mem: 9377 +Train: [0] [2900/6250] eta: 0:07:33 lr: 0.000012 grad: 0.0511 (0.0336) loss: 0.9933 (0.9952) time: 0.1350 data: 0.0505 max mem: 9377 +Train: [0] [3000/6250] eta: 0:07:19 lr: 0.000012 grad: 0.0655 (0.0346) loss: 0.9944 (0.9952) time: 0.1336 data: 0.0489 max mem: 9377 +Train: [0] [3100/6250] eta: 0:07:06 lr: 0.000012 grad: 0.0672 (0.0354) loss: 0.9921 (0.9951) time: 0.1257 data: 0.0391 max mem: 9377 +Train: [0] [3200/6250] eta: 0:06:51 lr: 0.000013 grad: 0.0608 (0.0365) loss: 0.9933 (0.9951) time: 0.1365 data: 0.0484 max mem: 9377 +Train: [0] [3300/6250] eta: 0:06:37 lr: 0.000013 grad: 0.0678 (0.0376) loss: 0.9929 (0.9950) time: 0.1263 data: 0.0354 max mem: 9377 +Train: [0] [3400/6250] eta: 0:06:24 lr: 0.000014 grad: 0.0675 (0.0386) loss: 0.9930 (0.9949) time: 0.1566 data: 0.0749 max mem: 9377 +Train: [0] [3500/6250] eta: 0:06:11 lr: 0.000014 grad: 0.0664 (0.0394) loss: 0.9938 (0.9949) time: 0.1493 data: 0.0568 max mem: 9377 +Train: [0] [3600/6250] eta: 0:05:58 lr: 0.000014 grad: 0.0648 (0.0404) loss: 0.9926 (0.9948) time: 0.1311 data: 0.0467 max mem: 9377 +Train: [0] [3700/6250] eta: 0:05:44 lr: 0.000015 grad: 0.0665 (0.0414) loss: 0.9935 (0.9948) time: 0.1153 data: 0.0221 max mem: 9377 +Train: [0] [3800/6250] eta: 0:05:31 lr: 0.000015 grad: 0.0758 (0.0423) loss: 0.9919 (0.9947) time: 0.1210 data: 0.0351 max mem: 9377 +Train: [0] [3900/6250] eta: 0:05:18 lr: 0.000016 grad: 0.0736 (0.0433) loss: 0.9919 (0.9946) time: 0.1310 data: 0.0428 max mem: 9377 +Train: [0] [4000/6250] eta: 0:05:04 lr: 0.000016 grad: 0.0901 (0.0444) loss: 0.9919 (0.9946) time: 0.1362 data: 0.0506 max mem: 9377 +Train: [0] [4100/6250] eta: 0:04:51 lr: 0.000016 grad: 0.0743 (0.0453) loss: 0.9919 (0.9945) time: 0.1483 data: 0.0637 max mem: 9377 +Train: [0] [4200/6250] eta: 0:04:37 lr: 0.000017 grad: 0.0684 (0.0462) loss: 0.9912 (0.9944) time: 0.1326 data: 0.0488 max mem: 9377 +Train: [0] [4300/6250] eta: 0:04:24 lr: 0.000017 grad: 0.0861 (0.0470) loss: 0.9919 (0.9944) time: 0.1480 data: 0.0637 max mem: 9377 +Train: [0] [4400/6250] eta: 0:04:10 lr: 0.000018 grad: 0.0818 (0.0478) loss: 0.9915 (0.9943) time: 0.1330 data: 0.0520 max mem: 9377 +Train: [0] [4500/6250] eta: 0:03:56 lr: 0.000018 grad: 0.0868 (0.0487) loss: 0.9925 (0.9942) time: 0.1249 data: 0.0368 max mem: 9377 +Train: [0] [4600/6250] eta: 0:03:43 lr: 0.000018 grad: 0.0834 (0.0495) loss: 0.9909 (0.9942) time: 0.1478 data: 0.0651 max mem: 9377 +Train: [0] [4700/6250] eta: 0:03:29 lr: 0.000019 grad: 0.0760 (0.0503) loss: 0.9916 (0.9941) time: 0.1390 data: 0.0606 max mem: 9377 +Train: [0] [4800/6250] eta: 0:03:16 lr: 0.000019 grad: 0.0713 (0.0510) loss: 0.9917 (0.9940) time: 0.1443 data: 0.0561 max mem: 9377 +Train: [0] [4900/6250] eta: 0:03:02 lr: 0.000020 grad: 0.0758 (0.0516) loss: 0.9907 (0.9940) time: 0.1202 data: 0.0392 max mem: 9377 +Train: [0] [5000/6250] eta: 0:02:49 lr: 0.000020 grad: 0.0746 (0.0523) loss: 0.9905 (0.9939) time: 0.1271 data: 0.0400 max mem: 9377 +Train: [0] [5100/6250] eta: 0:02:35 lr: 0.000020 grad: 0.0845 (0.0530) loss: 0.9915 (0.9938) time: 0.1237 data: 0.0397 max mem: 9377 +Train: [0] [5200/6250] eta: 0:02:21 lr: 0.000021 grad: 0.0744 (0.0538) loss: 0.9905 (0.9937) time: 0.1292 data: 0.0488 max mem: 9377 +Train: [0] [5300/6250] eta: 0:02:08 lr: 0.000021 grad: 0.0729 (0.0546) loss: 0.9898 (0.9937) time: 0.1418 data: 0.0576 max mem: 9377 +Train: [0] [5400/6250] eta: 0:01:54 lr: 0.000022 grad: 0.1048 (0.0554) loss: 0.9895 (0.9936) time: 0.1603 data: 0.0780 max mem: 9377 +Train: [0] [5500/6250] eta: 0:01:41 lr: 0.000022 grad: 0.0833 (0.0561) loss: 0.9880 (0.9935) time: 0.1324 data: 0.0470 max mem: 9377 +Train: [0] [5600/6250] eta: 0:01:27 lr: 0.000022 grad: 0.0834 (0.0567) loss: 0.9900 (0.9934) time: 0.1255 data: 0.0386 max mem: 9377 +Train: [0] [5700/6250] eta: 0:01:14 lr: 0.000023 grad: 0.0844 (0.0574) loss: 0.9900 (0.9934) time: 0.1462 data: 0.0705 max mem: 9377 +Train: [0] [5800/6250] eta: 0:01:00 lr: 0.000023 grad: 0.0848 (0.0580) loss: 0.9902 (0.9933) time: 0.1255 data: 0.0413 max mem: 9377 +Train: [0] [5900/6250] eta: 0:00:47 lr: 0.000024 grad: 0.0796 (0.0585) loss: 0.9890 (0.9932) time: 0.1347 data: 0.0532 max mem: 9377 +Train: [0] [6000/6250] eta: 0:00:33 lr: 0.000024 grad: 0.0956 (0.0591) loss: 0.9890 (0.9932) time: 0.1205 data: 0.0370 max mem: 9377 +Train: [0] [6100/6250] eta: 0:00:20 lr: 0.000024 grad: 0.0798 (0.0597) loss: 0.9900 (0.9931) time: 0.1351 data: 0.0512 max mem: 9377 +Train: [0] [6200/6250] eta: 0:00:06 lr: 0.000025 grad: 0.0893 (0.0604) loss: 0.9889 (0.9930) time: 0.1474 data: 0.0681 max mem: 9377 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1168 (0.0607) loss: 0.9862 (0.9930) time: 0.1187 data: 0.0334 max mem: 9377 +Train: [0] Total time: 0:14:12 (0.1364 s / it) +Averaged stats: lr: 0.000025 grad: 0.1168 (0.0607) loss: 0.9862 (0.9930) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:02:51 loss: 0.9863 (0.9863) time: 2.7582 data: 2.6883 max mem: 9377 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9906 (0.9907) time: 0.1470 data: 0.1162 max mem: 9377 +Eval (hcp-train-subset): [0] Total time: 0:00:13 (0.2129 s / it) +Averaged stats (hcp-train-subset): loss: 0.9906 (0.9907) +Eval (hcp-val): [0] [ 0/62] eta: 0:05:46 loss: 0.9882 (0.9882) time: 5.5822 data: 5.4581 max mem: 9377 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9903 (0.9908) time: 0.2176 data: 0.1929 max mem: 9377 +Eval (hcp-val): [0] Total time: 0:00:14 (0.2390 s / it) +Averaged stats (hcp-val): loss: 0.9903 (0.9908) +Eval (nsd-val): [0] [ 0/62] eta: 0:03:40 loss: 0.9935 (0.9935) time: 3.5644 data: 3.4917 max mem: 9377 +Eval (nsd-val): [0] [61/62] eta: 0:00:00 loss: 0.9915 (0.9919) time: 0.1291 data: 0.1036 max mem: 9377 +Eval (nsd-val): [0] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (nsd-val): loss: 0.9915 (0.9919) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [1] [ 0/6250] eta: 7:15:34 lr: 0.000025 grad: 0.0934 (0.0934) loss: 0.9893 (0.9893) time: 4.1816 data: 3.9252 max mem: 9377 +Train: [1] [ 100/6250] eta: 0:19:10 lr: 0.000025 grad: 0.1019 (0.1005) loss: 0.9881 (0.9893) time: 0.1395 data: 0.0531 max mem: 9377 +Train: [1] [ 200/6250] eta: 0:16:40 lr: 0.000026 grad: 0.1094 (0.1037) loss: 0.9899 (0.9879) time: 0.1427 data: 0.0556 max mem: 9377 +Train: [1] [ 300/6250] eta: 0:15:37 lr: 0.000026 grad: 0.0762 (0.1009) loss: 0.9872 (0.9876) time: 0.1612 data: 0.0756 max mem: 9377 +Train: [1] [ 400/6250] eta: 0:15:04 lr: 0.000027 grad: 0.0802 (0.1008) loss: 0.9881 (0.9874) time: 0.1195 data: 0.0341 max mem: 9377 +Train: [1] [ 500/6250] eta: 0:14:25 lr: 0.000027 grad: 0.0921 (0.1005) loss: 0.9877 (0.9875) time: 0.1176 data: 0.0175 max mem: 9377 +Train: [1] [ 600/6250] eta: 0:13:54 lr: 0.000027 grad: 0.1030 (0.1011) loss: 0.9880 (0.9875) time: 0.1358 data: 0.0357 max mem: 9377 +Train: [1] [ 700/6250] eta: 0:13:49 lr: 0.000028 grad: 0.0925 (0.1005) loss: 0.9881 (0.9875) time: 0.1997 data: 0.1129 max mem: 9377 +Train: [1] [ 800/6250] eta: 0:14:01 lr: 0.000028 grad: 0.1006 (0.0996) loss: 0.9877 (0.9876) time: 0.1966 data: 0.1170 max mem: 9377 +Train: [1] [ 900/6250] eta: 0:14:05 lr: 0.000029 grad: 0.0833 (0.0991) loss: 0.9881 (0.9877) time: 0.2102 data: 0.1252 max mem: 9377 +Train: [1] [1000/6250] eta: 0:14:03 lr: 0.000029 grad: 0.0879 (0.0980) loss: 0.9882 (0.9877) time: 0.1991 data: 0.1063 max mem: 9377 +Train: [1] [1100/6250] eta: 0:13:54 lr: 0.000029 grad: 0.0789 (0.0973) loss: 0.9892 (0.9877) time: 0.1310 data: 0.0459 max mem: 9377 +Train: [1] [1200/6250] eta: 0:13:39 lr: 0.000030 grad: 0.0840 (0.0970) loss: 0.9902 (0.9878) time: 0.1503 data: 0.0683 max mem: 9377 +Train: [1] [1300/6250] eta: 0:13:20 lr: 0.000030 grad: 0.0805 (0.0967) loss: 0.9881 (0.9878) time: 0.1528 data: 0.0672 max mem: 9377 +Train: [1] [1400/6250] eta: 0:13:00 lr: 0.000031 grad: 0.0856 (0.0969) loss: 0.9861 (0.9877) time: 0.1544 data: 0.0757 max mem: 9377 +Train: [1] [1500/6250] eta: 0:12:42 lr: 0.000031 grad: 0.0889 (0.0966) loss: 0.9878 (0.9877) time: 0.1613 data: 0.0768 max mem: 9377 +Train: [1] [1600/6250] eta: 0:12:23 lr: 0.000031 grad: 0.0737 (0.0964) loss: 0.9895 (0.9877) time: 0.1603 data: 0.0824 max mem: 9377 +Train: [1] [1700/6250] eta: 0:12:04 lr: 0.000032 grad: 0.0958 (0.0964) loss: 0.9883 (0.9876) time: 0.1552 data: 0.0704 max mem: 9377 +Train: [1] [1800/6250] eta: 0:11:44 lr: 0.000032 grad: 0.0961 (0.0965) loss: 0.9865 (0.9876) time: 0.1327 data: 0.0494 max mem: 9377 +Train: [1] [1900/6250] eta: 0:11:26 lr: 0.000033 grad: 0.1042 (0.0962) loss: 0.9864 (0.9876) time: 0.1594 data: 0.0754 max mem: 9377 +Train: [1] [2000/6250] eta: 0:11:08 lr: 0.000033 grad: 0.0914 (0.0961) loss: 0.9881 (0.9876) time: 0.1746 data: 0.0949 max mem: 9377 +Train: [1] [2100/6250] eta: 0:10:49 lr: 0.000033 grad: 0.0847 (0.0956) loss: 0.9874 (0.9876) time: 0.1585 data: 0.0835 max mem: 9377 +Train: [1] [2200/6250] eta: 0:10:32 lr: 0.000034 grad: 0.0809 (0.0954) loss: 0.9873 (0.9876) time: 0.1381 data: 0.0579 max mem: 9377 +Train: [1] [2300/6250] eta: 0:10:14 lr: 0.000034 grad: 0.0820 (0.0950) loss: 0.9894 (0.9876) time: 0.1372 data: 0.0572 max mem: 9377 +Train: [1] [2400/6250] eta: 0:09:56 lr: 0.000035 grad: 0.0800 (0.0949) loss: 0.9871 (0.9875) time: 0.1360 data: 0.0587 max mem: 9377 +Train: [1] [2500/6250] eta: 0:09:39 lr: 0.000035 grad: 0.0802 (0.0945) loss: 0.9865 (0.9875) time: 0.1588 data: 0.0779 max mem: 9377 +Train: [1] [2600/6250] eta: 0:09:22 lr: 0.000035 grad: 0.0851 (0.0946) loss: 0.9870 (0.9875) time: 0.1505 data: 0.0665 max mem: 9377 +Train: [1] [2700/6250] eta: 0:09:05 lr: 0.000036 grad: 0.0863 (0.0944) loss: 0.9861 (0.9875) time: 0.1330 data: 0.0523 max mem: 9377 +Train: [1] [2800/6250] eta: 0:08:48 lr: 0.000036 grad: 0.0831 (0.0942) loss: 0.9864 (0.9874) time: 0.1332 data: 0.0537 max mem: 9377 +Train: [1] [2900/6250] eta: 0:08:32 lr: 0.000037 grad: 0.0723 (0.0939) loss: 0.9883 (0.9874) time: 0.1436 data: 0.0698 max mem: 9377 +Train: [1] [3000/6250] eta: 0:08:16 lr: 0.000037 grad: 0.0840 (0.0938) loss: 0.9866 (0.9874) time: 0.1581 data: 0.0725 max mem: 9377 +Train: [1] [3100/6250] eta: 0:07:59 lr: 0.000037 grad: 0.0852 (0.0936) loss: 0.9869 (0.9874) time: 0.1120 data: 0.0295 max mem: 9377 +Train: [1] [3200/6250] eta: 0:07:43 lr: 0.000038 grad: 0.0817 (0.0935) loss: 0.9871 (0.9873) time: 0.1604 data: 0.0791 max mem: 9377 +Train: [1] [3300/6250] eta: 0:07:27 lr: 0.000038 grad: 0.0893 (0.0933) loss: 0.9861 (0.9873) time: 0.1447 data: 0.0523 max mem: 9377 +Train: [1] [3400/6250] eta: 0:07:10 lr: 0.000039 grad: 0.0903 (0.0932) loss: 0.9847 (0.9873) time: 0.1305 data: 0.0489 max mem: 9377 +Train: [1] [3500/6250] eta: 0:06:54 lr: 0.000039 grad: 0.0866 (0.0931) loss: 0.9862 (0.9873) time: 0.1490 data: 0.0702 max mem: 9377 +Train: [1] [3600/6250] eta: 0:06:39 lr: 0.000039 grad: 0.0800 (0.0929) loss: 0.9877 (0.9873) time: 0.1462 data: 0.0596 max mem: 9377 +Train: [1] [3700/6250] eta: 0:06:24 lr: 0.000040 grad: 0.0761 (0.0927) loss: 0.9870 (0.9873) time: 0.1585 data: 0.0762 max mem: 9377 +Train: [1] [3800/6250] eta: 0:06:09 lr: 0.000040 grad: 0.0872 (0.0927) loss: 0.9874 (0.9872) time: 0.1540 data: 0.0644 max mem: 9377 +Train: [1] [3900/6250] eta: 0:05:54 lr: 0.000041 grad: 0.0853 (0.0925) loss: 0.9869 (0.9872) time: 0.1710 data: 0.0901 max mem: 9377 +Train: [1] [4000/6250] eta: 0:05:40 lr: 0.000041 grad: 0.0855 (0.0925) loss: 0.9848 (0.9872) time: 0.1674 data: 0.0804 max mem: 9377 +Train: [1] [4100/6250] eta: 0:05:24 lr: 0.000041 grad: 0.0807 (0.0925) loss: 0.9869 (0.9872) time: 0.1355 data: 0.0567 max mem: 9377 +Train: [1] [4200/6250] eta: 0:05:09 lr: 0.000042 grad: 0.0891 (0.0924) loss: 0.9858 (0.9871) time: 0.1407 data: 0.0535 max mem: 9377 +Train: [1] [4300/6250] eta: 0:04:54 lr: 0.000042 grad: 0.0787 (0.0924) loss: 0.9874 (0.9871) time: 0.1543 data: 0.0723 max mem: 9377 +Train: [1] [4400/6250] eta: 0:04:39 lr: 0.000043 grad: 0.0831 (0.0924) loss: 0.9864 (0.9871) time: 0.1585 data: 0.0803 max mem: 9377 +Train: [1] [4500/6250] eta: 0:04:24 lr: 0.000043 grad: 0.0804 (0.0924) loss: 0.9871 (0.9871) time: 0.1418 data: 0.0525 max mem: 9377 +Train: [1] [4600/6250] eta: 0:04:09 lr: 0.000043 grad: 0.0840 (0.0925) loss: 0.9864 (0.9870) time: 0.1411 data: 0.0611 max mem: 9377 +Train: [1] [4700/6250] eta: 0:03:53 lr: 0.000044 grad: 0.0889 (0.0925) loss: 0.9851 (0.9870) time: 0.1615 data: 0.0750 max mem: 9377 +Train: [1] [4800/6250] eta: 0:03:38 lr: 0.000044 grad: 0.0783 (0.0926) loss: 0.9865 (0.9869) time: 0.1725 data: 0.0812 max mem: 9377 +Train: [1] [4900/6250] eta: 0:03:23 lr: 0.000045 grad: 0.0868 (0.0927) loss: 0.9852 (0.9869) time: 0.1551 data: 0.0630 max mem: 9377 +Train: [1] [5000/6250] eta: 0:03:08 lr: 0.000045 grad: 0.0886 (0.0928) loss: 0.9857 (0.9869) time: 0.1617 data: 0.0816 max mem: 9377 +Train: [1] [5100/6250] eta: 0:02:53 lr: 0.000045 grad: 0.0907 (0.0928) loss: 0.9862 (0.9868) time: 0.1248 data: 0.0477 max mem: 9377 +Train: [1] [5200/6250] eta: 0:02:38 lr: 0.000046 grad: 0.0899 (0.0929) loss: 0.9857 (0.9868) time: 0.1287 data: 0.0437 max mem: 9377 +Train: [1] [5300/6250] eta: 0:02:22 lr: 0.000046 grad: 0.0821 (0.0929) loss: 0.9861 (0.9868) time: 0.1379 data: 0.0454 max mem: 9377 +Train: [1] [5400/6250] eta: 0:02:07 lr: 0.000047 grad: 0.0759 (0.0928) loss: 0.9856 (0.9868) time: 0.1489 data: 0.0646 max mem: 9377 +Train: [1] [5500/6250] eta: 0:01:52 lr: 0.000047 grad: 0.0807 (0.0929) loss: 0.9850 (0.9867) time: 0.1500 data: 0.0655 max mem: 9377 +Train: [1] [5600/6250] eta: 0:01:37 lr: 0.000047 grad: 0.0946 (0.0929) loss: 0.9859 (0.9867) time: 0.1557 data: 0.0731 max mem: 9377 +Train: [1] [5700/6250] eta: 0:01:22 lr: 0.000048 grad: 0.0855 (0.0930) loss: 0.9854 (0.9867) time: 0.1204 data: 0.0343 max mem: 9377 +Train: [1] [5800/6250] eta: 0:01:07 lr: 0.000048 grad: 0.0846 (0.0930) loss: 0.9835 (0.9867) time: 0.1340 data: 0.0443 max mem: 9377 +Train: [1] [5900/6250] eta: 0:00:52 lr: 0.000049 grad: 0.0882 (0.0931) loss: 0.9847 (0.9866) time: 0.1349 data: 0.0438 max mem: 9377 +Train: [1] [6000/6250] eta: 0:00:37 lr: 0.000049 grad: 0.0895 (0.0932) loss: 0.9860 (0.9866) time: 0.1486 data: 0.0632 max mem: 9377 +Train: [1] [6100/6250] eta: 0:00:22 lr: 0.000049 grad: 0.0918 (0.0933) loss: 0.9838 (0.9866) time: 0.1584 data: 0.0706 max mem: 9377 +Train: [1] [6200/6250] eta: 0:00:07 lr: 0.000050 grad: 0.0893 (0.0935) loss: 0.9863 (0.9865) time: 0.1448 data: 0.0525 max mem: 9377 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0956 (0.0935) loss: 0.9853 (0.9865) time: 0.1591 data: 0.0714 max mem: 9377 +Train: [1] Total time: 0:15:47 (0.1516 s / it) +Averaged stats: lr: 0.000050 grad: 0.0956 (0.0935) loss: 0.9853 (0.9865) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:05:38 loss: 0.9913 (0.9913) time: 5.4646 data: 5.4323 max mem: 9377 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9881 (0.9877) time: 0.1093 data: 0.0841 max mem: 9377 +Eval (hcp-train-subset): [1] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-train-subset): loss: 0.9881 (0.9877) +Eval (hcp-val): [1] [ 0/62] eta: 0:03:01 loss: 0.9851 (0.9851) time: 2.9297 data: 2.8682 max mem: 9377 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9880 (0.9872) time: 0.1885 data: 0.1607 max mem: 9377 +Eval (hcp-val): [1] Total time: 0:00:17 (0.2782 s / it) +Averaged stats (hcp-val): loss: 0.9880 (0.9872) +Eval (nsd-val): [1] [ 0/62] eta: 0:05:07 loss: 0.9844 (0.9844) time: 4.9610 data: 4.9259 max mem: 9377 +Eval (nsd-val): [1] [61/62] eta: 0:00:00 loss: 0.9890 (0.9875) time: 0.1280 data: 0.1025 max mem: 9377 +Eval (nsd-val): [1] Total time: 0:00:14 (0.2336 s / it) +Averaged stats (nsd-val): loss: 0.9890 (0.9875) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [2] [ 0/6250] eta: 6:22:33 lr: 0.000050 grad: 0.1217 (0.1217) loss: 0.9840 (0.9840) time: 3.6725 data: 3.4434 max mem: 9377 +Train: [2] [ 100/6250] eta: 0:19:58 lr: 0.000050 grad: 0.0817 (0.0969) loss: 0.9878 (0.9854) time: 0.1568 data: 0.0780 max mem: 9377 +Train: [2] [ 200/6250] eta: 0:17:12 lr: 0.000051 grad: 0.0874 (0.0967) loss: 0.9837 (0.9844) time: 0.1178 data: 0.0336 max mem: 9377 +Train: [2] [ 300/6250] eta: 0:16:33 lr: 0.000051 grad: 0.0784 (0.0961) loss: 0.9844 (0.9845) time: 0.1813 data: 0.0943 max mem: 9377 +Train: [2] [ 400/6250] eta: 0:15:53 lr: 0.000052 grad: 0.0813 (0.0948) loss: 0.9869 (0.9848) time: 0.1503 data: 0.0573 max mem: 9377 +Train: [2] [ 500/6250] eta: 0:15:29 lr: 0.000052 grad: 0.0839 (0.0950) loss: 0.9861 (0.9848) time: 0.1526 data: 0.0582 max mem: 9377 +Train: [2] [ 600/6250] eta: 0:15:10 lr: 0.000052 grad: 0.0897 (0.0948) loss: 0.9833 (0.9848) time: 0.1325 data: 0.0308 max mem: 9377 +Train: [2] [ 700/6250] eta: 0:14:42 lr: 0.000053 grad: 0.0949 (0.0956) loss: 0.9844 (0.9847) time: 0.1530 data: 0.0624 max mem: 9377 +Train: [2] [ 800/6250] eta: 0:14:11 lr: 0.000053 grad: 0.1027 (0.0970) loss: 0.9839 (0.9845) time: 0.1299 data: 0.0356 max mem: 9377 +Train: [2] [ 900/6250] eta: 0:14:01 lr: 0.000054 grad: 0.0985 (0.0979) loss: 0.9823 (0.9842) time: 0.1666 data: 0.0758 max mem: 9377 +Train: [2] [1000/6250] eta: 0:14:10 lr: 0.000054 grad: 0.0964 (0.0988) loss: 0.9834 (0.9840) time: 0.2199 data: 0.1440 max mem: 9377 +Train: [2] [1100/6250] eta: 0:14:04 lr: 0.000054 grad: 0.0879 (0.0988) loss: 0.9832 (0.9839) time: 0.1804 data: 0.0965 max mem: 9377 +Train: [2] [1200/6250] eta: 0:14:01 lr: 0.000055 grad: 0.1039 (0.0997) loss: 0.9825 (0.9837) time: 0.2327 data: 0.1615 max mem: 9377 +Train: [2] [1300/6250] eta: 0:13:52 lr: 0.000055 grad: 0.0842 (0.1000) loss: 0.9831 (0.9837) time: 0.1482 data: 0.0659 max mem: 9377 +Train: [2] [1400/6250] eta: 0:13:27 lr: 0.000056 grad: 0.0950 (0.1003) loss: 0.9816 (0.9836) time: 0.1808 data: 0.0921 max mem: 9377 +Train: [2] [1500/6250] eta: 0:13:10 lr: 0.000056 grad: 0.0991 (0.1001) loss: 0.9836 (0.9836) time: 0.1375 data: 0.0436 max mem: 9377 +Train: [2] [1600/6250] eta: 0:12:53 lr: 0.000056 grad: 0.0902 (0.1000) loss: 0.9818 (0.9836) time: 0.1569 data: 0.0731 max mem: 9377 +Train: [2] [1700/6250] eta: 0:12:37 lr: 0.000057 grad: 0.0943 (0.1000) loss: 0.9843 (0.9835) time: 0.1685 data: 0.0847 max mem: 9377 +Train: [2] [1800/6250] eta: 0:12:21 lr: 0.000057 grad: 0.0792 (0.0999) loss: 0.9863 (0.9835) time: 0.1635 data: 0.0821 max mem: 9377 +Train: [2] [1900/6250] eta: 0:11:59 lr: 0.000058 grad: 0.0864 (0.0999) loss: 0.9847 (0.9835) time: 0.1260 data: 0.0412 max mem: 9377 +Train: [2] [2000/6250] eta: 0:11:41 lr: 0.000058 grad: 0.1014 (0.0998) loss: 0.9845 (0.9835) time: 0.1930 data: 0.1056 max mem: 9377 +Train: [2] [2100/6250] eta: 0:11:24 lr: 0.000058 grad: 0.0970 (0.0998) loss: 0.9831 (0.9835) time: 0.1675 data: 0.0758 max mem: 9377 +Train: [2] [2200/6250] eta: 0:11:06 lr: 0.000059 grad: 0.0936 (0.1002) loss: 0.9836 (0.9834) time: 0.1609 data: 0.0687 max mem: 9377 +Train: [2] [2300/6250] eta: 0:10:47 lr: 0.000059 grad: 0.1094 (0.1004) loss: 0.9829 (0.9834) time: 0.1222 data: 0.0301 max mem: 9377 +Train: [2] [2400/6250] eta: 0:10:25 lr: 0.000060 grad: 0.0928 (0.1007) loss: 0.9817 (0.9833) time: 0.1286 data: 0.0418 max mem: 9377 +Train: [2] [2500/6250] eta: 0:10:08 lr: 0.000060 grad: 0.0991 (0.1010) loss: 0.9801 (0.9833) time: 0.1575 data: 0.0737 max mem: 9377 +Train: [2] [2600/6250] eta: 0:09:50 lr: 0.000060 grad: 0.0945 (0.1012) loss: 0.9822 (0.9833) time: 0.1442 data: 0.0558 max mem: 9377 +Train: [2] [2700/6250] eta: 0:09:32 lr: 0.000061 grad: 0.0894 (0.1011) loss: 0.9826 (0.9833) time: 0.1343 data: 0.0437 max mem: 9377 +Train: [2] [2800/6250] eta: 0:09:15 lr: 0.000061 grad: 0.0983 (0.1013) loss: 0.9845 (0.9833) time: 0.1467 data: 0.0638 max mem: 9377 +Train: [2] [2900/6250] eta: 0:08:57 lr: 0.000062 grad: 0.0985 (0.1013) loss: 0.9838 (0.9833) time: 0.1406 data: 0.0483 max mem: 9377 +Train: [2] [3000/6250] eta: 0:08:39 lr: 0.000062 grad: 0.0878 (0.1013) loss: 0.9855 (0.9833) time: 0.1454 data: 0.0645 max mem: 9377 +Train: [2] [3100/6250] eta: 0:08:21 lr: 0.000062 grad: 0.0865 (0.1014) loss: 0.9833 (0.9833) time: 0.1544 data: 0.0676 max mem: 9377 +Train: [2] [3200/6250] eta: 0:08:04 lr: 0.000063 grad: 0.1049 (0.1014) loss: 0.9838 (0.9833) time: 0.1488 data: 0.0596 max mem: 9377 +Train: [2] [3300/6250] eta: 0:07:49 lr: 0.000063 grad: 0.1021 (0.1016) loss: 0.9833 (0.9833) time: 0.1730 data: 0.0877 max mem: 9377 +Train: [2] [3400/6250] eta: 0:07:32 lr: 0.000064 grad: 0.1115 (0.1019) loss: 0.9829 (0.9833) time: 0.1150 data: 0.0321 max mem: 9377 +Train: [2] [3500/6250] eta: 0:07:15 lr: 0.000064 grad: 0.0886 (0.1020) loss: 0.9854 (0.9833) time: 0.1332 data: 0.0539 max mem: 9377 +Train: [2] [3600/6250] eta: 0:06:58 lr: 0.000064 grad: 0.1047 (0.1022) loss: 0.9834 (0.9833) time: 0.1570 data: 0.0686 max mem: 9377 +Train: [2] [3700/6250] eta: 0:06:42 lr: 0.000065 grad: 0.1030 (0.1026) loss: 0.9837 (0.9833) time: 0.1555 data: 0.0720 max mem: 9377 +Train: [2] [3800/6250] eta: 0:06:25 lr: 0.000065 grad: 0.1112 (0.1035) loss: 0.9823 (0.9833) time: 0.1609 data: 0.0762 max mem: 9377 +Train: [2] [3900/6250] eta: 0:06:09 lr: 0.000066 grad: 0.0933 (0.1041) loss: 0.9850 (0.9833) time: 0.1548 data: 0.0691 max mem: 9377 +Train: [2] [4000/6250] eta: 0:05:53 lr: 0.000066 grad: 0.1446 (0.1051) loss: 0.9816 (0.9832) time: 0.1375 data: 0.0509 max mem: 9377 +Train: [2] [4100/6250] eta: 0:05:37 lr: 0.000066 grad: 0.1413 (0.1058) loss: 0.9818 (0.9832) time: 0.1496 data: 0.0612 max mem: 9377 +Train: [2] [4200/6250] eta: 0:05:21 lr: 0.000067 grad: 0.2170 (0.1074) loss: 0.9827 (0.9831) time: 0.1647 data: 0.0807 max mem: 9377 +Train: [2] [4300/6250] eta: 0:05:05 lr: 0.000067 grad: 0.1743 (0.1090) loss: 0.9819 (0.9831) time: 0.1732 data: 0.0850 max mem: 9377 +Train: [2] [4400/6250] eta: 0:04:49 lr: 0.000068 grad: 0.1813 (0.1106) loss: 0.9800 (0.9830) time: 0.1572 data: 0.0653 max mem: 9377 +Train: [2] [4500/6250] eta: 0:04:33 lr: 0.000068 grad: 0.1318 (0.1128) loss: 0.9787 (0.9829) time: 0.1540 data: 0.0761 max mem: 9377 +Train: [2] [4600/6250] eta: 0:04:17 lr: 0.000068 grad: 0.1974 (0.1150) loss: 0.9798 (0.9829) time: 0.1478 data: 0.0636 max mem: 9377 +Train: [2] [4700/6250] eta: 0:04:01 lr: 0.000069 grad: 0.1961 (0.1172) loss: 0.9816 (0.9828) time: 0.1695 data: 0.0892 max mem: 9377 +Train: [2] [4800/6250] eta: 0:03:46 lr: 0.000069 grad: 0.3040 (0.1199) loss: 0.9832 (0.9828) time: 0.1598 data: 0.0702 max mem: 9377 +Train: [2] [4900/6250] eta: 0:03:30 lr: 0.000070 grad: 0.1670 (0.1219) loss: 0.9787 (0.9828) time: 0.2055 data: 0.1195 max mem: 9377 +Train: [2] [5000/6250] eta: 0:03:14 lr: 0.000070 grad: 0.1418 (0.1232) loss: 0.9814 (0.9827) time: 0.1530 data: 0.0724 max mem: 9377 +Train: [2] [5100/6250] eta: 0:02:59 lr: 0.000070 grad: 0.2236 (0.1253) loss: 0.9810 (0.9827) time: 0.1688 data: 0.0852 max mem: 9377 +Train: [2] [5200/6250] eta: 0:02:43 lr: 0.000071 grad: 0.2019 (0.1277) loss: 0.9799 (0.9826) time: 0.1503 data: 0.0707 max mem: 9377 +Train: [2] [5300/6250] eta: 0:02:27 lr: 0.000071 grad: 0.1234 (0.1295) loss: 0.9790 (0.9826) time: 0.1518 data: 0.0637 max mem: 9377 +Train: [2] [5400/6250] eta: 0:02:12 lr: 0.000072 grad: 0.2178 (0.1315) loss: 0.9800 (0.9825) time: 0.1751 data: 0.0781 max mem: 9377 +Train: [2] [5500/6250] eta: 0:01:56 lr: 0.000072 grad: 0.2334 (0.1334) loss: 0.9801 (0.9825) time: 0.1519 data: 0.0623 max mem: 9377 +Train: [2] [5600/6250] eta: 0:01:41 lr: 0.000072 grad: 0.1665 (0.1348) loss: 0.9792 (0.9824) time: 0.1516 data: 0.0672 max mem: 9377 +Train: [2] [5700/6250] eta: 0:01:25 lr: 0.000073 grad: 0.1428 (0.1368) loss: 0.9780 (0.9824) time: 0.1422 data: 0.0654 max mem: 9377 +Train: [2] [5800/6250] eta: 0:01:09 lr: 0.000073 grad: 0.1955 (0.1386) loss: 0.9800 (0.9823) time: 0.1784 data: 0.0853 max mem: 9377 +Train: [2] [5900/6250] eta: 0:00:54 lr: 0.000074 grad: 0.1527 (0.1403) loss: 0.9817 (0.9823) time: 0.1731 data: 0.0872 max mem: 9377 +Train: [2] [6000/6250] eta: 0:00:38 lr: 0.000074 grad: 0.2814 (0.1415) loss: 0.9793 (0.9822) time: 0.1695 data: 0.0834 max mem: 9377 +Train: [2] [6100/6250] eta: 0:00:23 lr: 0.000074 grad: 0.1386 (0.1433) loss: 0.9784 (0.9822) time: 0.1289 data: 0.0381 max mem: 9377 +Train: [2] [6200/6250] eta: 0:00:07 lr: 0.000075 grad: 0.1568 (0.1446) loss: 0.9781 (0.9821) time: 0.1407 data: 0.0576 max mem: 9377 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.1613 (0.1453) loss: 0.9781 (0.9821) time: 0.1309 data: 0.0441 max mem: 9377 +Train: [2] Total time: 0:16:14 (0.1559 s / it) +Averaged stats: lr: 0.000075 grad: 0.1613 (0.1453) loss: 0.9781 (0.9821) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:04:53 loss: 0.9862 (0.9862) time: 4.7403 data: 4.7097 max mem: 9377 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9815 (0.9808) time: 0.1482 data: 0.1231 max mem: 9377 +Eval (hcp-train-subset): [2] Total time: 0:00:14 (0.2314 s / it) +Averaged stats (hcp-train-subset): loss: 0.9815 (0.9808) +Eval (hcp-val): [2] [ 0/62] eta: 0:05:30 loss: 0.9760 (0.9760) time: 5.3256 data: 5.2953 max mem: 9377 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9800 (0.9802) time: 0.2269 data: 0.1985 max mem: 9377 +Eval (hcp-val): [2] Total time: 0:00:16 (0.2700 s / it) +Averaged stats (hcp-val): loss: 0.9800 (0.9802) +Eval (nsd-val): [2] [ 0/62] eta: 0:04:20 loss: 0.9747 (0.9747) time: 4.2032 data: 4.1152 max mem: 9377 +Eval (nsd-val): [2] [61/62] eta: 0:00:00 loss: 0.9826 (0.9814) time: 0.1184 data: 0.0928 max mem: 9377 +Eval (nsd-val): [2] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (nsd-val): loss: 0.9826 (0.9814) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [3] [ 0/6250] eta: 9:19:02 lr: 0.000075 grad: 0.1363 (0.1363) loss: 0.9828 (0.9828) time: 5.3669 data: 5.2547 max mem: 9377 +Train: [3] [ 100/6250] eta: 0:21:50 lr: 0.000075 grad: 0.2252 (0.2148) loss: 0.9802 (0.9819) time: 0.1535 data: 0.0714 max mem: 9377 +Train: [3] [ 200/6250] eta: 0:18:25 lr: 0.000076 grad: 0.1816 (0.2245) loss: 0.9768 (0.9806) time: 0.1579 data: 0.0705 max mem: 9377 +Train: [3] [ 300/6250] eta: 0:17:54 lr: 0.000076 grad: 0.2074 (0.2314) loss: 0.9795 (0.9804) time: 0.2016 data: 0.1215 max mem: 9377 +Train: [3] [ 400/6250] eta: 0:16:46 lr: 0.000077 grad: 0.1908 (0.2269) loss: 0.9803 (0.9798) time: 0.1761 data: 0.0795 max mem: 9377 +Train: [3] [ 500/6250] eta: 0:16:25 lr: 0.000077 grad: 0.2115 (0.2302) loss: 0.9795 (0.9796) time: 0.1693 data: 0.0741 max mem: 9377 +Train: [3] [ 600/6250] eta: 0:16:00 lr: 0.000077 grad: 0.1873 (0.2267) loss: 0.9780 (0.9791) time: 0.1752 data: 0.0895 max mem: 9377 +Train: [3] [ 700/6250] eta: 0:15:45 lr: 0.000078 grad: 0.2451 (0.2246) loss: 0.9762 (0.9787) time: 0.2135 data: 0.1221 max mem: 9377 +Train: [3] [ 800/6250] eta: 0:15:21 lr: 0.000078 grad: 0.2482 (0.2256) loss: 0.9761 (0.9784) time: 0.1525 data: 0.0485 max mem: 9377 +Train: [3] [ 900/6250] eta: 0:15:06 lr: 0.000079 grad: 0.2638 (0.2247) loss: 0.9766 (0.9781) time: 0.1620 data: 0.0628 max mem: 9377 +Train: [3] [1000/6250] eta: 0:14:41 lr: 0.000079 grad: 0.1471 (0.2235) loss: 0.9743 (0.9779) time: 0.1701 data: 0.0804 max mem: 9377 +Train: [3] [1100/6250] eta: 0:14:11 lr: 0.000079 grad: 0.2516 (0.2261) loss: 0.9750 (0.9778) time: 0.1366 data: 0.0518 max mem: 9377 +Train: [3] [1200/6250] eta: 0:13:49 lr: 0.000080 grad: 0.2570 (0.2261) loss: 0.9779 (0.9776) time: 0.1746 data: 0.0868 max mem: 9377 +Train: [3] [1300/6250] eta: 0:13:25 lr: 0.000080 grad: 0.2382 (0.2274) loss: 0.9766 (0.9776) time: 0.1132 data: 0.0260 max mem: 9377 +Train: [3] [1400/6250] eta: 0:13:05 lr: 0.000081 grad: 0.1612 (0.2269) loss: 0.9761 (0.9774) time: 0.1560 data: 0.0660 max mem: 9377 +Train: [3] [1500/6250] eta: 0:12:54 lr: 0.000081 grad: 0.1204 (0.2261) loss: 0.9759 (0.9772) time: 0.2000 data: 0.1201 max mem: 9377 +Train: [3] [1600/6250] eta: 0:12:37 lr: 0.000081 grad: 0.2823 (0.2272) loss: 0.9743 (0.9771) time: 0.1607 data: 0.0744 max mem: 9377 +Train: [3] [1700/6250] eta: 0:12:19 lr: 0.000082 grad: 0.1703 (0.2271) loss: 0.9769 (0.9770) time: 0.1481 data: 0.0626 max mem: 9377 +Train: [3] [1800/6250] eta: 0:12:05 lr: 0.000082 grad: 0.3576 (0.2274) loss: 0.9775 (0.9769) time: 0.1641 data: 0.0837 max mem: 9377 +Train: [3] [1900/6250] eta: 0:11:47 lr: 0.000083 grad: 0.1489 (0.2270) loss: 0.9757 (0.9768) time: 0.1686 data: 0.0855 max mem: 9377 +Train: [3] [2000/6250] eta: 0:11:32 lr: 0.000083 grad: 0.1953 (0.2286) loss: 0.9731 (0.9767) time: 0.1700 data: 0.0907 max mem: 9377 +Train: [3] [2100/6250] eta: 0:11:14 lr: 0.000083 grad: 0.1544 (0.2274) loss: 0.9760 (0.9765) time: 0.1068 data: 0.0130 max mem: 9377 +Train: [3] [2200/6250] eta: 0:10:56 lr: 0.000084 grad: 0.1802 (0.2259) loss: 0.9708 (0.9763) time: 0.1603 data: 0.0775 max mem: 9377 +Train: [3] [2300/6250] eta: 0:10:37 lr: 0.000084 grad: 0.1627 (0.2249) loss: 0.9745 (0.9763) time: 0.1363 data: 0.0402 max mem: 9377 +Train: [3] [2400/6250] eta: 0:10:22 lr: 0.000085 grad: 0.1938 (0.2244) loss: 0.9756 (0.9762) time: 0.2383 data: 0.1550 max mem: 9377 +Train: [3] [2500/6250] eta: 0:10:01 lr: 0.000085 grad: 0.1450 (0.2231) loss: 0.9745 (0.9761) time: 0.1158 data: 0.0272 max mem: 9377 +Train: [3] [2600/6250] eta: 0:09:43 lr: 0.000085 grad: 0.1530 (0.2234) loss: 0.9711 (0.9760) time: 0.1865 data: 0.1054 max mem: 9377 +Train: [3] [2700/6250] eta: 0:09:24 lr: 0.000086 grad: 0.1713 (0.2228) loss: 0.9753 (0.9759) time: 0.1455 data: 0.0605 max mem: 9377 +Train: [3] [2800/6250] eta: 0:09:07 lr: 0.000086 grad: 0.1761 (0.2223) loss: 0.9711 (0.9757) time: 0.1512 data: 0.0718 max mem: 9377 +Train: [3] [2900/6250] eta: 0:08:50 lr: 0.000087 grad: 0.1641 (0.2212) loss: 0.9722 (0.9756) time: 0.1464 data: 0.0589 max mem: 9377 +Train: [3] [3000/6250] eta: 0:08:33 lr: 0.000087 grad: 0.1691 (0.2209) loss: 0.9706 (0.9754) time: 0.1452 data: 0.0650 max mem: 9377 +Train: [3] [3100/6250] eta: 0:08:17 lr: 0.000087 grad: 0.1598 (0.2206) loss: 0.9711 (0.9753) time: 0.1721 data: 0.0897 max mem: 9377 +Train: [3] [3200/6250] eta: 0:08:00 lr: 0.000088 grad: 0.1711 (0.2199) loss: 0.9703 (0.9752) time: 0.1611 data: 0.0793 max mem: 9377 +Train: [3] [3300/6250] eta: 0:07:44 lr: 0.000088 grad: 0.1924 (0.2202) loss: 0.9732 (0.9752) time: 0.1223 data: 0.0336 max mem: 9377 +Train: [3] [3400/6250] eta: 0:07:29 lr: 0.000089 grad: 0.1295 (0.2196) loss: 0.9718 (0.9751) time: 0.1745 data: 0.0815 max mem: 9377 +Train: [3] [3500/6250] eta: 0:07:14 lr: 0.000089 grad: 0.1949 (0.2197) loss: 0.9720 (0.9750) time: 0.1523 data: 0.0632 max mem: 9377 +Train: [3] [3600/6250] eta: 0:06:58 lr: 0.000089 grad: 0.1872 (0.2195) loss: 0.9734 (0.9749) time: 0.1479 data: 0.0627 max mem: 9377 +Train: [3] [3700/6250] eta: 0:06:42 lr: 0.000090 grad: 0.1642 (0.2192) loss: 0.9712 (0.9748) time: 0.1562 data: 0.0692 max mem: 9377 +Train: [3] [3800/6250] eta: 0:06:27 lr: 0.000090 grad: 0.1635 (0.2190) loss: 0.9719 (0.9747) time: 0.1755 data: 0.0910 max mem: 9377 +Train: [3] [3900/6250] eta: 0:06:10 lr: 0.000091 grad: 0.2057 (0.2187) loss: 0.9700 (0.9746) time: 0.1504 data: 0.0622 max mem: 9377 +Train: [3] [4000/6250] eta: 0:05:54 lr: 0.000091 grad: 0.2171 (0.2186) loss: 0.9728 (0.9746) time: 0.1574 data: 0.0778 max mem: 9377 +Train: [3] [4100/6250] eta: 0:05:39 lr: 0.000091 grad: 0.1851 (0.2184) loss: 0.9707 (0.9745) time: 0.1623 data: 0.0763 max mem: 9377 +Train: [3] [4200/6250] eta: 0:05:22 lr: 0.000092 grad: 0.1394 (0.2182) loss: 0.9693 (0.9744) time: 0.1390 data: 0.0523 max mem: 9377 +Train: [3] [4300/6250] eta: 0:05:06 lr: 0.000092 grad: 0.1742 (0.2186) loss: 0.9682 (0.9743) time: 0.1551 data: 0.0712 max mem: 9377 +Train: [3] [4400/6250] eta: 0:04:50 lr: 0.000093 grad: 0.1984 (0.2189) loss: 0.9687 (0.9741) time: 0.1653 data: 0.0802 max mem: 9377 +Train: [3] [4500/6250] eta: 0:04:34 lr: 0.000093 grad: 0.1295 (0.2185) loss: 0.9665 (0.9740) time: 0.1463 data: 0.0643 max mem: 9377 +Train: [3] [4600/6250] eta: 0:04:18 lr: 0.000093 grad: 0.2122 (0.2190) loss: 0.9688 (0.9739) time: 0.1420 data: 0.0559 max mem: 9377 +Train: [3] [4700/6250] eta: 0:04:03 lr: 0.000094 grad: 0.1664 (0.2186) loss: 0.9664 (0.9737) time: 0.1525 data: 0.0628 max mem: 9377 +Train: [3] [4800/6250] eta: 0:03:47 lr: 0.000094 grad: 0.1655 (0.2186) loss: 0.9684 (0.9736) time: 0.1635 data: 0.0785 max mem: 9377 +Train: [3] [4900/6250] eta: 0:03:31 lr: 0.000095 grad: 0.1639 (0.2187) loss: 0.9676 (0.9734) time: 0.1442 data: 0.0577 max mem: 9377 +Train: [3] [5000/6250] eta: 0:03:15 lr: 0.000095 grad: 0.1955 (0.2185) loss: 0.9639 (0.9732) time: 0.1503 data: 0.0696 max mem: 9377 +Train: [3] [5100/6250] eta: 0:02:59 lr: 0.000095 grad: 0.1821 (0.2183) loss: 0.9644 (0.9730) time: 0.1518 data: 0.0736 max mem: 9377 +Train: [3] [5200/6250] eta: 0:02:43 lr: 0.000096 grad: 0.2271 (0.2186) loss: 0.9656 (0.9728) time: 0.1609 data: 0.0814 max mem: 9377 +Train: [3] [5300/6250] eta: 0:02:28 lr: 0.000096 grad: 0.1844 (0.2187) loss: 0.9644 (0.9726) time: 0.1630 data: 0.0779 max mem: 9377 +Train: [3] [5400/6250] eta: 0:02:12 lr: 0.000097 grad: 0.2748 (0.2192) loss: 0.9641 (0.9725) time: 0.1587 data: 0.0789 max mem: 9377 +Train: [3] [5500/6250] eta: 0:01:57 lr: 0.000097 grad: 0.3473 (0.2192) loss: 0.9624 (0.9723) time: 0.1777 data: 0.0947 max mem: 9377 +Train: [3] [5600/6250] eta: 0:01:41 lr: 0.000097 grad: 0.1782 (0.2197) loss: 0.9618 (0.9721) time: 0.1437 data: 0.0580 max mem: 9377 +Train: [3] [5700/6250] eta: 0:01:25 lr: 0.000098 grad: 0.1519 (0.2194) loss: 0.9598 (0.9719) time: 0.1487 data: 0.0622 max mem: 9377 +Train: [3] [5800/6250] eta: 0:01:10 lr: 0.000098 grad: 0.1906 (0.2200) loss: 0.9585 (0.9717) time: 0.1385 data: 0.0502 max mem: 9377 +Train: [3] [5900/6250] eta: 0:00:54 lr: 0.000099 grad: 0.2120 (0.2200) loss: 0.9600 (0.9715) time: 0.1381 data: 0.0570 max mem: 9377 +Train: [3] [6000/6250] eta: 0:00:38 lr: 0.000099 grad: 0.1622 (0.2202) loss: 0.9613 (0.9713) time: 0.1523 data: 0.0708 max mem: 9377 +Train: [3] [6100/6250] eta: 0:00:23 lr: 0.000099 grad: 0.1588 (0.2203) loss: 0.9568 (0.9710) time: 0.1614 data: 0.0801 max mem: 9377 +Train: [3] [6200/6250] eta: 0:00:07 lr: 0.000100 grad: 0.2148 (0.2207) loss: 0.9579 (0.9708) time: 0.1454 data: 0.0625 max mem: 9377 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.2142 (0.2208) loss: 0.9563 (0.9706) time: 0.1783 data: 0.0955 max mem: 9377 +Train: [3] Total time: 0:16:17 (0.1565 s / it) +Averaged stats: lr: 0.000100 grad: 0.2142 (0.2208) loss: 0.9563 (0.9706) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:03:23 loss: 0.9615 (0.9615) time: 3.2833 data: 3.2021 max mem: 9377 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9603 (0.9610) time: 0.1448 data: 0.1198 max mem: 9377 +Eval (hcp-train-subset): [3] Total time: 0:00:14 (0.2304 s / it) +Averaged stats (hcp-train-subset): loss: 0.9603 (0.9610) +Eval (hcp-val): [3] [ 0/62] eta: 0:05:21 loss: 0.9609 (0.9609) time: 5.1782 data: 5.1482 max mem: 9377 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9607 (0.9622) time: 0.1545 data: 0.1267 max mem: 9377 +Eval (hcp-val): [3] Total time: 0:00:15 (0.2466 s / it) +Averaged stats (hcp-val): loss: 0.9607 (0.9622) +Eval (nsd-val): [3] [ 0/62] eta: 0:03:42 loss: 0.9385 (0.9385) time: 3.5928 data: 3.5044 max mem: 9377 +Eval (nsd-val): [3] [61/62] eta: 0:00:00 loss: 0.9476 (0.9464) time: 0.1308 data: 0.1053 max mem: 9377 +Eval (nsd-val): [3] Total time: 0:00:13 (0.2219 s / it) +Averaged stats (nsd-val): loss: 0.9476 (0.9464) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [4] [ 0/6250] eta: 9:38:46 lr: 0.000100 grad: 0.2355 (0.2355) loss: 0.9555 (0.9555) time: 5.5562 data: 5.4072 max mem: 9377 +Train: [4] [ 100/6250] eta: 0:22:39 lr: 0.000100 grad: 0.3368 (0.2554) loss: 0.9570 (0.9597) time: 0.1692 data: 0.0829 max mem: 9377 +Train: [4] [ 200/6250] eta: 0:19:42 lr: 0.000101 grad: 0.2410 (0.2721) loss: 0.9547 (0.9583) time: 0.1580 data: 0.0642 max mem: 9377 +Train: [4] [ 300/6250] eta: 0:19:20 lr: 0.000101 grad: 0.1699 (0.2659) loss: 0.9564 (0.9563) time: 0.2535 data: 0.1767 max mem: 9377 +Train: [4] [ 400/6250] eta: 0:18:13 lr: 0.000102 grad: 0.2467 (0.2626) loss: 0.9574 (0.9556) time: 0.1650 data: 0.0782 max mem: 9377 +Train: [4] [ 500/6250] eta: 0:17:28 lr: 0.000102 grad: 0.1874 (0.2638) loss: 0.9506 (0.9550) time: 0.1948 data: 0.1152 max mem: 9377 +Train: [4] [ 600/6250] eta: 0:16:42 lr: 0.000102 grad: 0.2351 (0.2635) loss: 0.9562 (0.9543) time: 0.1349 data: 0.0499 max mem: 9377 +Train: [4] [ 700/6250] eta: 0:16:17 lr: 0.000103 grad: 0.1895 (0.2616) loss: 0.9531 (0.9538) time: 0.1621 data: 0.0625 max mem: 9377 +Train: [4] [ 800/6250] eta: 0:15:58 lr: 0.000103 grad: 0.3053 (0.2633) loss: 0.9545 (0.9535) time: 0.1627 data: 0.0463 max mem: 9377 +Train: [4] [ 900/6250] eta: 0:15:37 lr: 0.000104 grad: 0.2571 (0.2643) loss: 0.9532 (0.9533) time: 0.1786 data: 0.0781 max mem: 9377 +Train: [4] [1000/6250] eta: 0:15:09 lr: 0.000104 grad: 0.2045 (0.2656) loss: 0.9505 (0.9531) time: 0.1210 data: 0.0260 max mem: 9377 +Train: [4] [1100/6250] eta: 0:14:39 lr: 0.000104 grad: 0.2268 (0.2662) loss: 0.9429 (0.9527) time: 0.1304 data: 0.0400 max mem: 9377 +Train: [4] [1200/6250] eta: 0:14:12 lr: 0.000105 grad: 0.2629 (0.2650) loss: 0.9498 (0.9524) time: 0.1382 data: 0.0489 max mem: 9377 +Train: [4] [1300/6250] eta: 0:13:48 lr: 0.000105 grad: 0.2268 (0.2664) loss: 0.9453 (0.9520) time: 0.1600 data: 0.0744 max mem: 9377 +Train: [4] [1400/6250] eta: 0:13:23 lr: 0.000106 grad: 0.3793 (0.2680) loss: 0.9494 (0.9517) time: 0.1234 data: 0.0293 max mem: 9377 +Train: [4] [1500/6250] eta: 0:12:59 lr: 0.000106 grad: 0.2898 (0.2709) loss: 0.9457 (0.9513) time: 0.1539 data: 0.0693 max mem: 9377 +Train: [4] [1600/6250] eta: 0:12:38 lr: 0.000106 grad: 0.2136 (0.2727) loss: 0.9441 (0.9509) time: 0.1454 data: 0.0643 max mem: 9377 +Train: [4] [1700/6250] eta: 0:12:19 lr: 0.000107 grad: 0.2781 (0.2750) loss: 0.9380 (0.9505) time: 0.1327 data: 0.0499 max mem: 9377 +Train: [4] [1800/6250] eta: 0:12:05 lr: 0.000107 grad: 0.3092 (0.2748) loss: 0.9450 (0.9501) time: 0.1964 data: 0.1194 max mem: 9377 +Train: [4] [1900/6250] eta: 0:11:51 lr: 0.000108 grad: 0.3141 (0.2764) loss: 0.9404 (0.9498) time: 0.1821 data: 0.0992 max mem: 9377 +Train: [4] [2000/6250] eta: 0:11:31 lr: 0.000108 grad: 0.2690 (0.2762) loss: 0.9417 (0.9494) time: 0.1442 data: 0.0599 max mem: 9377 +Train: [4] [2100/6250] eta: 0:11:10 lr: 0.000108 grad: 0.2043 (0.2752) loss: 0.9375 (0.9491) time: 0.1375 data: 0.0369 max mem: 9377 +Train: [4] [2200/6250] eta: 0:10:52 lr: 0.000109 grad: 0.2497 (0.2746) loss: 0.9411 (0.9487) time: 0.1538 data: 0.0711 max mem: 9377 +Train: [4] [2300/6250] eta: 0:10:37 lr: 0.000109 grad: 0.2218 (0.2751) loss: 0.9404 (0.9484) time: 0.1599 data: 0.0717 max mem: 9377 +Train: [4] [2400/6250] eta: 0:10:22 lr: 0.000110 grad: 0.3665 (0.2747) loss: 0.9391 (0.9481) time: 0.1575 data: 0.0768 max mem: 9377 +Train: [4] [2500/6250] eta: 0:10:06 lr: 0.000110 grad: 0.3611 (0.2754) loss: 0.9418 (0.9477) time: 0.1660 data: 0.0796 max mem: 9377 +Train: [4] [2600/6250] eta: 0:09:49 lr: 0.000110 grad: 0.2270 (0.2752) loss: 0.9314 (0.9474) time: 0.1412 data: 0.0628 max mem: 9377 +Train: [4] [2700/6250] eta: 0:09:31 lr: 0.000111 grad: 0.2391 (0.2736) loss: 0.9386 (0.9470) time: 0.1356 data: 0.0459 max mem: 9377 +Train: [4] [2800/6250] eta: 0:09:13 lr: 0.000111 grad: 0.2748 (0.2737) loss: 0.9378 (0.9467) time: 0.1600 data: 0.0727 max mem: 9377 +Train: [4] [2900/6250] eta: 0:08:57 lr: 0.000112 grad: 0.2041 (0.2740) loss: 0.9359 (0.9463) time: 0.1553 data: 0.0730 max mem: 9377 +Train: [4] [3000/6250] eta: 0:08:41 lr: 0.000112 grad: 0.2200 (0.2735) loss: 0.9296 (0.9459) time: 0.1454 data: 0.0616 max mem: 9377 +Train: [4] [3100/6250] eta: 0:08:24 lr: 0.000112 grad: 0.2027 (0.2732) loss: 0.9301 (0.9455) time: 0.1503 data: 0.0675 max mem: 9377 +Train: [4] [3200/6250] eta: 0:08:07 lr: 0.000113 grad: 0.2515 (0.2725) loss: 0.9321 (0.9451) time: 0.1457 data: 0.0644 max mem: 9377 +Train: [4] [3300/6250] eta: 0:07:50 lr: 0.000113 grad: 0.2910 (0.2719) loss: 0.9338 (0.9447) time: 0.1450 data: 0.0649 max mem: 9377 +Train: [4] [3400/6250] eta: 0:07:33 lr: 0.000114 grad: 0.2263 (0.2715) loss: 0.9335 (0.9444) time: 0.1620 data: 0.0754 max mem: 9377 +Train: [4] [3500/6250] eta: 0:07:15 lr: 0.000114 grad: 0.2966 (0.2709) loss: 0.9314 (0.9440) time: 0.1528 data: 0.0659 max mem: 9377 +Train: [4] [3600/6250] eta: 0:06:59 lr: 0.000114 grad: 0.2432 (0.2710) loss: 0.9320 (0.9436) time: 0.1643 data: 0.0825 max mem: 9377 +Train: [4] [3700/6250] eta: 0:06:42 lr: 0.000115 grad: 0.2025 (0.2702) loss: 0.9301 (0.9433) time: 0.1404 data: 0.0509 max mem: 9377 +Train: [4] [3800/6250] eta: 0:06:26 lr: 0.000115 grad: 0.2564 (0.2700) loss: 0.9322 (0.9430) time: 0.1617 data: 0.0772 max mem: 9377 +Train: [4] [3900/6250] eta: 0:06:10 lr: 0.000116 grad: 0.2130 (0.2699) loss: 0.9303 (0.9426) time: 0.1670 data: 0.0862 max mem: 9377 +Train: [4] [4000/6250] eta: 0:05:53 lr: 0.000116 grad: 0.2015 (0.2689) loss: 0.9297 (0.9423) time: 0.1690 data: 0.0840 max mem: 9377 +Train: [4] [4100/6250] eta: 0:05:38 lr: 0.000116 grad: 0.1593 (0.2678) loss: 0.9298 (0.9419) time: 0.1620 data: 0.0794 max mem: 9377 +Train: [4] [4200/6250] eta: 0:05:22 lr: 0.000117 grad: 0.1867 (0.2668) loss: 0.9297 (0.9416) time: 0.1557 data: 0.0688 max mem: 9377 +Train: [4] [4300/6250] eta: 0:05:06 lr: 0.000117 grad: 0.1988 (0.2659) loss: 0.9252 (0.9413) time: 0.1393 data: 0.0606 max mem: 9377 +Train: [4] [4400/6250] eta: 0:04:50 lr: 0.000118 grad: 0.2644 (0.2654) loss: 0.9279 (0.9410) time: 0.1492 data: 0.0599 max mem: 9377 +Train: [4] [4500/6250] eta: 0:04:34 lr: 0.000118 grad: 0.2224 (0.2647) loss: 0.9260 (0.9407) time: 0.1416 data: 0.0524 max mem: 9377 +Train: [4] [4600/6250] eta: 0:04:19 lr: 0.000118 grad: 0.2388 (0.2640) loss: 0.9249 (0.9404) time: 0.1526 data: 0.0620 max mem: 9377 +Train: [4] [4700/6250] eta: 0:04:03 lr: 0.000119 grad: 0.2451 (0.2632) loss: 0.9222 (0.9400) time: 0.1423 data: 0.0622 max mem: 9377 +Train: [4] [4800/6250] eta: 0:03:47 lr: 0.000119 grad: 0.1859 (0.2623) loss: 0.9214 (0.9397) time: 0.1382 data: 0.0453 max mem: 9377 +Train: [4] [4900/6250] eta: 0:03:31 lr: 0.000120 grad: 0.1894 (0.2614) loss: 0.9226 (0.9393) time: 0.1631 data: 0.0740 max mem: 9377 +Train: [4] [5000/6250] eta: 0:03:15 lr: 0.000120 grad: 0.2197 (0.2607) loss: 0.9207 (0.9389) time: 0.1501 data: 0.0653 max mem: 9377 +Train: [4] [5100/6250] eta: 0:02:59 lr: 0.000120 grad: 0.1674 (0.2598) loss: 0.9181 (0.9385) time: 0.1491 data: 0.0605 max mem: 9377 +Train: [4] [5200/6250] eta: 0:02:44 lr: 0.000121 grad: 0.1756 (0.2592) loss: 0.9138 (0.9381) time: 0.1381 data: 0.0517 max mem: 9377 +Train: [4] [5300/6250] eta: 0:02:28 lr: 0.000121 grad: 0.2186 (0.2585) loss: 0.9175 (0.9378) time: 0.1407 data: 0.0567 max mem: 9377 +Train: [4] [5400/6250] eta: 0:02:12 lr: 0.000122 grad: 0.2216 (0.2580) loss: 0.9156 (0.9374) time: 0.1295 data: 0.0495 max mem: 9377 +Train: [4] [5500/6250] eta: 0:01:56 lr: 0.000122 grad: 0.1717 (0.2571) loss: 0.9179 (0.9370) time: 0.1438 data: 0.0447 max mem: 9377 +Train: [4] [5600/6250] eta: 0:01:41 lr: 0.000122 grad: 0.2015 (0.2563) loss: 0.9155 (0.9366) time: 0.1359 data: 0.0433 max mem: 9377 +Train: [4] [5700/6250] eta: 0:01:25 lr: 0.000123 grad: 0.1868 (0.2555) loss: 0.9134 (0.9363) time: 0.1773 data: 0.0905 max mem: 9377 +Train: [4] [5800/6250] eta: 0:01:09 lr: 0.000123 grad: 0.1926 (0.2547) loss: 0.9162 (0.9359) time: 0.1081 data: 0.0252 max mem: 9377 +Train: [4] [5900/6250] eta: 0:00:54 lr: 0.000124 grad: 0.1975 (0.2538) loss: 0.9190 (0.9356) time: 0.1616 data: 0.0780 max mem: 9377 +Train: [4] [6000/6250] eta: 0:00:38 lr: 0.000124 grad: 0.1977 (0.2532) loss: 0.9146 (0.9353) time: 0.1118 data: 0.0337 max mem: 9377 +Train: [4] [6100/6250] eta: 0:00:23 lr: 0.000124 grad: 0.2037 (0.2524) loss: 0.9157 (0.9350) time: 0.1222 data: 0.0378 max mem: 9377 +Train: [4] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.1710 (0.2517) loss: 0.9202 (0.9346) time: 0.1551 data: 0.0610 max mem: 9377 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1913 (0.2512) loss: 0.9157 (0.9345) time: 0.1516 data: 0.0588 max mem: 9377 +Train: [4] Total time: 0:16:12 (0.1556 s / it) +Averaged stats: lr: 0.000125 grad: 0.1913 (0.2512) loss: 0.9157 (0.9345) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:03:16 loss: 0.9181 (0.9181) time: 3.1660 data: 3.0943 max mem: 9377 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9191 (0.9175) time: 0.1619 data: 0.1363 max mem: 9377 +Eval (hcp-train-subset): [4] Total time: 0:00:14 (0.2367 s / it) +Averaged stats (hcp-train-subset): loss: 0.9191 (0.9175) +Making plots (hcp-train-subset): example=27 +Eval (hcp-val): [4] [ 0/62] eta: 0:05:05 loss: 0.9148 (0.9148) time: 4.9242 data: 4.8612 max mem: 9377 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9150 (0.9159) time: 0.1469 data: 0.1213 max mem: 9377 +Eval (hcp-val): [4] Total time: 0:00:16 (0.2724 s / it) +Averaged stats (hcp-val): loss: 0.9150 (0.9159) +Making plots (hcp-val): example=1 +Eval (nsd-val): [4] [ 0/62] eta: 0:03:51 loss: 0.8750 (0.8750) time: 3.7362 data: 3.6708 max mem: 9377 +Eval (nsd-val): [4] [61/62] eta: 0:00:00 loss: 0.8837 (0.8849) time: 0.1551 data: 0.1295 max mem: 9377 +Eval (nsd-val): [4] Total time: 0:00:14 (0.2344 s / it) +Averaged stats (nsd-val): loss: 0.8837 (0.8849) +Making plots (nsd-val): example=28 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00004.pth +Train: [5] [ 0/6250] eta: 7:45:33 lr: 0.000125 grad: 0.2019 (0.2019) loss: 0.9274 (0.9274) time: 4.4693 data: 4.2083 max mem: 9377 +Train: [5] [ 100/6250] eta: 0:20:50 lr: 0.000125 grad: 0.1623 (0.2117) loss: 0.9123 (0.9143) time: 0.1582 data: 0.0722 max mem: 9377 +Train: [5] [ 200/6250] eta: 0:18:34 lr: 0.000125 grad: 0.1780 (0.2189) loss: 0.9158 (0.9127) time: 0.1673 data: 0.0745 max mem: 9377 +Train: [5] [ 300/6250] eta: 0:17:46 lr: 0.000125 grad: 0.1762 (0.2163) loss: 0.9128 (0.9123) time: 0.1726 data: 0.0838 max mem: 9377 +Train: [5] [ 400/6250] eta: 0:17:12 lr: 0.000125 grad: 0.2167 (0.2114) loss: 0.9092 (0.9115) time: 0.1890 data: 0.1033 max mem: 9377 +Train: [5] [ 500/6250] eta: 0:17:12 lr: 0.000125 grad: 0.1693 (0.2105) loss: 0.9048 (0.9107) time: 0.1710 data: 0.0836 max mem: 9377 +Train: [5] [ 600/6250] eta: 0:16:47 lr: 0.000125 grad: 0.1753 (0.2084) loss: 0.9108 (0.9102) time: 0.1724 data: 0.0826 max mem: 9377 +Train: [5] [ 700/6250] eta: 0:16:18 lr: 0.000125 grad: 0.1615 (0.2060) loss: 0.9075 (0.9101) time: 0.1520 data: 0.0669 max mem: 9377 +Train: [5] [ 800/6250] eta: 0:15:50 lr: 0.000125 grad: 0.2071 (0.2037) loss: 0.9121 (0.9105) time: 0.1501 data: 0.0587 max mem: 9377 +Train: [5] [ 900/6250] eta: 0:15:22 lr: 0.000125 grad: 0.1742 (0.2016) loss: 0.9119 (0.9106) time: 0.1637 data: 0.0658 max mem: 9377 +Train: [5] [1000/6250] eta: 0:14:59 lr: 0.000125 grad: 0.1671 (0.1994) loss: 0.9113 (0.9105) time: 0.1675 data: 0.0718 max mem: 9377 +Train: [5] [1100/6250] eta: 0:14:37 lr: 0.000125 grad: 0.1589 (0.1978) loss: 0.9093 (0.9105) time: 0.1630 data: 0.0815 max mem: 9377 +Train: [5] [1200/6250] eta: 0:14:08 lr: 0.000125 grad: 0.1712 (0.1970) loss: 0.9071 (0.9104) time: 0.1475 data: 0.0538 max mem: 9377 +Train: [5] [1300/6250] eta: 0:13:41 lr: 0.000125 grad: 0.1861 (0.1958) loss: 0.9035 (0.9101) time: 0.1357 data: 0.0475 max mem: 9377 +Train: [5] [1400/6250] eta: 0:13:15 lr: 0.000125 grad: 0.1589 (0.1946) loss: 0.9076 (0.9098) time: 0.1211 data: 0.0357 max mem: 9377 +Train: [5] [1500/6250] eta: 0:12:50 lr: 0.000125 grad: 0.1653 (0.1932) loss: 0.9109 (0.9096) time: 0.1507 data: 0.0665 max mem: 9377 +Train: [5] [1600/6250] eta: 0:12:31 lr: 0.000125 grad: 0.2048 (0.1932) loss: 0.9033 (0.9093) time: 0.1726 data: 0.0839 max mem: 9377 +Train: [5] [1700/6250] eta: 0:12:14 lr: 0.000125 grad: 0.1682 (0.1930) loss: 0.9053 (0.9090) time: 0.1497 data: 0.0662 max mem: 9377 +Train: [5] [1800/6250] eta: 0:11:57 lr: 0.000125 grad: 0.1757 (0.1930) loss: 0.8966 (0.9086) time: 0.1526 data: 0.0649 max mem: 9377 +Train: [5] [1900/6250] eta: 0:11:39 lr: 0.000125 grad: 0.1817 (0.1924) loss: 0.9026 (0.9082) time: 0.1383 data: 0.0506 max mem: 9377 +Train: [5] [2000/6250] eta: 0:11:21 lr: 0.000125 grad: 0.1815 (0.1922) loss: 0.8954 (0.9077) time: 0.1349 data: 0.0511 max mem: 9377 +Train: [5] [2100/6250] eta: 0:11:04 lr: 0.000125 grad: 0.1892 (0.1923) loss: 0.8986 (0.9072) time: 0.1837 data: 0.1096 max mem: 9377 +Train: [5] [2200/6250] eta: 0:10:50 lr: 0.000125 grad: 0.1990 (0.1921) loss: 0.8992 (0.9068) time: 0.1715 data: 0.0937 max mem: 9377 +Train: [5] [2300/6250] eta: 0:10:34 lr: 0.000125 grad: 0.1915 (0.1914) loss: 0.8974 (0.9065) time: 0.1560 data: 0.0739 max mem: 9377 +Train: [5] [2400/6250] eta: 0:10:16 lr: 0.000125 grad: 0.1858 (0.1915) loss: 0.8942 (0.9060) time: 0.1394 data: 0.0564 max mem: 9377 +Train: [5] [2500/6250] eta: 0:09:57 lr: 0.000125 grad: 0.2026 (0.1915) loss: 0.8943 (0.9055) time: 0.0969 data: 0.0097 max mem: 9377 +Train: [5] [2600/6250] eta: 0:09:39 lr: 0.000125 grad: 0.1713 (0.1911) loss: 0.8970 (0.9051) time: 0.1626 data: 0.0654 max mem: 9377 +Train: [5] [2700/6250] eta: 0:09:22 lr: 0.000125 grad: 0.2038 (0.1911) loss: 0.8928 (0.9047) time: 0.1488 data: 0.0628 max mem: 9377 +Train: [5] [2800/6250] eta: 0:09:07 lr: 0.000125 grad: 0.1703 (0.1910) loss: 0.8909 (0.9043) time: 0.1721 data: 0.0913 max mem: 9377 +Train: [5] [2900/6250] eta: 0:08:51 lr: 0.000125 grad: 0.1521 (0.1902) loss: 0.8935 (0.9039) time: 0.1620 data: 0.0852 max mem: 9377 +Train: [5] [3000/6250] eta: 0:08:35 lr: 0.000125 grad: 0.1581 (0.1899) loss: 0.8946 (0.9036) time: 0.1266 data: 0.0345 max mem: 9377 +Train: [5] [3100/6250] eta: 0:08:19 lr: 0.000125 grad: 0.1788 (0.1893) loss: 0.8905 (0.9031) time: 0.1512 data: 0.0718 max mem: 9377 +Train: [5] [3200/6250] eta: 0:08:02 lr: 0.000125 grad: 0.1758 (0.1894) loss: 0.8929 (0.9028) time: 0.1474 data: 0.0596 max mem: 9377 +Train: [5] [3300/6250] eta: 0:07:46 lr: 0.000125 grad: 0.1625 (0.1888) loss: 0.8854 (0.9024) time: 0.1642 data: 0.0856 max mem: 9377 +Train: [5] [3400/6250] eta: 0:07:29 lr: 0.000125 grad: 0.1761 (0.1886) loss: 0.8900 (0.9020) time: 0.1180 data: 0.0339 max mem: 9377 +Train: [5] [3500/6250] eta: 0:07:12 lr: 0.000125 grad: 0.1568 (0.1883) loss: 0.8856 (0.9015) time: 0.1314 data: 0.0513 max mem: 9377 +Train: [5] [3600/6250] eta: 0:06:56 lr: 0.000125 grad: 0.1528 (0.1885) loss: 0.8893 (0.9011) time: 0.1405 data: 0.0563 max mem: 9377 +Train: [5] [3700/6250] eta: 0:06:39 lr: 0.000125 grad: 0.1690 (0.1882) loss: 0.8869 (0.9007) time: 0.1448 data: 0.0625 max mem: 9377 +Train: [5] [3800/6250] eta: 0:06:23 lr: 0.000125 grad: 0.1609 (0.1880) loss: 0.8880 (0.9004) time: 0.1334 data: 0.0494 max mem: 9377 +Train: [5] [3900/6250] eta: 0:06:07 lr: 0.000125 grad: 0.1691 (0.1877) loss: 0.8850 (0.9000) time: 0.1433 data: 0.0531 max mem: 9377 +Train: [5] [4000/6250] eta: 0:05:50 lr: 0.000125 grad: 0.1790 (0.1875) loss: 0.8856 (0.8996) time: 0.1262 data: 0.0361 max mem: 9377 +Train: [5] [4100/6250] eta: 0:05:34 lr: 0.000125 grad: 0.1444 (0.1873) loss: 0.8849 (0.8992) time: 0.1470 data: 0.0609 max mem: 9377 +Train: [5] [4200/6250] eta: 0:05:18 lr: 0.000125 grad: 0.1522 (0.1868) loss: 0.8838 (0.8989) time: 0.1438 data: 0.0568 max mem: 9377 +Train: [5] [4300/6250] eta: 0:05:02 lr: 0.000125 grad: 0.1506 (0.1863) loss: 0.8811 (0.8985) time: 0.1468 data: 0.0627 max mem: 9377 +Train: [5] [4400/6250] eta: 0:04:46 lr: 0.000125 grad: 0.1585 (0.1860) loss: 0.8849 (0.8981) time: 0.1430 data: 0.0616 max mem: 9377 +Train: [5] [4500/6250] eta: 0:04:30 lr: 0.000125 grad: 0.1489 (0.1856) loss: 0.8839 (0.8978) time: 0.1492 data: 0.0698 max mem: 9377 +Train: [5] [4600/6250] eta: 0:04:14 lr: 0.000125 grad: 0.1512 (0.1851) loss: 0.8766 (0.8974) time: 0.1401 data: 0.0563 max mem: 9377 +Train: [5] [4700/6250] eta: 0:03:59 lr: 0.000125 grad: 0.1447 (0.1846) loss: 0.8812 (0.8970) time: 0.1684 data: 0.0778 max mem: 9377 +Train: [5] [4800/6250] eta: 0:03:44 lr: 0.000125 grad: 0.1501 (0.1842) loss: 0.8792 (0.8967) time: 0.1728 data: 0.0818 max mem: 9377 +Train: [5] [4900/6250] eta: 0:03:28 lr: 0.000125 grad: 0.1518 (0.1839) loss: 0.8845 (0.8964) time: 0.1443 data: 0.0586 max mem: 9377 +Train: [5] [5000/6250] eta: 0:03:12 lr: 0.000125 grad: 0.1356 (0.1834) loss: 0.8768 (0.8960) time: 0.1722 data: 0.0882 max mem: 9377 +Train: [5] [5100/6250] eta: 0:02:57 lr: 0.000125 grad: 0.1568 (0.1830) loss: 0.8777 (0.8957) time: 0.1663 data: 0.0879 max mem: 9377 +Train: [5] [5200/6250] eta: 0:02:41 lr: 0.000125 grad: 0.1368 (0.1824) loss: 0.8767 (0.8954) time: 0.1441 data: 0.0603 max mem: 9377 +Train: [5] [5300/6250] eta: 0:02:26 lr: 0.000125 grad: 0.1617 (0.1819) loss: 0.8810 (0.8950) time: 0.1265 data: 0.0406 max mem: 9377 +Train: [5] [5400/6250] eta: 0:02:10 lr: 0.000125 grad: 0.1567 (0.1815) loss: 0.8781 (0.8947) time: 0.1640 data: 0.0780 max mem: 9377 +Train: [5] [5500/6250] eta: 0:01:55 lr: 0.000125 grad: 0.1540 (0.1810) loss: 0.8796 (0.8944) time: 0.1584 data: 0.0647 max mem: 9377 +Train: [5] [5600/6250] eta: 0:01:40 lr: 0.000125 grad: 0.1488 (0.1805) loss: 0.8733 (0.8941) time: 0.1681 data: 0.0838 max mem: 9377 +Train: [5] [5700/6250] eta: 0:01:24 lr: 0.000125 grad: 0.1349 (0.1800) loss: 0.8765 (0.8938) time: 0.1502 data: 0.0630 max mem: 9377 +Train: [5] [5800/6250] eta: 0:01:09 lr: 0.000125 grad: 0.1466 (0.1796) loss: 0.8736 (0.8935) time: 0.1385 data: 0.0548 max mem: 9377 +Train: [5] [5900/6250] eta: 0:00:53 lr: 0.000125 grad: 0.1322 (0.1790) loss: 0.8745 (0.8932) time: 0.1477 data: 0.0619 max mem: 9377 +Train: [5] [6000/6250] eta: 0:00:38 lr: 0.000125 grad: 0.1426 (0.1787) loss: 0.8722 (0.8929) time: 0.1533 data: 0.0724 max mem: 9377 +Train: [5] [6100/6250] eta: 0:00:22 lr: 0.000125 grad: 0.1389 (0.1783) loss: 0.8754 (0.8927) time: 0.1464 data: 0.0639 max mem: 9377 +Train: [5] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.1490 (0.1778) loss: 0.8784 (0.8924) time: 0.1379 data: 0.0609 max mem: 9377 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1354 (0.1775) loss: 0.8761 (0.8923) time: 0.1640 data: 0.0862 max mem: 9377 +Train: [5] Total time: 0:16:03 (0.1542 s / it) +Averaged stats: lr: 0.000125 grad: 0.1354 (0.1775) loss: 0.8761 (0.8923) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:04:14 loss: 0.8818 (0.8818) time: 4.1047 data: 4.0517 max mem: 9377 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.8815 (0.8803) time: 0.1475 data: 0.1203 max mem: 9377 +Eval (hcp-train-subset): [5] Total time: 0:00:14 (0.2296 s / it) +Averaged stats (hcp-train-subset): loss: 0.8815 (0.8803) +Eval (hcp-val): [5] [ 0/62] eta: 0:03:57 loss: 0.8750 (0.8750) time: 3.8358 data: 3.7797 max mem: 9377 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.8773 (0.8791) time: 0.1232 data: 0.0973 max mem: 9377 +Eval (hcp-val): [5] Total time: 0:00:16 (0.2638 s / it) +Averaged stats (hcp-val): loss: 0.8773 (0.8791) +Eval (nsd-val): [5] [ 0/62] eta: 0:06:32 loss: 0.8327 (0.8327) time: 6.3359 data: 6.3063 max mem: 9377 +Eval (nsd-val): [5] [61/62] eta: 0:00:00 loss: 0.8430 (0.8425) time: 0.1212 data: 0.0941 max mem: 9377 +Eval (nsd-val): [5] Total time: 0:00:14 (0.2408 s / it) +Averaged stats (nsd-val): loss: 0.8430 (0.8425) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [6] [ 0/6250] eta: 6:33:35 lr: 0.000125 grad: 0.1127 (0.1127) loss: 0.9126 (0.9126) time: 3.7785 data: 3.5746 max mem: 9377 +Train: [6] [ 100/6250] eta: 0:20:36 lr: 0.000125 grad: 0.1150 (0.1427) loss: 0.8831 (0.8853) time: 0.1650 data: 0.0804 max mem: 9377 +Train: [6] [ 200/6250] eta: 0:18:14 lr: 0.000125 grad: 0.1383 (0.1438) loss: 0.8790 (0.8822) time: 0.1645 data: 0.0690 max mem: 9377 +Train: [6] [ 300/6250] eta: 0:17:34 lr: 0.000125 grad: 0.1324 (0.1409) loss: 0.8757 (0.8802) time: 0.1587 data: 0.0690 max mem: 9377 +Train: [6] [ 400/6250] eta: 0:17:00 lr: 0.000125 grad: 0.1322 (0.1472) loss: 0.8742 (0.8783) time: 0.1822 data: 0.0955 max mem: 9377 +Train: [6] [ 500/6250] eta: 0:16:27 lr: 0.000125 grad: 0.1305 (0.1459) loss: 0.8721 (0.8772) time: 0.1736 data: 0.0788 max mem: 9377 +Train: [6] [ 600/6250] eta: 0:15:44 lr: 0.000125 grad: 0.1208 (0.1428) loss: 0.8745 (0.8765) time: 0.1290 data: 0.0347 max mem: 9377 +Train: [6] [ 700/6250] eta: 0:15:19 lr: 0.000125 grad: 0.1422 (0.1425) loss: 0.8765 (0.8762) time: 0.1468 data: 0.0612 max mem: 9377 +Train: [6] [ 800/6250] eta: 0:15:07 lr: 0.000125 grad: 0.1171 (0.1415) loss: 0.8705 (0.8757) time: 0.1381 data: 0.0527 max mem: 9377 +Train: [6] [ 900/6250] eta: 0:14:52 lr: 0.000125 grad: 0.1315 (0.1414) loss: 0.8693 (0.8752) time: 0.1893 data: 0.0931 max mem: 9377 +Train: [6] [1000/6250] eta: 0:14:26 lr: 0.000125 grad: 0.1366 (0.1409) loss: 0.8732 (0.8747) time: 0.1685 data: 0.0888 max mem: 9377 +Train: [6] [1100/6250] eta: 0:13:55 lr: 0.000125 grad: 0.1193 (0.1398) loss: 0.8756 (0.8744) time: 0.1250 data: 0.0464 max mem: 9377 +Train: [6] [1200/6250] eta: 0:13:30 lr: 0.000125 grad: 0.1264 (0.1389) loss: 0.8715 (0.8741) time: 0.1410 data: 0.0581 max mem: 9377 +Train: [6] [1300/6250] eta: 0:13:11 lr: 0.000125 grad: 0.1139 (0.1388) loss: 0.8733 (0.8741) time: 0.1463 data: 0.0589 max mem: 9377 +Train: [6] [1400/6250] eta: 0:12:50 lr: 0.000125 grad: 0.1203 (0.1378) loss: 0.8697 (0.8740) time: 0.1543 data: 0.0653 max mem: 9377 +Train: [6] [1500/6250] eta: 0:12:27 lr: 0.000125 grad: 0.1312 (0.1372) loss: 0.8743 (0.8738) time: 0.1284 data: 0.0425 max mem: 9377 +Train: [6] [1600/6250] eta: 0:12:06 lr: 0.000125 grad: 0.1276 (0.1367) loss: 0.8719 (0.8736) time: 0.1272 data: 0.0444 max mem: 9377 +Train: [6] [1700/6250] eta: 0:11:45 lr: 0.000125 grad: 0.1204 (0.1363) loss: 0.8761 (0.8734) time: 0.1206 data: 0.0368 max mem: 9377 +Train: [6] [1800/6250] eta: 0:11:28 lr: 0.000125 grad: 0.1233 (0.1360) loss: 0.8735 (0.8733) time: 0.1504 data: 0.0550 max mem: 9377 +Train: [6] [1900/6250] eta: 0:11:14 lr: 0.000125 grad: 0.1298 (0.1357) loss: 0.8722 (0.8731) time: 0.1316 data: 0.0383 max mem: 9377 +Train: [6] [2000/6250] eta: 0:10:57 lr: 0.000125 grad: 0.1256 (0.1349) loss: 0.8685 (0.8730) time: 0.1432 data: 0.0579 max mem: 9377 +Train: [6] [2100/6250] eta: 0:10:41 lr: 0.000125 grad: 0.1105 (0.1345) loss: 0.8722 (0.8729) time: 0.1585 data: 0.0726 max mem: 9377 +Train: [6] [2200/6250] eta: 0:10:26 lr: 0.000125 grad: 0.1186 (0.1339) loss: 0.8690 (0.8728) time: 0.1708 data: 0.0819 max mem: 9377 +Train: [6] [2300/6250] eta: 0:10:10 lr: 0.000125 grad: 0.1241 (0.1338) loss: 0.8715 (0.8728) time: 0.1157 data: 0.0375 max mem: 9377 +Train: [6] [2400/6250] eta: 0:09:52 lr: 0.000125 grad: 0.1146 (0.1335) loss: 0.8700 (0.8727) time: 0.1463 data: 0.0614 max mem: 9377 +Train: [6] [2500/6250] eta: 0:09:36 lr: 0.000125 grad: 0.1099 (0.1331) loss: 0.8716 (0.8726) time: 0.1410 data: 0.0598 max mem: 9377 +Train: [6] [2600/6250] eta: 0:09:23 lr: 0.000125 grad: 0.1255 (0.1325) loss: 0.8665 (0.8725) time: 0.1858 data: 0.0988 max mem: 9377 +Train: [6] [2700/6250] eta: 0:09:08 lr: 0.000125 grad: 0.1154 (0.1321) loss: 0.8732 (0.8724) time: 0.1573 data: 0.0742 max mem: 9377 +Train: [6] [2800/6250] eta: 0:08:54 lr: 0.000125 grad: 0.1141 (0.1315) loss: 0.8709 (0.8724) time: 0.1740 data: 0.0944 max mem: 9377 +Train: [6] [2900/6250] eta: 0:08:38 lr: 0.000125 grad: 0.1132 (0.1309) loss: 0.8672 (0.8723) time: 0.1499 data: 0.0618 max mem: 9377 +Train: [6] [3000/6250] eta: 0:08:22 lr: 0.000125 grad: 0.1028 (0.1305) loss: 0.8743 (0.8723) time: 0.1583 data: 0.0819 max mem: 9377 +Train: [6] [3100/6250] eta: 0:08:07 lr: 0.000125 grad: 0.1224 (0.1304) loss: 0.8673 (0.8722) time: 0.1609 data: 0.0852 max mem: 9377 +Train: [6] [3200/6250] eta: 0:07:52 lr: 0.000125 grad: 0.1018 (0.1301) loss: 0.8705 (0.8721) time: 0.1505 data: 0.0728 max mem: 9377 +Train: [6] [3300/6250] eta: 0:07:35 lr: 0.000125 grad: 0.1029 (0.1299) loss: 0.8661 (0.8720) time: 0.1585 data: 0.0720 max mem: 9377 +Train: [6] [3400/6250] eta: 0:07:18 lr: 0.000125 grad: 0.1132 (0.1298) loss: 0.8674 (0.8719) time: 0.1231 data: 0.0457 max mem: 9377 +Train: [6] [3500/6250] eta: 0:07:03 lr: 0.000125 grad: 0.1074 (0.1293) loss: 0.8688 (0.8717) time: 0.1725 data: 0.0795 max mem: 9377 +Train: [6] [3600/6250] eta: 0:06:47 lr: 0.000125 grad: 0.1140 (0.1290) loss: 0.8660 (0.8716) time: 0.1503 data: 0.0737 max mem: 9377 +Train: [6] [3700/6250] eta: 0:06:32 lr: 0.000125 grad: 0.1237 (0.1291) loss: 0.8648 (0.8715) time: 0.1546 data: 0.0757 max mem: 9377 +Train: [6] [3800/6250] eta: 0:06:16 lr: 0.000125 grad: 0.1020 (0.1289) loss: 0.8684 (0.8713) time: 0.1334 data: 0.0524 max mem: 9377 +Train: [6] [3900/6250] eta: 0:06:00 lr: 0.000125 grad: 0.1126 (0.1286) loss: 0.8695 (0.8712) time: 0.1360 data: 0.0526 max mem: 9377 +Train: [6] [4000/6250] eta: 0:05:44 lr: 0.000125 grad: 0.1115 (0.1282) loss: 0.8677 (0.8710) time: 0.1329 data: 0.0501 max mem: 9377 +Train: [6] [4100/6250] eta: 0:05:28 lr: 0.000125 grad: 0.0987 (0.1279) loss: 0.8634 (0.8709) time: 0.1545 data: 0.0676 max mem: 9377 +Train: [6] [4200/6250] eta: 0:05:13 lr: 0.000125 grad: 0.1095 (0.1277) loss: 0.8656 (0.8707) time: 0.1319 data: 0.0338 max mem: 9377 +Train: [6] [4300/6250] eta: 0:04:57 lr: 0.000125 grad: 0.1237 (0.1274) loss: 0.8600 (0.8705) time: 0.1304 data: 0.0494 max mem: 9377 +Train: [6] [4400/6250] eta: 0:04:41 lr: 0.000125 grad: 0.1097 (0.1272) loss: 0.8650 (0.8703) time: 0.1527 data: 0.0656 max mem: 9377 +Train: [6] [4500/6250] eta: 0:04:27 lr: 0.000125 grad: 0.1101 (0.1270) loss: 0.8667 (0.8702) time: 0.1940 data: 0.1139 max mem: 9377 +Train: [6] [4600/6250] eta: 0:04:12 lr: 0.000125 grad: 0.1157 (0.1266) loss: 0.8588 (0.8700) time: 0.1503 data: 0.0656 max mem: 9377 +Train: [6] [4700/6250] eta: 0:03:57 lr: 0.000125 grad: 0.1102 (0.1263) loss: 0.8599 (0.8699) time: 0.1639 data: 0.0823 max mem: 9377 +Train: [6] [4800/6250] eta: 0:03:41 lr: 0.000125 grad: 0.1015 (0.1259) loss: 0.8658 (0.8697) time: 0.1665 data: 0.0867 max mem: 9377 +Train: [6] [4900/6250] eta: 0:03:26 lr: 0.000125 grad: 0.1018 (0.1258) loss: 0.8589 (0.8696) time: 0.1600 data: 0.0697 max mem: 9377 +Train: [6] [5000/6250] eta: 0:03:11 lr: 0.000125 grad: 0.1012 (0.1254) loss: 0.8643 (0.8694) time: 0.1772 data: 0.0940 max mem: 9377 +Train: [6] [5100/6250] eta: 0:02:55 lr: 0.000125 grad: 0.1067 (0.1252) loss: 0.8603 (0.8693) time: 0.1648 data: 0.0836 max mem: 9377 +Train: [6] [5200/6250] eta: 0:02:40 lr: 0.000125 grad: 0.1075 (0.1249) loss: 0.8555 (0.8691) time: 0.1560 data: 0.0692 max mem: 9377 +Train: [6] [5300/6250] eta: 0:02:24 lr: 0.000125 grad: 0.1169 (0.1247) loss: 0.8577 (0.8690) time: 0.1230 data: 0.0422 max mem: 9377 +Train: [6] [5400/6250] eta: 0:02:09 lr: 0.000125 grad: 0.1096 (0.1244) loss: 0.8641 (0.8688) time: 0.1378 data: 0.0535 max mem: 9377 +Train: [6] [5500/6250] eta: 0:01:54 lr: 0.000125 grad: 0.1176 (0.1241) loss: 0.8646 (0.8687) time: 0.1485 data: 0.0638 max mem: 9377 +Train: [6] [5600/6250] eta: 0:01:38 lr: 0.000125 grad: 0.1088 (0.1238) loss: 0.8585 (0.8686) time: 0.1610 data: 0.0658 max mem: 9377 +Train: [6] [5700/6250] eta: 0:01:23 lr: 0.000125 grad: 0.1087 (0.1236) loss: 0.8569 (0.8684) time: 0.1424 data: 0.0423 max mem: 9377 +Train: [6] [5800/6250] eta: 0:01:08 lr: 0.000125 grad: 0.1034 (0.1233) loss: 0.8602 (0.8683) time: 0.1478 data: 0.0689 max mem: 9377 +Train: [6] [5900/6250] eta: 0:00:53 lr: 0.000125 grad: 0.1009 (0.1231) loss: 0.8551 (0.8681) time: 0.1723 data: 0.0886 max mem: 9377 +Train: [6] [6000/6250] eta: 0:00:38 lr: 0.000125 grad: 0.1013 (0.1230) loss: 0.8594 (0.8680) time: 0.1560 data: 0.0741 max mem: 9377 +Train: [6] [6100/6250] eta: 0:00:22 lr: 0.000125 grad: 0.1039 (0.1228) loss: 0.8627 (0.8679) time: 0.1567 data: 0.0691 max mem: 9377 +Train: [6] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.0982 (0.1225) loss: 0.8611 (0.8678) time: 0.1536 data: 0.0660 max mem: 9377 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1046 (0.1224) loss: 0.8588 (0.8677) time: 0.1434 data: 0.0493 max mem: 9377 +Train: [6] Total time: 0:15:56 (0.1531 s / it) +Averaged stats: lr: 0.000125 grad: 0.1046 (0.1224) loss: 0.8588 (0.8677) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:05:29 loss: 0.8664 (0.8664) time: 5.3121 data: 5.2821 max mem: 9377 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.8670 (0.8673) time: 0.1505 data: 0.1246 max mem: 9377 +Eval (hcp-train-subset): [6] Total time: 0:00:14 (0.2321 s / it) +Averaged stats (hcp-train-subset): loss: 0.8670 (0.8673) +Eval (hcp-val): [6] [ 0/62] eta: 0:06:05 loss: 0.8636 (0.8636) time: 5.8937 data: 5.8631 max mem: 9377 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.8650 (0.8655) time: 0.1924 data: 0.1670 max mem: 9377 +Eval (hcp-val): [6] Total time: 0:00:17 (0.2774 s / it) +Averaged stats (hcp-val): loss: 0.8650 (0.8655) +Eval (nsd-val): [6] [ 0/62] eta: 0:05:16 loss: 0.8251 (0.8251) time: 5.1056 data: 5.0746 max mem: 9377 +Eval (nsd-val): [6] [61/62] eta: 0:00:00 loss: 0.8318 (0.8303) time: 0.1273 data: 0.1020 max mem: 9377 +Eval (nsd-val): [6] Total time: 0:00:13 (0.2221 s / it) +Averaged stats (nsd-val): loss: 0.8318 (0.8303) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [7] [ 0/6250] eta: 6:34:57 lr: 0.000125 grad: 0.0962 (0.0962) loss: 0.8767 (0.8767) time: 3.7915 data: 3.5039 max mem: 9377 +Train: [7] [ 100/6250] eta: 0:22:19 lr: 0.000125 grad: 0.1202 (0.1177) loss: 0.8658 (0.8674) time: 0.1386 data: 0.0428 max mem: 9377 +Train: [7] [ 200/6250] eta: 0:19:35 lr: 0.000125 grad: 0.1261 (0.1224) loss: 0.8526 (0.8613) time: 0.2028 data: 0.1112 max mem: 9377 +Train: [7] [ 300/6250] eta: 0:18:13 lr: 0.000125 grad: 0.1122 (0.1237) loss: 0.8519 (0.8582) time: 0.1620 data: 0.0767 max mem: 9377 +Train: [7] [ 400/6250] eta: 0:17:27 lr: 0.000125 grad: 0.1115 (0.1202) loss: 0.8537 (0.8569) time: 0.1857 data: 0.1075 max mem: 9377 +Train: [7] [ 500/6250] eta: 0:17:01 lr: 0.000125 grad: 0.1098 (0.1176) loss: 0.8594 (0.8567) time: 0.1921 data: 0.0983 max mem: 9377 +Train: [7] [ 600/6250] eta: 0:16:36 lr: 0.000125 grad: 0.0966 (0.1156) loss: 0.8591 (0.8570) time: 0.1601 data: 0.0617 max mem: 9377 +Train: [7] [ 700/6250] eta: 0:16:12 lr: 0.000125 grad: 0.1042 (0.1148) loss: 0.8630 (0.8573) time: 0.1633 data: 0.0720 max mem: 9377 +Train: [7] [ 800/6250] eta: 0:15:39 lr: 0.000125 grad: 0.1053 (0.1132) loss: 0.8516 (0.8573) time: 0.1708 data: 0.0758 max mem: 9377 +Train: [7] [ 900/6250] eta: 0:15:16 lr: 0.000125 grad: 0.1021 (0.1122) loss: 0.8607 (0.8576) time: 0.1878 data: 0.0986 max mem: 9377 +Train: [7] [1000/6250] eta: 0:14:46 lr: 0.000125 grad: 0.0993 (0.1113) loss: 0.8582 (0.8579) time: 0.1503 data: 0.0676 max mem: 9377 +Train: [7] [1100/6250] eta: 0:14:30 lr: 0.000125 grad: 0.0937 (0.1109) loss: 0.8609 (0.8580) time: 0.1399 data: 0.0470 max mem: 9377 +Train: [7] [1200/6250] eta: 0:14:09 lr: 0.000125 grad: 0.0993 (0.1102) loss: 0.8559 (0.8579) time: 0.1657 data: 0.0756 max mem: 9377 +Train: [7] [1300/6250] eta: 0:13:44 lr: 0.000125 grad: 0.0998 (0.1096) loss: 0.8579 (0.8579) time: 0.1357 data: 0.0566 max mem: 9377 +Train: [7] [1400/6250] eta: 0:13:21 lr: 0.000125 grad: 0.0954 (0.1092) loss: 0.8551 (0.8578) time: 0.1339 data: 0.0476 max mem: 9377 +Train: [7] [1500/6250] eta: 0:13:04 lr: 0.000125 grad: 0.1008 (0.1089) loss: 0.8571 (0.8577) time: 0.1654 data: 0.0793 max mem: 9377 +Train: [7] [1600/6250] eta: 0:12:46 lr: 0.000125 grad: 0.0943 (0.1091) loss: 0.8539 (0.8577) time: 0.1481 data: 0.0524 max mem: 9377 +Train: [7] [1700/6250] eta: 0:12:25 lr: 0.000125 grad: 0.0982 (0.1088) loss: 0.8570 (0.8577) time: 0.1363 data: 0.0493 max mem: 9377 +Train: [7] [1800/6250] eta: 0:12:06 lr: 0.000125 grad: 0.0911 (0.1089) loss: 0.8575 (0.8575) time: 0.1381 data: 0.0472 max mem: 9377 +Train: [7] [1900/6250] eta: 0:11:47 lr: 0.000125 grad: 0.1044 (0.1086) loss: 0.8568 (0.8575) time: 0.1314 data: 0.0450 max mem: 9377 +Train: [7] [2000/6250] eta: 0:11:27 lr: 0.000125 grad: 0.0979 (0.1083) loss: 0.8546 (0.8574) time: 0.1396 data: 0.0534 max mem: 9377 +Train: [7] [2100/6250] eta: 0:11:07 lr: 0.000125 grad: 0.0972 (0.1083) loss: 0.8554 (0.8572) time: 0.1606 data: 0.0737 max mem: 9377 +Train: [7] [2200/6250] eta: 0:10:48 lr: 0.000125 grad: 0.0978 (0.1082) loss: 0.8535 (0.8570) time: 0.1275 data: 0.0476 max mem: 9377 +Train: [7] [2300/6250] eta: 0:10:29 lr: 0.000125 grad: 0.0978 (0.1080) loss: 0.8540 (0.8568) time: 0.1380 data: 0.0565 max mem: 9377 +Train: [7] [2400/6250] eta: 0:10:10 lr: 0.000125 grad: 0.0954 (0.1079) loss: 0.8473 (0.8566) time: 0.1508 data: 0.0717 max mem: 9377 +Train: [7] [2500/6250] eta: 0:09:53 lr: 0.000125 grad: 0.1048 (0.1081) loss: 0.8532 (0.8563) time: 0.1670 data: 0.0814 max mem: 9377 +Train: [7] [2600/6250] eta: 0:09:37 lr: 0.000125 grad: 0.0987 (0.1079) loss: 0.8578 (0.8561) time: 0.1468 data: 0.0599 max mem: 9377 +Train: [7] [2700/6250] eta: 0:09:20 lr: 0.000125 grad: 0.0997 (0.1078) loss: 0.8513 (0.8560) time: 0.1569 data: 0.0747 max mem: 9377 +Train: [7] [2800/6250] eta: 0:09:02 lr: 0.000125 grad: 0.0960 (0.1077) loss: 0.8534 (0.8559) time: 0.1578 data: 0.0741 max mem: 9377 +Train: [7] [2900/6250] eta: 0:08:45 lr: 0.000125 grad: 0.0979 (0.1077) loss: 0.8461 (0.8557) time: 0.1475 data: 0.0702 max mem: 9377 +Train: [7] [3000/6250] eta: 0:08:28 lr: 0.000125 grad: 0.1016 (0.1075) loss: 0.8493 (0.8557) time: 0.1271 data: 0.0513 max mem: 9377 +Train: [7] [3100/6250] eta: 0:08:11 lr: 0.000125 grad: 0.1026 (0.1074) loss: 0.8484 (0.8556) time: 0.1599 data: 0.0701 max mem: 9377 +Train: [7] [3200/6250] eta: 0:07:55 lr: 0.000125 grad: 0.0923 (0.1071) loss: 0.8513 (0.8556) time: 0.1456 data: 0.0562 max mem: 9377 +Train: [7] [3300/6250] eta: 0:07:39 lr: 0.000125 grad: 0.1059 (0.1070) loss: 0.8564 (0.8555) time: 0.1666 data: 0.0805 max mem: 9377 +Train: [7] [3400/6250] eta: 0:07:23 lr: 0.000125 grad: 0.1105 (0.1070) loss: 0.8498 (0.8554) time: 0.1618 data: 0.0783 max mem: 9377 +Train: [7] [3500/6250] eta: 0:07:07 lr: 0.000125 grad: 0.1011 (0.1070) loss: 0.8530 (0.8553) time: 0.1610 data: 0.0733 max mem: 9377 +Train: [7] [3600/6250] eta: 0:06:52 lr: 0.000125 grad: 0.1042 (0.1071) loss: 0.8543 (0.8553) time: 0.1471 data: 0.0624 max mem: 9377 +Train: [7] [3700/6250] eta: 0:06:36 lr: 0.000125 grad: 0.1057 (0.1069) loss: 0.8534 (0.8553) time: 0.1500 data: 0.0648 max mem: 9377 +Train: [7] [3800/6250] eta: 0:06:20 lr: 0.000125 grad: 0.0915 (0.1068) loss: 0.8507 (0.8553) time: 0.1503 data: 0.0656 max mem: 9377 +Train: [7] [3900/6250] eta: 0:06:04 lr: 0.000125 grad: 0.0939 (0.1068) loss: 0.8532 (0.8552) time: 0.1429 data: 0.0585 max mem: 9377 +Train: [7] [4000/6250] eta: 0:05:48 lr: 0.000125 grad: 0.0962 (0.1066) loss: 0.8494 (0.8551) time: 0.1337 data: 0.0498 max mem: 9377 +Train: [7] [4100/6250] eta: 0:05:32 lr: 0.000125 grad: 0.1033 (0.1064) loss: 0.8464 (0.8550) time: 0.1422 data: 0.0604 max mem: 9377 +Train: [7] [4200/6250] eta: 0:05:16 lr: 0.000125 grad: 0.0943 (0.1063) loss: 0.8501 (0.8549) time: 0.1462 data: 0.0614 max mem: 9377 +Train: [7] [4300/6250] eta: 0:05:00 lr: 0.000125 grad: 0.1005 (0.1061) loss: 0.8480 (0.8548) time: 0.1515 data: 0.0675 max mem: 9377 +Train: [7] [4400/6250] eta: 0:04:44 lr: 0.000125 grad: 0.0900 (0.1058) loss: 0.8548 (0.8547) time: 0.1441 data: 0.0576 max mem: 9377 +Train: [7] [4500/6250] eta: 0:04:28 lr: 0.000125 grad: 0.0934 (0.1056) loss: 0.8432 (0.8547) time: 0.1201 data: 0.0390 max mem: 9377 +Train: [7] [4600/6250] eta: 0:04:12 lr: 0.000125 grad: 0.0918 (0.1055) loss: 0.8526 (0.8546) time: 0.1411 data: 0.0629 max mem: 9377 +Train: [7] [4700/6250] eta: 0:03:57 lr: 0.000125 grad: 0.0974 (0.1054) loss: 0.8526 (0.8545) time: 0.1611 data: 0.0827 max mem: 9377 +Train: [7] [4800/6250] eta: 0:03:41 lr: 0.000125 grad: 0.0887 (0.1052) loss: 0.8505 (0.8545) time: 0.1342 data: 0.0491 max mem: 9377 +Train: [7] [4900/6250] eta: 0:03:25 lr: 0.000125 grad: 0.0969 (0.1050) loss: 0.8549 (0.8544) time: 0.1386 data: 0.0596 max mem: 9377 +Train: [7] [5000/6250] eta: 0:03:10 lr: 0.000125 grad: 0.0922 (0.1049) loss: 0.8515 (0.8544) time: 0.1679 data: 0.0769 max mem: 9377 +Train: [7] [5100/6250] eta: 0:02:55 lr: 0.000125 grad: 0.0908 (0.1047) loss: 0.8504 (0.8543) time: 0.1623 data: 0.0739 max mem: 9377 +Train: [7] [5200/6250] eta: 0:02:40 lr: 0.000125 grad: 0.1010 (0.1047) loss: 0.8495 (0.8542) time: 0.1573 data: 0.0664 max mem: 9377 +Train: [7] [5300/6250] eta: 0:02:25 lr: 0.000125 grad: 0.0925 (0.1046) loss: 0.8505 (0.8542) time: 0.1547 data: 0.0736 max mem: 9377 +Train: [7] [5400/6250] eta: 0:02:10 lr: 0.000125 grad: 0.0921 (0.1044) loss: 0.8531 (0.8542) time: 0.1368 data: 0.0521 max mem: 9377 +Train: [7] [5500/6250] eta: 0:01:54 lr: 0.000125 grad: 0.0984 (0.1044) loss: 0.8506 (0.8541) time: 0.1319 data: 0.0420 max mem: 9377 +Train: [7] [5600/6250] eta: 0:01:39 lr: 0.000125 grad: 0.0904 (0.1042) loss: 0.8544 (0.8541) time: 0.1207 data: 0.0326 max mem: 9377 +Train: [7] [5700/6250] eta: 0:01:24 lr: 0.000125 grad: 0.0934 (0.1041) loss: 0.8549 (0.8541) time: 0.1608 data: 0.0749 max mem: 9377 +Train: [7] [5800/6250] eta: 0:01:08 lr: 0.000125 grad: 0.0948 (0.1040) loss: 0.8480 (0.8540) time: 0.1544 data: 0.0703 max mem: 9377 +Train: [7] [5900/6250] eta: 0:00:53 lr: 0.000125 grad: 0.0978 (0.1041) loss: 0.8497 (0.8540) time: 0.1351 data: 0.0502 max mem: 9377 +Train: [7] [6000/6250] eta: 0:00:38 lr: 0.000125 grad: 0.0947 (0.1040) loss: 0.8542 (0.8540) time: 0.1541 data: 0.0690 max mem: 9377 +Train: [7] [6100/6250] eta: 0:00:22 lr: 0.000125 grad: 0.1036 (0.1040) loss: 0.8529 (0.8539) time: 0.1590 data: 0.0774 max mem: 9377 +Train: [7] [6200/6250] eta: 0:00:07 lr: 0.000125 grad: 0.0943 (0.1039) loss: 0.8529 (0.8538) time: 0.1394 data: 0.0562 max mem: 9377 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0942 (0.1038) loss: 0.8516 (0.8538) time: 0.1439 data: 0.0590 max mem: 9377 +Train: [7] Total time: 0:15:58 (0.1533 s / it) +Averaged stats: lr: 0.000125 grad: 0.0942 (0.1038) loss: 0.8516 (0.8538) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:04:52 loss: 0.8639 (0.8639) time: 4.7129 data: 4.6814 max mem: 9377 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.8622 (0.8628) time: 0.1399 data: 0.1145 max mem: 9377 +Eval (hcp-train-subset): [7] Total time: 0:00:13 (0.2250 s / it) +Averaged stats (hcp-train-subset): loss: 0.8622 (0.8628) +Eval (hcp-val): [7] [ 0/62] eta: 0:03:14 loss: 0.8566 (0.8566) time: 3.1357 data: 3.0824 max mem: 9377 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.8598 (0.8607) time: 0.1151 data: 0.0895 max mem: 9377 +Eval (hcp-val): [7] Total time: 0:00:15 (0.2568 s / it) +Averaged stats (hcp-val): loss: 0.8598 (0.8607) +Eval (nsd-val): [7] [ 0/62] eta: 0:05:50 loss: 0.8163 (0.8163) time: 5.6588 data: 5.6263 max mem: 9377 +Eval (nsd-val): [7] [61/62] eta: 0:00:00 loss: 0.8288 (0.8270) time: 0.1489 data: 0.1222 max mem: 9377 +Eval (nsd-val): [7] Total time: 0:00:13 (0.2230 s / it) +Averaged stats (nsd-val): loss: 0.8288 (0.8270) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [8] [ 0/6250] eta: 7:12:12 lr: 0.000125 grad: 0.0696 (0.0696) loss: 0.8953 (0.8953) time: 4.1492 data: 3.8756 max mem: 9377 +Train: [8] [ 100/6250] eta: 0:21:27 lr: 0.000125 grad: 0.0883 (0.0911) loss: 0.8499 (0.8603) time: 0.1581 data: 0.0684 max mem: 9377 +Train: [8] [ 200/6250] eta: 0:19:37 lr: 0.000125 grad: 0.0999 (0.0937) loss: 0.8496 (0.8563) time: 0.2041 data: 0.1236 max mem: 9377 +Train: [8] [ 300/6250] eta: 0:18:01 lr: 0.000125 grad: 0.0872 (0.0950) loss: 0.8553 (0.8536) time: 0.1458 data: 0.0526 max mem: 9377 +Train: [8] [ 400/6250] eta: 0:17:30 lr: 0.000125 grad: 0.0857 (0.0945) loss: 0.8519 (0.8524) time: 0.1648 data: 0.0731 max mem: 9377 +Train: [8] [ 500/6250] eta: 0:16:56 lr: 0.000125 grad: 0.0962 (0.0948) loss: 0.8538 (0.8527) time: 0.1823 data: 0.0820 max mem: 9377 +Train: [8] [ 600/6250] eta: 0:16:35 lr: 0.000125 grad: 0.0988 (0.0949) loss: 0.8559 (0.8526) time: 0.1811 data: 0.0794 max mem: 9377 +Train: [8] [ 700/6250] eta: 0:15:59 lr: 0.000125 grad: 0.0917 (0.0948) loss: 0.8463 (0.8520) time: 0.1500 data: 0.0512 max mem: 9377 +Train: [8] [ 800/6250] eta: 0:15:32 lr: 0.000125 grad: 0.1063 (0.0969) loss: 0.8449 (0.8518) time: 0.1785 data: 0.0882 max mem: 9377 +Train: [8] [ 900/6250] eta: 0:15:07 lr: 0.000125 grad: 0.0973 (0.0974) loss: 0.8484 (0.8515) time: 0.1706 data: 0.0898 max mem: 9377 +Train: [8] [1000/6250] eta: 0:14:40 lr: 0.000125 grad: 0.0949 (0.0977) loss: 0.8507 (0.8511) time: 0.1511 data: 0.0688 max mem: 9377 +Train: [8] [1100/6250] eta: 0:14:14 lr: 0.000125 grad: 0.0912 (0.0978) loss: 0.8475 (0.8510) time: 0.1169 data: 0.0339 max mem: 9377 +Train: [8] [1200/6250] eta: 0:13:48 lr: 0.000125 grad: 0.0968 (0.0980) loss: 0.8492 (0.8507) time: 0.1269 data: 0.0440 max mem: 9377 +Train: [8] [1300/6250] eta: 0:13:31 lr: 0.000125 grad: 0.0983 (0.0981) loss: 0.8480 (0.8506) time: 0.2439 data: 0.1644 max mem: 9377 +Train: [8] [1400/6250] eta: 0:13:12 lr: 0.000125 grad: 0.0957 (0.0981) loss: 0.8472 (0.8506) time: 0.1728 data: 0.0851 max mem: 9377 +Train: [8] [1500/6250] eta: 0:12:53 lr: 0.000125 grad: 0.1016 (0.0980) loss: 0.8430 (0.8503) time: 0.1381 data: 0.0586 max mem: 9377 +Train: [8] [1600/6250] eta: 0:12:32 lr: 0.000125 grad: 0.0908 (0.0979) loss: 0.8467 (0.8501) time: 0.1203 data: 0.0435 max mem: 9377 +Train: [8] [1700/6250] eta: 0:12:19 lr: 0.000125 grad: 0.0853 (0.0980) loss: 0.8456 (0.8498) time: 0.1834 data: 0.0958 max mem: 9377 +Train: [8] [1800/6250] eta: 0:12:04 lr: 0.000125 grad: 0.0883 (0.0987) loss: 0.8426 (0.8496) time: 0.1456 data: 0.0622 max mem: 9377 +Train: [8] [1900/6250] eta: 0:11:45 lr: 0.000125 grad: 0.0875 (0.0988) loss: 0.8497 (0.8494) time: 0.1590 data: 0.0799 max mem: 9377 +Train: [8] [2000/6250] eta: 0:11:29 lr: 0.000125 grad: 0.1003 (0.0990) loss: 0.8363 (0.8492) time: 0.1577 data: 0.0639 max mem: 9377 +Train: [8] [2100/6250] eta: 0:11:12 lr: 0.000125 grad: 0.1002 (0.0992) loss: 0.8438 (0.8490) time: 0.1641 data: 0.0725 max mem: 9377 +Train: [8] [2200/6250] eta: 0:10:53 lr: 0.000125 grad: 0.0929 (0.0992) loss: 0.8491 (0.8489) time: 0.1437 data: 0.0495 max mem: 9377 +Train: [8] [2300/6250] eta: 0:10:34 lr: 0.000125 grad: 0.0942 (0.0991) loss: 0.8455 (0.8489) time: 0.1533 data: 0.0685 max mem: 9377 +Train: [8] [2400/6250] eta: 0:10:16 lr: 0.000125 grad: 0.0966 (0.0991) loss: 0.8449 (0.8487) time: 0.1287 data: 0.0422 max mem: 9377 +Train: [8] [2500/6250] eta: 0:09:58 lr: 0.000125 grad: 0.1089 (0.0994) loss: 0.8454 (0.8486) time: 0.1590 data: 0.0727 max mem: 9377 +Train: [8] [2600/6250] eta: 0:09:41 lr: 0.000125 grad: 0.0944 (0.0993) loss: 0.8443 (0.8486) time: 0.1671 data: 0.0815 max mem: 9377 +Train: [8] [2700/6250] eta: 0:09:22 lr: 0.000125 grad: 0.0964 (0.0994) loss: 0.8499 (0.8485) time: 0.1392 data: 0.0527 max mem: 9377 +Train: [8] [2800/6250] eta: 0:09:05 lr: 0.000125 grad: 0.0958 (0.0995) loss: 0.8478 (0.8484) time: 0.1725 data: 0.0877 max mem: 9377 +Train: [8] [2900/6250] eta: 0:08:48 lr: 0.000125 grad: 0.0993 (0.0994) loss: 0.8395 (0.8483) time: 0.1554 data: 0.0696 max mem: 9377 +Train: [8] [3000/6250] eta: 0:08:30 lr: 0.000125 grad: 0.0916 (0.0993) loss: 0.8513 (0.8483) time: 0.1359 data: 0.0462 max mem: 9377 +Train: [8] [3100/6250] eta: 0:08:13 lr: 0.000125 grad: 0.0974 (0.0994) loss: 0.8512 (0.8484) time: 0.1471 data: 0.0645 max mem: 9377 +Train: [8] [3200/6250] eta: 0:07:56 lr: 0.000125 grad: 0.0982 (0.0995) loss: 0.8413 (0.8483) time: 0.1423 data: 0.0518 max mem: 9377 +Train: [8] [3300/6250] eta: 0:07:40 lr: 0.000125 grad: 0.0923 (0.0993) loss: 0.8470 (0.8482) time: 0.1455 data: 0.0604 max mem: 9377 +Train: [8] [3400/6250] eta: 0:07:23 lr: 0.000125 grad: 0.0958 (0.0995) loss: 0.8432 (0.8481) time: 0.1542 data: 0.0746 max mem: 9377 +Train: [8] [3500/6250] eta: 0:07:07 lr: 0.000125 grad: 0.0925 (0.0995) loss: 0.8496 (0.8481) time: 0.1324 data: 0.0506 max mem: 9377 +Train: [8] [3600/6250] eta: 0:06:50 lr: 0.000125 grad: 0.0986 (0.0996) loss: 0.8343 (0.8479) time: 0.1060 data: 0.0130 max mem: 9377 +Train: [8] [3700/6250] eta: 0:06:34 lr: 0.000125 grad: 0.0940 (0.0999) loss: 0.8434 (0.8478) time: 0.1164 data: 0.0201 max mem: 9377 +Train: [8] [3800/6250] eta: 0:06:18 lr: 0.000125 grad: 0.0905 (0.1000) loss: 0.8488 (0.8478) time: 0.1423 data: 0.0498 max mem: 9377 +Train: [8] [3900/6250] eta: 0:06:02 lr: 0.000125 grad: 0.0938 (0.1000) loss: 0.8476 (0.8477) time: 0.1380 data: 0.0581 max mem: 9377 +Train: [8] [4000/6250] eta: 0:05:47 lr: 0.000125 grad: 0.0873 (0.0998) loss: 0.8454 (0.8477) time: 0.1492 data: 0.0675 max mem: 9377 +Train: [8] [4100/6250] eta: 0:05:32 lr: 0.000125 grad: 0.0876 (0.0998) loss: 0.8497 (0.8476) time: 0.1795 data: 0.1028 max mem: 9377 +Train: [8] [4200/6250] eta: 0:05:17 lr: 0.000125 grad: 0.0927 (0.0997) loss: 0.8422 (0.8476) time: 0.1381 data: 0.0626 max mem: 9377 +Train: [8] [4300/6250] eta: 0:05:02 lr: 0.000125 grad: 0.0962 (0.0996) loss: 0.8477 (0.8475) time: 0.1749 data: 0.0968 max mem: 9377 +Train: [8] [4400/6250] eta: 0:04:47 lr: 0.000125 grad: 0.0915 (0.0996) loss: 0.8448 (0.8475) time: 0.1990 data: 0.1198 max mem: 9377 +Train: [8] [4500/6250] eta: 0:04:31 lr: 0.000125 grad: 0.0930 (0.0996) loss: 0.8458 (0.8475) time: 0.1837 data: 0.1038 max mem: 9377 +Train: [8] [4600/6250] eta: 0:04:16 lr: 0.000125 grad: 0.1013 (0.0995) loss: 0.8471 (0.8475) time: 0.1472 data: 0.0641 max mem: 9377 +Train: [8] [4700/6250] eta: 0:04:00 lr: 0.000125 grad: 0.0965 (0.0995) loss: 0.8408 (0.8474) time: 0.1075 data: 0.0250 max mem: 9377 +Train: [8] [4800/6250] eta: 0:03:44 lr: 0.000125 grad: 0.0952 (0.0996) loss: 0.8468 (0.8473) time: 0.1432 data: 0.0619 max mem: 9377 +Train: [8] [4900/6250] eta: 0:03:28 lr: 0.000125 grad: 0.0911 (0.0995) loss: 0.8471 (0.8472) time: 0.1513 data: 0.0688 max mem: 9377 +Train: [8] [5000/6250] eta: 0:03:13 lr: 0.000125 grad: 0.0904 (0.0995) loss: 0.8396 (0.8472) time: 0.1389 data: 0.0499 max mem: 9377 +Train: [8] [5100/6250] eta: 0:02:57 lr: 0.000125 grad: 0.0964 (0.0994) loss: 0.8440 (0.8471) time: 0.1332 data: 0.0532 max mem: 9377 +Train: [8] [5200/6250] eta: 0:02:41 lr: 0.000124 grad: 0.0920 (0.0993) loss: 0.8488 (0.8471) time: 0.1391 data: 0.0539 max mem: 9377 +Train: [8] [5300/6250] eta: 0:02:26 lr: 0.000124 grad: 0.0847 (0.0992) loss: 0.8458 (0.8471) time: 0.1468 data: 0.0565 max mem: 9377 +Train: [8] [5400/6250] eta: 0:02:11 lr: 0.000124 grad: 0.1011 (0.0992) loss: 0.8455 (0.8470) time: 0.1856 data: 0.0983 max mem: 9377 +Train: [8] [5500/6250] eta: 0:01:55 lr: 0.000124 grad: 0.0934 (0.0991) loss: 0.8528 (0.8470) time: 0.1348 data: 0.0472 max mem: 9377 +Train: [8] [5600/6250] eta: 0:01:40 lr: 0.000124 grad: 0.0920 (0.0991) loss: 0.8464 (0.8470) time: 0.1424 data: 0.0645 max mem: 9377 +Train: [8] [5700/6250] eta: 0:01:24 lr: 0.000124 grad: 0.0937 (0.0990) loss: 0.8499 (0.8469) time: 0.1552 data: 0.0637 max mem: 9377 +Train: [8] [5800/6250] eta: 0:01:09 lr: 0.000124 grad: 0.0910 (0.0990) loss: 0.8463 (0.8469) time: 0.1680 data: 0.0836 max mem: 9377 +Train: [8] [5900/6250] eta: 0:00:54 lr: 0.000124 grad: 0.0892 (0.0990) loss: 0.8495 (0.8469) time: 0.1401 data: 0.0651 max mem: 9377 +Train: [8] [6000/6250] eta: 0:00:38 lr: 0.000124 grad: 0.0916 (0.0989) loss: 0.8470 (0.8469) time: 0.1329 data: 0.0464 max mem: 9377 +Train: [8] [6100/6250] eta: 0:00:23 lr: 0.000124 grad: 0.0911 (0.0989) loss: 0.8465 (0.8469) time: 0.1390 data: 0.0590 max mem: 9377 +Train: [8] [6200/6250] eta: 0:00:07 lr: 0.000124 grad: 0.0918 (0.0989) loss: 0.8497 (0.8468) time: 0.0949 data: 0.0056 max mem: 9377 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0907 (0.0988) loss: 0.8434 (0.8468) time: 0.1595 data: 0.0810 max mem: 9377 +Train: [8] Total time: 0:16:10 (0.1552 s / it) +Averaged stats: lr: 0.000124 grad: 0.0907 (0.0988) loss: 0.8434 (0.8468) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:05:08 loss: 0.8576 (0.8576) time: 4.9789 data: 4.9485 max mem: 9377 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.8628 (0.8602) time: 0.1408 data: 0.1148 max mem: 9377 +Eval (hcp-train-subset): [8] Total time: 0:00:14 (0.2267 s / it) +Averaged stats (hcp-train-subset): loss: 0.8628 (0.8602) +Eval (hcp-val): [8] [ 0/62] eta: 0:04:53 loss: 0.8531 (0.8531) time: 4.7359 data: 4.7047 max mem: 9377 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8578 (0.8584) time: 0.1684 data: 0.1419 max mem: 9377 +Eval (hcp-val): [8] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (hcp-val): loss: 0.8578 (0.8584) +Eval (nsd-val): [8] [ 0/62] eta: 0:06:16 loss: 0.8132 (0.8132) time: 6.0773 data: 6.0445 max mem: 9377 +Eval (nsd-val): [8] [61/62] eta: 0:00:00 loss: 0.8251 (0.8249) time: 0.1459 data: 0.1184 max mem: 9377 +Eval (nsd-val): [8] Total time: 0:00:14 (0.2342 s / it) +Averaged stats (nsd-val): loss: 0.8251 (0.8249) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [9] [ 0/6250] eta: 10:30:23 lr: 0.000124 grad: 0.0934 (0.0934) loss: 0.8872 (0.8872) time: 6.0517 data: 5.9589 max mem: 9377 +Train: [9] [ 100/6250] eta: 0:21:16 lr: 0.000124 grad: 0.0870 (0.0983) loss: 0.8603 (0.8553) time: 0.1734 data: 0.0874 max mem: 9377 +Train: [9] [ 200/6250] eta: 0:18:22 lr: 0.000124 grad: 0.0930 (0.0997) loss: 0.8533 (0.8525) time: 0.1490 data: 0.0546 max mem: 9377 +Train: [9] [ 300/6250] eta: 0:17:45 lr: 0.000124 grad: 0.1030 (0.1019) loss: 0.8478 (0.8495) time: 0.1673 data: 0.0831 max mem: 9377 +Train: [9] [ 400/6250] eta: 0:16:57 lr: 0.000124 grad: 0.0924 (0.1044) loss: 0.8452 (0.8472) time: 0.1621 data: 0.0655 max mem: 9377 +Train: [9] [ 500/6250] eta: 0:16:38 lr: 0.000124 grad: 0.0987 (0.1036) loss: 0.8378 (0.8456) time: 0.1627 data: 0.0788 max mem: 9377 +Train: [9] [ 600/6250] eta: 0:16:07 lr: 0.000124 grad: 0.0961 (0.1040) loss: 0.8365 (0.8442) time: 0.1737 data: 0.0897 max mem: 9377 +Train: [9] [ 700/6250] eta: 0:15:52 lr: 0.000124 grad: 0.1043 (0.1043) loss: 0.8352 (0.8430) time: 0.2034 data: 0.1068 max mem: 9377 +Train: [9] [ 800/6250] eta: 0:15:33 lr: 0.000124 grad: 0.1039 (0.1044) loss: 0.8347 (0.8422) time: 0.1547 data: 0.0685 max mem: 9377 +Train: [9] [ 900/6250] eta: 0:15:13 lr: 0.000124 grad: 0.1012 (0.1042) loss: 0.8399 (0.8416) time: 0.1587 data: 0.0672 max mem: 9377 +Train: [9] [1000/6250] eta: 0:14:47 lr: 0.000124 grad: 0.0944 (0.1038) loss: 0.8465 (0.8413) time: 0.1409 data: 0.0556 max mem: 9377 +Train: [9] [1100/6250] eta: 0:14:21 lr: 0.000124 grad: 0.0944 (0.1035) loss: 0.8363 (0.8412) time: 0.1556 data: 0.0678 max mem: 9377 +Train: [9] [1200/6250] eta: 0:13:58 lr: 0.000124 grad: 0.1049 (0.1032) loss: 0.8392 (0.8411) time: 0.1545 data: 0.0684 max mem: 9377 +Train: [9] [1300/6250] eta: 0:13:31 lr: 0.000124 grad: 0.0908 (0.1029) loss: 0.8417 (0.8410) time: 0.1408 data: 0.0542 max mem: 9377 +Train: [9] [1400/6250] eta: 0:13:07 lr: 0.000124 grad: 0.0922 (0.1028) loss: 0.8407 (0.8409) time: 0.1415 data: 0.0610 max mem: 9377 +Train: [9] [1500/6250] eta: 0:12:45 lr: 0.000124 grad: 0.0906 (0.1026) loss: 0.8478 (0.8410) time: 0.1516 data: 0.0602 max mem: 9377 +Train: [9] [1600/6250] eta: 0:12:34 lr: 0.000124 grad: 0.0881 (0.1020) loss: 0.8382 (0.8411) time: 0.1670 data: 0.0797 max mem: 9377 +Train: [9] [1700/6250] eta: 0:12:20 lr: 0.000124 grad: 0.0901 (0.1016) loss: 0.8417 (0.8411) time: 0.1847 data: 0.1062 max mem: 9377 +Train: [9] [1800/6250] eta: 0:12:02 lr: 0.000124 grad: 0.0924 (0.1013) loss: 0.8420 (0.8411) time: 0.1506 data: 0.0708 max mem: 9377 +Train: [9] [1900/6250] eta: 0:11:44 lr: 0.000124 grad: 0.0911 (0.1010) loss: 0.8478 (0.8412) time: 0.1313 data: 0.0382 max mem: 9377 +Train: [9] [2000/6250] eta: 0:11:27 lr: 0.000124 grad: 0.0967 (0.1009) loss: 0.8402 (0.8412) time: 0.1664 data: 0.0801 max mem: 9377 +Train: [9] [2100/6250] eta: 0:11:11 lr: 0.000124 grad: 0.0920 (0.1007) loss: 0.8319 (0.8410) time: 0.1603 data: 0.0785 max mem: 9377 +Train: [9] [2200/6250] eta: 0:10:52 lr: 0.000124 grad: 0.0987 (0.1006) loss: 0.8364 (0.8409) time: 0.1460 data: 0.0611 max mem: 9377 +Train: [9] [2300/6250] eta: 0:10:35 lr: 0.000124 grad: 0.0915 (0.1005) loss: 0.8370 (0.8408) time: 0.1814 data: 0.0895 max mem: 9377 +Train: [9] [2400/6250] eta: 0:10:20 lr: 0.000124 grad: 0.0965 (0.1003) loss: 0.8385 (0.8407) time: 0.1653 data: 0.0699 max mem: 9377 +Train: [9] [2500/6250] eta: 0:10:03 lr: 0.000124 grad: 0.0953 (0.1003) loss: 0.8401 (0.8407) time: 0.1575 data: 0.0696 max mem: 9377 +Train: [9] [2600/6250] eta: 0:09:45 lr: 0.000124 grad: 0.0931 (0.1003) loss: 0.8394 (0.8406) time: 0.1708 data: 0.0832 max mem: 9377 +Train: [9] [2700/6250] eta: 0:09:28 lr: 0.000124 grad: 0.1018 (0.1004) loss: 0.8420 (0.8405) time: 0.1199 data: 0.0319 max mem: 9377 +Train: [9] [2800/6250] eta: 0:09:12 lr: 0.000124 grad: 0.0977 (0.1003) loss: 0.8414 (0.8405) time: 0.1552 data: 0.0661 max mem: 9377 +Train: [9] [2900/6250] eta: 0:08:54 lr: 0.000124 grad: 0.0980 (0.1002) loss: 0.8312 (0.8404) time: 0.1486 data: 0.0669 max mem: 9377 +Train: [9] [3000/6250] eta: 0:08:37 lr: 0.000124 grad: 0.0994 (0.1003) loss: 0.8291 (0.8403) time: 0.1508 data: 0.0658 max mem: 9377 +Train: [9] [3100/6250] eta: 0:08:20 lr: 0.000124 grad: 0.1020 (0.1006) loss: 0.8375 (0.8402) time: 0.1407 data: 0.0499 max mem: 9377 +Train: [9] [3200/6250] eta: 0:08:03 lr: 0.000124 grad: 0.1065 (0.1008) loss: 0.8391 (0.8400) time: 0.1501 data: 0.0630 max mem: 9377 +Train: [9] [3300/6250] eta: 0:07:46 lr: 0.000124 grad: 0.1007 (0.1008) loss: 0.8345 (0.8398) time: 0.1385 data: 0.0523 max mem: 9377 +Train: [9] [3400/6250] eta: 0:07:29 lr: 0.000124 grad: 0.0959 (0.1009) loss: 0.8346 (0.8397) time: 0.1300 data: 0.0426 max mem: 9377 +Train: [9] [3500/6250] eta: 0:07:12 lr: 0.000124 grad: 0.0975 (0.1010) loss: 0.8382 (0.8396) time: 0.1363 data: 0.0535 max mem: 9377 +Train: [9] [3600/6250] eta: 0:06:56 lr: 0.000124 grad: 0.0996 (0.1010) loss: 0.8388 (0.8395) time: 0.1211 data: 0.0383 max mem: 9377 +Train: [9] [3700/6250] eta: 0:06:39 lr: 0.000124 grad: 0.0978 (0.1011) loss: 0.8402 (0.8394) time: 0.1625 data: 0.0808 max mem: 9377 +Train: [9] [3800/6250] eta: 0:06:24 lr: 0.000124 grad: 0.0931 (0.1011) loss: 0.8397 (0.8393) time: 0.1440 data: 0.0582 max mem: 9377 +Train: [9] [3900/6250] eta: 0:06:08 lr: 0.000124 grad: 0.0959 (0.1012) loss: 0.8391 (0.8393) time: 0.1764 data: 0.0897 max mem: 9377 +Train: [9] [4000/6250] eta: 0:05:52 lr: 0.000124 grad: 0.0915 (0.1012) loss: 0.8430 (0.8393) time: 0.1574 data: 0.0729 max mem: 9377 +Train: [9] [4100/6250] eta: 0:05:36 lr: 0.000124 grad: 0.0960 (0.1014) loss: 0.8343 (0.8392) time: 0.1343 data: 0.0448 max mem: 9377 +Train: [9] [4200/6250] eta: 0:05:19 lr: 0.000124 grad: 0.1000 (0.1013) loss: 0.8363 (0.8391) time: 0.1171 data: 0.0367 max mem: 9377 +Train: [9] [4300/6250] eta: 0:05:03 lr: 0.000124 grad: 0.0925 (0.1014) loss: 0.8379 (0.8391) time: 0.1540 data: 0.0674 max mem: 9377 +Train: [9] [4400/6250] eta: 0:04:47 lr: 0.000124 grad: 0.0942 (0.1013) loss: 0.8301 (0.8390) time: 0.1445 data: 0.0631 max mem: 9377 +Train: [9] [4500/6250] eta: 0:04:32 lr: 0.000124 grad: 0.1012 (0.1013) loss: 0.8328 (0.8389) time: 0.1605 data: 0.0763 max mem: 9377 +Train: [9] [4600/6250] eta: 0:04:16 lr: 0.000124 grad: 0.0996 (0.1014) loss: 0.8334 (0.8389) time: 0.1770 data: 0.0963 max mem: 9377 +Train: [9] [4700/6250] eta: 0:04:01 lr: 0.000124 grad: 0.1021 (0.1014) loss: 0.8413 (0.8389) time: 0.1272 data: 0.0406 max mem: 9377 +Train: [9] [4800/6250] eta: 0:03:45 lr: 0.000124 grad: 0.0960 (0.1013) loss: 0.8372 (0.8388) time: 0.1482 data: 0.0603 max mem: 9377 +Train: [9] [4900/6250] eta: 0:03:29 lr: 0.000124 grad: 0.0912 (0.1013) loss: 0.8383 (0.8388) time: 0.1343 data: 0.0581 max mem: 9377 +Train: [9] [5000/6250] eta: 0:03:14 lr: 0.000124 grad: 0.0885 (0.1013) loss: 0.8440 (0.8387) time: 0.1605 data: 0.0743 max mem: 9377 +Train: [9] [5100/6250] eta: 0:02:58 lr: 0.000124 grad: 0.0976 (0.1013) loss: 0.8390 (0.8387) time: 0.1369 data: 0.0556 max mem: 9377 +Train: [9] [5200/6250] eta: 0:02:43 lr: 0.000124 grad: 0.0953 (0.1014) loss: 0.8357 (0.8387) time: 0.1417 data: 0.0576 max mem: 9377 +Train: [9] [5300/6250] eta: 0:02:27 lr: 0.000124 grad: 0.0965 (0.1014) loss: 0.8404 (0.8387) time: 0.1591 data: 0.0734 max mem: 9377 +Train: [9] [5400/6250] eta: 0:02:11 lr: 0.000124 grad: 0.0939 (0.1014) loss: 0.8421 (0.8386) time: 0.1494 data: 0.0531 max mem: 9377 +Train: [9] [5500/6250] eta: 0:01:56 lr: 0.000124 grad: 0.0911 (0.1014) loss: 0.8432 (0.8386) time: 0.1501 data: 0.0658 max mem: 9377 +Train: [9] [5600/6250] eta: 0:01:40 lr: 0.000124 grad: 0.0919 (0.1014) loss: 0.8409 (0.8386) time: 0.1429 data: 0.0502 max mem: 9377 +Train: [9] [5700/6250] eta: 0:01:24 lr: 0.000124 grad: 0.0932 (0.1014) loss: 0.8394 (0.8385) time: 0.1562 data: 0.0744 max mem: 9377 +Train: [9] [5800/6250] eta: 0:01:09 lr: 0.000124 grad: 0.0927 (0.1014) loss: 0.8362 (0.8385) time: 0.1423 data: 0.0544 max mem: 9377 +Train: [9] [5900/6250] eta: 0:00:54 lr: 0.000124 grad: 0.0915 (0.1013) loss: 0.8408 (0.8385) time: 0.1430 data: 0.0565 max mem: 9377 +Train: [9] [6000/6250] eta: 0:00:38 lr: 0.000124 grad: 0.1046 (0.1013) loss: 0.8340 (0.8384) time: 0.1736 data: 0.0876 max mem: 9377 +Train: [9] [6100/6250] eta: 0:00:23 lr: 0.000124 grad: 0.1047 (0.1014) loss: 0.8380 (0.8384) time: 0.1559 data: 0.0741 max mem: 9377 +Train: [9] [6200/6250] eta: 0:00:07 lr: 0.000124 grad: 0.0886 (0.1014) loss: 0.8402 (0.8384) time: 0.1568 data: 0.0697 max mem: 9377 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0918 (0.1014) loss: 0.8428 (0.8384) time: 0.1573 data: 0.0635 max mem: 9377 +Train: [9] Total time: 0:16:17 (0.1564 s / it) +Averaged stats: lr: 0.000124 grad: 0.0918 (0.1014) loss: 0.8428 (0.8384) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:05:55 loss: 0.8547 (0.8547) time: 5.7285 data: 5.6952 max mem: 9377 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.8562 (0.8583) time: 0.1586 data: 0.1317 max mem: 9377 +Eval (hcp-train-subset): [9] Total time: 0:00:15 (0.2535 s / it) +Averaged stats (hcp-train-subset): loss: 0.8562 (0.8583) +Making plots (hcp-train-subset): example=43 +Eval (hcp-val): [9] [ 0/62] eta: 0:08:42 loss: 0.8516 (0.8516) time: 8.4308 data: 8.3990 max mem: 9377 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8563 (0.8569) time: 0.1853 data: 0.1606 max mem: 9377 +Eval (hcp-val): [9] Total time: 0:00:17 (0.2865 s / it) +Averaged stats (hcp-val): loss: 0.8563 (0.8569) +Making plots (hcp-val): example=26 +Eval (nsd-val): [9] [ 0/62] eta: 0:06:47 loss: 0.8128 (0.8128) time: 6.5661 data: 6.5337 max mem: 9377 +Eval (nsd-val): [9] [61/62] eta: 0:00:00 loss: 0.8223 (0.8244) time: 0.1099 data: 0.0849 max mem: 9377 +Eval (nsd-val): [9] Total time: 0:00:15 (0.2565 s / it) +Averaged stats (nsd-val): loss: 0.8223 (0.8244) +Making plots (nsd-val): example=36 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00009.pth +Train: [10] [ 0/6250] eta: 11:56:47 lr: 0.000124 grad: 0.2833 (0.2833) loss: 0.8046 (0.8046) time: 6.8812 data: 6.7839 max mem: 9377 +Train: [10] [ 100/6250] eta: 0:22:09 lr: 0.000124 grad: 0.0967 (0.1339) loss: 0.8419 (0.8455) time: 0.1622 data: 0.0721 max mem: 9377 +Train: [10] [ 200/6250] eta: 0:19:51 lr: 0.000124 grad: 0.1073 (0.1199) loss: 0.8426 (0.8421) time: 0.1800 data: 0.0892 max mem: 9377 +Train: [10] [ 300/6250] eta: 0:19:05 lr: 0.000124 grad: 0.0946 (0.1169) loss: 0.8454 (0.8393) time: 0.2012 data: 0.1157 max mem: 9377 +Train: [10] [ 400/6250] eta: 0:18:47 lr: 0.000124 grad: 0.0954 (0.1122) loss: 0.8351 (0.8382) time: 0.1985 data: 0.1058 max mem: 9377 +Train: [10] [ 500/6250] eta: 0:18:04 lr: 0.000124 grad: 0.0969 (0.1097) loss: 0.8375 (0.8379) time: 0.1738 data: 0.0721 max mem: 9377 +Train: [10] [ 600/6250] eta: 0:17:40 lr: 0.000124 grad: 0.0918 (0.1069) loss: 0.8396 (0.8375) time: 0.1706 data: 0.0748 max mem: 9377 +Train: [10] [ 700/6250] eta: 0:17:15 lr: 0.000124 grad: 0.0910 (0.1060) loss: 0.8384 (0.8373) time: 0.1708 data: 0.0828 max mem: 9377 +Train: [10] [ 800/6250] eta: 0:16:31 lr: 0.000124 grad: 0.1002 (0.1051) loss: 0.8366 (0.8369) time: 0.1500 data: 0.0603 max mem: 9377 +Train: [10] [ 900/6250] eta: 0:15:52 lr: 0.000124 grad: 0.0944 (0.1043) loss: 0.8379 (0.8366) time: 0.1390 data: 0.0528 max mem: 9377 +Train: [10] [1000/6250] eta: 0:15:28 lr: 0.000124 grad: 0.0881 (0.1036) loss: 0.8363 (0.8366) time: 0.1817 data: 0.0966 max mem: 9377 +Train: [10] [1100/6250] eta: 0:14:59 lr: 0.000124 grad: 0.0938 (0.1035) loss: 0.8460 (0.8365) time: 0.1279 data: 0.0422 max mem: 9377 +Train: [10] [1200/6250] eta: 0:14:28 lr: 0.000124 grad: 0.0936 (0.1029) loss: 0.8348 (0.8364) time: 0.1376 data: 0.0521 max mem: 9377 +Train: [10] [1300/6250] eta: 0:14:06 lr: 0.000124 grad: 0.0942 (0.1027) loss: 0.8284 (0.8362) time: 0.1613 data: 0.0797 max mem: 9377 +Train: [10] [1400/6250] eta: 0:13:42 lr: 0.000124 grad: 0.0988 (0.1024) loss: 0.8398 (0.8361) time: 0.1499 data: 0.0755 max mem: 9377 +Train: [10] [1500/6250] eta: 0:13:22 lr: 0.000124 grad: 0.1001 (0.1023) loss: 0.8350 (0.8360) time: 0.2140 data: 0.1421 max mem: 9377 +Train: [10] [1600/6250] eta: 0:13:06 lr: 0.000124 grad: 0.1046 (0.1020) loss: 0.8285 (0.8359) time: 0.1532 data: 0.0674 max mem: 9377 +Train: [10] [1700/6250] eta: 0:12:46 lr: 0.000124 grad: 0.0994 (0.1019) loss: 0.8316 (0.8357) time: 0.1498 data: 0.0650 max mem: 9377 +Train: [10] [1800/6250] eta: 0:12:25 lr: 0.000124 grad: 0.0986 (0.1018) loss: 0.8349 (0.8357) time: 0.1386 data: 0.0611 max mem: 9377 +Train: [10] [1900/6250] eta: 0:12:04 lr: 0.000124 grad: 0.1030 (0.1018) loss: 0.8302 (0.8355) time: 0.1506 data: 0.0688 max mem: 9377 +Train: [10] [2000/6250] eta: 0:11:44 lr: 0.000124 grad: 0.0961 (0.1016) loss: 0.8334 (0.8354) time: 0.1529 data: 0.0756 max mem: 9377 +Train: [10] [2100/6250] eta: 0:11:23 lr: 0.000124 grad: 0.0948 (0.1014) loss: 0.8325 (0.8352) time: 0.1299 data: 0.0475 max mem: 9377 +Train: [10] [2200/6250] eta: 0:11:03 lr: 0.000124 grad: 0.0834 (0.1014) loss: 0.8393 (0.8351) time: 0.1570 data: 0.0778 max mem: 9377 +Train: [10] [2300/6250] eta: 0:10:44 lr: 0.000124 grad: 0.0997 (0.1013) loss: 0.8360 (0.8350) time: 0.1430 data: 0.0582 max mem: 9377 +Train: [10] [2400/6250] eta: 0:10:26 lr: 0.000124 grad: 0.0937 (0.1012) loss: 0.8358 (0.8349) time: 0.1612 data: 0.0728 max mem: 9377 +Train: [10] [2500/6250] eta: 0:10:07 lr: 0.000124 grad: 0.0898 (0.1011) loss: 0.8392 (0.8349) time: 0.1472 data: 0.0637 max mem: 9377 +Train: [10] [2600/6250] eta: 0:09:48 lr: 0.000124 grad: 0.0949 (0.1012) loss: 0.8338 (0.8347) time: 0.1290 data: 0.0387 max mem: 9377 +Train: [10] [2700/6250] eta: 0:09:29 lr: 0.000124 grad: 0.0984 (0.1012) loss: 0.8280 (0.8345) time: 0.1469 data: 0.0657 max mem: 9377 +Train: [10] [2800/6250] eta: 0:09:10 lr: 0.000124 grad: 0.0986 (0.1013) loss: 0.8286 (0.8344) time: 0.1440 data: 0.0660 max mem: 9377 +Train: [10] [2900/6250] eta: 0:08:52 lr: 0.000124 grad: 0.0897 (0.1012) loss: 0.8341 (0.8343) time: 0.1463 data: 0.0632 max mem: 9377 +Train: [10] [3000/6250] eta: 0:08:33 lr: 0.000124 grad: 0.0983 (0.1013) loss: 0.8297 (0.8342) time: 0.1476 data: 0.0675 max mem: 9377 +Train: [10] [3100/6250] eta: 0:08:15 lr: 0.000124 grad: 0.0958 (0.1013) loss: 0.8265 (0.8341) time: 0.1493 data: 0.0700 max mem: 9377 +Train: [10] [3200/6250] eta: 0:07:57 lr: 0.000124 grad: 0.0960 (0.1012) loss: 0.8316 (0.8340) time: 0.1342 data: 0.0541 max mem: 9377 +Train: [10] [3300/6250] eta: 0:07:40 lr: 0.000124 grad: 0.0966 (0.1012) loss: 0.8337 (0.8340) time: 0.1351 data: 0.0549 max mem: 9377 +Train: [10] [3400/6250] eta: 0:07:22 lr: 0.000124 grad: 0.0880 (0.1012) loss: 0.8352 (0.8340) time: 0.1385 data: 0.0568 max mem: 9377 +Train: [10] [3500/6250] eta: 0:07:05 lr: 0.000124 grad: 0.0967 (0.1011) loss: 0.8291 (0.8339) time: 0.1438 data: 0.0626 max mem: 9377 +Train: [10] [3600/6250] eta: 0:06:48 lr: 0.000124 grad: 0.0936 (0.1011) loss: 0.8346 (0.8339) time: 0.1459 data: 0.0698 max mem: 9377 +Train: [10] [3700/6250] eta: 0:06:32 lr: 0.000124 grad: 0.0962 (0.1012) loss: 0.8349 (0.8338) time: 0.1261 data: 0.0446 max mem: 9377 +Train: [10] [3800/6250] eta: 0:06:15 lr: 0.000124 grad: 0.0945 (0.1011) loss: 0.8330 (0.8338) time: 0.1394 data: 0.0623 max mem: 9377 +Train: [10] [3900/6250] eta: 0:05:59 lr: 0.000124 grad: 0.0980 (0.1010) loss: 0.8339 (0.8338) time: 0.1348 data: 0.0536 max mem: 9377 +Train: [10] [4000/6250] eta: 0:05:43 lr: 0.000124 grad: 0.0894 (0.1010) loss: 0.8381 (0.8337) time: 0.1491 data: 0.0701 max mem: 9377 +Train: [10] [4100/6250] eta: 0:05:27 lr: 0.000124 grad: 0.1033 (0.1010) loss: 0.8283 (0.8336) time: 0.1428 data: 0.0633 max mem: 9377 +Train: [10] [4200/6250] eta: 0:05:11 lr: 0.000124 grad: 0.1020 (0.1011) loss: 0.8340 (0.8335) time: 0.1376 data: 0.0602 max mem: 9377 +Train: [10] [4300/6250] eta: 0:04:55 lr: 0.000124 grad: 0.0974 (0.1011) loss: 0.8313 (0.8335) time: 0.1416 data: 0.0671 max mem: 9377 +Train: [10] [4400/6250] eta: 0:04:41 lr: 0.000124 grad: 0.1045 (0.1011) loss: 0.8265 (0.8334) time: 0.1856 data: 0.1094 max mem: 9377 +Train: [10] [4500/6250] eta: 0:04:25 lr: 0.000124 grad: 0.0885 (0.1011) loss: 0.8387 (0.8334) time: 0.1466 data: 0.0671 max mem: 9377 +Train: [10] [4600/6250] eta: 0:04:10 lr: 0.000124 grad: 0.0973 (0.1011) loss: 0.8354 (0.8334) time: 0.1605 data: 0.0843 max mem: 9377 +Train: [10] [4700/6250] eta: 0:03:55 lr: 0.000124 grad: 0.0990 (0.1010) loss: 0.8294 (0.8333) time: 0.1238 data: 0.0381 max mem: 9377 +Train: [10] [4800/6250] eta: 0:03:39 lr: 0.000124 grad: 0.0924 (0.1011) loss: 0.8338 (0.8332) time: 0.1466 data: 0.0732 max mem: 9377 +Train: [10] [4900/6250] eta: 0:03:24 lr: 0.000124 grad: 0.0957 (0.1011) loss: 0.8370 (0.8332) time: 0.1368 data: 0.0549 max mem: 9377 +Train: [10] [5000/6250] eta: 0:03:09 lr: 0.000124 grad: 0.0960 (0.1010) loss: 0.8334 (0.8332) time: 0.1519 data: 0.0770 max mem: 9377 +Train: [10] [5100/6250] eta: 0:02:54 lr: 0.000124 grad: 0.0991 (0.1010) loss: 0.8326 (0.8332) time: 0.1576 data: 0.0808 max mem: 9377 +Train: [10] [5200/6250] eta: 0:02:39 lr: 0.000124 grad: 0.0998 (0.1009) loss: 0.8361 (0.8332) time: 0.1728 data: 0.0928 max mem: 9377 +Train: [10] [5300/6250] eta: 0:02:24 lr: 0.000124 grad: 0.0998 (0.1009) loss: 0.8326 (0.8332) time: 0.1586 data: 0.0843 max mem: 9377 +Train: [10] [5400/6250] eta: 0:02:08 lr: 0.000124 grad: 0.1032 (0.1009) loss: 0.8197 (0.8332) time: 0.1565 data: 0.0773 max mem: 9377 +Train: [10] [5500/6250] eta: 0:01:53 lr: 0.000124 grad: 0.0972 (0.1010) loss: 0.8363 (0.8331) time: 0.1300 data: 0.0459 max mem: 9377 +Train: [10] [5600/6250] eta: 0:01:38 lr: 0.000124 grad: 0.0876 (0.1010) loss: 0.8345 (0.8331) time: 0.1469 data: 0.0699 max mem: 9377 +Train: [10] [5700/6250] eta: 0:01:23 lr: 0.000124 grad: 0.0964 (0.1010) loss: 0.8306 (0.8330) time: 0.1455 data: 0.0707 max mem: 9377 +Train: [10] [5800/6250] eta: 0:01:08 lr: 0.000124 grad: 0.0876 (0.1010) loss: 0.8353 (0.8330) time: 0.1291 data: 0.0474 max mem: 9377 +Train: [10] [5900/6250] eta: 0:00:52 lr: 0.000124 grad: 0.1045 (0.1011) loss: 0.8332 (0.8330) time: 0.1257 data: 0.0421 max mem: 9377 +Train: [10] [6000/6250] eta: 0:00:37 lr: 0.000124 grad: 0.0905 (0.1011) loss: 0.8372 (0.8330) time: 0.1285 data: 0.0469 max mem: 9377 +Train: [10] [6100/6250] eta: 0:00:22 lr: 0.000124 grad: 0.0905 (0.1012) loss: 0.8268 (0.8329) time: 0.1430 data: 0.0605 max mem: 9377 +Train: [10] [6200/6250] eta: 0:00:07 lr: 0.000124 grad: 0.1070 (0.1013) loss: 0.8275 (0.8329) time: 0.1355 data: 0.0567 max mem: 9377 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0952 (0.1013) loss: 0.8340 (0.8329) time: 0.1397 data: 0.0577 max mem: 9377 +Train: [10] Total time: 0:15:45 (0.1513 s / it) +Averaged stats: lr: 0.000124 grad: 0.0952 (0.1013) loss: 0.8340 (0.8329) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:05:59 loss: 0.8558 (0.8558) time: 5.7977 data: 5.7651 max mem: 9377 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.8581 (0.8574) time: 0.1533 data: 0.1280 max mem: 9377 +Eval (hcp-train-subset): [10] Total time: 0:00:14 (0.2342 s / it) +Averaged stats (hcp-train-subset): loss: 0.8581 (0.8574) +Eval (hcp-val): [10] [ 0/62] eta: 0:06:32 loss: 0.8492 (0.8492) time: 6.3327 data: 6.3009 max mem: 9377 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8527 (0.8551) time: 0.1512 data: 0.1259 max mem: 9377 +Eval (hcp-val): [10] Total time: 0:00:15 (0.2491 s / it) +Averaged stats (hcp-val): loss: 0.8527 (0.8551) +Eval (nsd-val): [10] [ 0/62] eta: 0:06:07 loss: 0.8155 (0.8155) time: 5.9230 data: 5.8876 max mem: 9377 +Eval (nsd-val): [10] [61/62] eta: 0:00:00 loss: 0.8235 (0.8253) time: 0.2406 data: 0.2154 max mem: 9377 +Eval (nsd-val): [10] Total time: 0:00:17 (0.2867 s / it) +Averaged stats (nsd-val): loss: 0.8235 (0.8253) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [11] [ 0/6250] eta: 8:02:52 lr: 0.000124 grad: 0.0834 (0.0834) loss: 0.8977 (0.8977) time: 4.6356 data: 4.4415 max mem: 9377 +Train: [11] [ 100/6250] eta: 0:23:11 lr: 0.000124 grad: 0.0909 (0.1105) loss: 0.8449 (0.8534) time: 0.1782 data: 0.0887 max mem: 9377 +Train: [11] [ 200/6250] eta: 0:19:49 lr: 0.000124 grad: 0.0978 (0.1087) loss: 0.8285 (0.8418) time: 0.1462 data: 0.0662 max mem: 9377 +Train: [11] [ 300/6250] eta: 0:18:48 lr: 0.000124 grad: 0.0864 (0.1042) loss: 0.8373 (0.8384) time: 0.1884 data: 0.1028 max mem: 9377 +Train: [11] [ 400/6250] eta: 0:18:03 lr: 0.000124 grad: 0.0900 (0.1024) loss: 0.8298 (0.8365) time: 0.1682 data: 0.0784 max mem: 9377 +Train: [11] [ 500/6250] eta: 0:17:31 lr: 0.000124 grad: 0.0970 (0.1025) loss: 0.8289 (0.8351) time: 0.1769 data: 0.0842 max mem: 9377 +Train: [11] [ 600/6250] eta: 0:16:54 lr: 0.000124 grad: 0.1016 (0.1021) loss: 0.8260 (0.8341) time: 0.1598 data: 0.0702 max mem: 9377 +Train: [11] [ 700/6250] eta: 0:16:15 lr: 0.000124 grad: 0.0955 (0.1014) loss: 0.8285 (0.8334) time: 0.1422 data: 0.0475 max mem: 9377 +Train: [11] [ 800/6250] eta: 0:15:35 lr: 0.000124 grad: 0.0896 (0.1006) loss: 0.8261 (0.8328) time: 0.1282 data: 0.0340 max mem: 9377 +Train: [11] [ 900/6250] eta: 0:15:03 lr: 0.000124 grad: 0.0853 (0.0999) loss: 0.8302 (0.8324) time: 0.1465 data: 0.0661 max mem: 9377 +Train: [11] [1000/6250] eta: 0:14:36 lr: 0.000124 grad: 0.0938 (0.0996) loss: 0.8329 (0.8324) time: 0.1314 data: 0.0398 max mem: 9377 +Train: [11] [1100/6250] eta: 0:14:07 lr: 0.000124 grad: 0.0920 (0.0991) loss: 0.8290 (0.8324) time: 0.1479 data: 0.0680 max mem: 9377 +Train: [11] [1200/6250] eta: 0:13:41 lr: 0.000124 grad: 0.0966 (0.0985) loss: 0.8321 (0.8325) time: 0.1599 data: 0.0803 max mem: 9377 +Train: [11] [1300/6250] eta: 0:13:16 lr: 0.000124 grad: 0.0903 (0.0984) loss: 0.8329 (0.8326) time: 0.1321 data: 0.0482 max mem: 9377 +Train: [11] [1400/6250] eta: 0:12:54 lr: 0.000124 grad: 0.0942 (0.0981) loss: 0.8317 (0.8326) time: 0.1290 data: 0.0504 max mem: 9377 +Train: [11] [1500/6250] eta: 0:12:32 lr: 0.000124 grad: 0.0966 (0.0978) loss: 0.8280 (0.8326) time: 0.1335 data: 0.0542 max mem: 9377 +Train: [11] [1600/6250] eta: 0:12:11 lr: 0.000124 grad: 0.0923 (0.0975) loss: 0.8304 (0.8326) time: 0.1301 data: 0.0416 max mem: 9377 +Train: [11] [1700/6250] eta: 0:11:50 lr: 0.000124 grad: 0.0926 (0.0974) loss: 0.8381 (0.8326) time: 0.1340 data: 0.0573 max mem: 9377 +Train: [11] [1800/6250] eta: 0:11:37 lr: 0.000124 grad: 0.0933 (0.0972) loss: 0.8357 (0.8327) time: 0.1316 data: 0.0533 max mem: 9377 +Train: [11] [1900/6250] eta: 0:11:20 lr: 0.000124 grad: 0.0829 (0.0971) loss: 0.8363 (0.8326) time: 0.1522 data: 0.0661 max mem: 9377 +Train: [11] [2000/6250] eta: 0:11:02 lr: 0.000124 grad: 0.0915 (0.0970) loss: 0.8361 (0.8327) time: 0.1409 data: 0.0617 max mem: 9377 +Train: [11] [2100/6250] eta: 0:10:45 lr: 0.000124 grad: 0.0938 (0.0967) loss: 0.8319 (0.8327) time: 0.1500 data: 0.0668 max mem: 9377 +Train: [11] [2200/6250] eta: 0:10:27 lr: 0.000124 grad: 0.0864 (0.0966) loss: 0.8342 (0.8327) time: 0.1345 data: 0.0514 max mem: 9377 +Train: [11] [2300/6250] eta: 0:10:10 lr: 0.000124 grad: 0.0902 (0.0966) loss: 0.8374 (0.8327) time: 0.1332 data: 0.0462 max mem: 9377 +Train: [11] [2400/6250] eta: 0:09:53 lr: 0.000124 grad: 0.0936 (0.0966) loss: 0.8360 (0.8327) time: 0.1411 data: 0.0579 max mem: 9377 +Train: [11] [2500/6250] eta: 0:09:37 lr: 0.000124 grad: 0.0925 (0.0966) loss: 0.8348 (0.8327) time: 0.1400 data: 0.0585 max mem: 9377 +Train: [11] [2600/6250] eta: 0:09:20 lr: 0.000124 grad: 0.0914 (0.0964) loss: 0.8410 (0.8328) time: 0.1527 data: 0.0628 max mem: 9377 +Train: [11] [2700/6250] eta: 0:09:02 lr: 0.000124 grad: 0.0964 (0.0966) loss: 0.8257 (0.8328) time: 0.1479 data: 0.0580 max mem: 9377 +Train: [11] [2800/6250] eta: 0:08:45 lr: 0.000124 grad: 0.1040 (0.0966) loss: 0.8265 (0.8327) time: 0.1403 data: 0.0573 max mem: 9377 +Train: [11] [2900/6250] eta: 0:08:28 lr: 0.000124 grad: 0.0989 (0.0967) loss: 0.8308 (0.8327) time: 0.1180 data: 0.0246 max mem: 9377 +Train: [11] [3000/6250] eta: 0:08:12 lr: 0.000124 grad: 0.0934 (0.0967) loss: 0.8292 (0.8326) time: 0.1485 data: 0.0707 max mem: 9377 +Train: [11] [3100/6250] eta: 0:07:56 lr: 0.000124 grad: 0.0881 (0.0968) loss: 0.8303 (0.8325) time: 0.1389 data: 0.0561 max mem: 9377 +Train: [11] [3200/6250] eta: 0:07:39 lr: 0.000124 grad: 0.1036 (0.0969) loss: 0.8279 (0.8323) time: 0.1425 data: 0.0619 max mem: 9377 +Train: [11] [3300/6250] eta: 0:07:23 lr: 0.000124 grad: 0.1018 (0.0971) loss: 0.8259 (0.8322) time: 0.1519 data: 0.0740 max mem: 9377 +Train: [11] [3400/6250] eta: 0:07:07 lr: 0.000124 grad: 0.0967 (0.0972) loss: 0.8295 (0.8321) time: 0.1449 data: 0.0685 max mem: 9377 +Train: [11] [3500/6250] eta: 0:06:51 lr: 0.000124 grad: 0.0912 (0.0973) loss: 0.8346 (0.8320) time: 0.1274 data: 0.0431 max mem: 9377 +Train: [11] [3600/6250] eta: 0:06:35 lr: 0.000124 grad: 0.0981 (0.0972) loss: 0.8224 (0.8319) time: 0.1211 data: 0.0444 max mem: 9377 +Train: [11] [3700/6250] eta: 0:06:19 lr: 0.000124 grad: 0.1062 (0.0975) loss: 0.8212 (0.8318) time: 0.1500 data: 0.0728 max mem: 9377 +Train: [11] [3800/6250] eta: 0:06:04 lr: 0.000124 grad: 0.0926 (0.0975) loss: 0.8335 (0.8316) time: 0.1496 data: 0.0730 max mem: 9377 +Train: [11] [3900/6250] eta: 0:05:48 lr: 0.000124 grad: 0.0982 (0.0975) loss: 0.8281 (0.8316) time: 0.1407 data: 0.0631 max mem: 9377 +Train: [11] [4000/6250] eta: 0:05:33 lr: 0.000123 grad: 0.0984 (0.0976) loss: 0.8278 (0.8315) time: 0.1426 data: 0.0677 max mem: 9377 +Train: [11] [4100/6250] eta: 0:05:17 lr: 0.000123 grad: 0.0989 (0.0976) loss: 0.8255 (0.8313) time: 0.1238 data: 0.0403 max mem: 9377 +Train: [11] [4200/6250] eta: 0:05:02 lr: 0.000123 grad: 0.0945 (0.0976) loss: 0.8326 (0.8312) time: 0.1242 data: 0.0414 max mem: 9377 +Train: [11] [4300/6250] eta: 0:04:47 lr: 0.000123 grad: 0.0936 (0.0977) loss: 0.8283 (0.8311) time: 0.1573 data: 0.0705 max mem: 9377 +Train: [11] [4400/6250] eta: 0:04:32 lr: 0.000123 grad: 0.0966 (0.0977) loss: 0.8247 (0.8310) time: 0.1231 data: 0.0421 max mem: 9377 +Train: [11] [4500/6250] eta: 0:04:17 lr: 0.000123 grad: 0.0993 (0.0977) loss: 0.8257 (0.8309) time: 0.2055 data: 0.1303 max mem: 9377 +Train: [11] [4600/6250] eta: 0:04:02 lr: 0.000123 grad: 0.0959 (0.0977) loss: 0.8292 (0.8308) time: 0.1344 data: 0.0483 max mem: 9377 +Train: [11] [4700/6250] eta: 0:03:47 lr: 0.000123 grad: 0.0929 (0.0977) loss: 0.8361 (0.8308) time: 0.1352 data: 0.0530 max mem: 9377 +Train: [11] [4800/6250] eta: 0:03:32 lr: 0.000123 grad: 0.0938 (0.0977) loss: 0.8329 (0.8307) time: 0.1375 data: 0.0577 max mem: 9377 +Train: [11] [4900/6250] eta: 0:03:17 lr: 0.000123 grad: 0.0967 (0.0977) loss: 0.8310 (0.8307) time: 0.1227 data: 0.0412 max mem: 9377 +Train: [11] [5000/6250] eta: 0:03:02 lr: 0.000123 grad: 0.0883 (0.0977) loss: 0.8334 (0.8307) time: 0.1606 data: 0.0780 max mem: 9377 +Train: [11] [5100/6250] eta: 0:02:47 lr: 0.000123 grad: 0.0886 (0.0976) loss: 0.8333 (0.8307) time: 0.1469 data: 0.0635 max mem: 9377 +Train: [11] [5200/6250] eta: 0:02:32 lr: 0.000123 grad: 0.0987 (0.0976) loss: 0.8209 (0.8306) time: 0.1608 data: 0.0818 max mem: 9377 +Train: [11] [5300/6250] eta: 0:02:18 lr: 0.000123 grad: 0.0996 (0.0976) loss: 0.8294 (0.8306) time: 0.1365 data: 0.0583 max mem: 9377 +Train: [11] [5400/6250] eta: 0:02:03 lr: 0.000123 grad: 0.0978 (0.0977) loss: 0.8321 (0.8306) time: 0.1389 data: 0.0560 max mem: 9377 +Train: [11] [5500/6250] eta: 0:01:49 lr: 0.000123 grad: 0.0994 (0.0977) loss: 0.8319 (0.8306) time: 0.1453 data: 0.0655 max mem: 9377 +Train: [11] [5600/6250] eta: 0:01:34 lr: 0.000123 grad: 0.0958 (0.0977) loss: 0.8272 (0.8305) time: 0.1450 data: 0.0680 max mem: 9377 +Train: [11] [5700/6250] eta: 0:01:20 lr: 0.000123 grad: 0.0875 (0.0977) loss: 0.8360 (0.8305) time: 0.1688 data: 0.0910 max mem: 9377 +Train: [11] [5800/6250] eta: 0:01:05 lr: 0.000123 grad: 0.0959 (0.0977) loss: 0.8265 (0.8304) time: 0.1453 data: 0.0672 max mem: 9377 +Train: [11] [5900/6250] eta: 0:00:50 lr: 0.000123 grad: 0.0882 (0.0978) loss: 0.8303 (0.8304) time: 0.1597 data: 0.0815 max mem: 9377 +Train: [11] [6000/6250] eta: 0:00:36 lr: 0.000123 grad: 0.0969 (0.0978) loss: 0.8321 (0.8303) time: 0.1444 data: 0.0617 max mem: 9377 +Train: [11] [6100/6250] eta: 0:00:21 lr: 0.000123 grad: 0.0943 (0.0978) loss: 0.8238 (0.8303) time: 0.1344 data: 0.0599 max mem: 9377 +Train: [11] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0972 (0.0979) loss: 0.8240 (0.8302) time: 0.1177 data: 0.0380 max mem: 9377 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0927 (0.0979) loss: 0.8340 (0.8302) time: 0.1249 data: 0.0445 max mem: 9377 +Train: [11] Total time: 0:15:15 (0.1464 s / it) +Averaged stats: lr: 0.000123 grad: 0.0927 (0.0979) loss: 0.8340 (0.8302) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:05:06 loss: 0.8563 (0.8563) time: 4.9474 data: 4.9156 max mem: 9377 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8561 (0.8574) time: 0.1399 data: 0.1129 max mem: 9377 +Eval (hcp-train-subset): [11] Total time: 0:00:13 (0.2203 s / it) +Averaged stats (hcp-train-subset): loss: 0.8561 (0.8574) +Eval (hcp-val): [11] [ 0/62] eta: 0:05:43 loss: 0.8547 (0.8547) time: 5.5481 data: 5.5167 max mem: 9377 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8547 (0.8553) time: 0.1263 data: 0.1008 max mem: 9377 +Eval (hcp-val): [11] Total time: 0:00:13 (0.2128 s / it) +Averaged stats (hcp-val): loss: 0.8547 (0.8553) +Eval (nsd-val): [11] [ 0/62] eta: 0:04:21 loss: 0.8150 (0.8150) time: 4.2206 data: 4.1894 max mem: 9377 +Eval (nsd-val): [11] [61/62] eta: 0:00:00 loss: 0.8262 (0.8271) time: 0.1346 data: 0.1097 max mem: 9377 +Eval (nsd-val): [11] Total time: 0:00:12 (0.2062 s / it) +Averaged stats (nsd-val): loss: 0.8262 (0.8271) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [12] [ 0/6250] eta: 8:52:29 lr: 0.000123 grad: 0.1390 (0.1390) loss: 0.8400 (0.8400) time: 5.1119 data: 4.9881 max mem: 9377 +Train: [12] [ 100/6250] eta: 0:20:19 lr: 0.000123 grad: 0.0936 (0.1226) loss: 0.8377 (0.8458) time: 0.1808 data: 0.0910 max mem: 9377 +Train: [12] [ 200/6250] eta: 0:18:18 lr: 0.000123 grad: 0.0897 (0.1123) loss: 0.8463 (0.8414) time: 0.1588 data: 0.0721 max mem: 9377 +Train: [12] [ 300/6250] eta: 0:17:19 lr: 0.000123 grad: 0.0935 (0.1062) loss: 0.8447 (0.8412) time: 0.1612 data: 0.0697 max mem: 9377 +Train: [12] [ 400/6250] eta: 0:17:12 lr: 0.000123 grad: 0.0868 (0.1025) loss: 0.8467 (0.8414) time: 0.1658 data: 0.0698 max mem: 9377 +Train: [12] [ 500/6250] eta: 0:17:00 lr: 0.000123 grad: 0.0886 (0.1005) loss: 0.8410 (0.8407) time: 0.1799 data: 0.0735 max mem: 9377 +Train: [12] [ 600/6250] eta: 0:16:22 lr: 0.000123 grad: 0.0889 (0.0993) loss: 0.8358 (0.8403) time: 0.1560 data: 0.0616 max mem: 9377 +Train: [12] [ 700/6250] eta: 0:15:48 lr: 0.000123 grad: 0.0928 (0.0989) loss: 0.8423 (0.8397) time: 0.1602 data: 0.0731 max mem: 9377 +Train: [12] [ 800/6250] eta: 0:15:17 lr: 0.000123 grad: 0.0891 (0.0982) loss: 0.8391 (0.8395) time: 0.1397 data: 0.0462 max mem: 9377 +Train: [12] [ 900/6250] eta: 0:14:52 lr: 0.000123 grad: 0.0927 (0.0982) loss: 0.8347 (0.8387) time: 0.1562 data: 0.0693 max mem: 9377 +Train: [12] [1000/6250] eta: 0:14:25 lr: 0.000123 grad: 0.0896 (0.0980) loss: 0.8277 (0.8380) time: 0.1519 data: 0.0699 max mem: 9377 +Train: [12] [1100/6250] eta: 0:13:58 lr: 0.000123 grad: 0.0962 (0.0979) loss: 0.8375 (0.8371) time: 0.1457 data: 0.0649 max mem: 9377 +Train: [12] [1200/6250] eta: 0:13:33 lr: 0.000123 grad: 0.0927 (0.0977) loss: 0.8284 (0.8366) time: 0.1271 data: 0.0499 max mem: 9377 +Train: [12] [1300/6250] eta: 0:13:09 lr: 0.000123 grad: 0.0870 (0.0974) loss: 0.8385 (0.8361) time: 0.1366 data: 0.0526 max mem: 9377 +Train: [12] [1400/6250] eta: 0:12:46 lr: 0.000123 grad: 0.0940 (0.0972) loss: 0.8289 (0.8359) time: 0.1232 data: 0.0439 max mem: 9377 +Train: [12] [1500/6250] eta: 0:12:25 lr: 0.000123 grad: 0.0936 (0.0972) loss: 0.8257 (0.8355) time: 0.1386 data: 0.0536 max mem: 9377 +Train: [12] [1600/6250] eta: 0:12:04 lr: 0.000123 grad: 0.0938 (0.0973) loss: 0.8305 (0.8352) time: 0.1320 data: 0.0441 max mem: 9377 +Train: [12] [1700/6250] eta: 0:11:43 lr: 0.000123 grad: 0.1032 (0.0975) loss: 0.8298 (0.8349) time: 0.1287 data: 0.0489 max mem: 9377 +Train: [12] [1800/6250] eta: 0:11:23 lr: 0.000123 grad: 0.0990 (0.0976) loss: 0.8251 (0.8347) time: 0.1270 data: 0.0419 max mem: 9377 +Train: [12] [1900/6250] eta: 0:11:04 lr: 0.000123 grad: 0.1004 (0.0978) loss: 0.8267 (0.8344) time: 0.1370 data: 0.0555 max mem: 9377 +Train: [12] [2000/6250] eta: 0:10:51 lr: 0.000123 grad: 0.0967 (0.0978) loss: 0.8306 (0.8342) time: 0.1564 data: 0.0686 max mem: 9377 +Train: [12] [2100/6250] eta: 0:10:37 lr: 0.000123 grad: 0.0957 (0.0978) loss: 0.8283 (0.8339) time: 0.1589 data: 0.0740 max mem: 9377 +Train: [12] [2200/6250] eta: 0:10:22 lr: 0.000123 grad: 0.0921 (0.0979) loss: 0.8278 (0.8336) time: 0.1443 data: 0.0627 max mem: 9377 +Train: [12] [2300/6250] eta: 0:10:05 lr: 0.000123 grad: 0.0963 (0.0979) loss: 0.8255 (0.8335) time: 0.1276 data: 0.0418 max mem: 9377 +Train: [12] [2400/6250] eta: 0:09:51 lr: 0.000123 grad: 0.0924 (0.0978) loss: 0.8322 (0.8335) time: 0.1607 data: 0.0803 max mem: 9377 +Train: [12] [2500/6250] eta: 0:09:34 lr: 0.000123 grad: 0.0975 (0.0978) loss: 0.8320 (0.8333) time: 0.1364 data: 0.0523 max mem: 9377 +Train: [12] [2600/6250] eta: 0:09:18 lr: 0.000123 grad: 0.0967 (0.0980) loss: 0.8278 (0.8331) time: 0.1413 data: 0.0491 max mem: 9377 +Train: [12] [2700/6250] eta: 0:09:02 lr: 0.000123 grad: 0.1037 (0.0982) loss: 0.8234 (0.8328) time: 0.1491 data: 0.0634 max mem: 9377 +Train: [12] [2800/6250] eta: 0:08:46 lr: 0.000123 grad: 0.0944 (0.0982) loss: 0.8293 (0.8326) time: 0.1556 data: 0.0740 max mem: 9377 +Train: [12] [2900/6250] eta: 0:08:29 lr: 0.000123 grad: 0.0933 (0.0982) loss: 0.8259 (0.8325) time: 0.1301 data: 0.0446 max mem: 9377 +Train: [12] [3000/6250] eta: 0:08:12 lr: 0.000123 grad: 0.0968 (0.0983) loss: 0.8306 (0.8323) time: 0.1317 data: 0.0450 max mem: 9377 +Train: [12] [3100/6250] eta: 0:07:56 lr: 0.000123 grad: 0.0973 (0.0984) loss: 0.8243 (0.8322) time: 0.1278 data: 0.0391 max mem: 9377 +Train: [12] [3200/6250] eta: 0:07:39 lr: 0.000123 grad: 0.0995 (0.0985) loss: 0.8251 (0.8320) time: 0.1465 data: 0.0655 max mem: 9377 +Train: [12] [3300/6250] eta: 0:07:23 lr: 0.000123 grad: 0.0986 (0.0986) loss: 0.8160 (0.8318) time: 0.1365 data: 0.0534 max mem: 9377 +Train: [12] [3400/6250] eta: 0:07:07 lr: 0.000123 grad: 0.0976 (0.0987) loss: 0.8238 (0.8316) time: 0.1257 data: 0.0380 max mem: 9377 +Train: [12] [3500/6250] eta: 0:06:51 lr: 0.000123 grad: 0.0959 (0.0988) loss: 0.8205 (0.8313) time: 0.1208 data: 0.0346 max mem: 9377 +Train: [12] [3600/6250] eta: 0:06:35 lr: 0.000123 grad: 0.0957 (0.0989) loss: 0.8212 (0.8310) time: 0.1340 data: 0.0531 max mem: 9377 +Train: [12] [3700/6250] eta: 0:06:19 lr: 0.000123 grad: 0.1016 (0.0993) loss: 0.8250 (0.8307) time: 0.1364 data: 0.0530 max mem: 9377 +Train: [12] [3800/6250] eta: 0:06:04 lr: 0.000123 grad: 0.0975 (0.0994) loss: 0.8242 (0.8305) time: 0.1276 data: 0.0455 max mem: 9377 +Train: [12] [3900/6250] eta: 0:05:49 lr: 0.000123 grad: 0.0960 (0.0994) loss: 0.8221 (0.8303) time: 0.1394 data: 0.0596 max mem: 9377 +Train: [12] [4000/6250] eta: 0:05:33 lr: 0.000123 grad: 0.1065 (0.0997) loss: 0.8223 (0.8301) time: 0.1334 data: 0.0499 max mem: 9377 +Train: [12] [4100/6250] eta: 0:05:18 lr: 0.000123 grad: 0.1000 (0.0999) loss: 0.8212 (0.8299) time: 0.1401 data: 0.0599 max mem: 9377 +Train: [12] [4200/6250] eta: 0:05:03 lr: 0.000123 grad: 0.0971 (0.1000) loss: 0.8186 (0.8296) time: 0.1298 data: 0.0489 max mem: 9377 +Train: [12] [4300/6250] eta: 0:04:48 lr: 0.000123 grad: 0.0999 (0.1001) loss: 0.8165 (0.8293) time: 0.1332 data: 0.0472 max mem: 9377 +Train: [12] [4400/6250] eta: 0:04:33 lr: 0.000123 grad: 0.0966 (0.1003) loss: 0.8187 (0.8291) time: 0.1330 data: 0.0478 max mem: 9377 +Train: [12] [4500/6250] eta: 0:04:18 lr: 0.000123 grad: 0.0962 (0.1003) loss: 0.8259 (0.8288) time: 0.1453 data: 0.0574 max mem: 9377 +Train: [12] [4600/6250] eta: 0:04:03 lr: 0.000123 grad: 0.1038 (0.1005) loss: 0.8205 (0.8286) time: 0.1476 data: 0.0673 max mem: 9377 +Train: [12] [4700/6250] eta: 0:03:48 lr: 0.000123 grad: 0.1015 (0.1005) loss: 0.8246 (0.8285) time: 0.1452 data: 0.0625 max mem: 9377 +Train: [12] [4800/6250] eta: 0:03:32 lr: 0.000123 grad: 0.1014 (0.1006) loss: 0.8200 (0.8284) time: 0.1082 data: 0.0239 max mem: 9377 +Train: [12] [4900/6250] eta: 0:03:17 lr: 0.000123 grad: 0.0958 (0.1007) loss: 0.8243 (0.8282) time: 0.1433 data: 0.0587 max mem: 9377 +Train: [12] [5000/6250] eta: 0:03:03 lr: 0.000123 grad: 0.1002 (0.1007) loss: 0.8200 (0.8281) time: 0.1624 data: 0.0787 max mem: 9377 +Train: [12] [5100/6250] eta: 0:02:48 lr: 0.000123 grad: 0.0980 (0.1008) loss: 0.8205 (0.8280) time: 0.1155 data: 0.0368 max mem: 9377 +Train: [12] [5200/6250] eta: 0:02:33 lr: 0.000123 grad: 0.0983 (0.1009) loss: 0.8237 (0.8279) time: 0.1530 data: 0.0735 max mem: 9377 +Train: [12] [5300/6250] eta: 0:02:18 lr: 0.000123 grad: 0.1057 (0.1010) loss: 0.8218 (0.8277) time: 0.1411 data: 0.0561 max mem: 9377 +Train: [12] [5400/6250] eta: 0:02:03 lr: 0.000123 grad: 0.1098 (0.1011) loss: 0.8146 (0.8276) time: 0.1355 data: 0.0576 max mem: 9377 +Train: [12] [5500/6250] eta: 0:01:49 lr: 0.000123 grad: 0.1038 (0.1013) loss: 0.8252 (0.8275) time: 0.1302 data: 0.0480 max mem: 9377 +Train: [12] [5600/6250] eta: 0:01:34 lr: 0.000123 grad: 0.1123 (0.1014) loss: 0.8113 (0.8273) time: 0.1426 data: 0.0668 max mem: 9377 +Train: [12] [5700/6250] eta: 0:01:19 lr: 0.000123 grad: 0.1004 (0.1016) loss: 0.8343 (0.8272) time: 0.1276 data: 0.0425 max mem: 9377 +Train: [12] [5800/6250] eta: 0:01:05 lr: 0.000123 grad: 0.0974 (0.1017) loss: 0.8142 (0.8271) time: 0.1379 data: 0.0510 max mem: 9377 +Train: [12] [5900/6250] eta: 0:00:50 lr: 0.000123 grad: 0.1014 (0.1017) loss: 0.8158 (0.8269) time: 0.1383 data: 0.0617 max mem: 9377 +Train: [12] [6000/6250] eta: 0:00:36 lr: 0.000123 grad: 0.1077 (0.1018) loss: 0.8119 (0.8267) time: 0.1331 data: 0.0553 max mem: 9377 +Train: [12] [6100/6250] eta: 0:00:21 lr: 0.000123 grad: 0.1026 (0.1019) loss: 0.8173 (0.8266) time: 0.1245 data: 0.0447 max mem: 9377 +Train: [12] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.1037 (0.1020) loss: 0.8137 (0.8264) time: 0.1235 data: 0.0390 max mem: 9377 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.1002 (0.1021) loss: 0.8183 (0.8263) time: 0.1491 data: 0.0678 max mem: 9377 +Train: [12] Total time: 0:15:08 (0.1454 s / it) +Averaged stats: lr: 0.000123 grad: 0.1002 (0.1021) loss: 0.8183 (0.8263) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:05:41 loss: 0.8548 (0.8548) time: 5.5090 data: 5.4791 max mem: 9377 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8566 (0.8557) time: 0.1174 data: 0.0923 max mem: 9377 +Eval (hcp-train-subset): [12] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (hcp-train-subset): loss: 0.8566 (0.8557) +Eval (hcp-val): [12] [ 0/62] eta: 0:03:59 loss: 0.8521 (0.8521) time: 3.8634 data: 3.7913 max mem: 9377 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8537 (0.8536) time: 0.1329 data: 0.1068 max mem: 9377 +Eval (hcp-val): [12] Total time: 0:00:13 (0.2185 s / it) +Averaged stats (hcp-val): loss: 0.8537 (0.8536) +Eval (nsd-val): [12] [ 0/62] eta: 0:04:42 loss: 0.8130 (0.8130) time: 4.5584 data: 4.5252 max mem: 9377 +Eval (nsd-val): [12] [61/62] eta: 0:00:00 loss: 0.8226 (0.8212) time: 0.1137 data: 0.0886 max mem: 9377 +Eval (nsd-val): [12] Total time: 0:00:13 (0.2176 s / it) +Averaged stats (nsd-val): loss: 0.8226 (0.8212) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [13] [ 0/6250] eta: 10:29:33 lr: 0.000123 grad: 0.1004 (0.1004) loss: 0.8563 (0.8563) time: 6.0438 data: 5.8386 max mem: 9377 +Train: [13] [ 100/6250] eta: 0:19:40 lr: 0.000123 grad: 0.1189 (0.1765) loss: 0.8083 (0.8326) time: 0.1463 data: 0.0700 max mem: 9377 +Train: [13] [ 200/6250] eta: 0:17:00 lr: 0.000123 grad: 0.1080 (0.1502) loss: 0.8126 (0.8234) time: 0.1411 data: 0.0572 max mem: 9377 +Train: [13] [ 300/6250] eta: 0:15:52 lr: 0.000123 grad: 0.1152 (0.1387) loss: 0.8032 (0.8196) time: 0.1349 data: 0.0523 max mem: 9377 +Train: [13] [ 400/6250] eta: 0:15:09 lr: 0.000123 grad: 0.1063 (0.1310) loss: 0.8103 (0.8185) time: 0.1467 data: 0.0586 max mem: 9377 +Train: [13] [ 500/6250] eta: 0:14:38 lr: 0.000123 grad: 0.0975 (0.1274) loss: 0.8117 (0.8173) time: 0.1356 data: 0.0555 max mem: 9377 +Train: [13] [ 600/6250] eta: 0:14:11 lr: 0.000123 grad: 0.1043 (0.1239) loss: 0.8067 (0.8166) time: 0.1399 data: 0.0583 max mem: 9377 +Train: [13] [ 700/6250] eta: 0:13:41 lr: 0.000123 grad: 0.1077 (0.1214) loss: 0.8053 (0.8163) time: 0.1373 data: 0.0541 max mem: 9377 +Train: [13] [ 800/6250] eta: 0:13:15 lr: 0.000123 grad: 0.0986 (0.1192) loss: 0.8151 (0.8166) time: 0.1392 data: 0.0466 max mem: 9377 +Train: [13] [ 900/6250] eta: 0:12:58 lr: 0.000123 grad: 0.0880 (0.1172) loss: 0.8311 (0.8175) time: 0.1290 data: 0.0338 max mem: 9377 +Train: [13] [1000/6250] eta: 0:12:38 lr: 0.000123 grad: 0.0986 (0.1157) loss: 0.8250 (0.8181) time: 0.1375 data: 0.0564 max mem: 9377 +Train: [13] [1100/6250] eta: 0:12:21 lr: 0.000123 grad: 0.0893 (0.1142) loss: 0.8208 (0.8189) time: 0.1266 data: 0.0456 max mem: 9377 +Train: [13] [1200/6250] eta: 0:12:06 lr: 0.000123 grad: 0.0935 (0.1130) loss: 0.8273 (0.8195) time: 0.1406 data: 0.0613 max mem: 9377 +Train: [13] [1300/6250] eta: 0:11:50 lr: 0.000123 grad: 0.0983 (0.1124) loss: 0.8259 (0.8198) time: 0.1431 data: 0.0594 max mem: 9377 +Train: [13] [1400/6250] eta: 0:11:34 lr: 0.000123 grad: 0.0979 (0.1116) loss: 0.8181 (0.8201) time: 0.1450 data: 0.0622 max mem: 9377 +Train: [13] [1500/6250] eta: 0:11:20 lr: 0.000123 grad: 0.1005 (0.1109) loss: 0.8233 (0.8205) time: 0.1484 data: 0.0665 max mem: 9377 +Train: [13] [1600/6250] eta: 0:11:02 lr: 0.000123 grad: 0.0939 (0.1103) loss: 0.8275 (0.8207) time: 0.1174 data: 0.0355 max mem: 9377 +Train: [13] [1700/6250] eta: 0:10:46 lr: 0.000123 grad: 0.0961 (0.1097) loss: 0.8252 (0.8210) time: 0.1139 data: 0.0269 max mem: 9377 +Train: [13] [1800/6250] eta: 0:10:30 lr: 0.000123 grad: 0.0869 (0.1092) loss: 0.8250 (0.8212) time: 0.1348 data: 0.0513 max mem: 9377 +Train: [13] [1900/6250] eta: 0:10:15 lr: 0.000123 grad: 0.1015 (0.1089) loss: 0.8262 (0.8213) time: 0.1402 data: 0.0626 max mem: 9377 +Train: [13] [2000/6250] eta: 0:10:01 lr: 0.000123 grad: 0.0993 (0.1086) loss: 0.8239 (0.8214) time: 0.1344 data: 0.0561 max mem: 9377 +Train: [13] [2100/6250] eta: 0:09:46 lr: 0.000123 grad: 0.0998 (0.1084) loss: 0.8250 (0.8216) time: 0.1479 data: 0.0695 max mem: 9377 +Train: [13] [2200/6250] eta: 0:09:34 lr: 0.000123 grad: 0.0992 (0.1081) loss: 0.8213 (0.8216) time: 0.1477 data: 0.0620 max mem: 9377 +Train: [13] [2300/6250] eta: 0:09:22 lr: 0.000123 grad: 0.1006 (0.1080) loss: 0.8313 (0.8216) time: 0.1542 data: 0.0639 max mem: 9377 +Train: [13] [2400/6250] eta: 0:09:09 lr: 0.000123 grad: 0.1088 (0.1079) loss: 0.8134 (0.8215) time: 0.1139 data: 0.0243 max mem: 9377 +Train: [13] [2500/6250] eta: 0:08:56 lr: 0.000123 grad: 0.1050 (0.1082) loss: 0.8188 (0.8212) time: 0.1456 data: 0.0732 max mem: 9377 +Train: [13] [2600/6250] eta: 0:08:44 lr: 0.000123 grad: 0.1044 (0.1083) loss: 0.8156 (0.8212) time: 0.1755 data: 0.0943 max mem: 9377 +Train: [13] [2700/6250] eta: 0:08:30 lr: 0.000123 grad: 0.0996 (0.1082) loss: 0.8190 (0.8211) time: 0.1509 data: 0.0697 max mem: 9377 +Train: [13] [2800/6250] eta: 0:08:15 lr: 0.000123 grad: 0.1094 (0.1081) loss: 0.8245 (0.8211) time: 0.1372 data: 0.0574 max mem: 9377 +Train: [13] [2900/6250] eta: 0:08:01 lr: 0.000123 grad: 0.1057 (0.1080) loss: 0.8239 (0.8210) time: 0.1536 data: 0.0680 max mem: 9377 +Train: [13] [3000/6250] eta: 0:07:48 lr: 0.000123 grad: 0.0958 (0.1079) loss: 0.8159 (0.8210) time: 0.1639 data: 0.0836 max mem: 9377 +Train: [13] [3100/6250] eta: 0:07:33 lr: 0.000123 grad: 0.1068 (0.1078) loss: 0.8129 (0.8209) time: 0.1421 data: 0.0621 max mem: 9377 +Train: [13] [3200/6250] eta: 0:07:18 lr: 0.000123 grad: 0.0982 (0.1076) loss: 0.8170 (0.8209) time: 0.1305 data: 0.0423 max mem: 9377 +Train: [13] [3300/6250] eta: 0:07:03 lr: 0.000123 grad: 0.0932 (0.1074) loss: 0.8217 (0.8209) time: 0.1545 data: 0.0726 max mem: 9377 +Train: [13] [3400/6250] eta: 0:06:48 lr: 0.000123 grad: 0.1017 (0.1073) loss: 0.8246 (0.8208) time: 0.1138 data: 0.0263 max mem: 9377 +Train: [13] [3500/6250] eta: 0:06:33 lr: 0.000123 grad: 0.0987 (0.1073) loss: 0.8196 (0.8208) time: 0.1371 data: 0.0544 max mem: 9377 +Train: [13] [3600/6250] eta: 0:06:19 lr: 0.000123 grad: 0.0977 (0.1073) loss: 0.8137 (0.8207) time: 0.1321 data: 0.0464 max mem: 9377 +Train: [13] [3700/6250] eta: 0:06:04 lr: 0.000122 grad: 0.1021 (0.1073) loss: 0.8196 (0.8206) time: 0.1536 data: 0.0726 max mem: 9377 +Train: [13] [3800/6250] eta: 0:05:49 lr: 0.000122 grad: 0.1030 (0.1073) loss: 0.8201 (0.8206) time: 0.1346 data: 0.0479 max mem: 9377 +Train: [13] [3900/6250] eta: 0:05:35 lr: 0.000122 grad: 0.1110 (0.1073) loss: 0.8082 (0.8205) time: 0.1168 data: 0.0365 max mem: 9377 +Train: [13] [4000/6250] eta: 0:05:20 lr: 0.000122 grad: 0.1116 (0.1074) loss: 0.8055 (0.8204) time: 0.1317 data: 0.0500 max mem: 9377 +Train: [13] [4100/6250] eta: 0:05:06 lr: 0.000122 grad: 0.1107 (0.1074) loss: 0.8096 (0.8203) time: 0.1476 data: 0.0651 max mem: 9377 +Train: [13] [4200/6250] eta: 0:04:51 lr: 0.000122 grad: 0.1037 (0.1074) loss: 0.8225 (0.8203) time: 0.1283 data: 0.0513 max mem: 9377 +Train: [13] [4300/6250] eta: 0:04:36 lr: 0.000122 grad: 0.1089 (0.1074) loss: 0.8095 (0.8201) time: 0.1173 data: 0.0358 max mem: 9377 +Train: [13] [4400/6250] eta: 0:04:22 lr: 0.000122 grad: 0.1064 (0.1074) loss: 0.8108 (0.8201) time: 0.1321 data: 0.0521 max mem: 9377 +Train: [13] [4500/6250] eta: 0:04:07 lr: 0.000122 grad: 0.1104 (0.1075) loss: 0.8136 (0.8200) time: 0.1101 data: 0.0267 max mem: 9377 +Train: [13] [4600/6250] eta: 0:03:53 lr: 0.000122 grad: 0.0999 (0.1076) loss: 0.8157 (0.8199) time: 0.1379 data: 0.0590 max mem: 9377 +Train: [13] [4700/6250] eta: 0:03:39 lr: 0.000122 grad: 0.1097 (0.1076) loss: 0.8169 (0.8198) time: 0.1478 data: 0.0685 max mem: 9377 +Train: [13] [4800/6250] eta: 0:03:25 lr: 0.000122 grad: 0.1077 (0.1076) loss: 0.8124 (0.8198) time: 0.1528 data: 0.0743 max mem: 9377 +Train: [13] [4900/6250] eta: 0:03:10 lr: 0.000122 grad: 0.1093 (0.1076) loss: 0.8197 (0.8197) time: 0.1640 data: 0.0797 max mem: 9377 +Train: [13] [5000/6250] eta: 0:02:56 lr: 0.000122 grad: 0.1090 (0.1076) loss: 0.8180 (0.8196) time: 0.1119 data: 0.0237 max mem: 9377 +Train: [13] [5100/6250] eta: 0:02:42 lr: 0.000122 grad: 0.0934 (0.1075) loss: 0.8254 (0.8196) time: 0.1556 data: 0.0770 max mem: 9377 +Train: [13] [5200/6250] eta: 0:02:28 lr: 0.000122 grad: 0.0990 (0.1075) loss: 0.8182 (0.8195) time: 0.1589 data: 0.0757 max mem: 9377 +Train: [13] [5300/6250] eta: 0:02:13 lr: 0.000122 grad: 0.1059 (0.1075) loss: 0.8123 (0.8194) time: 0.1437 data: 0.0576 max mem: 9377 +Train: [13] [5400/6250] eta: 0:01:59 lr: 0.000122 grad: 0.1027 (0.1075) loss: 0.8153 (0.8192) time: 0.1398 data: 0.0589 max mem: 9377 +Train: [13] [5500/6250] eta: 0:01:45 lr: 0.000122 grad: 0.1042 (0.1075) loss: 0.8154 (0.8191) time: 0.1408 data: 0.0560 max mem: 9377 +Train: [13] [5600/6250] eta: 0:01:31 lr: 0.000122 grad: 0.0988 (0.1074) loss: 0.8125 (0.8191) time: 0.1274 data: 0.0396 max mem: 9377 +Train: [13] [5700/6250] eta: 0:01:17 lr: 0.000122 grad: 0.0898 (0.1074) loss: 0.8207 (0.8190) time: 0.1359 data: 0.0499 max mem: 9377 +Train: [13] [5800/6250] eta: 0:01:03 lr: 0.000122 grad: 0.0977 (0.1073) loss: 0.8201 (0.8190) time: 0.1186 data: 0.0375 max mem: 9377 +Train: [13] [5900/6250] eta: 0:00:49 lr: 0.000122 grad: 0.0994 (0.1072) loss: 0.8221 (0.8189) time: 0.1215 data: 0.0388 max mem: 9377 +Train: [13] [6000/6250] eta: 0:00:35 lr: 0.000122 grad: 0.1096 (0.1072) loss: 0.8174 (0.8188) time: 0.1267 data: 0.0452 max mem: 9377 +Train: [13] [6100/6250] eta: 0:00:21 lr: 0.000122 grad: 0.1031 (0.1073) loss: 0.8112 (0.8186) time: 0.1375 data: 0.0569 max mem: 9377 +Train: [13] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.1054 (0.1073) loss: 0.8187 (0.8185) time: 0.1385 data: 0.0590 max mem: 9377 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.1014 (0.1073) loss: 0.8105 (0.8184) time: 0.1216 data: 0.0387 max mem: 9377 +Train: [13] Total time: 0:14:43 (0.1414 s / it) +Averaged stats: lr: 0.000122 grad: 0.1014 (0.1073) loss: 0.8105 (0.8184) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:05:09 loss: 0.8558 (0.8558) time: 4.9904 data: 4.9607 max mem: 9377 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8570 (0.8572) time: 0.1391 data: 0.1117 max mem: 9377 +Eval (hcp-train-subset): [13] Total time: 0:00:13 (0.2232 s / it) +Averaged stats (hcp-train-subset): loss: 0.8570 (0.8572) +Eval (hcp-val): [13] [ 0/62] eta: 0:06:06 loss: 0.8515 (0.8515) time: 5.9191 data: 5.8889 max mem: 9377 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8541 (0.8547) time: 0.1287 data: 0.1034 max mem: 9377 +Eval (hcp-val): [13] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (hcp-val): loss: 0.8541 (0.8547) +Eval (nsd-val): [13] [ 0/62] eta: 0:04:38 loss: 0.8184 (0.8184) time: 4.4864 data: 4.4565 max mem: 9377 +Eval (nsd-val): [13] [61/62] eta: 0:00:00 loss: 0.8277 (0.8278) time: 0.1329 data: 0.1077 max mem: 9377 +Eval (nsd-val): [13] Total time: 0:00:13 (0.2165 s / it) +Averaged stats (nsd-val): loss: 0.8277 (0.8278) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [14] [ 0/6250] eta: 10:28:24 lr: 0.000122 grad: 0.0648 (0.0648) loss: 0.8752 (0.8752) time: 6.0327 data: 5.9373 max mem: 9377 +Train: [14] [ 100/6250] eta: 0:20:11 lr: 0.000122 grad: 0.1193 (0.1280) loss: 0.8154 (0.8283) time: 0.1605 data: 0.0671 max mem: 9377 +Train: [14] [ 200/6250] eta: 0:17:19 lr: 0.000122 grad: 0.1172 (0.1252) loss: 0.8108 (0.8213) time: 0.1738 data: 0.0939 max mem: 9377 +Train: [14] [ 300/6250] eta: 0:15:48 lr: 0.000122 grad: 0.1055 (0.1221) loss: 0.8037 (0.8179) time: 0.1361 data: 0.0617 max mem: 9377 +Train: [14] [ 400/6250] eta: 0:15:10 lr: 0.000122 grad: 0.1038 (0.1187) loss: 0.8122 (0.8174) time: 0.1356 data: 0.0556 max mem: 9377 +Train: [14] [ 500/6250] eta: 0:14:41 lr: 0.000122 grad: 0.1031 (0.1162) loss: 0.8184 (0.8179) time: 0.1434 data: 0.0624 max mem: 9377 +Train: [14] [ 600/6250] eta: 0:14:14 lr: 0.000122 grad: 0.1053 (0.1141) loss: 0.8244 (0.8183) time: 0.1653 data: 0.0842 max mem: 9377 +Train: [14] [ 700/6250] eta: 0:13:49 lr: 0.000122 grad: 0.1105 (0.1128) loss: 0.8183 (0.8189) time: 0.1434 data: 0.0521 max mem: 9377 +Train: [14] [ 800/6250] eta: 0:13:21 lr: 0.000122 grad: 0.0981 (0.1116) loss: 0.8137 (0.8189) time: 0.1287 data: 0.0411 max mem: 9377 +Train: [14] [ 900/6250] eta: 0:13:01 lr: 0.000122 grad: 0.1053 (0.1111) loss: 0.8134 (0.8190) time: 0.1211 data: 0.0300 max mem: 9377 +Train: [14] [1000/6250] eta: 0:12:38 lr: 0.000122 grad: 0.1048 (0.1108) loss: 0.8164 (0.8189) time: 0.1347 data: 0.0539 max mem: 9377 +Train: [14] [1100/6250] eta: 0:12:15 lr: 0.000122 grad: 0.1052 (0.1104) loss: 0.8079 (0.8186) time: 0.1176 data: 0.0355 max mem: 9377 +Train: [14] [1200/6250] eta: 0:11:55 lr: 0.000122 grad: 0.0997 (0.1103) loss: 0.8187 (0.8185) time: 0.1170 data: 0.0346 max mem: 9377 +Train: [14] [1300/6250] eta: 0:11:40 lr: 0.000122 grad: 0.1000 (0.1098) loss: 0.8147 (0.8182) time: 0.1453 data: 0.0631 max mem: 9377 +Train: [14] [1400/6250] eta: 0:11:24 lr: 0.000122 grad: 0.1037 (0.1098) loss: 0.8170 (0.8180) time: 0.1449 data: 0.0617 max mem: 9377 +Train: [14] [1500/6250] eta: 0:11:09 lr: 0.000122 grad: 0.1012 (0.1099) loss: 0.8217 (0.8177) time: 0.1267 data: 0.0438 max mem: 9377 +Train: [14] [1600/6250] eta: 0:10:52 lr: 0.000122 grad: 0.1097 (0.1098) loss: 0.8108 (0.8174) time: 0.1310 data: 0.0430 max mem: 9377 +Train: [14] [1700/6250] eta: 0:10:37 lr: 0.000122 grad: 0.1052 (0.1096) loss: 0.8084 (0.8172) time: 0.1375 data: 0.0592 max mem: 9377 +Train: [14] [1800/6250] eta: 0:10:23 lr: 0.000122 grad: 0.1017 (0.1095) loss: 0.8156 (0.8170) time: 0.1527 data: 0.0711 max mem: 9377 +Train: [14] [1900/6250] eta: 0:10:08 lr: 0.000122 grad: 0.1026 (0.1093) loss: 0.8131 (0.8168) time: 0.1249 data: 0.0420 max mem: 9377 +Train: [14] [2000/6250] eta: 0:09:55 lr: 0.000122 grad: 0.1031 (0.1093) loss: 0.8185 (0.8167) time: 0.1404 data: 0.0564 max mem: 9377 +Train: [14] [2100/6250] eta: 0:09:41 lr: 0.000122 grad: 0.1081 (0.1091) loss: 0.8125 (0.8166) time: 0.1335 data: 0.0474 max mem: 9377 +Train: [14] [2200/6250] eta: 0:09:28 lr: 0.000122 grad: 0.1053 (0.1089) loss: 0.8096 (0.8166) time: 0.1512 data: 0.0659 max mem: 9377 +Train: [14] [2300/6250] eta: 0:09:13 lr: 0.000122 grad: 0.1028 (0.1087) loss: 0.8190 (0.8166) time: 0.1395 data: 0.0574 max mem: 9377 +Train: [14] [2400/6250] eta: 0:08:59 lr: 0.000122 grad: 0.1032 (0.1085) loss: 0.8061 (0.8165) time: 0.1235 data: 0.0381 max mem: 9377 +Train: [14] [2500/6250] eta: 0:08:48 lr: 0.000122 grad: 0.1098 (0.1084) loss: 0.8179 (0.8165) time: 0.2339 data: 0.1604 max mem: 9377 +Train: [14] [2600/6250] eta: 0:08:35 lr: 0.000122 grad: 0.1021 (0.1084) loss: 0.8232 (0.8165) time: 0.1337 data: 0.0495 max mem: 9377 +Train: [14] [2700/6250] eta: 0:08:23 lr: 0.000122 grad: 0.1045 (0.1084) loss: 0.8204 (0.8164) time: 0.1546 data: 0.0710 max mem: 9377 +Train: [14] [2800/6250] eta: 0:08:11 lr: 0.000122 grad: 0.1044 (0.1084) loss: 0.8139 (0.8164) time: 0.1503 data: 0.0699 max mem: 9377 +Train: [14] [2900/6250] eta: 0:07:57 lr: 0.000122 grad: 0.1090 (0.1084) loss: 0.8156 (0.8162) time: 0.1303 data: 0.0434 max mem: 9377 +Train: [14] [3000/6250] eta: 0:07:44 lr: 0.000122 grad: 0.1031 (0.1085) loss: 0.8108 (0.8160) time: 0.1548 data: 0.0678 max mem: 9377 +Train: [14] [3100/6250] eta: 0:07:30 lr: 0.000122 grad: 0.1089 (0.1086) loss: 0.8141 (0.8157) time: 0.1725 data: 0.0904 max mem: 9377 +Train: [14] [3200/6250] eta: 0:07:15 lr: 0.000122 grad: 0.1057 (0.1087) loss: 0.8150 (0.8155) time: 0.1406 data: 0.0507 max mem: 9377 +Train: [14] [3300/6250] eta: 0:07:01 lr: 0.000122 grad: 0.1151 (0.1089) loss: 0.8002 (0.8151) time: 0.1444 data: 0.0501 max mem: 9377 +Train: [14] [3400/6250] eta: 0:06:47 lr: 0.000122 grad: 0.1135 (0.1090) loss: 0.7965 (0.8147) time: 0.1343 data: 0.0464 max mem: 9377 +Train: [14] [3500/6250] eta: 0:06:32 lr: 0.000122 grad: 0.1171 (0.1091) loss: 0.8074 (0.8144) time: 0.1494 data: 0.0657 max mem: 9377 +Train: [14] [3600/6250] eta: 0:06:17 lr: 0.000122 grad: 0.1132 (0.1091) loss: 0.8105 (0.8143) time: 0.1308 data: 0.0512 max mem: 9377 +Train: [14] [3700/6250] eta: 0:06:03 lr: 0.000122 grad: 0.1049 (0.1093) loss: 0.8077 (0.8140) time: 0.1446 data: 0.0650 max mem: 9377 +Train: [14] [3800/6250] eta: 0:05:48 lr: 0.000122 grad: 0.1087 (0.1094) loss: 0.7989 (0.8139) time: 0.1342 data: 0.0520 max mem: 9377 +Train: [14] [3900/6250] eta: 0:05:34 lr: 0.000122 grad: 0.1044 (0.1095) loss: 0.8145 (0.8137) time: 0.1368 data: 0.0520 max mem: 9377 +Train: [14] [4000/6250] eta: 0:05:20 lr: 0.000122 grad: 0.0918 (0.1094) loss: 0.8178 (0.8137) time: 0.1496 data: 0.0710 max mem: 9377 +Train: [14] [4100/6250] eta: 0:05:05 lr: 0.000122 grad: 0.1093 (0.1094) loss: 0.8189 (0.8138) time: 0.1330 data: 0.0494 max mem: 9377 +Train: [14] [4200/6250] eta: 0:04:51 lr: 0.000122 grad: 0.1037 (0.1094) loss: 0.8076 (0.8137) time: 0.1292 data: 0.0410 max mem: 9377 +Train: [14] [4300/6250] eta: 0:04:36 lr: 0.000122 grad: 0.1157 (0.1094) loss: 0.8206 (0.8137) time: 0.1412 data: 0.0600 max mem: 9377 +Train: [14] [4400/6250] eta: 0:04:22 lr: 0.000122 grad: 0.1059 (0.1094) loss: 0.8108 (0.8136) time: 0.1297 data: 0.0453 max mem: 9377 +Train: [14] [4500/6250] eta: 0:04:07 lr: 0.000122 grad: 0.1086 (0.1095) loss: 0.8053 (0.8136) time: 0.1435 data: 0.0622 max mem: 9377 +Train: [14] [4600/6250] eta: 0:03:53 lr: 0.000122 grad: 0.1068 (0.1096) loss: 0.8052 (0.8135) time: 0.1456 data: 0.0608 max mem: 9377 +Train: [14] [4700/6250] eta: 0:03:39 lr: 0.000122 grad: 0.1117 (0.1096) loss: 0.8002 (0.8134) time: 0.1258 data: 0.0434 max mem: 9377 +Train: [14] [4800/6250] eta: 0:03:24 lr: 0.000122 grad: 0.1153 (0.1098) loss: 0.8130 (0.8132) time: 0.1469 data: 0.0638 max mem: 9377 +Train: [14] [4900/6250] eta: 0:03:10 lr: 0.000122 grad: 0.1072 (0.1099) loss: 0.8154 (0.8131) time: 0.1252 data: 0.0432 max mem: 9377 +Train: [14] [5000/6250] eta: 0:02:56 lr: 0.000122 grad: 0.1145 (0.1101) loss: 0.7941 (0.8128) time: 0.1444 data: 0.0603 max mem: 9377 +Train: [14] [5100/6250] eta: 0:02:42 lr: 0.000122 grad: 0.1213 (0.1103) loss: 0.8084 (0.8127) time: 0.1264 data: 0.0412 max mem: 9377 +Train: [14] [5200/6250] eta: 0:02:28 lr: 0.000122 grad: 0.1089 (0.1104) loss: 0.8014 (0.8125) time: 0.1335 data: 0.0458 max mem: 9377 +Train: [14] [5300/6250] eta: 0:02:13 lr: 0.000122 grad: 0.1166 (0.1105) loss: 0.8017 (0.8123) time: 0.1562 data: 0.0781 max mem: 9377 +Train: [14] [5400/6250] eta: 0:01:59 lr: 0.000122 grad: 0.1089 (0.1106) loss: 0.8189 (0.8122) time: 0.1387 data: 0.0558 max mem: 9377 +Train: [14] [5500/6250] eta: 0:01:45 lr: 0.000122 grad: 0.1131 (0.1107) loss: 0.8130 (0.8120) time: 0.1263 data: 0.0423 max mem: 9377 +Train: [14] [5600/6250] eta: 0:01:31 lr: 0.000122 grad: 0.1109 (0.1107) loss: 0.8048 (0.8119) time: 0.1343 data: 0.0497 max mem: 9377 +Train: [14] [5700/6250] eta: 0:01:17 lr: 0.000122 grad: 0.1103 (0.1108) loss: 0.8104 (0.8119) time: 0.1526 data: 0.0709 max mem: 9377 +Train: [14] [5800/6250] eta: 0:01:03 lr: 0.000122 grad: 0.1044 (0.1108) loss: 0.8140 (0.8119) time: 0.1327 data: 0.0485 max mem: 9377 +Train: [14] [5900/6250] eta: 0:00:49 lr: 0.000122 grad: 0.1096 (0.1108) loss: 0.8111 (0.8119) time: 0.1338 data: 0.0505 max mem: 9377 +Train: [14] [6000/6250] eta: 0:00:35 lr: 0.000122 grad: 0.1021 (0.1108) loss: 0.8164 (0.8119) time: 0.1431 data: 0.0627 max mem: 9377 +Train: [14] [6100/6250] eta: 0:00:21 lr: 0.000122 grad: 0.1141 (0.1108) loss: 0.8062 (0.8118) time: 0.1334 data: 0.0559 max mem: 9377 +Train: [14] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.1066 (0.1108) loss: 0.8127 (0.8118) time: 0.1409 data: 0.0648 max mem: 9377 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.1039 (0.1108) loss: 0.8144 (0.8118) time: 0.1285 data: 0.0407 max mem: 9377 +Train: [14] Total time: 0:14:42 (0.1412 s / it) +Averaged stats: lr: 0.000122 grad: 0.1039 (0.1108) loss: 0.8144 (0.8118) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:04:50 loss: 0.8520 (0.8520) time: 4.6776 data: 4.6453 max mem: 9377 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8557 (0.8563) time: 0.1277 data: 0.1022 max mem: 9377 +Eval (hcp-train-subset): [14] Total time: 0:00:13 (0.2174 s / it) +Averaged stats (hcp-train-subset): loss: 0.8557 (0.8563) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [14] [ 0/62] eta: 0:05:26 loss: 0.8522 (0.8522) time: 5.2665 data: 5.2334 max mem: 9377 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8537 (0.8550) time: 0.1351 data: 0.1080 max mem: 9377 +Eval (hcp-val): [14] Total time: 0:00:13 (0.2254 s / it) +Averaged stats (hcp-val): loss: 0.8537 (0.8550) +Making plots (hcp-val): example=24 +Eval (nsd-val): [14] [ 0/62] eta: 0:05:29 loss: 0.8128 (0.8128) time: 5.3142 data: 5.2830 max mem: 9377 +Eval (nsd-val): [14] [61/62] eta: 0:00:00 loss: 0.8218 (0.8245) time: 0.1246 data: 0.0996 max mem: 9377 +Eval (nsd-val): [14] Total time: 0:00:13 (0.2164 s / it) +Averaged stats (nsd-val): loss: 0.8218 (0.8245) +Making plots (nsd-val): example=6 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00014.pth +Train: [15] [ 0/6250] eta: 10:45:33 lr: 0.000122 grad: 0.3211 (0.3211) loss: 0.7845 (0.7845) time: 6.1973 data: 6.0802 max mem: 9377 +Train: [15] [ 100/6250] eta: 0:21:28 lr: 0.000122 grad: 0.1020 (0.1598) loss: 0.8187 (0.8139) time: 0.1556 data: 0.0654 max mem: 9377 +Train: [15] [ 200/6250] eta: 0:18:16 lr: 0.000122 grad: 0.1031 (0.1402) loss: 0.8295 (0.8142) time: 0.1424 data: 0.0603 max mem: 9377 +Train: [15] [ 300/6250] eta: 0:16:50 lr: 0.000122 grad: 0.1236 (0.1336) loss: 0.8121 (0.8126) time: 0.1418 data: 0.0588 max mem: 9377 +Train: [15] [ 400/6250] eta: 0:15:43 lr: 0.000122 grad: 0.1226 (0.1292) loss: 0.8050 (0.8102) time: 0.1286 data: 0.0485 max mem: 9377 +Train: [15] [ 500/6250] eta: 0:15:01 lr: 0.000122 grad: 0.0951 (0.1264) loss: 0.8045 (0.8081) time: 0.1502 data: 0.0697 max mem: 9377 +Train: [15] [ 600/6250] eta: 0:14:30 lr: 0.000122 grad: 0.1133 (0.1245) loss: 0.8057 (0.8068) time: 0.1082 data: 0.0186 max mem: 9377 +Train: [15] [ 700/6250] eta: 0:14:05 lr: 0.000122 grad: 0.1124 (0.1223) loss: 0.8059 (0.8064) time: 0.1594 data: 0.0718 max mem: 9377 +Train: [15] [ 800/6250] eta: 0:13:35 lr: 0.000122 grad: 0.1137 (0.1215) loss: 0.8076 (0.8062) time: 0.1502 data: 0.0646 max mem: 9377 +Train: [15] [ 900/6250] eta: 0:13:14 lr: 0.000122 grad: 0.1130 (0.1212) loss: 0.7956 (0.8055) time: 0.1031 data: 0.0002 max mem: 9377 +Train: [15] [1000/6250] eta: 0:12:48 lr: 0.000122 grad: 0.1051 (0.1202) loss: 0.8035 (0.8054) time: 0.1376 data: 0.0543 max mem: 9377 +Train: [15] [1100/6250] eta: 0:12:22 lr: 0.000121 grad: 0.1026 (0.1192) loss: 0.8080 (0.8054) time: 0.1312 data: 0.0456 max mem: 9377 +Train: [15] [1200/6250] eta: 0:11:59 lr: 0.000121 grad: 0.1105 (0.1185) loss: 0.8128 (0.8054) time: 0.1221 data: 0.0375 max mem: 9377 +Train: [15] [1300/6250] eta: 0:11:42 lr: 0.000121 grad: 0.1080 (0.1182) loss: 0.8026 (0.8050) time: 0.1249 data: 0.0370 max mem: 9377 +Train: [15] [1400/6250] eta: 0:11:32 lr: 0.000121 grad: 0.1129 (0.1177) loss: 0.8012 (0.8047) time: 0.1507 data: 0.0719 max mem: 9377 +Train: [15] [1500/6250] eta: 0:11:22 lr: 0.000121 grad: 0.1048 (0.1174) loss: 0.8127 (0.8044) time: 0.1390 data: 0.0587 max mem: 9377 +Train: [15] [1600/6250] eta: 0:11:10 lr: 0.000121 grad: 0.1102 (0.1170) loss: 0.8010 (0.8041) time: 0.1537 data: 0.0711 max mem: 9377 +Train: [15] [1700/6250] eta: 0:10:58 lr: 0.000121 grad: 0.1076 (0.1170) loss: 0.7988 (0.8036) time: 0.1537 data: 0.0784 max mem: 9377 +Train: [15] [1800/6250] eta: 0:10:48 lr: 0.000121 grad: 0.1106 (0.1166) loss: 0.8017 (0.8035) time: 0.1818 data: 0.1009 max mem: 9377 +Train: [15] [1900/6250] eta: 0:10:33 lr: 0.000121 grad: 0.1113 (0.1164) loss: 0.7892 (0.8034) time: 0.1509 data: 0.0743 max mem: 9377 +Train: [15] [2000/6250] eta: 0:10:20 lr: 0.000121 grad: 0.1118 (0.1162) loss: 0.8060 (0.8032) time: 0.1609 data: 0.0820 max mem: 9377 +Train: [15] [2100/6250] eta: 0:10:05 lr: 0.000121 grad: 0.1042 (0.1162) loss: 0.8017 (0.8030) time: 0.1519 data: 0.0767 max mem: 9377 +Train: [15] [2200/6250] eta: 0:09:50 lr: 0.000121 grad: 0.1127 (0.1157) loss: 0.7990 (0.8032) time: 0.1457 data: 0.0598 max mem: 9377 +Train: [15] [2300/6250] eta: 0:09:35 lr: 0.000121 grad: 0.1081 (0.1156) loss: 0.8064 (0.8032) time: 0.1274 data: 0.0392 max mem: 9377 +Train: [15] [2400/6250] eta: 0:09:19 lr: 0.000121 grad: 0.1074 (0.1154) loss: 0.8068 (0.8033) time: 0.1519 data: 0.0693 max mem: 9377 +Train: [15] [2500/6250] eta: 0:09:04 lr: 0.000121 grad: 0.1082 (0.1153) loss: 0.7985 (0.8032) time: 0.1508 data: 0.0699 max mem: 9377 +Train: [15] [2600/6250] eta: 0:08:51 lr: 0.000121 grad: 0.1118 (0.1152) loss: 0.7995 (0.8031) time: 0.2164 data: 0.1373 max mem: 9377 +Train: [15] [2700/6250] eta: 0:08:42 lr: 0.000121 grad: 0.1196 (0.1153) loss: 0.7985 (0.8030) time: 0.1932 data: 0.1153 max mem: 9377 +Train: [15] [2800/6250] eta: 0:08:32 lr: 0.000121 grad: 0.1059 (0.1153) loss: 0.7945 (0.8030) time: 0.1874 data: 0.1031 max mem: 9377 +Train: [15] [2900/6250] eta: 0:08:20 lr: 0.000121 grad: 0.1095 (0.1153) loss: 0.8025 (0.8030) time: 0.1430 data: 0.0480 max mem: 9377 +Train: [15] [3000/6250] eta: 0:08:06 lr: 0.000121 grad: 0.1058 (0.1154) loss: 0.8105 (0.8029) time: 0.1542 data: 0.0705 max mem: 9377 +Train: [15] [3100/6250] eta: 0:07:54 lr: 0.000121 grad: 0.1148 (0.1154) loss: 0.7967 (0.8028) time: 0.1942 data: 0.1139 max mem: 9377 +Train: [15] [3200/6250] eta: 0:07:41 lr: 0.000121 grad: 0.1113 (0.1155) loss: 0.7984 (0.8026) time: 0.1727 data: 0.0847 max mem: 9377 +Train: [15] [3300/6250] eta: 0:07:27 lr: 0.000121 grad: 0.1190 (0.1155) loss: 0.7975 (0.8025) time: 0.1292 data: 0.0509 max mem: 9377 +Train: [15] [3400/6250] eta: 0:07:11 lr: 0.000121 grad: 0.1110 (0.1155) loss: 0.8006 (0.8025) time: 0.1508 data: 0.0661 max mem: 9377 +Train: [15] [3500/6250] eta: 0:06:56 lr: 0.000121 grad: 0.1143 (0.1156) loss: 0.7970 (0.8024) time: 0.1267 data: 0.0366 max mem: 9377 +Train: [15] [3600/6250] eta: 0:06:40 lr: 0.000121 grad: 0.1157 (0.1156) loss: 0.8027 (0.8023) time: 0.1427 data: 0.0572 max mem: 9377 +Train: [15] [3700/6250] eta: 0:06:25 lr: 0.000121 grad: 0.1213 (0.1159) loss: 0.7975 (0.8022) time: 0.1480 data: 0.0680 max mem: 9377 +Train: [15] [3800/6250] eta: 0:06:10 lr: 0.000121 grad: 0.1102 (0.1160) loss: 0.7967 (0.8020) time: 0.1686 data: 0.0890 max mem: 9377 +Train: [15] [3900/6250] eta: 0:05:54 lr: 0.000121 grad: 0.1115 (0.1161) loss: 0.8061 (0.8020) time: 0.1509 data: 0.0734 max mem: 9377 +Train: [15] [4000/6250] eta: 0:05:38 lr: 0.000121 grad: 0.1165 (0.1160) loss: 0.8029 (0.8020) time: 0.1445 data: 0.0621 max mem: 9377 +Train: [15] [4100/6250] eta: 0:05:23 lr: 0.000121 grad: 0.1188 (0.1160) loss: 0.7946 (0.8019) time: 0.1353 data: 0.0565 max mem: 9377 +Train: [15] [4200/6250] eta: 0:05:08 lr: 0.000121 grad: 0.1091 (0.1160) loss: 0.8050 (0.8019) time: 0.1090 data: 0.0255 max mem: 9377 +Train: [15] [4300/6250] eta: 0:04:53 lr: 0.000121 grad: 0.1079 (0.1159) loss: 0.8098 (0.8020) time: 0.1462 data: 0.0644 max mem: 9377 +Train: [15] [4400/6250] eta: 0:04:37 lr: 0.000121 grad: 0.1134 (0.1160) loss: 0.8051 (0.8020) time: 0.1506 data: 0.0724 max mem: 9377 +Train: [15] [4500/6250] eta: 0:04:22 lr: 0.000121 grad: 0.1126 (0.1159) loss: 0.8030 (0.8020) time: 0.1341 data: 0.0560 max mem: 9377 +Train: [15] [4600/6250] eta: 0:04:06 lr: 0.000121 grad: 0.1146 (0.1159) loss: 0.8110 (0.8021) time: 0.1460 data: 0.0689 max mem: 9377 +Train: [15] [4700/6250] eta: 0:03:51 lr: 0.000121 grad: 0.1015 (0.1158) loss: 0.8134 (0.8023) time: 0.1465 data: 0.0635 max mem: 9377 +Train: [15] [4800/6250] eta: 0:03:36 lr: 0.000121 grad: 0.1083 (0.1156) loss: 0.8074 (0.8024) time: 0.1488 data: 0.0687 max mem: 9377 +Train: [15] [4900/6250] eta: 0:03:21 lr: 0.000121 grad: 0.1173 (0.1155) loss: 0.8031 (0.8025) time: 0.1288 data: 0.0530 max mem: 9377 +Train: [15] [5000/6250] eta: 0:03:06 lr: 0.000121 grad: 0.1021 (0.1154) loss: 0.8190 (0.8027) time: 0.1283 data: 0.0448 max mem: 9377 +Train: [15] [5100/6250] eta: 0:02:51 lr: 0.000121 grad: 0.1038 (0.1152) loss: 0.8180 (0.8030) time: 0.1533 data: 0.0756 max mem: 9377 +Train: [15] [5200/6250] eta: 0:02:35 lr: 0.000121 grad: 0.1079 (0.1151) loss: 0.8164 (0.8032) time: 0.1300 data: 0.0471 max mem: 9377 +Train: [15] [5300/6250] eta: 0:02:20 lr: 0.000121 grad: 0.1043 (0.1149) loss: 0.8086 (0.8034) time: 0.1278 data: 0.0446 max mem: 9377 +Train: [15] [5400/6250] eta: 0:02:05 lr: 0.000121 grad: 0.1080 (0.1148) loss: 0.8103 (0.8036) time: 0.1422 data: 0.0593 max mem: 9377 +Train: [15] [5500/6250] eta: 0:01:50 lr: 0.000121 grad: 0.1075 (0.1147) loss: 0.8072 (0.8038) time: 0.1283 data: 0.0449 max mem: 9377 +Train: [15] [5600/6250] eta: 0:01:36 lr: 0.000121 grad: 0.1028 (0.1145) loss: 0.8144 (0.8038) time: 0.1241 data: 0.0419 max mem: 9377 +Train: [15] [5700/6250] eta: 0:01:21 lr: 0.000121 grad: 0.1120 (0.1145) loss: 0.8107 (0.8039) time: 0.1589 data: 0.0785 max mem: 9377 +Train: [15] [5800/6250] eta: 0:01:06 lr: 0.000121 grad: 0.1107 (0.1144) loss: 0.8042 (0.8040) time: 0.1304 data: 0.0504 max mem: 9377 +Train: [15] [5900/6250] eta: 0:00:51 lr: 0.000121 grad: 0.1076 (0.1144) loss: 0.8140 (0.8041) time: 0.1411 data: 0.0556 max mem: 9377 +Train: [15] [6000/6250] eta: 0:00:36 lr: 0.000121 grad: 0.1117 (0.1143) loss: 0.8004 (0.8042) time: 0.1337 data: 0.0538 max mem: 9377 +Train: [15] [6100/6250] eta: 0:00:22 lr: 0.000121 grad: 0.1030 (0.1143) loss: 0.8235 (0.8042) time: 0.1464 data: 0.0698 max mem: 9377 +Train: [15] [6200/6250] eta: 0:00:07 lr: 0.000121 grad: 0.1052 (0.1142) loss: 0.8221 (0.8044) time: 0.1453 data: 0.0642 max mem: 9377 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.1042 (0.1141) loss: 0.8212 (0.8045) time: 0.1391 data: 0.0599 max mem: 9377 +Train: [15] Total time: 0:15:23 (0.1478 s / it) +Averaged stats: lr: 0.000121 grad: 0.1042 (0.1141) loss: 0.8212 (0.8045) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:03:37 loss: 0.8501 (0.8501) time: 3.5030 data: 3.4294 max mem: 9377 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8558 (0.8559) time: 0.1408 data: 0.1154 max mem: 9377 +Eval (hcp-train-subset): [15] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (hcp-train-subset): loss: 0.8558 (0.8559) +Eval (hcp-val): [15] [ 0/62] eta: 0:04:38 loss: 0.8512 (0.8512) time: 4.4918 data: 4.4594 max mem: 9377 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8528 (0.8540) time: 0.1394 data: 0.1125 max mem: 9377 +Eval (hcp-val): [15] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (hcp-val): loss: 0.8528 (0.8540) +Eval (nsd-val): [15] [ 0/62] eta: 0:05:49 loss: 0.8135 (0.8135) time: 5.6450 data: 5.6142 max mem: 9377 +Eval (nsd-val): [15] [61/62] eta: 0:00:00 loss: 0.8242 (0.8245) time: 0.1395 data: 0.1139 max mem: 9377 +Eval (nsd-val): [15] Total time: 0:00:13 (0.2159 s / it) +Averaged stats (nsd-val): loss: 0.8242 (0.8245) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [16] [ 0/6250] eta: 7:17:14 lr: 0.000121 grad: 0.0627 (0.0627) loss: 0.8925 (0.8925) time: 4.1975 data: 3.9224 max mem: 9377 +Train: [16] [ 100/6250] eta: 0:20:36 lr: 0.000121 grad: 0.1141 (0.1443) loss: 0.8321 (0.8307) time: 0.1537 data: 0.0653 max mem: 9377 +Train: [16] [ 200/6250] eta: 0:18:21 lr: 0.000121 grad: 0.1245 (0.1379) loss: 0.8203 (0.8212) time: 0.1711 data: 0.0864 max mem: 9377 +Train: [16] [ 300/6250] eta: 0:17:17 lr: 0.000121 grad: 0.1206 (0.1354) loss: 0.8006 (0.8143) time: 0.1580 data: 0.0771 max mem: 9377 +Train: [16] [ 400/6250] eta: 0:16:20 lr: 0.000121 grad: 0.1092 (0.1327) loss: 0.8099 (0.8114) time: 0.1397 data: 0.0570 max mem: 9377 +Train: [16] [ 500/6250] eta: 0:15:39 lr: 0.000121 grad: 0.1026 (0.1280) loss: 0.8087 (0.8110) time: 0.1290 data: 0.0474 max mem: 9377 +Train: [16] [ 600/6250] eta: 0:15:22 lr: 0.000121 grad: 0.1066 (0.1247) loss: 0.7924 (0.8101) time: 0.1810 data: 0.0971 max mem: 9377 +Train: [16] [ 700/6250] eta: 0:14:58 lr: 0.000121 grad: 0.1112 (0.1223) loss: 0.8113 (0.8098) time: 0.1642 data: 0.0713 max mem: 9377 +Train: [16] [ 800/6250] eta: 0:14:34 lr: 0.000121 grad: 0.1002 (0.1202) loss: 0.8104 (0.8098) time: 0.1666 data: 0.0667 max mem: 9377 +Train: [16] [ 900/6250] eta: 0:14:04 lr: 0.000121 grad: 0.0993 (0.1186) loss: 0.8037 (0.8096) time: 0.1405 data: 0.0625 max mem: 9377 +Train: [16] [1000/6250] eta: 0:13:32 lr: 0.000121 grad: 0.1062 (0.1174) loss: 0.8070 (0.8095) time: 0.1224 data: 0.0351 max mem: 9377 +Train: [16] [1100/6250] eta: 0:13:07 lr: 0.000121 grad: 0.1016 (0.1164) loss: 0.8124 (0.8094) time: 0.1289 data: 0.0470 max mem: 9377 +Train: [16] [1200/6250] eta: 0:12:50 lr: 0.000121 grad: 0.1052 (0.1156) loss: 0.8073 (0.8092) time: 0.1636 data: 0.0868 max mem: 9377 +Train: [16] [1300/6250] eta: 0:12:36 lr: 0.000121 grad: 0.1074 (0.1150) loss: 0.8047 (0.8090) time: 0.1438 data: 0.0676 max mem: 9377 +Train: [16] [1400/6250] eta: 0:12:21 lr: 0.000121 grad: 0.1053 (0.1144) loss: 0.7970 (0.8089) time: 0.1626 data: 0.0836 max mem: 9377 +Train: [16] [1500/6250] eta: 0:12:09 lr: 0.000121 grad: 0.1099 (0.1140) loss: 0.8068 (0.8091) time: 0.1704 data: 0.0959 max mem: 9377 +Train: [16] [1600/6250] eta: 0:11:58 lr: 0.000121 grad: 0.1060 (0.1137) loss: 0.8086 (0.8089) time: 0.1578 data: 0.0798 max mem: 9377 +Train: [16] [1700/6250] eta: 0:11:43 lr: 0.000121 grad: 0.1053 (0.1135) loss: 0.8091 (0.8090) time: 0.1566 data: 0.0798 max mem: 9377 +Train: [16] [1800/6250] eta: 0:11:28 lr: 0.000121 grad: 0.1045 (0.1132) loss: 0.8152 (0.8091) time: 0.1531 data: 0.0779 max mem: 9377 +Train: [16] [1900/6250] eta: 0:11:13 lr: 0.000121 grad: 0.1017 (0.1129) loss: 0.8145 (0.8092) time: 0.1309 data: 0.0461 max mem: 9377 +Train: [16] [2000/6250] eta: 0:10:54 lr: 0.000121 grad: 0.1078 (0.1129) loss: 0.8080 (0.8091) time: 0.1386 data: 0.0628 max mem: 9377 +Train: [16] [2100/6250] eta: 0:10:35 lr: 0.000121 grad: 0.1050 (0.1127) loss: 0.8088 (0.8090) time: 0.1477 data: 0.0701 max mem: 9377 +Train: [16] [2200/6250] eta: 0:10:16 lr: 0.000121 grad: 0.1051 (0.1126) loss: 0.8067 (0.8088) time: 0.1166 data: 0.0357 max mem: 9377 +Train: [16] [2300/6250] eta: 0:09:58 lr: 0.000121 grad: 0.1113 (0.1124) loss: 0.8092 (0.8087) time: 0.1401 data: 0.0582 max mem: 9377 +Train: [16] [2400/6250] eta: 0:09:42 lr: 0.000121 grad: 0.1069 (0.1123) loss: 0.7994 (0.8085) time: 0.1259 data: 0.0432 max mem: 9377 +Train: [16] [2500/6250] eta: 0:09:24 lr: 0.000121 grad: 0.1095 (0.1122) loss: 0.8124 (0.8083) time: 0.1272 data: 0.0495 max mem: 9377 +Train: [16] [2600/6250] eta: 0:09:07 lr: 0.000121 grad: 0.1047 (0.1122) loss: 0.8088 (0.8081) time: 0.1429 data: 0.0623 max mem: 9377 +Train: [16] [2700/6250] eta: 0:08:51 lr: 0.000121 grad: 0.1036 (0.1121) loss: 0.8141 (0.8080) time: 0.1430 data: 0.0644 max mem: 9377 +Train: [16] [2800/6250] eta: 0:08:35 lr: 0.000121 grad: 0.0998 (0.1119) loss: 0.7969 (0.8079) time: 0.1365 data: 0.0550 max mem: 9377 +Train: [16] [2900/6250] eta: 0:08:19 lr: 0.000121 grad: 0.1092 (0.1120) loss: 0.8146 (0.8079) time: 0.1605 data: 0.0769 max mem: 9377 +Train: [16] [3000/6250] eta: 0:08:04 lr: 0.000121 grad: 0.1073 (0.1120) loss: 0.7990 (0.8077) time: 0.1560 data: 0.0748 max mem: 9377 +Train: [16] [3100/6250] eta: 0:07:50 lr: 0.000121 grad: 0.1158 (0.1121) loss: 0.7991 (0.8075) time: 0.1460 data: 0.0659 max mem: 9377 +Train: [16] [3200/6250] eta: 0:07:35 lr: 0.000121 grad: 0.1074 (0.1123) loss: 0.8045 (0.8072) time: 0.1565 data: 0.0801 max mem: 9377 +Train: [16] [3300/6250] eta: 0:07:19 lr: 0.000121 grad: 0.1205 (0.1124) loss: 0.7875 (0.8070) time: 0.1558 data: 0.0769 max mem: 9377 +Train: [16] [3400/6250] eta: 0:07:05 lr: 0.000121 grad: 0.1113 (0.1124) loss: 0.7964 (0.8068) time: 0.1518 data: 0.0657 max mem: 9377 +Train: [16] [3500/6250] eta: 0:06:52 lr: 0.000120 grad: 0.1108 (0.1124) loss: 0.8008 (0.8067) time: 0.1496 data: 0.0573 max mem: 9377 +Train: [16] [3600/6250] eta: 0:06:37 lr: 0.000120 grad: 0.1129 (0.1125) loss: 0.7947 (0.8065) time: 0.1487 data: 0.0660 max mem: 9377 +Train: [16] [3700/6250] eta: 0:06:22 lr: 0.000120 grad: 0.1148 (0.1126) loss: 0.7946 (0.8063) time: 0.1373 data: 0.0435 max mem: 9377 +Train: [16] [3800/6250] eta: 0:06:06 lr: 0.000120 grad: 0.1152 (0.1127) loss: 0.7905 (0.8061) time: 0.1481 data: 0.0660 max mem: 9377 +Train: [16] [3900/6250] eta: 0:05:50 lr: 0.000120 grad: 0.1153 (0.1128) loss: 0.8016 (0.8061) time: 0.1409 data: 0.0582 max mem: 9377 +Train: [16] [4000/6250] eta: 0:05:35 lr: 0.000120 grad: 0.1109 (0.1129) loss: 0.7937 (0.8060) time: 0.1364 data: 0.0521 max mem: 9377 +Train: [16] [4100/6250] eta: 0:05:20 lr: 0.000120 grad: 0.1158 (0.1129) loss: 0.8025 (0.8059) time: 0.1239 data: 0.0381 max mem: 9377 +Train: [16] [4200/6250] eta: 0:05:04 lr: 0.000120 grad: 0.1107 (0.1131) loss: 0.8078 (0.8058) time: 0.1326 data: 0.0480 max mem: 9377 +Train: [16] [4300/6250] eta: 0:04:49 lr: 0.000120 grad: 0.1203 (0.1132) loss: 0.8036 (0.8057) time: 0.1464 data: 0.0657 max mem: 9377 +Train: [16] [4400/6250] eta: 0:04:34 lr: 0.000120 grad: 0.1113 (0.1133) loss: 0.7990 (0.8056) time: 0.1295 data: 0.0464 max mem: 9377 +Train: [16] [4500/6250] eta: 0:04:19 lr: 0.000120 grad: 0.1200 (0.1134) loss: 0.8005 (0.8055) time: 0.1445 data: 0.0615 max mem: 9377 +Train: [16] [4600/6250] eta: 0:04:03 lr: 0.000120 grad: 0.1181 (0.1135) loss: 0.7963 (0.8053) time: 0.1400 data: 0.0613 max mem: 9377 +Train: [16] [4700/6250] eta: 0:03:48 lr: 0.000120 grad: 0.1163 (0.1136) loss: 0.7973 (0.8051) time: 0.1242 data: 0.0403 max mem: 9377 +Train: [16] [4800/6250] eta: 0:03:33 lr: 0.000120 grad: 0.1102 (0.1137) loss: 0.8009 (0.8049) time: 0.1236 data: 0.0481 max mem: 9377 +Train: [16] [4900/6250] eta: 0:03:18 lr: 0.000120 grad: 0.1103 (0.1138) loss: 0.7996 (0.8047) time: 0.1411 data: 0.0594 max mem: 9377 +Train: [16] [5000/6250] eta: 0:03:03 lr: 0.000120 grad: 0.1103 (0.1139) loss: 0.7970 (0.8046) time: 0.1279 data: 0.0541 max mem: 9377 +Train: [16] [5100/6250] eta: 0:02:48 lr: 0.000120 grad: 0.1022 (0.1139) loss: 0.7959 (0.8043) time: 0.1294 data: 0.0501 max mem: 9377 +Train: [16] [5200/6250] eta: 0:02:33 lr: 0.000120 grad: 0.1227 (0.1141) loss: 0.7901 (0.8042) time: 0.1383 data: 0.0532 max mem: 9377 +Train: [16] [5300/6250] eta: 0:02:19 lr: 0.000120 grad: 0.1169 (0.1141) loss: 0.7985 (0.8041) time: 0.1390 data: 0.0552 max mem: 9377 +Train: [16] [5400/6250] eta: 0:02:04 lr: 0.000120 grad: 0.1166 (0.1143) loss: 0.8125 (0.8040) time: 0.1556 data: 0.0714 max mem: 9377 +Train: [16] [5500/6250] eta: 0:01:49 lr: 0.000120 grad: 0.1039 (0.1144) loss: 0.8040 (0.8039) time: 0.1333 data: 0.0476 max mem: 9377 +Train: [16] [5600/6250] eta: 0:01:34 lr: 0.000120 grad: 0.1140 (0.1146) loss: 0.7965 (0.8039) time: 0.1310 data: 0.0536 max mem: 9377 +Train: [16] [5700/6250] eta: 0:01:20 lr: 0.000120 grad: 0.1205 (0.1147) loss: 0.7860 (0.8037) time: 0.1325 data: 0.0517 max mem: 9377 +Train: [16] [5800/6250] eta: 0:01:05 lr: 0.000120 grad: 0.1154 (0.1147) loss: 0.8062 (0.8036) time: 0.1293 data: 0.0431 max mem: 9377 +Train: [16] [5900/6250] eta: 0:00:50 lr: 0.000120 grad: 0.1229 (0.1148) loss: 0.7905 (0.8035) time: 0.1402 data: 0.0561 max mem: 9377 +Train: [16] [6000/6250] eta: 0:00:36 lr: 0.000120 grad: 0.1123 (0.1148) loss: 0.8031 (0.8033) time: 0.1218 data: 0.0435 max mem: 9377 +Train: [16] [6100/6250] eta: 0:00:21 lr: 0.000120 grad: 0.1246 (0.1149) loss: 0.7943 (0.8032) time: 0.1353 data: 0.0540 max mem: 9377 +Train: [16] [6200/6250] eta: 0:00:07 lr: 0.000120 grad: 0.1140 (0.1149) loss: 0.8057 (0.8032) time: 0.1377 data: 0.0605 max mem: 9377 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.1101 (0.1149) loss: 0.8016 (0.8031) time: 0.1222 data: 0.0380 max mem: 9377 +Train: [16] Total time: 0:15:10 (0.1456 s / it) +Averaged stats: lr: 0.000120 grad: 0.1101 (0.1149) loss: 0.8016 (0.8031) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:04:54 loss: 0.8552 (0.8552) time: 4.7499 data: 4.7192 max mem: 9377 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8557 (0.8552) time: 0.1284 data: 0.1016 max mem: 9377 +Eval (hcp-train-subset): [16] Total time: 0:00:13 (0.2135 s / it) +Averaged stats (hcp-train-subset): loss: 0.8557 (0.8552) +Eval (hcp-val): [16] [ 0/62] eta: 0:04:16 loss: 0.8498 (0.8498) time: 4.1325 data: 4.0520 max mem: 9377 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8515 (0.8533) time: 0.1374 data: 0.1121 max mem: 9377 +Eval (hcp-val): [16] Total time: 0:00:13 (0.2135 s / it) +Averaged stats (hcp-val): loss: 0.8515 (0.8533) +Eval (nsd-val): [16] [ 0/62] eta: 0:03:53 loss: 0.8159 (0.8159) time: 3.7617 data: 3.6579 max mem: 9377 +Eval (nsd-val): [16] [61/62] eta: 0:00:00 loss: 0.8268 (0.8275) time: 0.1249 data: 0.0991 max mem: 9377 +Eval (nsd-val): [16] Total time: 0:00:12 (0.2081 s / it) +Averaged stats (nsd-val): loss: 0.8268 (0.8275) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +Train: [17] [ 0/6250] eta: 8:21:42 lr: 0.000120 grad: 0.1094 (0.1094) loss: 0.8480 (0.8480) time: 4.8164 data: 4.5790 max mem: 9377 +Train: [17] [ 100/6250] eta: 0:19:39 lr: 0.000120 grad: 0.1100 (0.1454) loss: 0.8366 (0.8354) time: 0.1329 data: 0.0461 max mem: 9377 +Train: [17] [ 200/6250] eta: 0:17:02 lr: 0.000120 grad: 0.1153 (0.1388) loss: 0.8233 (0.8240) time: 0.1383 data: 0.0521 max mem: 9377 +Train: [17] [ 300/6250] eta: 0:16:10 lr: 0.000120 grad: 0.1180 (0.1332) loss: 0.8022 (0.8184) time: 0.1790 data: 0.0905 max mem: 9377 +Train: [17] [ 400/6250] eta: 0:15:33 lr: 0.000120 grad: 0.1165 (0.1307) loss: 0.7912 (0.8142) time: 0.1369 data: 0.0559 max mem: 9377 +Train: [17] [ 500/6250] eta: 0:14:51 lr: 0.000120 grad: 0.1068 (0.1273) loss: 0.7967 (0.8123) time: 0.1082 data: 0.0207 max mem: 9377 +Train: [17] [ 600/6250] eta: 0:14:19 lr: 0.000120 grad: 0.1061 (0.1246) loss: 0.8106 (0.8112) time: 0.1351 data: 0.0541 max mem: 9377 +Train: [17] [ 700/6250] eta: 0:13:53 lr: 0.000120 grad: 0.1060 (0.1231) loss: 0.8005 (0.8094) time: 0.1470 data: 0.0559 max mem: 9377 +Train: [17] [ 800/6250] eta: 0:13:38 lr: 0.000120 grad: 0.1239 (0.1223) loss: 0.7906 (0.8082) time: 0.1354 data: 0.0500 max mem: 9377 +Train: [17] [ 900/6250] eta: 0:13:19 lr: 0.000120 grad: 0.1112 (0.1212) loss: 0.8053 (0.8073) time: 0.1430 data: 0.0501 max mem: 9377 +Train: [17] [1000/6250] eta: 0:12:57 lr: 0.000120 grad: 0.1048 (0.1205) loss: 0.7945 (0.8062) time: 0.1166 data: 0.0310 max mem: 9377 +Train: [17] [1100/6250] eta: 0:12:36 lr: 0.000120 grad: 0.1137 (0.1199) loss: 0.7973 (0.8051) time: 0.1392 data: 0.0518 max mem: 9377 +Train: [17] [1200/6250] eta: 0:12:16 lr: 0.000120 grad: 0.1119 (0.1195) loss: 0.7978 (0.8040) time: 0.1163 data: 0.0325 max mem: 9377 +Train: [17] [1300/6250] eta: 0:11:57 lr: 0.000120 grad: 0.1196 (0.1195) loss: 0.7851 (0.8033) time: 0.1335 data: 0.0517 max mem: 9377 +Train: [17] [1400/6250] eta: 0:11:38 lr: 0.000120 grad: 0.1118 (0.1194) loss: 0.7933 (0.8022) time: 0.1421 data: 0.0672 max mem: 9377 +Train: [17] [1500/6250] eta: 0:11:21 lr: 0.000120 grad: 0.1111 (0.1190) loss: 0.7926 (0.8014) time: 0.1411 data: 0.0592 max mem: 9377 +Train: [17] [1600/6250] eta: 0:11:06 lr: 0.000120 grad: 0.1072 (0.1188) loss: 0.7977 (0.8008) time: 0.1561 data: 0.0788 max mem: 9377 +Train: [17] [1700/6250] eta: 0:10:51 lr: 0.000120 grad: 0.1189 (0.1186) loss: 0.7969 (0.8002) time: 0.1411 data: 0.0583 max mem: 9377 +Train: [17] [1800/6250] eta: 0:10:35 lr: 0.000120 grad: 0.1149 (0.1184) loss: 0.7925 (0.7998) time: 0.1193 data: 0.0392 max mem: 9377 +Train: [17] [1900/6250] eta: 0:10:22 lr: 0.000120 grad: 0.1119 (0.1184) loss: 0.7937 (0.7994) time: 0.1336 data: 0.0540 max mem: 9377 +Train: [17] [2000/6250] eta: 0:10:09 lr: 0.000120 grad: 0.1084 (0.1183) loss: 0.7944 (0.7990) time: 0.1671 data: 0.0851 max mem: 9377 +Train: [17] [2100/6250] eta: 0:09:55 lr: 0.000120 grad: 0.1088 (0.1183) loss: 0.8017 (0.7989) time: 0.1432 data: 0.0625 max mem: 9377 +Train: [17] [2200/6250] eta: 0:09:41 lr: 0.000120 grad: 0.1175 (0.1186) loss: 0.7961 (0.7987) time: 0.1231 data: 0.0448 max mem: 9377 +Train: [17] [2300/6250] eta: 0:09:27 lr: 0.000120 grad: 0.1217 (0.1184) loss: 0.7848 (0.7986) time: 0.1311 data: 0.0522 max mem: 9377 +Train: [17] [2400/6250] eta: 0:09:13 lr: 0.000120 grad: 0.1230 (0.1185) loss: 0.7835 (0.7984) time: 0.1429 data: 0.0646 max mem: 9377 +Train: [17] [2500/6250] eta: 0:09:01 lr: 0.000120 grad: 0.1141 (0.1185) loss: 0.7973 (0.7982) time: 0.1514 data: 0.0719 max mem: 9377 +Train: [17] [2600/6250] eta: 0:08:48 lr: 0.000120 grad: 0.1176 (0.1185) loss: 0.7969 (0.7982) time: 0.0896 data: 0.0031 max mem: 9377 +Train: [17] [2700/6250] eta: 0:08:34 lr: 0.000120 grad: 0.1011 (0.1183) loss: 0.7981 (0.7979) time: 0.1432 data: 0.0614 max mem: 9377 +Train: [17] [2800/6250] eta: 0:08:21 lr: 0.000120 grad: 0.1160 (0.1183) loss: 0.7955 (0.7978) time: 0.1710 data: 0.0883 max mem: 9377 +Train: [17] [2900/6250] eta: 0:08:06 lr: 0.000120 grad: 0.1103 (0.1182) loss: 0.7908 (0.7977) time: 0.1435 data: 0.0587 max mem: 9377 +Train: [17] [3000/6250] eta: 0:07:52 lr: 0.000120 grad: 0.1112 (0.1182) loss: 0.7979 (0.7976) time: 0.1555 data: 0.0754 max mem: 9377 +Train: [17] [3100/6250] eta: 0:07:39 lr: 0.000120 grad: 0.1207 (0.1183) loss: 0.7906 (0.7974) time: 0.1462 data: 0.0616 max mem: 9377 +Train: [17] [3200/6250] eta: 0:07:24 lr: 0.000120 grad: 0.1151 (0.1183) loss: 0.7703 (0.7971) time: 0.1609 data: 0.0698 max mem: 9377 +Train: [17] [3300/6250] eta: 0:07:10 lr: 0.000120 grad: 0.1216 (0.1182) loss: 0.8021 (0.7970) time: 0.1320 data: 0.0486 max mem: 9377 +Train: [17] [3400/6250] eta: 0:06:55 lr: 0.000120 grad: 0.1188 (0.1181) loss: 0.7849 (0.7968) time: 0.1342 data: 0.0533 max mem: 9377 +Train: [17] [3500/6250] eta: 0:06:41 lr: 0.000120 grad: 0.1102 (0.1180) loss: 0.7958 (0.7966) time: 0.1473 data: 0.0728 max mem: 9377 +Train: [17] [3600/6250] eta: 0:06:26 lr: 0.000120 grad: 0.1237 (0.1181) loss: 0.7849 (0.7963) time: 0.1178 data: 0.0277 max mem: 9377 +Train: [17] [3700/6250] eta: 0:06:11 lr: 0.000120 grad: 0.1138 (0.1181) loss: 0.7715 (0.7960) time: 0.1050 data: 0.0163 max mem: 9377 +Train: [17] [3800/6250] eta: 0:05:56 lr: 0.000120 grad: 0.1132 (0.1181) loss: 0.7900 (0.7958) time: 0.1335 data: 0.0469 max mem: 9377 +Train: [17] [3900/6250] eta: 0:05:40 lr: 0.000120 grad: 0.1168 (0.1181) loss: 0.7945 (0.7956) time: 0.1361 data: 0.0496 max mem: 9377 +Train: [17] [4000/6250] eta: 0:05:25 lr: 0.000120 grad: 0.1051 (0.1179) loss: 0.7821 (0.7955) time: 0.1325 data: 0.0444 max mem: 9377 +Train: [17] [4100/6250] eta: 0:05:10 lr: 0.000120 grad: 0.1112 (0.1179) loss: 0.7946 (0.7953) time: 0.1379 data: 0.0482 max mem: 9377 +Train: [17] [4200/6250] eta: 0:04:55 lr: 0.000120 grad: 0.1105 (0.1178) loss: 0.8036 (0.7952) time: 0.1518 data: 0.0652 max mem: 9377 +Train: [17] [4300/6250] eta: 0:04:40 lr: 0.000120 grad: 0.1162 (0.1178) loss: 0.8026 (0.7951) time: 0.1300 data: 0.0459 max mem: 9377 +Train: [17] [4400/6250] eta: 0:04:26 lr: 0.000120 grad: 0.1095 (0.1178) loss: 0.7873 (0.7950) time: 0.1634 data: 0.0717 max mem: 9377 +Train: [17] [4500/6250] eta: 0:04:11 lr: 0.000120 grad: 0.1121 (0.1177) loss: 0.7960 (0.7948) time: 0.1451 data: 0.0608 max mem: 9377 +Train: [17] [4600/6250] eta: 0:03:57 lr: 0.000120 grad: 0.1083 (0.1177) loss: 0.7864 (0.7948) time: 0.1384 data: 0.0562 max mem: 9377 +Train: [17] [4700/6250] eta: 0:03:42 lr: 0.000120 grad: 0.1176 (0.1177) loss: 0.7955 (0.7947) time: 0.1356 data: 0.0564 max mem: 9377 +Train: [17] [4800/6250] eta: 0:03:28 lr: 0.000120 grad: 0.1100 (0.1177) loss: 0.7928 (0.7946) time: 0.1277 data: 0.0418 max mem: 9377 +Train: [17] [4900/6250] eta: 0:03:13 lr: 0.000119 grad: 0.1187 (0.1177) loss: 0.7836 (0.7945) time: 0.1337 data: 0.0508 max mem: 9377 +Train: [17] [5000/6250] eta: 0:02:59 lr: 0.000119 grad: 0.1133 (0.1177) loss: 0.8060 (0.7946) time: 0.1492 data: 0.0695 max mem: 9377 +Train: [17] [5100/6250] eta: 0:02:44 lr: 0.000119 grad: 0.1114 (0.1177) loss: 0.7896 (0.7946) time: 0.1255 data: 0.0397 max mem: 9377 +Train: [17] [5200/6250] eta: 0:02:30 lr: 0.000119 grad: 0.1136 (0.1178) loss: 0.7914 (0.7945) time: 0.1258 data: 0.0388 max mem: 9377 +Train: [17] [5300/6250] eta: 0:02:15 lr: 0.000119 grad: 0.1124 (0.1177) loss: 0.7942 (0.7945) time: 0.1535 data: 0.0728 max mem: 9377 +Train: [17] [5400/6250] eta: 0:02:01 lr: 0.000119 grad: 0.1092 (0.1177) loss: 0.7917 (0.7945) time: 0.1465 data: 0.0582 max mem: 9377 +Train: [17] [5500/6250] eta: 0:01:47 lr: 0.000119 grad: 0.1133 (0.1177) loss: 0.8008 (0.7945) time: 0.1508 data: 0.0668 max mem: 9377 +Train: [17] [5600/6250] eta: 0:01:32 lr: 0.000119 grad: 0.1094 (0.1177) loss: 0.8071 (0.7946) time: 0.1221 data: 0.0449 max mem: 9377 +Train: [17] [5700/6250] eta: 0:01:18 lr: 0.000119 grad: 0.1188 (0.1177) loss: 0.8001 (0.7946) time: 0.1416 data: 0.0622 max mem: 9377 +Train: [17] [5800/6250] eta: 0:01:04 lr: 0.000119 grad: 0.1155 (0.1177) loss: 0.7931 (0.7946) time: 0.1279 data: 0.0436 max mem: 9377 +Train: [17] [5900/6250] eta: 0:00:49 lr: 0.000119 grad: 0.1183 (0.1176) loss: 0.8011 (0.7947) time: 0.1381 data: 0.0582 max mem: 9377 +Train: [17] [6000/6250] eta: 0:00:35 lr: 0.000119 grad: 0.1119 (0.1176) loss: 0.7978 (0.7947) time: 0.1531 data: 0.0703 max mem: 9377 +Train: [17] [6100/6250] eta: 0:00:21 lr: 0.000119 grad: 0.1176 (0.1176) loss: 0.7940 (0.7948) time: 0.1397 data: 0.0530 max mem: 9377 +Train: [17] [6200/6250] eta: 0:00:07 lr: 0.000119 grad: 0.1126 (0.1176) loss: 0.8061 (0.7948) time: 0.1322 data: 0.0487 max mem: 9377 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.1072 (0.1175) loss: 0.8026 (0.7949) time: 0.1452 data: 0.0663 max mem: 9377 +Train: [17] Total time: 0:14:54 (0.1432 s / it) +Averaged stats: lr: 0.000119 grad: 0.1072 (0.1175) loss: 0.8026 (0.7949) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:03:58 loss: 0.8538 (0.8538) time: 3.8427 data: 3.7651 max mem: 9377 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8586 (0.8574) time: 0.1272 data: 0.1020 max mem: 9377 +Eval (hcp-train-subset): [17] Total time: 0:00:13 (0.2205 s / it) +Averaged stats (hcp-train-subset): loss: 0.8586 (0.8574) +Eval (hcp-val): [17] [ 0/62] eta: 0:04:17 loss: 0.8551 (0.8551) time: 4.1497 data: 4.0668 max mem: 9377 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8543 (0.8549) time: 0.1208 data: 0.0958 max mem: 9377 +Eval (hcp-val): [17] Total time: 0:00:13 (0.2221 s / it) +Averaged stats (hcp-val): loss: 0.8543 (0.8549) +Eval (nsd-val): [17] [ 0/62] eta: 0:03:45 loss: 0.8272 (0.8272) time: 3.6367 data: 3.5716 max mem: 9377 +Eval (nsd-val): [17] [61/62] eta: 0:00:00 loss: 0.8370 (0.8387) time: 0.1391 data: 0.1140 max mem: 9377 +Eval (nsd-val): [17] Total time: 0:00:12 (0.2073 s / it) +Averaged stats (nsd-val): loss: 0.8370 (0.8387) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [18] [ 0/6250] eta: 9:56:53 lr: 0.000119 grad: 0.2364 (0.2364) loss: 0.8269 (0.8269) time: 5.7302 data: 5.6289 max mem: 9377 +Train: [18] [ 100/6250] eta: 0:20:19 lr: 0.000119 grad: 0.1294 (0.1565) loss: 0.8149 (0.8262) time: 0.1547 data: 0.0640 max mem: 9377 +Train: [18] [ 200/6250] eta: 0:17:36 lr: 0.000119 grad: 0.1362 (0.1480) loss: 0.8071 (0.8137) time: 0.1577 data: 0.0726 max mem: 9377 +Train: [18] [ 300/6250] eta: 0:16:30 lr: 0.000119 grad: 0.1257 (0.1412) loss: 0.7903 (0.8078) time: 0.1395 data: 0.0574 max mem: 9377 +Train: [18] [ 400/6250] eta: 0:15:45 lr: 0.000119 grad: 0.1191 (0.1367) loss: 0.7998 (0.8039) time: 0.1593 data: 0.0776 max mem: 9377 +Train: [18] [ 500/6250] eta: 0:15:17 lr: 0.000119 grad: 0.1084 (0.1331) loss: 0.7891 (0.8021) time: 0.1388 data: 0.0557 max mem: 9377 +Train: [18] [ 600/6250] eta: 0:14:49 lr: 0.000119 grad: 0.1177 (0.1293) loss: 0.7961 (0.8015) time: 0.1411 data: 0.0575 max mem: 9377 +Train: [18] [ 700/6250] eta: 0:14:31 lr: 0.000119 grad: 0.1023 (0.1266) loss: 0.7996 (0.8015) time: 0.1269 data: 0.0433 max mem: 9377 +Train: [18] [ 800/6250] eta: 0:14:14 lr: 0.000119 grad: 0.1153 (0.1250) loss: 0.7952 (0.8008) time: 0.1710 data: 0.0877 max mem: 9377 +Train: [18] [ 900/6250] eta: 0:13:55 lr: 0.000119 grad: 0.1071 (0.1235) loss: 0.7922 (0.8008) time: 0.1660 data: 0.0715 max mem: 9377 +Train: [18] [1000/6250] eta: 0:13:33 lr: 0.000119 grad: 0.1052 (0.1223) loss: 0.7847 (0.8004) time: 0.1490 data: 0.0590 max mem: 9377 +Train: [18] [1100/6250] eta: 0:13:12 lr: 0.000119 grad: 0.1139 (0.1215) loss: 0.7895 (0.7999) time: 0.1265 data: 0.0409 max mem: 9377 +Train: [18] [1200/6250] eta: 0:12:50 lr: 0.000119 grad: 0.1118 (0.1208) loss: 0.7944 (0.7997) time: 0.1378 data: 0.0485 max mem: 9377 +Train: [18] [1300/6250] eta: 0:12:28 lr: 0.000119 grad: 0.1056 (0.1205) loss: 0.7910 (0.7993) time: 0.1541 data: 0.0718 max mem: 9377 +Train: [18] [1400/6250] eta: 0:12:07 lr: 0.000119 grad: 0.1128 (0.1200) loss: 0.7990 (0.7991) time: 0.1358 data: 0.0556 max mem: 9377 +Train: [18] [1500/6250] eta: 0:11:48 lr: 0.000119 grad: 0.1136 (0.1197) loss: 0.7851 (0.7989) time: 0.1387 data: 0.0623 max mem: 9377 +Train: [18] [1600/6250] eta: 0:11:31 lr: 0.000119 grad: 0.1151 (0.1197) loss: 0.7918 (0.7986) time: 0.1499 data: 0.0726 max mem: 9377 +Train: [18] [1700/6250] eta: 0:11:15 lr: 0.000119 grad: 0.1020 (0.1195) loss: 0.7991 (0.7983) time: 0.1435 data: 0.0658 max mem: 9377 +Train: [18] [1800/6250] eta: 0:10:58 lr: 0.000119 grad: 0.1128 (0.1193) loss: 0.7936 (0.7981) time: 0.1360 data: 0.0560 max mem: 9377 +Train: [18] [1900/6250] eta: 0:10:43 lr: 0.000119 grad: 0.1167 (0.1192) loss: 0.7906 (0.7977) time: 0.1369 data: 0.0471 max mem: 9377 +Train: [18] [2000/6250] eta: 0:10:26 lr: 0.000119 grad: 0.1025 (0.1191) loss: 0.7962 (0.7974) time: 0.1294 data: 0.0411 max mem: 9377 +Train: [18] [2100/6250] eta: 0:10:10 lr: 0.000119 grad: 0.1142 (0.1191) loss: 0.7864 (0.7970) time: 0.1434 data: 0.0636 max mem: 9377 +Train: [18] [2200/6250] eta: 0:09:54 lr: 0.000119 grad: 0.1177 (0.1190) loss: 0.7876 (0.7968) time: 0.1177 data: 0.0314 max mem: 9377 +Train: [18] [2300/6250] eta: 0:09:38 lr: 0.000119 grad: 0.1181 (0.1188) loss: 0.7828 (0.7965) time: 0.1384 data: 0.0590 max mem: 9377 +Train: [18] [2400/6250] eta: 0:09:21 lr: 0.000119 grad: 0.1119 (0.1187) loss: 0.7907 (0.7962) time: 0.1304 data: 0.0444 max mem: 9377 +Train: [18] [2500/6250] eta: 0:09:05 lr: 0.000119 grad: 0.1057 (0.1185) loss: 0.7919 (0.7961) time: 0.1430 data: 0.0634 max mem: 9377 +Train: [18] [2600/6250] eta: 0:08:49 lr: 0.000119 grad: 0.1122 (0.1183) loss: 0.7939 (0.7961) time: 0.1373 data: 0.0599 max mem: 9377 +Train: [18] [2700/6250] eta: 0:08:35 lr: 0.000119 grad: 0.1097 (0.1181) loss: 0.7872 (0.7959) time: 0.1412 data: 0.0552 max mem: 9377 +Train: [18] [2800/6250] eta: 0:08:22 lr: 0.000119 grad: 0.1106 (0.1180) loss: 0.7935 (0.7959) time: 0.1548 data: 0.0714 max mem: 9377 +Train: [18] [2900/6250] eta: 0:08:07 lr: 0.000119 grad: 0.1137 (0.1180) loss: 0.7976 (0.7959) time: 0.1523 data: 0.0725 max mem: 9377 +Train: [18] [3000/6250] eta: 0:07:52 lr: 0.000119 grad: 0.1126 (0.1180) loss: 0.7985 (0.7959) time: 0.1351 data: 0.0582 max mem: 9377 +Train: [18] [3100/6250] eta: 0:07:38 lr: 0.000119 grad: 0.1151 (0.1179) loss: 0.8010 (0.7959) time: 0.1438 data: 0.0636 max mem: 9377 +Train: [18] [3200/6250] eta: 0:07:24 lr: 0.000119 grad: 0.1194 (0.1179) loss: 0.7924 (0.7958) time: 0.1543 data: 0.0686 max mem: 9377 +Train: [18] [3300/6250] eta: 0:07:11 lr: 0.000119 grad: 0.1197 (0.1180) loss: 0.7971 (0.7957) time: 0.1699 data: 0.0793 max mem: 9377 +Train: [18] [3400/6250] eta: 0:06:56 lr: 0.000119 grad: 0.1192 (0.1180) loss: 0.7869 (0.7956) time: 0.1387 data: 0.0598 max mem: 9377 +Train: [18] [3500/6250] eta: 0:06:41 lr: 0.000119 grad: 0.1174 (0.1181) loss: 0.7960 (0.7956) time: 0.1293 data: 0.0560 max mem: 9377 +Train: [18] [3600/6250] eta: 0:06:26 lr: 0.000119 grad: 0.1189 (0.1181) loss: 0.7891 (0.7954) time: 0.1227 data: 0.0412 max mem: 9377 +Train: [18] [3700/6250] eta: 0:06:11 lr: 0.000119 grad: 0.1079 (0.1180) loss: 0.7850 (0.7952) time: 0.1321 data: 0.0495 max mem: 9377 +Train: [18] [3800/6250] eta: 0:05:56 lr: 0.000119 grad: 0.1233 (0.1180) loss: 0.7887 (0.7952) time: 0.1305 data: 0.0436 max mem: 9377 +Train: [18] [3900/6250] eta: 0:05:41 lr: 0.000119 grad: 0.1171 (0.1181) loss: 0.7904 (0.7951) time: 0.1225 data: 0.0353 max mem: 9377 +Train: [18] [4000/6250] eta: 0:05:25 lr: 0.000119 grad: 0.1084 (0.1181) loss: 0.7937 (0.7950) time: 0.1350 data: 0.0486 max mem: 9377 +Train: [18] [4100/6250] eta: 0:05:10 lr: 0.000119 grad: 0.1188 (0.1180) loss: 0.7859 (0.7950) time: 0.1299 data: 0.0424 max mem: 9377 +Train: [18] [4200/6250] eta: 0:04:55 lr: 0.000119 grad: 0.1156 (0.1181) loss: 0.8004 (0.7949) time: 0.1681 data: 0.0892 max mem: 9377 +Train: [18] [4300/6250] eta: 0:04:41 lr: 0.000119 grad: 0.1194 (0.1181) loss: 0.7982 (0.7949) time: 0.1586 data: 0.0817 max mem: 9377 +Train: [18] [4400/6250] eta: 0:04:27 lr: 0.000119 grad: 0.1234 (0.1183) loss: 0.7823 (0.7948) time: 0.1459 data: 0.0652 max mem: 9377 +Train: [18] [4500/6250] eta: 0:04:13 lr: 0.000119 grad: 0.1152 (0.1182) loss: 0.7837 (0.7947) time: 0.1306 data: 0.0579 max mem: 9377 +Train: [18] [4600/6250] eta: 0:03:59 lr: 0.000119 grad: 0.1136 (0.1182) loss: 0.8001 (0.7947) time: 0.1743 data: 0.0972 max mem: 9377 +Train: [18] [4700/6250] eta: 0:03:44 lr: 0.000119 grad: 0.1100 (0.1181) loss: 0.7926 (0.7947) time: 0.1534 data: 0.0744 max mem: 9377 +Train: [18] [4800/6250] eta: 0:03:30 lr: 0.000119 grad: 0.1172 (0.1182) loss: 0.7994 (0.7947) time: 0.1290 data: 0.0431 max mem: 9377 +Train: [18] [4900/6250] eta: 0:03:15 lr: 0.000119 grad: 0.1100 (0.1181) loss: 0.7951 (0.7948) time: 0.1450 data: 0.0649 max mem: 9377 +Train: [18] [5000/6250] eta: 0:03:00 lr: 0.000119 grad: 0.1052 (0.1181) loss: 0.7892 (0.7948) time: 0.1475 data: 0.0718 max mem: 9377 +Train: [18] [5100/6250] eta: 0:02:46 lr: 0.000119 grad: 0.1225 (0.1181) loss: 0.7935 (0.7948) time: 0.1505 data: 0.0645 max mem: 9377 +Train: [18] [5200/6250] eta: 0:02:32 lr: 0.000119 grad: 0.1201 (0.1180) loss: 0.7847 (0.7948) time: 0.1964 data: 0.1166 max mem: 9377 +Train: [18] [5300/6250] eta: 0:02:17 lr: 0.000119 grad: 0.1154 (0.1180) loss: 0.7846 (0.7948) time: 0.1468 data: 0.0683 max mem: 9377 +Train: [18] [5400/6250] eta: 0:02:02 lr: 0.000119 grad: 0.1130 (0.1179) loss: 0.7792 (0.7947) time: 0.1385 data: 0.0548 max mem: 9377 +Train: [18] [5500/6250] eta: 0:01:48 lr: 0.000119 grad: 0.1168 (0.1179) loss: 0.7920 (0.7945) time: 0.1509 data: 0.0633 max mem: 9377 +Train: [18] [5600/6250] eta: 0:01:33 lr: 0.000119 grad: 0.1118 (0.1179) loss: 0.7875 (0.7945) time: 0.1193 data: 0.0339 max mem: 9377 +Train: [18] [5700/6250] eta: 0:01:19 lr: 0.000119 grad: 0.1176 (0.1178) loss: 0.7879 (0.7944) time: 0.1464 data: 0.0648 max mem: 9377 +Train: [18] [5800/6250] eta: 0:01:04 lr: 0.000118 grad: 0.1269 (0.1179) loss: 0.7788 (0.7943) time: 0.1465 data: 0.0715 max mem: 9377 +Train: [18] [5900/6250] eta: 0:00:50 lr: 0.000118 grad: 0.1187 (0.1179) loss: 0.7847 (0.7942) time: 0.1394 data: 0.0546 max mem: 9377 +Train: [18] [6000/6250] eta: 0:00:36 lr: 0.000118 grad: 0.1272 (0.1180) loss: 0.7712 (0.7940) time: 0.1355 data: 0.0538 max mem: 9377 +Train: [18] [6100/6250] eta: 0:00:21 lr: 0.000118 grad: 0.1140 (0.1180) loss: 0.7820 (0.7938) time: 0.1535 data: 0.0691 max mem: 9377 +Train: [18] [6200/6250] eta: 0:00:07 lr: 0.000118 grad: 0.1175 (0.1181) loss: 0.7790 (0.7936) time: 0.1233 data: 0.0297 max mem: 9377 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.1185 (0.1181) loss: 0.7895 (0.7936) time: 0.1318 data: 0.0479 max mem: 9377 +Train: [18] Total time: 0:15:04 (0.1447 s / it) +Averaged stats: lr: 0.000118 grad: 0.1185 (0.1181) loss: 0.7895 (0.7936) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:03:46 loss: 0.8505 (0.8505) time: 3.6612 data: 3.5752 max mem: 9377 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8561 (0.8547) time: 0.1245 data: 0.0993 max mem: 9377 +Eval (hcp-train-subset): [18] Total time: 0:00:13 (0.2178 s / it) +Averaged stats (hcp-train-subset): loss: 0.8561 (0.8547) +Eval (hcp-val): [18] [ 0/62] eta: 0:04:57 loss: 0.8520 (0.8520) time: 4.7931 data: 4.7631 max mem: 9377 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8515 (0.8540) time: 0.1401 data: 0.1128 max mem: 9377 +Eval (hcp-val): [18] Total time: 0:00:13 (0.2234 s / it) +Averaged stats (hcp-val): loss: 0.8515 (0.8540) +Eval (nsd-val): [18] [ 0/62] eta: 0:04:07 loss: 0.8209 (0.8209) time: 3.9917 data: 3.9243 max mem: 9377 +Eval (nsd-val): [18] [61/62] eta: 0:00:00 loss: 0.8331 (0.8343) time: 0.1419 data: 0.1163 max mem: 9377 +Eval (nsd-val): [18] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (nsd-val): loss: 0.8331 (0.8343) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [19] [ 0/6250] eta: 7:34:19 lr: 0.000118 grad: 0.2648 (0.2648) loss: 0.8528 (0.8528) time: 4.3616 data: 4.1457 max mem: 9377 +Train: [19] [ 100/6250] eta: 0:19:57 lr: 0.000118 grad: 0.1305 (0.1429) loss: 0.7972 (0.8275) time: 0.1415 data: 0.0455 max mem: 9377 +Train: [19] [ 200/6250] eta: 0:17:24 lr: 0.000118 grad: 0.1290 (0.1404) loss: 0.8077 (0.8128) time: 0.1417 data: 0.0604 max mem: 9377 +Train: [19] [ 300/6250] eta: 0:16:25 lr: 0.000118 grad: 0.1164 (0.1333) loss: 0.7889 (0.8076) time: 0.1653 data: 0.0759 max mem: 9377 +Train: [19] [ 400/6250] eta: 0:15:37 lr: 0.000118 grad: 0.1311 (0.1341) loss: 0.7825 (0.8020) time: 0.1443 data: 0.0634 max mem: 9377 +Train: [19] [ 500/6250] eta: 0:15:26 lr: 0.000118 grad: 0.1188 (0.1323) loss: 0.7866 (0.7982) time: 0.2221 data: 0.1466 max mem: 9377 +Train: [19] [ 600/6250] eta: 0:15:01 lr: 0.000118 grad: 0.1165 (0.1304) loss: 0.7822 (0.7963) time: 0.1525 data: 0.0724 max mem: 9377 +Train: [19] [ 700/6250] eta: 0:14:33 lr: 0.000118 grad: 0.1124 (0.1292) loss: 0.7919 (0.7952) time: 0.1501 data: 0.0692 max mem: 9377 +Train: [19] [ 800/6250] eta: 0:14:05 lr: 0.000118 grad: 0.1102 (0.1280) loss: 0.7953 (0.7945) time: 0.1565 data: 0.0727 max mem: 9377 +Train: [19] [ 900/6250] eta: 0:13:47 lr: 0.000118 grad: 0.1134 (0.1268) loss: 0.7838 (0.7940) time: 0.1689 data: 0.0906 max mem: 9377 +Train: [19] [1000/6250] eta: 0:13:28 lr: 0.000118 grad: 0.1217 (0.1259) loss: 0.7782 (0.7932) time: 0.1498 data: 0.0641 max mem: 9377 +Train: [19] [1100/6250] eta: 0:13:07 lr: 0.000118 grad: 0.1309 (0.1254) loss: 0.7903 (0.7926) time: 0.1400 data: 0.0479 max mem: 9377 +Train: [19] [1200/6250] eta: 0:12:45 lr: 0.000118 grad: 0.1237 (0.1253) loss: 0.7841 (0.7918) time: 0.1324 data: 0.0513 max mem: 9377 +Train: [19] [1300/6250] eta: 0:12:23 lr: 0.000118 grad: 0.1189 (0.1248) loss: 0.7821 (0.7913) time: 0.1280 data: 0.0429 max mem: 9377 +Train: [19] [1400/6250] eta: 0:12:04 lr: 0.000118 grad: 0.1118 (0.1243) loss: 0.7802 (0.7910) time: 0.1330 data: 0.0449 max mem: 9377 +Train: [19] [1500/6250] eta: 0:11:45 lr: 0.000118 grad: 0.1105 (0.1238) loss: 0.7809 (0.7907) time: 0.1306 data: 0.0468 max mem: 9377 +Train: [19] [1600/6250] eta: 0:11:27 lr: 0.000118 grad: 0.1202 (0.1234) loss: 0.7928 (0.7905) time: 0.1391 data: 0.0546 max mem: 9377 +Train: [19] [1700/6250] eta: 0:11:12 lr: 0.000118 grad: 0.1194 (0.1231) loss: 0.7751 (0.7904) time: 0.1482 data: 0.0652 max mem: 9377 +Train: [19] [1800/6250] eta: 0:10:54 lr: 0.000118 grad: 0.1108 (0.1226) loss: 0.7862 (0.7903) time: 0.1392 data: 0.0588 max mem: 9377 +Train: [19] [1900/6250] eta: 0:10:38 lr: 0.000118 grad: 0.1095 (0.1224) loss: 0.7964 (0.7904) time: 0.1383 data: 0.0524 max mem: 9377 +Train: [19] [2000/6250] eta: 0:10:23 lr: 0.000118 grad: 0.1161 (0.1223) loss: 0.7931 (0.7905) time: 0.1454 data: 0.0632 max mem: 9377 +Train: [19] [2100/6250] eta: 0:10:06 lr: 0.000118 grad: 0.1160 (0.1223) loss: 0.7882 (0.7906) time: 0.1376 data: 0.0518 max mem: 9377 +Train: [19] [2200/6250] eta: 0:09:50 lr: 0.000118 grad: 0.1214 (0.1222) loss: 0.7747 (0.7904) time: 0.1498 data: 0.0701 max mem: 9377 +Train: [19] [2300/6250] eta: 0:09:33 lr: 0.000118 grad: 0.1173 (0.1222) loss: 0.7939 (0.7903) time: 0.1448 data: 0.0653 max mem: 9377 +Train: [19] [2400/6250] eta: 0:09:18 lr: 0.000118 grad: 0.1149 (0.1221) loss: 0.7836 (0.7901) time: 0.1456 data: 0.0650 max mem: 9377 +Train: [19] [2500/6250] eta: 0:09:02 lr: 0.000118 grad: 0.1196 (0.1221) loss: 0.7861 (0.7899) time: 0.1435 data: 0.0608 max mem: 9377 +Train: [19] [2600/6250] eta: 0:08:46 lr: 0.000118 grad: 0.1216 (0.1220) loss: 0.7786 (0.7898) time: 0.1015 data: 0.0129 max mem: 9377 +Train: [19] [2700/6250] eta: 0:08:31 lr: 0.000118 grad: 0.1292 (0.1221) loss: 0.7669 (0.7895) time: 0.1392 data: 0.0594 max mem: 9377 +Train: [19] [2800/6250] eta: 0:08:15 lr: 0.000118 grad: 0.1151 (0.1222) loss: 0.7792 (0.7894) time: 0.1384 data: 0.0556 max mem: 9377 +Train: [19] [2900/6250] eta: 0:08:00 lr: 0.000118 grad: 0.1141 (0.1221) loss: 0.7905 (0.7893) time: 0.1474 data: 0.0671 max mem: 9377 +Train: [19] [3000/6250] eta: 0:07:45 lr: 0.000118 grad: 0.1177 (0.1221) loss: 0.7857 (0.7893) time: 0.1330 data: 0.0521 max mem: 9377 +Train: [19] [3100/6250] eta: 0:07:29 lr: 0.000118 grad: 0.1148 (0.1220) loss: 0.8000 (0.7894) time: 0.1075 data: 0.0244 max mem: 9377 +Train: [19] [3200/6250] eta: 0:07:15 lr: 0.000118 grad: 0.1250 (0.1221) loss: 0.7807 (0.7892) time: 0.1458 data: 0.0656 max mem: 9377 +Train: [19] [3300/6250] eta: 0:07:00 lr: 0.000118 grad: 0.1245 (0.1220) loss: 0.7813 (0.7892) time: 0.1230 data: 0.0431 max mem: 9377 +Train: [19] [3400/6250] eta: 0:06:46 lr: 0.000118 grad: 0.1264 (0.1219) loss: 0.7835 (0.7893) time: 0.1490 data: 0.0685 max mem: 9377 +Train: [19] [3500/6250] eta: 0:06:33 lr: 0.000118 grad: 0.1257 (0.1217) loss: 0.7824 (0.7893) time: 0.1585 data: 0.0730 max mem: 9377 +Train: [19] [3600/6250] eta: 0:06:20 lr: 0.000118 grad: 0.1109 (0.1216) loss: 0.7878 (0.7893) time: 0.1490 data: 0.0573 max mem: 9377 +Train: [19] [3700/6250] eta: 0:06:06 lr: 0.000118 grad: 0.1170 (0.1215) loss: 0.7887 (0.7894) time: 0.1519 data: 0.0703 max mem: 9377 +Train: [19] [3800/6250] eta: 0:05:52 lr: 0.000118 grad: 0.1177 (0.1213) loss: 0.7891 (0.7894) time: 0.1434 data: 0.0636 max mem: 9377 +Train: [19] [3900/6250] eta: 0:05:38 lr: 0.000118 grad: 0.1078 (0.1212) loss: 0.7890 (0.7894) time: 0.1418 data: 0.0609 max mem: 9377 +Train: [19] [4000/6250] eta: 0:05:23 lr: 0.000118 grad: 0.1162 (0.1211) loss: 0.7897 (0.7895) time: 0.1384 data: 0.0552 max mem: 9377 +Train: [19] [4100/6250] eta: 0:05:08 lr: 0.000118 grad: 0.1174 (0.1211) loss: 0.7917 (0.7895) time: 0.1397 data: 0.0570 max mem: 9377 +Train: [19] [4200/6250] eta: 0:04:53 lr: 0.000118 grad: 0.1198 (0.1212) loss: 0.7909 (0.7895) time: 0.1295 data: 0.0432 max mem: 9377 +Train: [19] [4300/6250] eta: 0:04:38 lr: 0.000118 grad: 0.1260 (0.1212) loss: 0.7967 (0.7895) time: 0.1561 data: 0.0773 max mem: 9377 +Train: [19] [4400/6250] eta: 0:04:23 lr: 0.000118 grad: 0.1163 (0.1211) loss: 0.7812 (0.7895) time: 0.1243 data: 0.0294 max mem: 9377 +Train: [19] [4500/6250] eta: 0:04:09 lr: 0.000118 grad: 0.1209 (0.1211) loss: 0.7873 (0.7896) time: 0.1464 data: 0.0667 max mem: 9377 +Train: [19] [4600/6250] eta: 0:03:55 lr: 0.000118 grad: 0.1214 (0.1211) loss: 0.7857 (0.7895) time: 0.1790 data: 0.1036 max mem: 9377 +Train: [19] [4700/6250] eta: 0:03:41 lr: 0.000118 grad: 0.1226 (0.1211) loss: 0.7897 (0.7895) time: 0.1555 data: 0.0707 max mem: 9377 +Train: [19] [4800/6250] eta: 0:03:27 lr: 0.000118 grad: 0.1176 (0.1212) loss: 0.7952 (0.7896) time: 0.1246 data: 0.0394 max mem: 9377 +Train: [19] [4900/6250] eta: 0:03:13 lr: 0.000118 grad: 0.1146 (0.1211) loss: 0.7903 (0.7896) time: 0.1454 data: 0.0609 max mem: 9377 +Train: [19] [5000/6250] eta: 0:02:58 lr: 0.000118 grad: 0.1152 (0.1211) loss: 0.7940 (0.7897) time: 0.1345 data: 0.0575 max mem: 9377 +Train: [19] [5100/6250] eta: 0:02:44 lr: 0.000118 grad: 0.1251 (0.1211) loss: 0.7834 (0.7897) time: 0.1359 data: 0.0482 max mem: 9377 +Train: [19] [5200/6250] eta: 0:02:30 lr: 0.000118 grad: 0.1185 (0.1211) loss: 0.7918 (0.7898) time: 0.1465 data: 0.0632 max mem: 9377 +Train: [19] [5300/6250] eta: 0:02:16 lr: 0.000118 grad: 0.1149 (0.1210) loss: 0.8067 (0.7898) time: 0.1468 data: 0.0633 max mem: 9377 +Train: [19] [5400/6250] eta: 0:02:01 lr: 0.000118 grad: 0.1173 (0.1210) loss: 0.8021 (0.7899) time: 0.1455 data: 0.0654 max mem: 9377 +Train: [19] [5500/6250] eta: 0:01:47 lr: 0.000118 grad: 0.1141 (0.1209) loss: 0.7890 (0.7899) time: 0.1287 data: 0.0464 max mem: 9377 +Train: [19] [5600/6250] eta: 0:01:33 lr: 0.000118 grad: 0.1140 (0.1208) loss: 0.7869 (0.7899) time: 0.1337 data: 0.0525 max mem: 9377 +Train: [19] [5700/6250] eta: 0:01:18 lr: 0.000118 grad: 0.1167 (0.1208) loss: 0.7910 (0.7900) time: 0.1462 data: 0.0672 max mem: 9377 +Train: [19] [5800/6250] eta: 0:01:04 lr: 0.000118 grad: 0.1304 (0.1208) loss: 0.7845 (0.7899) time: 0.1482 data: 0.0619 max mem: 9377 +Train: [19] [5900/6250] eta: 0:00:50 lr: 0.000118 grad: 0.1123 (0.1208) loss: 0.7862 (0.7899) time: 0.1363 data: 0.0525 max mem: 9377 +Train: [19] [6000/6250] eta: 0:00:35 lr: 0.000118 grad: 0.1158 (0.1207) loss: 0.8008 (0.7899) time: 0.1344 data: 0.0517 max mem: 9377 +Train: [19] [6100/6250] eta: 0:00:21 lr: 0.000117 grad: 0.1231 (0.1207) loss: 0.7802 (0.7898) time: 0.1506 data: 0.0697 max mem: 9377 +Train: [19] [6200/6250] eta: 0:00:07 lr: 0.000117 grad: 0.1161 (0.1207) loss: 0.7837 (0.7898) time: 0.1421 data: 0.0607 max mem: 9377 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.1178 (0.1207) loss: 0.7935 (0.7898) time: 0.1375 data: 0.0521 max mem: 9377 +Train: [19] Total time: 0:15:00 (0.1441 s / it) +Averaged stats: lr: 0.000117 grad: 0.1178 (0.1207) loss: 0.7935 (0.7898) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:05:02 loss: 0.8554 (0.8554) time: 4.8783 data: 4.8481 max mem: 9377 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8553 (0.8555) time: 0.1350 data: 0.1081 max mem: 9377 +Eval (hcp-train-subset): [19] Total time: 0:00:14 (0.2302 s / it) +Averaged stats (hcp-train-subset): loss: 0.8553 (0.8555) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [19] [ 0/62] eta: 0:05:14 loss: 0.8567 (0.8567) time: 5.0694 data: 5.0374 max mem: 9377 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8536 (0.8539) time: 0.1716 data: 0.1462 max mem: 9377 +Eval (hcp-val): [19] Total time: 0:00:15 (0.2496 s / it) +Averaged stats (hcp-val): loss: 0.8536 (0.8539) +Making plots (hcp-val): example=6 +Eval (nsd-val): [19] [ 0/62] eta: 0:04:13 loss: 0.8171 (0.8171) time: 4.0962 data: 4.0290 max mem: 9377 +Eval (nsd-val): [19] [61/62] eta: 0:00:00 loss: 0.8255 (0.8277) time: 0.1323 data: 0.1072 max mem: 9377 +Eval (nsd-val): [19] Total time: 0:00:14 (0.2390 s / it) +Averaged stats (nsd-val): loss: 0.8255 (0.8277) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 10:08:48 lr: 0.000117 grad: 0.0817 (0.0817) loss: 0.8517 (0.8517) time: 5.8446 data: 5.6788 max mem: 9377 +Train: [20] [ 100/6250] eta: 0:22:01 lr: 0.000117 grad: 0.1115 (0.1470) loss: 0.8315 (0.8269) time: 0.1548 data: 0.0522 max mem: 9377 +Train: [20] [ 200/6250] eta: 0:19:12 lr: 0.000117 grad: 0.1158 (0.1342) loss: 0.8084 (0.8222) time: 0.1832 data: 0.1019 max mem: 9377 +Train: [20] [ 300/6250] eta: 0:17:44 lr: 0.000117 grad: 0.1205 (0.1306) loss: 0.7997 (0.8155) time: 0.1581 data: 0.0619 max mem: 9377 +Train: [20] [ 400/6250] eta: 0:16:46 lr: 0.000117 grad: 0.1256 (0.1311) loss: 0.7776 (0.8079) time: 0.1460 data: 0.0593 max mem: 9377 +Train: [20] [ 500/6250] eta: 0:16:02 lr: 0.000117 grad: 0.1214 (0.1294) loss: 0.7827 (0.8032) time: 0.1348 data: 0.0478 max mem: 9377 +Train: [20] [ 600/6250] eta: 0:15:40 lr: 0.000117 grad: 0.1138 (0.1283) loss: 0.7952 (0.8006) time: 0.1535 data: 0.0672 max mem: 9377 +Train: [20] [ 700/6250] eta: 0:15:19 lr: 0.000117 grad: 0.1127 (0.1269) loss: 0.7913 (0.7990) time: 0.1590 data: 0.0717 max mem: 9377 +Train: [20] [ 800/6250] eta: 0:14:54 lr: 0.000117 grad: 0.1148 (0.1261) loss: 0.7893 (0.7980) time: 0.1568 data: 0.0730 max mem: 9377 +Train: [20] [ 900/6250] eta: 0:14:21 lr: 0.000117 grad: 0.1153 (0.1251) loss: 0.7862 (0.7969) time: 0.1379 data: 0.0604 max mem: 9377 +Train: [20] [1000/6250] eta: 0:13:56 lr: 0.000117 grad: 0.1136 (0.1248) loss: 0.7886 (0.7964) time: 0.1385 data: 0.0520 max mem: 9377 +Train: [20] [1100/6250] eta: 0:13:36 lr: 0.000117 grad: 0.1136 (0.1243) loss: 0.7948 (0.7959) time: 0.1704 data: 0.0844 max mem: 9377 +Train: [20] [1200/6250] eta: 0:13:14 lr: 0.000117 grad: 0.1191 (0.1234) loss: 0.7875 (0.7956) time: 0.1537 data: 0.0655 max mem: 9377 +Train: [20] [1300/6250] eta: 0:12:51 lr: 0.000117 grad: 0.1196 (0.1229) loss: 0.7890 (0.7951) time: 0.1320 data: 0.0467 max mem: 9377 +Train: [20] [1400/6250] eta: 0:12:31 lr: 0.000117 grad: 0.1110 (0.1225) loss: 0.7920 (0.7946) time: 0.1347 data: 0.0381 max mem: 9377 +Train: [20] [1500/6250] eta: 0:12:09 lr: 0.000117 grad: 0.1146 (0.1221) loss: 0.7921 (0.7944) time: 0.1353 data: 0.0388 max mem: 9377 +Train: [20] [1600/6250] eta: 0:11:50 lr: 0.000117 grad: 0.1124 (0.1218) loss: 0.7865 (0.7942) time: 0.1530 data: 0.0669 max mem: 9377 +Train: [20] [1700/6250] eta: 0:11:34 lr: 0.000117 grad: 0.1133 (0.1215) loss: 0.7892 (0.7938) time: 0.1632 data: 0.0822 max mem: 9377 +Train: [20] [1800/6250] eta: 0:11:15 lr: 0.000117 grad: 0.1223 (0.1215) loss: 0.7887 (0.7933) time: 0.1375 data: 0.0526 max mem: 9377 +Train: [20] [1900/6250] eta: 0:10:59 lr: 0.000117 grad: 0.1198 (0.1215) loss: 0.7801 (0.7927) time: 0.1317 data: 0.0529 max mem: 9377 +Train: [20] [2000/6250] eta: 0:10:44 lr: 0.000117 grad: 0.1162 (0.1214) loss: 0.7704 (0.7923) time: 0.1500 data: 0.0716 max mem: 9377 +Train: [20] [2100/6250] eta: 0:10:26 lr: 0.000117 grad: 0.1238 (0.1214) loss: 0.7854 (0.7918) time: 0.1309 data: 0.0476 max mem: 9377 +Train: [20] [2200/6250] eta: 0:10:08 lr: 0.000117 grad: 0.1139 (0.1212) loss: 0.7842 (0.7916) time: 0.1331 data: 0.0479 max mem: 9377 +Train: [20] [2300/6250] eta: 0:09:51 lr: 0.000117 grad: 0.1066 (0.1209) loss: 0.7937 (0.7913) time: 0.1118 data: 0.0238 max mem: 9377 +Train: [20] [2400/6250] eta: 0:09:34 lr: 0.000117 grad: 0.1145 (0.1208) loss: 0.7871 (0.7911) time: 0.1306 data: 0.0496 max mem: 9377 +Train: [20] [2500/6250] eta: 0:09:18 lr: 0.000117 grad: 0.1154 (0.1207) loss: 0.7796 (0.7908) time: 0.1291 data: 0.0487 max mem: 9377 +Train: [20] [2600/6250] eta: 0:09:01 lr: 0.000117 grad: 0.1202 (0.1208) loss: 0.7765 (0.7905) time: 0.1456 data: 0.0634 max mem: 9377 +Train: [20] [2700/6250] eta: 0:08:44 lr: 0.000117 grad: 0.1085 (0.1207) loss: 0.7834 (0.7903) time: 0.1376 data: 0.0545 max mem: 9377 +Train: [20] [2800/6250] eta: 0:08:28 lr: 0.000117 grad: 0.1174 (0.1206) loss: 0.7859 (0.7900) time: 0.1142 data: 0.0307 max mem: 9377 +Train: [20] [2900/6250] eta: 0:08:13 lr: 0.000117 grad: 0.1225 (0.1204) loss: 0.7805 (0.7898) time: 0.1409 data: 0.0612 max mem: 9377 +Train: [20] [3000/6250] eta: 0:07:58 lr: 0.000117 grad: 0.1161 (0.1205) loss: 0.7793 (0.7895) time: 0.1410 data: 0.0672 max mem: 9377 +Train: [20] [3100/6250] eta: 0:07:42 lr: 0.000117 grad: 0.1237 (0.1204) loss: 0.7854 (0.7893) time: 0.1191 data: 0.0399 max mem: 9377 +Train: [20] [3200/6250] eta: 0:07:27 lr: 0.000117 grad: 0.1255 (0.1205) loss: 0.7771 (0.7892) time: 0.1198 data: 0.0379 max mem: 9377 +Train: [20] [3300/6250] eta: 0:07:11 lr: 0.000117 grad: 0.1182 (0.1205) loss: 0.7838 (0.7891) time: 0.1213 data: 0.0412 max mem: 9377 +Train: [20] [3400/6250] eta: 0:06:55 lr: 0.000117 grad: 0.1150 (0.1205) loss: 0.7829 (0.7890) time: 0.1304 data: 0.0482 max mem: 9377 +Train: [20] [3500/6250] eta: 0:06:40 lr: 0.000117 grad: 0.1155 (0.1204) loss: 0.7962 (0.7890) time: 0.1293 data: 0.0489 max mem: 9377 +Train: [20] [3600/6250] eta: 0:06:25 lr: 0.000117 grad: 0.1081 (0.1202) loss: 0.7984 (0.7891) time: 0.1334 data: 0.0523 max mem: 9377 +Train: [20] [3700/6250] eta: 0:06:12 lr: 0.000117 grad: 0.1156 (0.1201) loss: 0.7913 (0.7889) time: 0.1648 data: 0.0821 max mem: 9377 +Train: [20] [3800/6250] eta: 0:05:57 lr: 0.000117 grad: 0.1255 (0.1201) loss: 0.7809 (0.7889) time: 0.1454 data: 0.0593 max mem: 9377 +Train: [20] [3900/6250] eta: 0:05:43 lr: 0.000117 grad: 0.1244 (0.1201) loss: 0.7804 (0.7889) time: 0.1612 data: 0.0748 max mem: 9377 +Train: [20] [4000/6250] eta: 0:05:27 lr: 0.000117 grad: 0.1222 (0.1201) loss: 0.7885 (0.7890) time: 0.1326 data: 0.0557 max mem: 9377 +Train: [20] [4100/6250] eta: 0:05:13 lr: 0.000117 grad: 0.1103 (0.1201) loss: 0.7981 (0.7890) time: 0.1408 data: 0.0625 max mem: 9377 +Train: [20] [4200/6250] eta: 0:04:58 lr: 0.000117 grad: 0.1139 (0.1200) loss: 0.7951 (0.7890) time: 0.1392 data: 0.0554 max mem: 9377 +Train: [20] [4300/6250] eta: 0:04:43 lr: 0.000117 grad: 0.1134 (0.1200) loss: 0.7846 (0.7891) time: 0.1335 data: 0.0479 max mem: 9377 +Train: [20] [4400/6250] eta: 0:04:28 lr: 0.000117 grad: 0.1224 (0.1200) loss: 0.7866 (0.7890) time: 0.1490 data: 0.0650 max mem: 9377 +Train: [20] [4500/6250] eta: 0:04:13 lr: 0.000117 grad: 0.1114 (0.1200) loss: 0.7893 (0.7890) time: 0.1285 data: 0.0385 max mem: 9377 +Train: [20] [4600/6250] eta: 0:03:58 lr: 0.000117 grad: 0.1169 (0.1199) loss: 0.7905 (0.7890) time: 0.1176 data: 0.0347 max mem: 9377 +Train: [20] [4700/6250] eta: 0:03:43 lr: 0.000117 grad: 0.1063 (0.1197) loss: 0.7931 (0.7890) time: 0.1308 data: 0.0432 max mem: 9377 +Train: [20] [4800/6250] eta: 0:03:28 lr: 0.000117 grad: 0.1207 (0.1197) loss: 0.7838 (0.7890) time: 0.1476 data: 0.0630 max mem: 9377 +Train: [20] [4900/6250] eta: 0:03:13 lr: 0.000117 grad: 0.1237 (0.1197) loss: 0.7859 (0.7889) time: 0.1452 data: 0.0652 max mem: 9377 +Train: [20] [5000/6250] eta: 0:02:59 lr: 0.000117 grad: 0.1124 (0.1198) loss: 0.7965 (0.7889) time: 0.1049 data: 0.0174 max mem: 9377 +Train: [20] [5100/6250] eta: 0:02:45 lr: 0.000117 grad: 0.1103 (0.1197) loss: 0.7976 (0.7890) time: 0.1352 data: 0.0555 max mem: 9377 +Train: [20] [5200/6250] eta: 0:02:30 lr: 0.000117 grad: 0.1131 (0.1196) loss: 0.7789 (0.7889) time: 0.1340 data: 0.0541 max mem: 9377 +Train: [20] [5300/6250] eta: 0:02:16 lr: 0.000117 grad: 0.1127 (0.1195) loss: 0.7850 (0.7890) time: 0.1454 data: 0.0628 max mem: 9377 +Train: [20] [5400/6250] eta: 0:02:02 lr: 0.000117 grad: 0.1129 (0.1195) loss: 0.7799 (0.7889) time: 0.1403 data: 0.0639 max mem: 9377 +Train: [20] [5500/6250] eta: 0:01:47 lr: 0.000117 grad: 0.1180 (0.1195) loss: 0.7821 (0.7889) time: 0.0852 data: 0.0067 max mem: 9377 +Train: [20] [5600/6250] eta: 0:01:33 lr: 0.000117 grad: 0.1132 (0.1194) loss: 0.7983 (0.7889) time: 0.1348 data: 0.0577 max mem: 9377 +Train: [20] [5700/6250] eta: 0:01:18 lr: 0.000117 grad: 0.1074 (0.1193) loss: 0.7944 (0.7890) time: 0.1269 data: 0.0491 max mem: 9377 +Train: [20] [5800/6250] eta: 0:01:04 lr: 0.000117 grad: 0.1197 (0.1193) loss: 0.7969 (0.7891) time: 0.1340 data: 0.0597 max mem: 9377 +Train: [20] [5900/6250] eta: 0:00:50 lr: 0.000117 grad: 0.1076 (0.1192) loss: 0.7983 (0.7891) time: 0.1444 data: 0.0612 max mem: 9377 +Train: [20] [6000/6250] eta: 0:00:35 lr: 0.000116 grad: 0.1205 (0.1191) loss: 0.7976 (0.7893) time: 0.1324 data: 0.0477 max mem: 9377 +Train: [20] [6100/6250] eta: 0:00:21 lr: 0.000116 grad: 0.1093 (0.1190) loss: 0.7914 (0.7894) time: 0.1306 data: 0.0497 max mem: 9377 +Train: [20] [6200/6250] eta: 0:00:07 lr: 0.000116 grad: 0.1143 (0.1189) loss: 0.7858 (0.7894) time: 0.1393 data: 0.0594 max mem: 9377 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.1099 (0.1189) loss: 0.7954 (0.7895) time: 0.1361 data: 0.0566 max mem: 9377 +Train: [20] Total time: 0:15:01 (0.1442 s / it) +Averaged stats: lr: 0.000116 grad: 0.1099 (0.1189) loss: 0.7954 (0.7895) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:03:03 loss: 0.8539 (0.8539) time: 2.9652 data: 2.8827 max mem: 9377 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8576 (0.8571) time: 0.1327 data: 0.1073 max mem: 9377 +Eval (hcp-train-subset): [20] Total time: 0:00:13 (0.2130 s / it) +Averaged stats (hcp-train-subset): loss: 0.8576 (0.8571) +Eval (hcp-val): [20] [ 0/62] eta: 0:04:03 loss: 0.8518 (0.8518) time: 3.9208 data: 3.8484 max mem: 9377 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8548 (0.8542) time: 0.1181 data: 0.0912 max mem: 9377 +Eval (hcp-val): [20] Total time: 0:00:13 (0.2152 s / it) +Averaged stats (hcp-val): loss: 0.8548 (0.8542) +Eval (nsd-val): [20] [ 0/62] eta: 0:03:19 loss: 0.8134 (0.8134) time: 3.2243 data: 3.1367 max mem: 9377 +Eval (nsd-val): [20] [61/62] eta: 0:00:00 loss: 0.8260 (0.8252) time: 0.1321 data: 0.1070 max mem: 9377 +Eval (nsd-val): [20] Total time: 0:00:12 (0.2001 s / it) +Averaged stats (nsd-val): loss: 0.8260 (0.8252) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [21] [ 0/6250] eta: 9:13:09 lr: 0.000116 grad: 0.0901 (0.0901) loss: 0.8811 (0.8811) time: 5.3104 data: 5.2139 max mem: 9377 +Train: [21] [ 100/6250] eta: 0:19:24 lr: 0.000116 grad: 0.1291 (0.1553) loss: 0.8282 (0.8306) time: 0.1595 data: 0.0702 max mem: 9377 +Train: [21] [ 200/6250] eta: 0:16:37 lr: 0.000116 grad: 0.1364 (0.1471) loss: 0.8028 (0.8178) time: 0.1671 data: 0.0822 max mem: 9377 +Train: [21] [ 300/6250] eta: 0:15:58 lr: 0.000116 grad: 0.1123 (0.1399) loss: 0.8058 (0.8124) time: 0.1482 data: 0.0599 max mem: 9377 +Train: [21] [ 400/6250] eta: 0:15:38 lr: 0.000116 grad: 0.1184 (0.1359) loss: 0.8007 (0.8085) time: 0.1822 data: 0.1018 max mem: 9377 +Train: [21] [ 500/6250] eta: 0:15:01 lr: 0.000116 grad: 0.1173 (0.1339) loss: 0.8008 (0.8052) time: 0.1331 data: 0.0438 max mem: 9377 +Train: [21] [ 600/6250] eta: 0:14:36 lr: 0.000116 grad: 0.1204 (0.1325) loss: 0.7923 (0.8022) time: 0.1557 data: 0.0674 max mem: 9377 +Train: [21] [ 700/6250] eta: 0:14:16 lr: 0.000116 grad: 0.1110 (0.1307) loss: 0.7928 (0.8003) time: 0.1499 data: 0.0639 max mem: 9377 +Train: [21] [ 800/6250] eta: 0:14:02 lr: 0.000116 grad: 0.1168 (0.1288) loss: 0.7999 (0.7989) time: 0.1858 data: 0.1059 max mem: 9377 +Train: [21] [ 900/6250] eta: 0:13:43 lr: 0.000116 grad: 0.1217 (0.1275) loss: 0.7894 (0.7978) time: 0.1576 data: 0.0685 max mem: 9377 +Train: [21] [1000/6250] eta: 0:13:19 lr: 0.000116 grad: 0.1164 (0.1267) loss: 0.7857 (0.7967) time: 0.1377 data: 0.0635 max mem: 9377 +Train: [21] [1100/6250] eta: 0:13:04 lr: 0.000116 grad: 0.1165 (0.1259) loss: 0.7853 (0.7957) time: 0.1600 data: 0.0711 max mem: 9377 +Train: [21] [1200/6250] eta: 0:12:50 lr: 0.000116 grad: 0.1192 (0.1257) loss: 0.7868 (0.7944) time: 0.1552 data: 0.0709 max mem: 9377 +Train: [21] [1300/6250] eta: 0:12:31 lr: 0.000116 grad: 0.1183 (0.1255) loss: 0.7809 (0.7933) time: 0.1402 data: 0.0524 max mem: 9377 +Train: [21] [1400/6250] eta: 0:12:11 lr: 0.000116 grad: 0.1210 (0.1250) loss: 0.7920 (0.7926) time: 0.1297 data: 0.0485 max mem: 9377 +Train: [21] [1500/6250] eta: 0:11:51 lr: 0.000116 grad: 0.1195 (0.1246) loss: 0.7779 (0.7919) time: 0.1352 data: 0.0557 max mem: 9377 +Train: [21] [1600/6250] eta: 0:11:33 lr: 0.000116 grad: 0.1255 (0.1243) loss: 0.7738 (0.7912) time: 0.1401 data: 0.0528 max mem: 9377 +Train: [21] [1700/6250] eta: 0:11:17 lr: 0.000116 grad: 0.1158 (0.1240) loss: 0.7818 (0.7906) time: 0.1475 data: 0.0693 max mem: 9377 +Train: [21] [1800/6250] eta: 0:11:00 lr: 0.000116 grad: 0.1098 (0.1238) loss: 0.7955 (0.7902) time: 0.1508 data: 0.0751 max mem: 9377 +Train: [21] [1900/6250] eta: 0:10:43 lr: 0.000116 grad: 0.1131 (0.1236) loss: 0.7890 (0.7899) time: 0.1346 data: 0.0532 max mem: 9377 +Train: [21] [2000/6250] eta: 0:10:28 lr: 0.000116 grad: 0.1158 (0.1232) loss: 0.7746 (0.7897) time: 0.1461 data: 0.0669 max mem: 9377 +Train: [21] [2100/6250] eta: 0:10:11 lr: 0.000116 grad: 0.1205 (0.1230) loss: 0.7790 (0.7896) time: 0.1195 data: 0.0302 max mem: 9377 +Train: [21] [2200/6250] eta: 0:09:54 lr: 0.000116 grad: 0.1244 (0.1229) loss: 0.7742 (0.7893) time: 0.1259 data: 0.0350 max mem: 9377 +Train: [21] [2300/6250] eta: 0:09:38 lr: 0.000116 grad: 0.1169 (0.1228) loss: 0.7777 (0.7890) time: 0.1340 data: 0.0495 max mem: 9377 +Train: [21] [2400/6250] eta: 0:09:22 lr: 0.000116 grad: 0.1262 (0.1229) loss: 0.7752 (0.7885) time: 0.1425 data: 0.0633 max mem: 9377 +Train: [21] [2500/6250] eta: 0:09:05 lr: 0.000116 grad: 0.1188 (0.1228) loss: 0.7668 (0.7883) time: 0.1360 data: 0.0561 max mem: 9377 +Train: [21] [2600/6250] eta: 0:08:49 lr: 0.000116 grad: 0.1163 (0.1228) loss: 0.7873 (0.7878) time: 0.1478 data: 0.0675 max mem: 9377 +Train: [21] [2700/6250] eta: 0:08:33 lr: 0.000116 grad: 0.1225 (0.1228) loss: 0.7687 (0.7873) time: 0.1339 data: 0.0550 max mem: 9377 +Train: [21] [2800/6250] eta: 0:08:17 lr: 0.000116 grad: 0.1139 (0.1228) loss: 0.7862 (0.7870) time: 0.1198 data: 0.0341 max mem: 9377 +Train: [21] [2900/6250] eta: 0:08:02 lr: 0.000116 grad: 0.1225 (0.1227) loss: 0.7813 (0.7866) time: 0.1360 data: 0.0537 max mem: 9377 +Train: [21] [3000/6250] eta: 0:07:48 lr: 0.000116 grad: 0.1149 (0.1227) loss: 0.7821 (0.7863) time: 0.1936 data: 0.1116 max mem: 9377 +Train: [21] [3100/6250] eta: 0:07:32 lr: 0.000116 grad: 0.1194 (0.1226) loss: 0.7720 (0.7860) time: 0.1384 data: 0.0539 max mem: 9377 +Train: [21] [3200/6250] eta: 0:07:17 lr: 0.000116 grad: 0.1229 (0.1226) loss: 0.7830 (0.7857) time: 0.1366 data: 0.0608 max mem: 9377 +Train: [21] [3300/6250] eta: 0:07:02 lr: 0.000116 grad: 0.1171 (0.1224) loss: 0.7748 (0.7856) time: 0.1168 data: 0.0330 max mem: 9377 +Train: [21] [3400/6250] eta: 0:06:47 lr: 0.000116 grad: 0.1227 (0.1226) loss: 0.7736 (0.7854) time: 0.1276 data: 0.0463 max mem: 9377 +Train: [21] [3500/6250] eta: 0:06:31 lr: 0.000116 grad: 0.1298 (0.1227) loss: 0.7805 (0.7852) time: 0.1345 data: 0.0501 max mem: 9377 +Train: [21] [3600/6250] eta: 0:06:17 lr: 0.000116 grad: 0.1207 (0.1227) loss: 0.7745 (0.7851) time: 0.1234 data: 0.0457 max mem: 9377 +Train: [21] [3700/6250] eta: 0:06:02 lr: 0.000116 grad: 0.1137 (0.1226) loss: 0.7848 (0.7849) time: 0.1345 data: 0.0570 max mem: 9377 +Train: [21] [3800/6250] eta: 0:05:47 lr: 0.000116 grad: 0.1225 (0.1226) loss: 0.7880 (0.7848) time: 0.1448 data: 0.0648 max mem: 9377 +Train: [21] [3900/6250] eta: 0:05:33 lr: 0.000116 grad: 0.1117 (0.1226) loss: 0.7963 (0.7847) time: 0.1558 data: 0.0722 max mem: 9377 +Train: [21] [4000/6250] eta: 0:05:18 lr: 0.000116 grad: 0.1210 (0.1225) loss: 0.7782 (0.7847) time: 0.1341 data: 0.0528 max mem: 9377 +Train: [21] [4100/6250] eta: 0:05:04 lr: 0.000116 grad: 0.1144 (0.1224) loss: 0.7680 (0.7846) time: 0.1220 data: 0.0357 max mem: 9377 +Train: [21] [4200/6250] eta: 0:04:50 lr: 0.000116 grad: 0.1278 (0.1224) loss: 0.7766 (0.7845) time: 0.1324 data: 0.0552 max mem: 9377 +Train: [21] [4300/6250] eta: 0:04:37 lr: 0.000116 grad: 0.1220 (0.1224) loss: 0.7813 (0.7845) time: 0.1644 data: 0.0838 max mem: 9377 +Train: [21] [4400/6250] eta: 0:04:23 lr: 0.000116 grad: 0.1262 (0.1225) loss: 0.7784 (0.7844) time: 0.1392 data: 0.0528 max mem: 9377 +Train: [21] [4500/6250] eta: 0:04:09 lr: 0.000116 grad: 0.1246 (0.1225) loss: 0.7782 (0.7843) time: 0.1362 data: 0.0499 max mem: 9377 +Train: [21] [4600/6250] eta: 0:03:54 lr: 0.000116 grad: 0.1167 (0.1224) loss: 0.7801 (0.7843) time: 0.1400 data: 0.0609 max mem: 9377 +Train: [21] [4700/6250] eta: 0:03:40 lr: 0.000116 grad: 0.1210 (0.1224) loss: 0.7882 (0.7843) time: 0.1520 data: 0.0733 max mem: 9377 +Train: [21] [4800/6250] eta: 0:03:26 lr: 0.000116 grad: 0.1217 (0.1224) loss: 0.7858 (0.7843) time: 0.1526 data: 0.0704 max mem: 9377 +Train: [21] [4900/6250] eta: 0:03:12 lr: 0.000116 grad: 0.1222 (0.1224) loss: 0.7810 (0.7842) time: 0.1375 data: 0.0568 max mem: 9377 +Train: [21] [5000/6250] eta: 0:02:57 lr: 0.000116 grad: 0.1098 (0.1224) loss: 0.7882 (0.7842) time: 0.1201 data: 0.0337 max mem: 9377 +Train: [21] [5100/6250] eta: 0:02:43 lr: 0.000116 grad: 0.1131 (0.1223) loss: 0.7862 (0.7843) time: 0.1301 data: 0.0458 max mem: 9377 +Train: [21] [5200/6250] eta: 0:02:28 lr: 0.000116 grad: 0.1140 (0.1222) loss: 0.7881 (0.7843) time: 0.1371 data: 0.0555 max mem: 9377 +Train: [21] [5300/6250] eta: 0:02:14 lr: 0.000116 grad: 0.1166 (0.1222) loss: 0.7937 (0.7844) time: 0.1752 data: 0.0936 max mem: 9377 +Train: [21] [5400/6250] eta: 0:02:00 lr: 0.000116 grad: 0.1203 (0.1222) loss: 0.7843 (0.7843) time: 0.1948 data: 0.1185 max mem: 9377 +Train: [21] [5500/6250] eta: 0:01:46 lr: 0.000116 grad: 0.1192 (0.1222) loss: 0.7873 (0.7842) time: 0.1566 data: 0.0770 max mem: 9377 +Train: [21] [5600/6250] eta: 0:01:32 lr: 0.000115 grad: 0.1169 (0.1222) loss: 0.7767 (0.7842) time: 0.1431 data: 0.0670 max mem: 9377 +Train: [21] [5700/6250] eta: 0:01:18 lr: 0.000115 grad: 0.1198 (0.1222) loss: 0.7684 (0.7841) time: 0.1741 data: 0.0964 max mem: 9377 +Train: [21] [5800/6250] eta: 0:01:04 lr: 0.000115 grad: 0.1262 (0.1223) loss: 0.7610 (0.7840) time: 0.1522 data: 0.0737 max mem: 9377 +Train: [21] [5900/6250] eta: 0:00:50 lr: 0.000115 grad: 0.1200 (0.1224) loss: 0.7877 (0.7838) time: 0.1188 data: 0.0390 max mem: 9377 +Train: [21] [6000/6250] eta: 0:00:35 lr: 0.000115 grad: 0.1190 (0.1225) loss: 0.7820 (0.7837) time: 0.1621 data: 0.0798 max mem: 9377 +Train: [21] [6100/6250] eta: 0:00:21 lr: 0.000115 grad: 0.1234 (0.1225) loss: 0.7757 (0.7836) time: 0.1565 data: 0.0788 max mem: 9377 +Train: [21] [6200/6250] eta: 0:00:07 lr: 0.000115 grad: 0.1187 (0.1225) loss: 0.7790 (0.7835) time: 0.1416 data: 0.0638 max mem: 9377 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.1178 (0.1225) loss: 0.7654 (0.7834) time: 0.1661 data: 0.0820 max mem: 9377 +Train: [21] Total time: 0:15:05 (0.1448 s / it) +Averaged stats: lr: 0.000115 grad: 0.1178 (0.1225) loss: 0.7654 (0.7834) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:04:20 loss: 0.8569 (0.8569) time: 4.1952 data: 4.1593 max mem: 9377 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8584 (0.8578) time: 0.1382 data: 0.1056 max mem: 9377 +Eval (hcp-train-subset): [21] Total time: 0:00:13 (0.2138 s / it) +Averaged stats (hcp-train-subset): loss: 0.8584 (0.8578) +Eval (hcp-val): [21] [ 0/62] eta: 0:03:04 loss: 0.8553 (0.8553) time: 2.9754 data: 2.8790 max mem: 9377 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8547 (0.8554) time: 0.1171 data: 0.0920 max mem: 9377 +Eval (hcp-val): [21] Total time: 0:00:13 (0.2194 s / it) +Averaged stats (hcp-val): loss: 0.8547 (0.8554) +Eval (nsd-val): [21] [ 0/62] eta: 0:04:38 loss: 0.8200 (0.8200) time: 4.4845 data: 4.4546 max mem: 9377 +Eval (nsd-val): [21] [61/62] eta: 0:00:00 loss: 0.8296 (0.8295) time: 0.1306 data: 0.1051 max mem: 9377 +Eval (nsd-val): [21] Total time: 0:00:12 (0.1987 s / it) +Averaged stats (nsd-val): loss: 0.8296 (0.8295) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [22] [ 0/6250] eta: 8:23:41 lr: 0.000115 grad: 0.1096 (0.1096) loss: 0.8115 (0.8115) time: 4.8355 data: 4.6724 max mem: 9377 +Train: [22] [ 100/6250] eta: 0:19:13 lr: 0.000115 grad: 0.1341 (0.1636) loss: 0.8231 (0.8146) time: 0.1493 data: 0.0700 max mem: 9377 +Train: [22] [ 200/6250] eta: 0:16:38 lr: 0.000115 grad: 0.1272 (0.1518) loss: 0.8126 (0.8048) time: 0.1299 data: 0.0522 max mem: 9377 +Train: [22] [ 300/6250] eta: 0:15:33 lr: 0.000115 grad: 0.1336 (0.1483) loss: 0.7635 (0.7959) time: 0.1410 data: 0.0492 max mem: 9377 +Train: [22] [ 400/6250] eta: 0:14:41 lr: 0.000115 grad: 0.1210 (0.1451) loss: 0.7828 (0.7930) time: 0.1231 data: 0.0414 max mem: 9377 +Train: [22] [ 500/6250] eta: 0:14:08 lr: 0.000115 grad: 0.1257 (0.1423) loss: 0.7946 (0.7912) time: 0.1234 data: 0.0281 max mem: 9377 +Train: [22] [ 600/6250] eta: 0:13:37 lr: 0.000115 grad: 0.1245 (0.1406) loss: 0.7848 (0.7893) time: 0.1460 data: 0.0586 max mem: 9377 +Train: [22] [ 700/6250] eta: 0:13:12 lr: 0.000115 grad: 0.1151 (0.1379) loss: 0.7927 (0.7887) time: 0.1330 data: 0.0450 max mem: 9377 +Train: [22] [ 800/6250] eta: 0:13:07 lr: 0.000115 grad: 0.1181 (0.1361) loss: 0.7860 (0.7883) time: 0.1505 data: 0.0686 max mem: 9377 +Train: [22] [ 900/6250] eta: 0:12:56 lr: 0.000115 grad: 0.1189 (0.1346) loss: 0.7866 (0.7880) time: 0.1340 data: 0.0413 max mem: 9377 +Train: [22] [1000/6250] eta: 0:12:46 lr: 0.000115 grad: 0.1216 (0.1334) loss: 0.7925 (0.7878) time: 0.1303 data: 0.0523 max mem: 9377 +Train: [22] [1100/6250] eta: 0:12:29 lr: 0.000115 grad: 0.1180 (0.1324) loss: 0.7812 (0.7874) time: 0.1489 data: 0.0668 max mem: 9377 +Train: [22] [1200/6250] eta: 0:12:09 lr: 0.000115 grad: 0.1217 (0.1314) loss: 0.7791 (0.7870) time: 0.1207 data: 0.0346 max mem: 9377 +Train: [22] [1300/6250] eta: 0:11:50 lr: 0.000115 grad: 0.1199 (0.1308) loss: 0.7676 (0.7863) time: 0.0963 data: 0.0007 max mem: 9377 +Train: [22] [1400/6250] eta: 0:11:34 lr: 0.000115 grad: 0.1172 (0.1302) loss: 0.7880 (0.7857) time: 0.1417 data: 0.0626 max mem: 9377 +Train: [22] [1500/6250] eta: 0:11:17 lr: 0.000115 grad: 0.1139 (0.1296) loss: 0.7792 (0.7851) time: 0.1257 data: 0.0378 max mem: 9377 +Train: [22] [1600/6250] eta: 0:11:01 lr: 0.000115 grad: 0.1213 (0.1292) loss: 0.7840 (0.7845) time: 0.1367 data: 0.0534 max mem: 9377 +Train: [22] [1700/6250] eta: 0:10:46 lr: 0.000115 grad: 0.1140 (0.1285) loss: 0.7694 (0.7841) time: 0.1552 data: 0.0763 max mem: 9377 +Train: [22] [1800/6250] eta: 0:10:30 lr: 0.000115 grad: 0.1329 (0.1284) loss: 0.7709 (0.7835) time: 0.1247 data: 0.0427 max mem: 9377 +Train: [22] [1900/6250] eta: 0:10:19 lr: 0.000115 grad: 0.1254 (0.1283) loss: 0.7658 (0.7832) time: 0.1450 data: 0.0630 max mem: 9377 +Train: [22] [2000/6250] eta: 0:10:04 lr: 0.000115 grad: 0.1455 (0.1283) loss: 0.7720 (0.7826) time: 0.1475 data: 0.0641 max mem: 9377 +Train: [22] [2100/6250] eta: 0:09:48 lr: 0.000115 grad: 0.1197 (0.1282) loss: 0.7731 (0.7823) time: 0.1334 data: 0.0542 max mem: 9377 +Train: [22] [2200/6250] eta: 0:09:34 lr: 0.000115 grad: 0.1238 (0.1278) loss: 0.7827 (0.7821) time: 0.1416 data: 0.0622 max mem: 9377 +Train: [22] [2300/6250] eta: 0:09:19 lr: 0.000115 grad: 0.1219 (0.1277) loss: 0.7807 (0.7818) time: 0.1688 data: 0.0918 max mem: 9377 +Train: [22] [2400/6250] eta: 0:09:03 lr: 0.000115 grad: 0.1213 (0.1277) loss: 0.7681 (0.7815) time: 0.1251 data: 0.0356 max mem: 9377 +Train: [22] [2500/6250] eta: 0:08:48 lr: 0.000115 grad: 0.1208 (0.1276) loss: 0.7763 (0.7812) time: 0.1004 data: 0.0128 max mem: 9377 +Train: [22] [2600/6250] eta: 0:08:33 lr: 0.000115 grad: 0.1201 (0.1276) loss: 0.7799 (0.7809) time: 0.1369 data: 0.0562 max mem: 9377 +Train: [22] [2700/6250] eta: 0:08:19 lr: 0.000115 grad: 0.1142 (0.1276) loss: 0.7775 (0.7807) time: 0.1302 data: 0.0462 max mem: 9377 +Train: [22] [2800/6250] eta: 0:08:04 lr: 0.000115 grad: 0.1164 (0.1275) loss: 0.7661 (0.7802) time: 0.1332 data: 0.0533 max mem: 9377 +Train: [22] [2900/6250] eta: 0:07:50 lr: 0.000115 grad: 0.1162 (0.1274) loss: 0.7729 (0.7801) time: 0.1268 data: 0.0485 max mem: 9377 +Train: [22] [3000/6250] eta: 0:07:35 lr: 0.000115 grad: 0.1207 (0.1273) loss: 0.7791 (0.7800) time: 0.1400 data: 0.0573 max mem: 9377 +Train: [22] [3100/6250] eta: 0:07:21 lr: 0.000115 grad: 0.1251 (0.1272) loss: 0.7770 (0.7798) time: 0.1336 data: 0.0521 max mem: 9377 +Train: [22] [3200/6250] eta: 0:07:08 lr: 0.000115 grad: 0.1226 (0.1271) loss: 0.7660 (0.7796) time: 0.1396 data: 0.0583 max mem: 9377 +Train: [22] [3300/6250] eta: 0:06:53 lr: 0.000115 grad: 0.1138 (0.1269) loss: 0.7813 (0.7796) time: 0.1303 data: 0.0494 max mem: 9377 +Train: [22] [3400/6250] eta: 0:06:39 lr: 0.000115 grad: 0.1249 (0.1268) loss: 0.7817 (0.7794) time: 0.1410 data: 0.0649 max mem: 9377 +Train: [22] [3500/6250] eta: 0:06:25 lr: 0.000115 grad: 0.1148 (0.1268) loss: 0.7830 (0.7794) time: 0.1309 data: 0.0470 max mem: 9377 +Train: [22] [3600/6250] eta: 0:06:11 lr: 0.000115 grad: 0.1279 (0.1267) loss: 0.7794 (0.7793) time: 0.1326 data: 0.0537 max mem: 9377 +Train: [22] [3700/6250] eta: 0:05:57 lr: 0.000115 grad: 0.1297 (0.1267) loss: 0.7735 (0.7793) time: 0.1362 data: 0.0549 max mem: 9377 +Train: [22] [3800/6250] eta: 0:05:43 lr: 0.000115 grad: 0.1342 (0.1266) loss: 0.7828 (0.7794) time: 0.1336 data: 0.0501 max mem: 9377 +Train: [22] [3900/6250] eta: 0:05:28 lr: 0.000115 grad: 0.1190 (0.1266) loss: 0.7747 (0.7793) time: 0.1263 data: 0.0400 max mem: 9377 +Train: [22] [4000/6250] eta: 0:05:14 lr: 0.000115 grad: 0.1202 (0.1266) loss: 0.7850 (0.7793) time: 0.1282 data: 0.0479 max mem: 9377 +Train: [22] [4100/6250] eta: 0:05:00 lr: 0.000115 grad: 0.1143 (0.1265) loss: 0.7794 (0.7793) time: 0.1456 data: 0.0688 max mem: 9377 +Train: [22] [4200/6250] eta: 0:04:46 lr: 0.000115 grad: 0.1224 (0.1265) loss: 0.7800 (0.7793) time: 0.1367 data: 0.0544 max mem: 9377 +Train: [22] [4300/6250] eta: 0:04:32 lr: 0.000115 grad: 0.1290 (0.1263) loss: 0.7776 (0.7793) time: 0.1353 data: 0.0481 max mem: 9377 +Train: [22] [4400/6250] eta: 0:04:18 lr: 0.000115 grad: 0.1152 (0.1263) loss: 0.7749 (0.7793) time: 0.1236 data: 0.0374 max mem: 9377 +Train: [22] [4500/6250] eta: 0:04:05 lr: 0.000115 grad: 0.1215 (0.1262) loss: 0.7640 (0.7792) time: 0.1430 data: 0.0554 max mem: 9377 +Train: [22] [4600/6250] eta: 0:03:51 lr: 0.000115 grad: 0.1192 (0.1262) loss: 0.7717 (0.7791) time: 0.1640 data: 0.0704 max mem: 9377 +Train: [22] [4700/6250] eta: 0:03:38 lr: 0.000115 grad: 0.1200 (0.1261) loss: 0.7761 (0.7791) time: 0.1486 data: 0.0690 max mem: 9377 +Train: [22] [4800/6250] eta: 0:03:24 lr: 0.000115 grad: 0.1169 (0.1261) loss: 0.7732 (0.7790) time: 0.1411 data: 0.0594 max mem: 9377 +Train: [22] [4900/6250] eta: 0:03:10 lr: 0.000114 grad: 0.1236 (0.1261) loss: 0.7808 (0.7789) time: 0.1443 data: 0.0598 max mem: 9377 +Train: [22] [5000/6250] eta: 0:02:56 lr: 0.000114 grad: 0.1289 (0.1262) loss: 0.7671 (0.7788) time: 0.1418 data: 0.0547 max mem: 9377 +Train: [22] [5100/6250] eta: 0:02:42 lr: 0.000114 grad: 0.1238 (0.1262) loss: 0.7741 (0.7787) time: 0.1383 data: 0.0533 max mem: 9377 +Train: [22] [5200/6250] eta: 0:02:28 lr: 0.000114 grad: 0.1224 (0.1262) loss: 0.7799 (0.7786) time: 0.1364 data: 0.0477 max mem: 9377 +Train: [22] [5300/6250] eta: 0:02:13 lr: 0.000114 grad: 0.1245 (0.1262) loss: 0.7787 (0.7785) time: 0.1279 data: 0.0412 max mem: 9377 +Train: [22] [5400/6250] eta: 0:01:59 lr: 0.000114 grad: 0.1259 (0.1262) loss: 0.7701 (0.7784) time: 0.1418 data: 0.0598 max mem: 9377 +Train: [22] [5500/6250] eta: 0:01:45 lr: 0.000114 grad: 0.1176 (0.1261) loss: 0.7716 (0.7784) time: 0.1418 data: 0.0612 max mem: 9377 +Train: [22] [5600/6250] eta: 0:01:31 lr: 0.000114 grad: 0.1202 (0.1260) loss: 0.7805 (0.7784) time: 0.1438 data: 0.0623 max mem: 9377 +Train: [22] [5700/6250] eta: 0:01:17 lr: 0.000114 grad: 0.1175 (0.1260) loss: 0.7794 (0.7784) time: 0.1365 data: 0.0636 max mem: 9377 +Train: [22] [5800/6250] eta: 0:01:03 lr: 0.000114 grad: 0.1197 (0.1259) loss: 0.7691 (0.7783) time: 0.1358 data: 0.0549 max mem: 9377 +Train: [22] [5900/6250] eta: 0:00:49 lr: 0.000114 grad: 0.1176 (0.1259) loss: 0.7729 (0.7782) time: 0.1461 data: 0.0673 max mem: 9377 +Train: [22] [6000/6250] eta: 0:00:35 lr: 0.000114 grad: 0.1206 (0.1259) loss: 0.7689 (0.7781) time: 0.1302 data: 0.0563 max mem: 9377 +Train: [22] [6100/6250] eta: 0:00:21 lr: 0.000114 grad: 0.1359 (0.1260) loss: 0.7679 (0.7780) time: 0.1176 data: 0.0351 max mem: 9377 +Train: [22] [6200/6250] eta: 0:00:07 lr: 0.000114 grad: 0.1185 (0.1260) loss: 0.7811 (0.7778) time: 0.1376 data: 0.0560 max mem: 9377 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.1270 (0.1260) loss: 0.7710 (0.7777) time: 0.1398 data: 0.0613 max mem: 9377 +Train: [22] Total time: 0:14:50 (0.1424 s / it) +Averaged stats: lr: 0.000114 grad: 0.1270 (0.1260) loss: 0.7710 (0.7777) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:03:49 loss: 0.8512 (0.8512) time: 3.6962 data: 3.6342 max mem: 9377 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8533 (0.8552) time: 0.1360 data: 0.1089 max mem: 9377 +Eval (hcp-train-subset): [22] Total time: 0:00:13 (0.2188 s / it) +Averaged stats (hcp-train-subset): loss: 0.8533 (0.8552) +Eval (hcp-val): [22] [ 0/62] eta: 0:04:53 loss: 0.8559 (0.8559) time: 4.7303 data: 4.6952 max mem: 9377 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8518 (0.8540) time: 0.1303 data: 0.1051 max mem: 9377 +Eval (hcp-val): [22] Total time: 0:00:13 (0.2203 s / it) +Averaged stats (hcp-val): loss: 0.8518 (0.8540) +Eval (nsd-val): [22] [ 0/62] eta: 0:04:14 loss: 0.8188 (0.8188) time: 4.0998 data: 4.0637 max mem: 9377 +Eval (nsd-val): [22] [61/62] eta: 0:00:00 loss: 0.8284 (0.8303) time: 0.1304 data: 0.1049 max mem: 9377 +Eval (nsd-val): [22] Total time: 0:00:12 (0.2031 s / it) +Averaged stats (nsd-val): loss: 0.8284 (0.8303) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [23] [ 0/6250] eta: 7:05:48 lr: 0.000114 grad: 0.4148 (0.4148) loss: 0.7670 (0.7670) time: 4.0877 data: 3.9006 max mem: 9377 +Train: [23] [ 100/6250] eta: 0:19:21 lr: 0.000114 grad: 0.1589 (0.1882) loss: 0.7776 (0.8034) time: 0.1433 data: 0.0514 max mem: 9377 +Train: [23] [ 200/6250] eta: 0:16:39 lr: 0.000114 grad: 0.1314 (0.1703) loss: 0.7642 (0.7931) time: 0.1635 data: 0.0783 max mem: 9377 +Train: [23] [ 300/6250] eta: 0:15:45 lr: 0.000114 grad: 0.1402 (0.1648) loss: 0.7790 (0.7875) time: 0.1644 data: 0.0844 max mem: 9377 +Train: [23] [ 400/6250] eta: 0:14:51 lr: 0.000114 grad: 0.1366 (0.1578) loss: 0.7601 (0.7835) time: 0.1145 data: 0.0299 max mem: 9377 +Train: [23] [ 500/6250] eta: 0:14:15 lr: 0.000114 grad: 0.1221 (0.1515) loss: 0.7846 (0.7821) time: 0.1306 data: 0.0422 max mem: 9377 +Train: [23] [ 600/6250] eta: 0:13:43 lr: 0.000114 grad: 0.1242 (0.1473) loss: 0.7857 (0.7819) time: 0.1358 data: 0.0520 max mem: 9377 +Train: [23] [ 700/6250] eta: 0:13:26 lr: 0.000114 grad: 0.1204 (0.1437) loss: 0.7804 (0.7824) time: 0.1985 data: 0.1042 max mem: 9377 +Train: [23] [ 800/6250] eta: 0:13:12 lr: 0.000114 grad: 0.1192 (0.1405) loss: 0.7881 (0.7827) time: 0.1635 data: 0.0727 max mem: 9377 +Train: [23] [ 900/6250] eta: 0:13:03 lr: 0.000114 grad: 0.1125 (0.1384) loss: 0.7866 (0.7826) time: 0.1386 data: 0.0590 max mem: 9377 +Train: [23] [1000/6250] eta: 0:12:50 lr: 0.000114 grad: 0.1235 (0.1369) loss: 0.7842 (0.7824) time: 0.1318 data: 0.0504 max mem: 9377 +Train: [23] [1100/6250] eta: 0:12:29 lr: 0.000114 grad: 0.1172 (0.1356) loss: 0.7818 (0.7821) time: 0.1390 data: 0.0578 max mem: 9377 +Train: [23] [1200/6250] eta: 0:12:11 lr: 0.000114 grad: 0.1184 (0.1344) loss: 0.7791 (0.7819) time: 0.1469 data: 0.0649 max mem: 9377 +Train: [23] [1300/6250] eta: 0:11:53 lr: 0.000114 grad: 0.1130 (0.1332) loss: 0.7861 (0.7820) time: 0.1406 data: 0.0537 max mem: 9377 +Train: [23] [1400/6250] eta: 0:11:32 lr: 0.000114 grad: 0.1142 (0.1322) loss: 0.7810 (0.7817) time: 0.1359 data: 0.0523 max mem: 9377 +Train: [23] [1500/6250] eta: 0:11:15 lr: 0.000114 grad: 0.1169 (0.1312) loss: 0.7851 (0.7816) time: 0.1193 data: 0.0312 max mem: 9377 +Train: [23] [1600/6250] eta: 0:10:57 lr: 0.000114 grad: 0.1190 (0.1306) loss: 0.7853 (0.7815) time: 0.1283 data: 0.0375 max mem: 9377 +Train: [23] [1700/6250] eta: 0:10:41 lr: 0.000114 grad: 0.1156 (0.1300) loss: 0.7820 (0.7814) time: 0.1510 data: 0.0738 max mem: 9377 +Train: [23] [1800/6250] eta: 0:10:27 lr: 0.000114 grad: 0.1223 (0.1296) loss: 0.7704 (0.7811) time: 0.1355 data: 0.0519 max mem: 9377 +Train: [23] [1900/6250] eta: 0:10:12 lr: 0.000114 grad: 0.1146 (0.1291) loss: 0.7819 (0.7811) time: 0.1140 data: 0.0253 max mem: 9377 +Train: [23] [2000/6250] eta: 0:09:57 lr: 0.000114 grad: 0.1138 (0.1286) loss: 0.7880 (0.7811) time: 0.1448 data: 0.0621 max mem: 9377 +Train: [23] [2100/6250] eta: 0:09:43 lr: 0.000114 grad: 0.1196 (0.1282) loss: 0.7816 (0.7813) time: 0.1417 data: 0.0558 max mem: 9377 +Train: [23] [2200/6250] eta: 0:09:28 lr: 0.000114 grad: 0.1181 (0.1278) loss: 0.7819 (0.7813) time: 0.1302 data: 0.0468 max mem: 9377 +Train: [23] [2300/6250] eta: 0:09:15 lr: 0.000114 grad: 0.1159 (0.1276) loss: 0.7843 (0.7812) time: 0.1307 data: 0.0517 max mem: 9377 +Train: [23] [2400/6250] eta: 0:09:02 lr: 0.000114 grad: 0.1249 (0.1274) loss: 0.7729 (0.7812) time: 0.1603 data: 0.0776 max mem: 9377 +Train: [23] [2500/6250] eta: 0:08:48 lr: 0.000114 grad: 0.1189 (0.1272) loss: 0.7851 (0.7810) time: 0.1093 data: 0.0239 max mem: 9377 +Train: [23] [2600/6250] eta: 0:08:34 lr: 0.000114 grad: 0.1179 (0.1270) loss: 0.7833 (0.7811) time: 0.1249 data: 0.0435 max mem: 9377 +Train: [23] [2700/6250] eta: 0:08:20 lr: 0.000114 grad: 0.1257 (0.1267) loss: 0.7758 (0.7811) time: 0.1341 data: 0.0526 max mem: 9377 +Train: [23] [2800/6250] eta: 0:08:06 lr: 0.000114 grad: 0.1189 (0.1265) loss: 0.7758 (0.7810) time: 0.1564 data: 0.0744 max mem: 9377 +Train: [23] [2900/6250] eta: 0:07:52 lr: 0.000114 grad: 0.1213 (0.1265) loss: 0.7774 (0.7810) time: 0.1359 data: 0.0526 max mem: 9377 +Train: [23] [3000/6250] eta: 0:07:39 lr: 0.000114 grad: 0.1229 (0.1264) loss: 0.7722 (0.7809) time: 0.1458 data: 0.0636 max mem: 9377 +Train: [23] [3100/6250] eta: 0:07:25 lr: 0.000114 grad: 0.1209 (0.1262) loss: 0.7703 (0.7809) time: 0.1448 data: 0.0518 max mem: 9377 +Train: [23] [3200/6250] eta: 0:07:11 lr: 0.000114 grad: 0.1258 (0.1262) loss: 0.7833 (0.7808) time: 0.1355 data: 0.0557 max mem: 9377 +Train: [23] [3300/6250] eta: 0:06:57 lr: 0.000114 grad: 0.1246 (0.1263) loss: 0.7814 (0.7807) time: 0.1506 data: 0.0672 max mem: 9377 +Train: [23] [3400/6250] eta: 0:06:42 lr: 0.000114 grad: 0.1291 (0.1263) loss: 0.7675 (0.7805) time: 0.1380 data: 0.0528 max mem: 9377 +Train: [23] [3500/6250] eta: 0:06:28 lr: 0.000114 grad: 0.1209 (0.1265) loss: 0.7752 (0.7804) time: 0.1451 data: 0.0635 max mem: 9377 +Train: [23] [3600/6250] eta: 0:06:14 lr: 0.000114 grad: 0.1223 (0.1264) loss: 0.7648 (0.7802) time: 0.1323 data: 0.0545 max mem: 9377 +Train: [23] [3700/6250] eta: 0:06:00 lr: 0.000114 grad: 0.1212 (0.1264) loss: 0.7800 (0.7802) time: 0.1274 data: 0.0444 max mem: 9377 +Train: [23] [3800/6250] eta: 0:05:45 lr: 0.000114 grad: 0.1219 (0.1264) loss: 0.7789 (0.7801) time: 0.1325 data: 0.0472 max mem: 9377 +Train: [23] [3900/6250] eta: 0:05:31 lr: 0.000114 grad: 0.1186 (0.1263) loss: 0.7768 (0.7800) time: 0.1462 data: 0.0650 max mem: 9377 +Train: [23] [4000/6250] eta: 0:05:17 lr: 0.000113 grad: 0.1279 (0.1262) loss: 0.7707 (0.7800) time: 0.1247 data: 0.0451 max mem: 9377 +Train: [23] [4100/6250] eta: 0:05:02 lr: 0.000113 grad: 0.1192 (0.1262) loss: 0.7754 (0.7800) time: 0.1558 data: 0.0760 max mem: 9377 +Train: [23] [4200/6250] eta: 0:04:48 lr: 0.000113 grad: 0.1153 (0.1261) loss: 0.7834 (0.7801) time: 0.1513 data: 0.0695 max mem: 9377 +Train: [23] [4300/6250] eta: 0:04:34 lr: 0.000113 grad: 0.1161 (0.1260) loss: 0.7827 (0.7801) time: 0.1201 data: 0.0419 max mem: 9377 +Train: [23] [4400/6250] eta: 0:04:20 lr: 0.000113 grad: 0.1205 (0.1259) loss: 0.7671 (0.7800) time: 0.1315 data: 0.0486 max mem: 9377 +Train: [23] [4500/6250] eta: 0:04:06 lr: 0.000113 grad: 0.1270 (0.1259) loss: 0.7750 (0.7799) time: 0.1395 data: 0.0597 max mem: 9377 +Train: [23] [4600/6250] eta: 0:03:51 lr: 0.000113 grad: 0.1211 (0.1259) loss: 0.7712 (0.7798) time: 0.1358 data: 0.0548 max mem: 9377 +Train: [23] [4700/6250] eta: 0:03:38 lr: 0.000113 grad: 0.1361 (0.1260) loss: 0.7665 (0.7796) time: 0.1683 data: 0.0773 max mem: 9377 +Train: [23] [4800/6250] eta: 0:03:25 lr: 0.000113 grad: 0.1280 (0.1260) loss: 0.7720 (0.7794) time: 0.1540 data: 0.0695 max mem: 9377 +Train: [23] [4900/6250] eta: 0:03:11 lr: 0.000113 grad: 0.1215 (0.1260) loss: 0.7702 (0.7792) time: 0.1464 data: 0.0611 max mem: 9377 +Train: [23] [5000/6250] eta: 0:02:57 lr: 0.000113 grad: 0.1265 (0.1259) loss: 0.7747 (0.7791) time: 0.1566 data: 0.0722 max mem: 9377 +Train: [23] [5100/6250] eta: 0:02:43 lr: 0.000113 grad: 0.1207 (0.1259) loss: 0.7819 (0.7791) time: 0.1610 data: 0.0811 max mem: 9377 +Train: [23] [5200/6250] eta: 0:02:28 lr: 0.000113 grad: 0.1221 (0.1259) loss: 0.7741 (0.7789) time: 0.1247 data: 0.0352 max mem: 9377 +Train: [23] [5300/6250] eta: 0:02:14 lr: 0.000113 grad: 0.1204 (0.1259) loss: 0.7758 (0.7788) time: 0.1185 data: 0.0336 max mem: 9377 +Train: [23] [5400/6250] eta: 0:02:00 lr: 0.000113 grad: 0.1231 (0.1259) loss: 0.7631 (0.7786) time: 0.1452 data: 0.0571 max mem: 9377 +Train: [23] [5500/6250] eta: 0:01:46 lr: 0.000113 grad: 0.1260 (0.1259) loss: 0.7680 (0.7785) time: 0.1470 data: 0.0548 max mem: 9377 +Train: [23] [5600/6250] eta: 0:01:32 lr: 0.000113 grad: 0.1269 (0.1259) loss: 0.7743 (0.7784) time: 0.1652 data: 0.0774 max mem: 9377 +Train: [23] [5700/6250] eta: 0:01:18 lr: 0.000113 grad: 0.1214 (0.1259) loss: 0.7730 (0.7783) time: 0.1620 data: 0.0773 max mem: 9377 +Train: [23] [5800/6250] eta: 0:01:03 lr: 0.000113 grad: 0.1250 (0.1258) loss: 0.7920 (0.7782) time: 0.1412 data: 0.0592 max mem: 9377 +Train: [23] [5900/6250] eta: 0:00:49 lr: 0.000113 grad: 0.1304 (0.1259) loss: 0.7700 (0.7781) time: 0.1432 data: 0.0604 max mem: 9377 +Train: [23] [6000/6250] eta: 0:00:35 lr: 0.000113 grad: 0.1326 (0.1260) loss: 0.7632 (0.7780) time: 0.1474 data: 0.0655 max mem: 9377 +Train: [23] [6100/6250] eta: 0:00:21 lr: 0.000113 grad: 0.1276 (0.1260) loss: 0.7754 (0.7779) time: 0.1425 data: 0.0655 max mem: 9377 +Train: [23] [6200/6250] eta: 0:00:07 lr: 0.000113 grad: 0.1225 (0.1260) loss: 0.7779 (0.7778) time: 0.1443 data: 0.0617 max mem: 9377 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.1217 (0.1260) loss: 0.7749 (0.7778) time: 0.1641 data: 0.0848 max mem: 9377 +Train: [23] Total time: 0:14:49 (0.1424 s / it) +Averaged stats: lr: 0.000113 grad: 0.1217 (0.1260) loss: 0.7749 (0.7778) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:04:04 loss: 0.8544 (0.8544) time: 3.9377 data: 3.8294 max mem: 9377 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8574 (0.8569) time: 0.1146 data: 0.0890 max mem: 9377 +Eval (hcp-train-subset): [23] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (hcp-train-subset): loss: 0.8574 (0.8569) +Eval (hcp-val): [23] [ 0/62] eta: 0:05:24 loss: 0.8588 (0.8588) time: 5.2360 data: 5.2008 max mem: 9377 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8545 (0.8549) time: 0.1255 data: 0.0996 max mem: 9377 +Eval (hcp-val): [23] Total time: 0:00:13 (0.2230 s / it) +Averaged stats (hcp-val): loss: 0.8545 (0.8549) +Eval (nsd-val): [23] [ 0/62] eta: 0:04:28 loss: 0.8213 (0.8213) time: 4.3314 data: 4.3016 max mem: 9377 +Eval (nsd-val): [23] [61/62] eta: 0:00:00 loss: 0.8295 (0.8314) time: 0.1387 data: 0.1135 max mem: 9377 +Eval (nsd-val): [23] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (nsd-val): loss: 0.8295 (0.8314) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 8:45:35 lr: 0.000113 grad: 0.0964 (0.0964) loss: 0.8526 (0.8526) time: 5.0457 data: 4.8504 max mem: 9377 +Train: [24] [ 100/6250] eta: 0:19:28 lr: 0.000113 grad: 0.1608 (0.2021) loss: 0.7923 (0.7934) time: 0.1454 data: 0.0538 max mem: 9377 +Train: [24] [ 200/6250] eta: 0:16:43 lr: 0.000113 grad: 0.1375 (0.1749) loss: 0.7816 (0.7917) time: 0.1371 data: 0.0559 max mem: 9377 +Train: [24] [ 300/6250] eta: 0:15:45 lr: 0.000113 grad: 0.1545 (0.1653) loss: 0.7881 (0.7898) time: 0.1454 data: 0.0613 max mem: 9377 +Train: [24] [ 400/6250] eta: 0:15:00 lr: 0.000113 grad: 0.1406 (0.1592) loss: 0.7708 (0.7877) time: 0.1354 data: 0.0570 max mem: 9377 +Train: [24] [ 500/6250] eta: 0:14:25 lr: 0.000113 grad: 0.1291 (0.1546) loss: 0.7878 (0.7857) time: 0.1451 data: 0.0645 max mem: 9377 +Train: [24] [ 600/6250] eta: 0:13:56 lr: 0.000113 grad: 0.1278 (0.1500) loss: 0.7700 (0.7841) time: 0.1195 data: 0.0311 max mem: 9377 +Train: [24] [ 700/6250] eta: 0:13:29 lr: 0.000113 grad: 0.1325 (0.1469) loss: 0.7723 (0.7836) time: 0.1249 data: 0.0377 max mem: 9377 +Train: [24] [ 800/6250] eta: 0:13:19 lr: 0.000113 grad: 0.1125 (0.1443) loss: 0.7765 (0.7830) time: 0.1532 data: 0.0732 max mem: 9377 +Train: [24] [ 900/6250] eta: 0:13:02 lr: 0.000113 grad: 0.1240 (0.1422) loss: 0.7704 (0.7818) time: 0.1472 data: 0.0599 max mem: 9377 +Train: [24] [1000/6250] eta: 0:12:41 lr: 0.000113 grad: 0.1174 (0.1405) loss: 0.7727 (0.7809) time: 0.1173 data: 0.0383 max mem: 9377 +Train: [24] [1100/6250] eta: 0:12:21 lr: 0.000113 grad: 0.1221 (0.1393) loss: 0.7653 (0.7797) time: 0.1348 data: 0.0495 max mem: 9377 +Train: [24] [1200/6250] eta: 0:12:04 lr: 0.000113 grad: 0.1347 (0.1386) loss: 0.7744 (0.7790) time: 0.1177 data: 0.0282 max mem: 9377 +Train: [24] [1300/6250] eta: 0:11:47 lr: 0.000113 grad: 0.1332 (0.1376) loss: 0.7682 (0.7783) time: 0.1322 data: 0.0483 max mem: 9377 +Train: [24] [1400/6250] eta: 0:11:30 lr: 0.000113 grad: 0.1253 (0.1371) loss: 0.7707 (0.7774) time: 0.1205 data: 0.0362 max mem: 9377 +Train: [24] [1500/6250] eta: 0:11:14 lr: 0.000113 grad: 0.1153 (0.1363) loss: 0.7697 (0.7771) time: 0.1136 data: 0.0260 max mem: 9377 +Train: [24] [1600/6250] eta: 0:10:56 lr: 0.000113 grad: 0.1270 (0.1357) loss: 0.7806 (0.7768) time: 0.1282 data: 0.0418 max mem: 9377 +Train: [24] [1700/6250] eta: 0:10:39 lr: 0.000113 grad: 0.1291 (0.1352) loss: 0.7630 (0.7765) time: 0.1291 data: 0.0394 max mem: 9377 +Train: [24] [1800/6250] eta: 0:10:24 lr: 0.000113 grad: 0.1263 (0.1348) loss: 0.7621 (0.7761) time: 0.1420 data: 0.0626 max mem: 9377 +Train: [24] [1900/6250] eta: 0:10:11 lr: 0.000113 grad: 0.1279 (0.1344) loss: 0.7701 (0.7758) time: 0.1532 data: 0.0687 max mem: 9377 +Train: [24] [2000/6250] eta: 0:09:56 lr: 0.000113 grad: 0.1248 (0.1340) loss: 0.7697 (0.7755) time: 0.1230 data: 0.0445 max mem: 9377 +Train: [24] [2100/6250] eta: 0:09:40 lr: 0.000113 grad: 0.1347 (0.1337) loss: 0.7661 (0.7753) time: 0.1184 data: 0.0408 max mem: 9377 +Train: [24] [2200/6250] eta: 0:09:26 lr: 0.000113 grad: 0.1275 (0.1334) loss: 0.7761 (0.7752) time: 0.1312 data: 0.0509 max mem: 9377 +Train: [24] [2300/6250] eta: 0:09:11 lr: 0.000113 grad: 0.1194 (0.1333) loss: 0.7755 (0.7749) time: 0.1323 data: 0.0479 max mem: 9377 +Train: [24] [2400/6250] eta: 0:08:55 lr: 0.000113 grad: 0.1181 (0.1331) loss: 0.7733 (0.7749) time: 0.1311 data: 0.0493 max mem: 9377 +Train: [24] [2500/6250] eta: 0:08:41 lr: 0.000113 grad: 0.1225 (0.1327) loss: 0.7662 (0.7748) time: 0.1408 data: 0.0606 max mem: 9377 +Train: [24] [2600/6250] eta: 0:08:28 lr: 0.000113 grad: 0.1217 (0.1325) loss: 0.7649 (0.7747) time: 0.1183 data: 0.0366 max mem: 9377 +Train: [24] [2700/6250] eta: 0:08:13 lr: 0.000113 grad: 0.1284 (0.1321) loss: 0.7742 (0.7748) time: 0.1307 data: 0.0492 max mem: 9377 +Train: [24] [2800/6250] eta: 0:07:59 lr: 0.000113 grad: 0.1205 (0.1319) loss: 0.7741 (0.7748) time: 0.1428 data: 0.0658 max mem: 9377 +Train: [24] [2900/6250] eta: 0:07:45 lr: 0.000112 grad: 0.1178 (0.1316) loss: 0.7842 (0.7748) time: 0.1185 data: 0.0388 max mem: 9377 +Train: [24] [3000/6250] eta: 0:07:31 lr: 0.000112 grad: 0.1221 (0.1315) loss: 0.7827 (0.7748) time: 0.1314 data: 0.0509 max mem: 9377 +Train: [24] [3100/6250] eta: 0:07:18 lr: 0.000112 grad: 0.1230 (0.1313) loss: 0.7732 (0.7749) time: 0.1409 data: 0.0582 max mem: 9377 +Train: [24] [3200/6250] eta: 0:07:04 lr: 0.000112 grad: 0.1303 (0.1311) loss: 0.7589 (0.7749) time: 0.1439 data: 0.0612 max mem: 9377 +Train: [24] [3300/6250] eta: 0:06:51 lr: 0.000112 grad: 0.1253 (0.1310) loss: 0.7752 (0.7748) time: 0.1578 data: 0.0739 max mem: 9377 +Train: [24] [3400/6250] eta: 0:06:37 lr: 0.000112 grad: 0.1280 (0.1309) loss: 0.7745 (0.7747) time: 0.1411 data: 0.0551 max mem: 9377 +Train: [24] [3500/6250] eta: 0:06:23 lr: 0.000112 grad: 0.1286 (0.1309) loss: 0.7803 (0.7746) time: 0.1269 data: 0.0465 max mem: 9377 +Train: [24] [3600/6250] eta: 0:06:09 lr: 0.000112 grad: 0.1264 (0.1309) loss: 0.7751 (0.7745) time: 0.1308 data: 0.0459 max mem: 9377 +Train: [24] [3700/6250] eta: 0:05:56 lr: 0.000112 grad: 0.1169 (0.1309) loss: 0.7793 (0.7744) time: 0.1431 data: 0.0599 max mem: 9377 +Train: [24] [3800/6250] eta: 0:05:42 lr: 0.000112 grad: 0.1260 (0.1307) loss: 0.7764 (0.7745) time: 0.1432 data: 0.0607 max mem: 9377 +Train: [24] [3900/6250] eta: 0:05:28 lr: 0.000112 grad: 0.1253 (0.1306) loss: 0.7758 (0.7746) time: 0.1376 data: 0.0492 max mem: 9377 +Train: [24] [4000/6250] eta: 0:05:14 lr: 0.000112 grad: 0.1223 (0.1304) loss: 0.7730 (0.7746) time: 0.1381 data: 0.0558 max mem: 9377 +Train: [24] [4100/6250] eta: 0:05:00 lr: 0.000112 grad: 0.1249 (0.1303) loss: 0.7775 (0.7746) time: 0.1460 data: 0.0670 max mem: 9377 +Train: [24] [4200/6250] eta: 0:04:45 lr: 0.000112 grad: 0.1249 (0.1302) loss: 0.7703 (0.7745) time: 0.1428 data: 0.0593 max mem: 9377 +Train: [24] [4300/6250] eta: 0:04:32 lr: 0.000112 grad: 0.1259 (0.1301) loss: 0.7645 (0.7745) time: 0.1513 data: 0.0666 max mem: 9377 +Train: [24] [4400/6250] eta: 0:04:18 lr: 0.000112 grad: 0.1265 (0.1301) loss: 0.7639 (0.7745) time: 0.1334 data: 0.0543 max mem: 9377 +Train: [24] [4500/6250] eta: 0:04:04 lr: 0.000112 grad: 0.1328 (0.1301) loss: 0.7758 (0.7744) time: 0.1453 data: 0.0642 max mem: 9377 +Train: [24] [4600/6250] eta: 0:03:50 lr: 0.000112 grad: 0.1231 (0.1300) loss: 0.7774 (0.7743) time: 0.1298 data: 0.0459 max mem: 9377 +Train: [24] [4700/6250] eta: 0:03:36 lr: 0.000112 grad: 0.1205 (0.1299) loss: 0.7625 (0.7742) time: 0.1280 data: 0.0443 max mem: 9377 +Train: [24] [4800/6250] eta: 0:03:21 lr: 0.000112 grad: 0.1157 (0.1298) loss: 0.7704 (0.7742) time: 0.1290 data: 0.0487 max mem: 9377 +Train: [24] [4900/6250] eta: 0:03:08 lr: 0.000112 grad: 0.1178 (0.1298) loss: 0.7801 (0.7741) time: 0.1413 data: 0.0625 max mem: 9377 +Train: [24] [5000/6250] eta: 0:02:54 lr: 0.000112 grad: 0.1217 (0.1297) loss: 0.7694 (0.7740) time: 0.1552 data: 0.0699 max mem: 9377 +Train: [24] [5100/6250] eta: 0:02:41 lr: 0.000112 grad: 0.1293 (0.1297) loss: 0.7789 (0.7738) time: 0.2011 data: 0.1204 max mem: 9377 +Train: [24] [5200/6250] eta: 0:02:27 lr: 0.000112 grad: 0.1191 (0.1297) loss: 0.7666 (0.7737) time: 0.1610 data: 0.0793 max mem: 9377 +Train: [24] [5300/6250] eta: 0:02:13 lr: 0.000112 grad: 0.1248 (0.1296) loss: 0.7785 (0.7736) time: 0.1572 data: 0.0773 max mem: 9377 +Train: [24] [5400/6250] eta: 0:01:59 lr: 0.000112 grad: 0.1221 (0.1295) loss: 0.7791 (0.7736) time: 0.1345 data: 0.0538 max mem: 9377 +Train: [24] [5500/6250] eta: 0:01:45 lr: 0.000112 grad: 0.1273 (0.1294) loss: 0.7603 (0.7735) time: 0.1704 data: 0.0876 max mem: 9377 +Train: [24] [5600/6250] eta: 0:01:31 lr: 0.000112 grad: 0.1230 (0.1294) loss: 0.7751 (0.7735) time: 0.1359 data: 0.0532 max mem: 9377 +Train: [24] [5700/6250] eta: 0:01:17 lr: 0.000112 grad: 0.1262 (0.1295) loss: 0.7669 (0.7734) time: 0.1385 data: 0.0493 max mem: 9377 +Train: [24] [5800/6250] eta: 0:01:03 lr: 0.000112 grad: 0.1282 (0.1294) loss: 0.7715 (0.7734) time: 0.1203 data: 0.0306 max mem: 9377 +Train: [24] [5900/6250] eta: 0:00:49 lr: 0.000112 grad: 0.1309 (0.1294) loss: 0.7699 (0.7733) time: 0.1128 data: 0.0299 max mem: 9377 +Train: [24] [6000/6250] eta: 0:00:35 lr: 0.000112 grad: 0.1179 (0.1293) loss: 0.7757 (0.7733) time: 0.1452 data: 0.0628 max mem: 9377 +Train: [24] [6100/6250] eta: 0:00:21 lr: 0.000112 grad: 0.1242 (0.1293) loss: 0.7848 (0.7733) time: 0.1355 data: 0.0456 max mem: 9377 +Train: [24] [6200/6250] eta: 0:00:07 lr: 0.000112 grad: 0.1211 (0.1293) loss: 0.7741 (0.7733) time: 0.1098 data: 0.0214 max mem: 9377 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.1239 (0.1293) loss: 0.7721 (0.7733) time: 0.1465 data: 0.0637 max mem: 9377 +Train: [24] Total time: 0:14:43 (0.1413 s / it) +Averaged stats: lr: 0.000112 grad: 0.1239 (0.1293) loss: 0.7721 (0.7733) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:03:07 loss: 0.8493 (0.8493) time: 3.0278 data: 2.9489 max mem: 9377 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8577 (0.8569) time: 0.1308 data: 0.1034 max mem: 9377 +Eval (hcp-train-subset): [24] Total time: 0:00:13 (0.2133 s / it) +Averaged stats (hcp-train-subset): loss: 0.8577 (0.8569) +Making plots (hcp-train-subset): example=55 +Eval (hcp-val): [24] [ 0/62] eta: 0:03:49 loss: 0.8559 (0.8559) time: 3.7064 data: 3.6086 max mem: 9377 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8534 (0.8550) time: 0.1314 data: 0.1062 max mem: 9377 +Eval (hcp-val): [24] Total time: 0:00:13 (0.2235 s / it) +Averaged stats (hcp-val): loss: 0.8534 (0.8550) +Making plots (hcp-val): example=39 +Eval (nsd-val): [24] [ 0/62] eta: 0:03:57 loss: 0.8200 (0.8200) time: 3.8230 data: 3.7486 max mem: 9377 +Eval (nsd-val): [24] [61/62] eta: 0:00:00 loss: 0.8298 (0.8322) time: 0.1391 data: 0.1140 max mem: 9377 +Eval (nsd-val): [24] Total time: 0:00:13 (0.2168 s / it) +Averaged stats (nsd-val): loss: 0.8298 (0.8322) +Making plots (nsd-val): example=23 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00024.pth +Train: [25] [ 0/6250] eta: 6:49:59 lr: 0.000112 grad: 0.0853 (0.0853) loss: 0.8481 (0.8481) time: 3.9360 data: 3.6711 max mem: 9377 +Train: [25] [ 100/6250] eta: 0:19:19 lr: 0.000112 grad: 0.1589 (0.1914) loss: 0.7950 (0.8089) time: 0.1263 data: 0.0395 max mem: 9377 +Train: [25] [ 200/6250] eta: 0:17:07 lr: 0.000112 grad: 0.1269 (0.1802) loss: 0.7901 (0.7954) time: 0.1469 data: 0.0613 max mem: 9377 +Train: [25] [ 300/6250] eta: 0:15:58 lr: 0.000112 grad: 0.1357 (0.1705) loss: 0.7862 (0.7908) time: 0.1225 data: 0.0338 max mem: 9377 +Train: [25] [ 400/6250] eta: 0:15:27 lr: 0.000112 grad: 0.1178 (0.1619) loss: 0.7963 (0.7904) time: 0.1549 data: 0.0631 max mem: 9377 +Train: [25] [ 500/6250] eta: 0:14:51 lr: 0.000112 grad: 0.1183 (0.1546) loss: 0.7808 (0.7893) time: 0.1495 data: 0.0658 max mem: 9377 +Train: [25] [ 600/6250] eta: 0:14:23 lr: 0.000112 grad: 0.1148 (0.1506) loss: 0.7827 (0.7878) time: 0.1328 data: 0.0429 max mem: 9377 +Train: [25] [ 700/6250] eta: 0:14:03 lr: 0.000112 grad: 0.1297 (0.1470) loss: 0.7755 (0.7863) time: 0.1599 data: 0.0844 max mem: 9377 +Train: [25] [ 800/6250] eta: 0:13:41 lr: 0.000112 grad: 0.1225 (0.1441) loss: 0.7729 (0.7848) time: 0.1250 data: 0.0430 max mem: 9377 +Train: [25] [ 900/6250] eta: 0:13:21 lr: 0.000112 grad: 0.1296 (0.1424) loss: 0.7628 (0.7838) time: 0.1331 data: 0.0563 max mem: 9377 +Train: [25] [1000/6250] eta: 0:12:58 lr: 0.000112 grad: 0.1238 (0.1407) loss: 0.7815 (0.7833) time: 0.1270 data: 0.0465 max mem: 9377 +Train: [25] [1100/6250] eta: 0:12:33 lr: 0.000112 grad: 0.1254 (0.1394) loss: 0.7747 (0.7826) time: 0.1311 data: 0.0531 max mem: 9377 +Train: [25] [1200/6250] eta: 0:12:15 lr: 0.000112 grad: 0.1301 (0.1384) loss: 0.7734 (0.7816) time: 0.1479 data: 0.0662 max mem: 9377 +Train: [25] [1300/6250] eta: 0:12:00 lr: 0.000112 grad: 0.1161 (0.1373) loss: 0.7729 (0.7810) time: 0.1632 data: 0.0680 max mem: 9377 +Train: [25] [1400/6250] eta: 0:11:47 lr: 0.000112 grad: 0.1156 (0.1364) loss: 0.7766 (0.7804) time: 0.1402 data: 0.0596 max mem: 9377 +Train: [25] [1500/6250] eta: 0:11:27 lr: 0.000112 grad: 0.1219 (0.1358) loss: 0.7713 (0.7794) time: 0.1227 data: 0.0386 max mem: 9377 +Train: [25] [1600/6250] eta: 0:11:10 lr: 0.000111 grad: 0.1346 (0.1353) loss: 0.7674 (0.7787) time: 0.1355 data: 0.0519 max mem: 9377 +Train: [25] [1700/6250] eta: 0:10:55 lr: 0.000111 grad: 0.1226 (0.1348) loss: 0.7703 (0.7781) time: 0.1716 data: 0.0924 max mem: 9377 +Train: [25] [1800/6250] eta: 0:10:35 lr: 0.000111 grad: 0.1208 (0.1346) loss: 0.7670 (0.7774) time: 0.1345 data: 0.0521 max mem: 9377 +Train: [25] [1900/6250] eta: 0:10:17 lr: 0.000111 grad: 0.1257 (0.1342) loss: 0.7729 (0.7769) time: 0.1220 data: 0.0382 max mem: 9377 +Train: [25] [2000/6250] eta: 0:10:01 lr: 0.000111 grad: 0.1290 (0.1339) loss: 0.7695 (0.7765) time: 0.1384 data: 0.0552 max mem: 9377 +Train: [25] [2100/6250] eta: 0:09:45 lr: 0.000111 grad: 0.1290 (0.1335) loss: 0.7568 (0.7761) time: 0.1184 data: 0.0338 max mem: 9377 +Train: [25] [2200/6250] eta: 0:09:30 lr: 0.000111 grad: 0.1340 (0.1336) loss: 0.7617 (0.7755) time: 0.1305 data: 0.0485 max mem: 9377 +Train: [25] [2300/6250] eta: 0:09:16 lr: 0.000111 grad: 0.1263 (0.1336) loss: 0.7590 (0.7749) time: 0.1498 data: 0.0717 max mem: 9377 +Train: [25] [2400/6250] eta: 0:09:02 lr: 0.000111 grad: 0.1292 (0.1334) loss: 0.7632 (0.7744) time: 0.1524 data: 0.0729 max mem: 9377 +Train: [25] [2500/6250] eta: 0:08:48 lr: 0.000111 grad: 0.1259 (0.1333) loss: 0.7633 (0.7739) time: 0.1752 data: 0.0920 max mem: 9377 +Train: [25] [2600/6250] eta: 0:08:33 lr: 0.000111 grad: 0.1373 (0.1333) loss: 0.7643 (0.7733) time: 0.1264 data: 0.0465 max mem: 9377 +Train: [25] [2700/6250] eta: 0:08:19 lr: 0.000111 grad: 0.1374 (0.1335) loss: 0.7540 (0.7729) time: 0.1609 data: 0.0844 max mem: 9377 +Train: [25] [2800/6250] eta: 0:08:04 lr: 0.000111 grad: 0.1286 (0.1333) loss: 0.7582 (0.7726) time: 0.1305 data: 0.0491 max mem: 9377 +Train: [25] [2900/6250] eta: 0:07:50 lr: 0.000111 grad: 0.1264 (0.1333) loss: 0.7643 (0.7722) time: 0.1442 data: 0.0676 max mem: 9377 +Train: [25] [3000/6250] eta: 0:07:36 lr: 0.000111 grad: 0.1316 (0.1331) loss: 0.7641 (0.7720) time: 0.1427 data: 0.0673 max mem: 9377 +Train: [25] [3100/6250] eta: 0:07:22 lr: 0.000111 grad: 0.1348 (0.1329) loss: 0.7581 (0.7719) time: 0.1199 data: 0.0352 max mem: 9377 +Train: [25] [3200/6250] eta: 0:07:09 lr: 0.000111 grad: 0.1278 (0.1327) loss: 0.7642 (0.7718) time: 0.1349 data: 0.0548 max mem: 9377 +Train: [25] [3300/6250] eta: 0:06:55 lr: 0.000111 grad: 0.1290 (0.1325) loss: 0.7578 (0.7716) time: 0.1415 data: 0.0598 max mem: 9377 +Train: [25] [3400/6250] eta: 0:06:41 lr: 0.000111 grad: 0.1259 (0.1323) loss: 0.7666 (0.7715) time: 0.1397 data: 0.0597 max mem: 9377 +Train: [25] [3500/6250] eta: 0:06:27 lr: 0.000111 grad: 0.1211 (0.1322) loss: 0.7706 (0.7713) time: 0.1419 data: 0.0585 max mem: 9377 +Train: [25] [3600/6250] eta: 0:06:13 lr: 0.000111 grad: 0.1266 (0.1320) loss: 0.7679 (0.7712) time: 0.1580 data: 0.0793 max mem: 9377 +Train: [25] [3700/6250] eta: 0:05:58 lr: 0.000111 grad: 0.1244 (0.1319) loss: 0.7702 (0.7711) time: 0.1219 data: 0.0453 max mem: 9377 +Train: [25] [3800/6250] eta: 0:05:44 lr: 0.000111 grad: 0.1272 (0.1318) loss: 0.7730 (0.7710) time: 0.1233 data: 0.0410 max mem: 9377 +Train: [25] [3900/6250] eta: 0:05:30 lr: 0.000111 grad: 0.1225 (0.1317) loss: 0.7666 (0.7710) time: 0.1607 data: 0.0784 max mem: 9377 +Train: [25] [4000/6250] eta: 0:05:16 lr: 0.000111 grad: 0.1277 (0.1316) loss: 0.7809 (0.7711) time: 0.1471 data: 0.0622 max mem: 9377 +Train: [25] [4100/6250] eta: 0:05:02 lr: 0.000111 grad: 0.1256 (0.1315) loss: 0.7686 (0.7711) time: 0.1460 data: 0.0631 max mem: 9377 +Train: [25] [4200/6250] eta: 0:04:48 lr: 0.000111 grad: 0.1265 (0.1314) loss: 0.7679 (0.7712) time: 0.1418 data: 0.0626 max mem: 9377 +Train: [25] [4300/6250] eta: 0:04:34 lr: 0.000111 grad: 0.1233 (0.1314) loss: 0.7771 (0.7712) time: 0.1414 data: 0.0617 max mem: 9377 +Train: [25] [4400/6250] eta: 0:04:20 lr: 0.000111 grad: 0.1226 (0.1312) loss: 0.7759 (0.7712) time: 0.1198 data: 0.0381 max mem: 9377 +Train: [25] [4500/6250] eta: 0:04:06 lr: 0.000111 grad: 0.1277 (0.1312) loss: 0.7634 (0.7712) time: 0.1381 data: 0.0573 max mem: 9377 +Train: [25] [4600/6250] eta: 0:03:52 lr: 0.000111 grad: 0.1235 (0.1311) loss: 0.7784 (0.7712) time: 0.1538 data: 0.0761 max mem: 9377 +Train: [25] [4700/6250] eta: 0:03:38 lr: 0.000111 grad: 0.1173 (0.1309) loss: 0.7757 (0.7712) time: 0.1320 data: 0.0537 max mem: 9377 +Train: [25] [4800/6250] eta: 0:03:24 lr: 0.000111 grad: 0.1221 (0.1308) loss: 0.7841 (0.7712) time: 0.1404 data: 0.0606 max mem: 9377 +Train: [25] [4900/6250] eta: 0:03:09 lr: 0.000111 grad: 0.1213 (0.1306) loss: 0.7579 (0.7712) time: 0.1457 data: 0.0677 max mem: 9377 +Train: [25] [5000/6250] eta: 0:02:56 lr: 0.000111 grad: 0.1277 (0.1305) loss: 0.7646 (0.7711) time: 0.1491 data: 0.0721 max mem: 9377 +Train: [25] [5100/6250] eta: 0:02:42 lr: 0.000111 grad: 0.1234 (0.1304) loss: 0.7743 (0.7711) time: 0.1812 data: 0.0799 max mem: 9377 +Train: [25] [5200/6250] eta: 0:02:28 lr: 0.000111 grad: 0.1237 (0.1304) loss: 0.7833 (0.7710) time: 0.1389 data: 0.0524 max mem: 9377 +Train: [25] [5300/6250] eta: 0:02:14 lr: 0.000111 grad: 0.1221 (0.1303) loss: 0.7606 (0.7709) time: 0.1317 data: 0.0549 max mem: 9377 +Train: [25] [5400/6250] eta: 0:02:00 lr: 0.000111 grad: 0.1297 (0.1302) loss: 0.7594 (0.7709) time: 0.1272 data: 0.0514 max mem: 9377 +Train: [25] [5500/6250] eta: 0:01:46 lr: 0.000111 grad: 0.1251 (0.1302) loss: 0.7623 (0.7707) time: 0.1456 data: 0.0702 max mem: 9377 +Train: [25] [5600/6250] eta: 0:01:32 lr: 0.000111 grad: 0.1266 (0.1302) loss: 0.7631 (0.7706) time: 0.1677 data: 0.0904 max mem: 9377 +Train: [25] [5700/6250] eta: 0:01:18 lr: 0.000111 grad: 0.1207 (0.1302) loss: 0.7665 (0.7705) time: 0.1516 data: 0.0696 max mem: 9377 +Train: [25] [5800/6250] eta: 0:01:03 lr: 0.000111 grad: 0.1285 (0.1302) loss: 0.7512 (0.7703) time: 0.1385 data: 0.0607 max mem: 9377 +Train: [25] [5900/6250] eta: 0:00:49 lr: 0.000111 grad: 0.1261 (0.1302) loss: 0.7706 (0.7701) time: 0.1566 data: 0.0713 max mem: 9377 +Train: [25] [6000/6250] eta: 0:00:35 lr: 0.000111 grad: 0.1282 (0.1301) loss: 0.7644 (0.7700) time: 0.1270 data: 0.0374 max mem: 9377 +Train: [25] [6100/6250] eta: 0:00:21 lr: 0.000111 grad: 0.1243 (0.1302) loss: 0.7586 (0.7699) time: 0.1377 data: 0.0559 max mem: 9377 +Train: [25] [6200/6250] eta: 0:00:07 lr: 0.000111 grad: 0.1277 (0.1302) loss: 0.7594 (0.7697) time: 0.1319 data: 0.0480 max mem: 9377 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.1274 (0.1302) loss: 0.7564 (0.7696) time: 0.1309 data: 0.0569 max mem: 9377 +Train: [25] Total time: 0:14:53 (0.1429 s / it) +Averaged stats: lr: 0.000111 grad: 0.1274 (0.1302) loss: 0.7564 (0.7696) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:04:34 loss: 0.8552 (0.8552) time: 4.4278 data: 4.3975 max mem: 9377 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8571 (0.8567) time: 0.0929 data: 0.0679 max mem: 9377 +Eval (hcp-train-subset): [25] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (hcp-train-subset): loss: 0.8571 (0.8567) +Eval (hcp-val): [25] [ 0/62] eta: 0:03:26 loss: 0.8509 (0.8509) time: 3.3277 data: 3.2453 max mem: 9377 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8534 (0.8540) time: 0.1261 data: 0.0994 max mem: 9377 +Eval (hcp-val): [25] Total time: 0:00:12 (0.2034 s / it) +Averaged stats (hcp-val): loss: 0.8534 (0.8540) +Eval (nsd-val): [25] [ 0/62] eta: 0:04:16 loss: 0.8216 (0.8216) time: 4.1368 data: 4.0958 max mem: 9377 +Eval (nsd-val): [25] [61/62] eta: 0:00:00 loss: 0.8328 (0.8328) time: 0.1075 data: 0.0811 max mem: 9377 +Eval (nsd-val): [25] Total time: 0:00:12 (0.2001 s / it) +Averaged stats (nsd-val): loss: 0.8328 (0.8328) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [26] [ 0/6250] eta: 10:47:32 lr: 0.000111 grad: 0.1506 (0.1506) loss: 0.8516 (0.8516) time: 6.2163 data: 6.1142 max mem: 9377 +Train: [26] [ 100/6250] eta: 0:21:05 lr: 0.000111 grad: 0.1410 (0.2077) loss: 0.8013 (0.8042) time: 0.1669 data: 0.0737 max mem: 9377 +Train: [26] [ 200/6250] eta: 0:18:32 lr: 0.000110 grad: 0.1618 (0.1906) loss: 0.7953 (0.7974) time: 0.1662 data: 0.0798 max mem: 9377 +Train: [26] [ 300/6250] eta: 0:16:57 lr: 0.000110 grad: 0.1389 (0.1787) loss: 0.7797 (0.7916) time: 0.1489 data: 0.0653 max mem: 9377 +Train: [26] [ 400/6250] eta: 0:16:09 lr: 0.000110 grad: 0.1357 (0.1686) loss: 0.7853 (0.7890) time: 0.1581 data: 0.0719 max mem: 9377 +Train: [26] [ 500/6250] eta: 0:15:22 lr: 0.000110 grad: 0.1199 (0.1607) loss: 0.7885 (0.7879) time: 0.1236 data: 0.0422 max mem: 9377 +Train: [26] [ 600/6250] eta: 0:14:53 lr: 0.000110 grad: 0.1287 (0.1559) loss: 0.7763 (0.7871) time: 0.1359 data: 0.0532 max mem: 9377 +Train: [26] [ 700/6250] eta: 0:14:25 lr: 0.000110 grad: 0.1256 (0.1518) loss: 0.7691 (0.7860) time: 0.1547 data: 0.0721 max mem: 9377 +Train: [26] [ 800/6250] eta: 0:14:00 lr: 0.000110 grad: 0.1121 (0.1486) loss: 0.7783 (0.7851) time: 0.1223 data: 0.0390 max mem: 9377 +Train: [26] [ 900/6250] eta: 0:13:46 lr: 0.000110 grad: 0.1129 (0.1453) loss: 0.7864 (0.7844) time: 0.1464 data: 0.0610 max mem: 9377 +Train: [26] [1000/6250] eta: 0:13:27 lr: 0.000110 grad: 0.1140 (0.1429) loss: 0.7797 (0.7839) time: 0.1356 data: 0.0514 max mem: 9377 +Train: [26] [1100/6250] eta: 0:13:06 lr: 0.000110 grad: 0.1226 (0.1413) loss: 0.7844 (0.7833) time: 0.1565 data: 0.0668 max mem: 9377 +Train: [26] [1200/6250] eta: 0:12:45 lr: 0.000110 grad: 0.1207 (0.1400) loss: 0.7772 (0.7829) time: 0.1474 data: 0.0632 max mem: 9377 +Train: [26] [1300/6250] eta: 0:12:29 lr: 0.000110 grad: 0.1256 (0.1387) loss: 0.7729 (0.7826) time: 0.1563 data: 0.0731 max mem: 9377 +Train: [26] [1400/6250] eta: 0:12:08 lr: 0.000110 grad: 0.1197 (0.1375) loss: 0.7763 (0.7822) time: 0.1236 data: 0.0426 max mem: 9377 +Train: [26] [1500/6250] eta: 0:11:47 lr: 0.000110 grad: 0.1116 (0.1362) loss: 0.7695 (0.7817) time: 0.1352 data: 0.0521 max mem: 9377 +Train: [26] [1600/6250] eta: 0:11:28 lr: 0.000110 grad: 0.1179 (0.1357) loss: 0.7662 (0.7812) time: 0.1422 data: 0.0605 max mem: 9377 +Train: [26] [1700/6250] eta: 0:11:08 lr: 0.000110 grad: 0.1252 (0.1351) loss: 0.7728 (0.7805) time: 0.1467 data: 0.0601 max mem: 9377 +Train: [26] [1800/6250] eta: 0:10:51 lr: 0.000110 grad: 0.1185 (0.1346) loss: 0.7689 (0.7799) time: 0.1533 data: 0.0718 max mem: 9377 +Train: [26] [1900/6250] eta: 0:10:37 lr: 0.000110 grad: 0.1226 (0.1341) loss: 0.7657 (0.7794) time: 0.1228 data: 0.0287 max mem: 9377 +Train: [26] [2000/6250] eta: 0:10:21 lr: 0.000110 grad: 0.1279 (0.1335) loss: 0.7673 (0.7790) time: 0.1284 data: 0.0484 max mem: 9377 +Train: [26] [2100/6250] eta: 0:10:08 lr: 0.000110 grad: 0.1187 (0.1331) loss: 0.7663 (0.7787) time: 0.1418 data: 0.0587 max mem: 9377 +Train: [26] [2200/6250] eta: 0:09:55 lr: 0.000110 grad: 0.1204 (0.1327) loss: 0.7675 (0.7786) time: 0.1475 data: 0.0678 max mem: 9377 +Train: [26] [2300/6250] eta: 0:09:41 lr: 0.000110 grad: 0.1159 (0.1323) loss: 0.7711 (0.7784) time: 0.1623 data: 0.0874 max mem: 9377 +Train: [26] [2400/6250] eta: 0:09:28 lr: 0.000110 grad: 0.1284 (0.1320) loss: 0.7608 (0.7780) time: 0.1441 data: 0.0639 max mem: 9377 +Train: [26] [2500/6250] eta: 0:09:13 lr: 0.000110 grad: 0.1160 (0.1318) loss: 0.7754 (0.7777) time: 0.1747 data: 0.0998 max mem: 9377 +Train: [26] [2600/6250] eta: 0:08:57 lr: 0.000110 grad: 0.1241 (0.1316) loss: 0.7705 (0.7775) time: 0.1440 data: 0.0598 max mem: 9377 +Train: [26] [2700/6250] eta: 0:08:42 lr: 0.000110 grad: 0.1237 (0.1315) loss: 0.7780 (0.7773) time: 0.1465 data: 0.0646 max mem: 9377 +Train: [26] [2800/6250] eta: 0:08:26 lr: 0.000110 grad: 0.1305 (0.1313) loss: 0.7657 (0.7772) time: 0.1319 data: 0.0510 max mem: 9377 +Train: [26] [2900/6250] eta: 0:08:10 lr: 0.000110 grad: 0.1199 (0.1310) loss: 0.7735 (0.7770) time: 0.1275 data: 0.0437 max mem: 9377 +Train: [26] [3000/6250] eta: 0:07:55 lr: 0.000110 grad: 0.1257 (0.1309) loss: 0.7731 (0.7768) time: 0.1422 data: 0.0607 max mem: 9377 +Train: [26] [3100/6250] eta: 0:07:39 lr: 0.000110 grad: 0.1273 (0.1308) loss: 0.7698 (0.7766) time: 0.1302 data: 0.0522 max mem: 9377 +Train: [26] [3200/6250] eta: 0:07:23 lr: 0.000110 grad: 0.1237 (0.1305) loss: 0.7689 (0.7766) time: 0.1259 data: 0.0436 max mem: 9377 +Train: [26] [3300/6250] eta: 0:07:09 lr: 0.000110 grad: 0.1232 (0.1303) loss: 0.7795 (0.7766) time: 0.1425 data: 0.0647 max mem: 9377 +Train: [26] [3400/6250] eta: 0:06:53 lr: 0.000110 grad: 0.1215 (0.1301) loss: 0.7756 (0.7766) time: 0.1267 data: 0.0485 max mem: 9377 +Train: [26] [3500/6250] eta: 0:06:38 lr: 0.000110 grad: 0.1226 (0.1300) loss: 0.7640 (0.7764) time: 0.1552 data: 0.0774 max mem: 9377 +Train: [26] [3600/6250] eta: 0:06:24 lr: 0.000110 grad: 0.1190 (0.1299) loss: 0.7768 (0.7763) time: 0.1541 data: 0.0693 max mem: 9377 +Train: [26] [3700/6250] eta: 0:06:08 lr: 0.000110 grad: 0.1245 (0.1298) loss: 0.7695 (0.7761) time: 0.1230 data: 0.0458 max mem: 9377 +Train: [26] [3800/6250] eta: 0:05:54 lr: 0.000110 grad: 0.1288 (0.1297) loss: 0.7636 (0.7760) time: 0.1584 data: 0.0790 max mem: 9377 +Train: [26] [3900/6250] eta: 0:05:40 lr: 0.000110 grad: 0.1174 (0.1296) loss: 0.7782 (0.7760) time: 0.1760 data: 0.0978 max mem: 9377 +Train: [26] [4000/6250] eta: 0:05:25 lr: 0.000110 grad: 0.1234 (0.1294) loss: 0.7757 (0.7759) time: 0.1409 data: 0.0626 max mem: 9377 +Train: [26] [4100/6250] eta: 0:05:11 lr: 0.000110 grad: 0.1287 (0.1293) loss: 0.7744 (0.7759) time: 0.1311 data: 0.0503 max mem: 9377 +Train: [26] [4200/6250] eta: 0:04:56 lr: 0.000110 grad: 0.1298 (0.1292) loss: 0.7744 (0.7758) time: 0.1437 data: 0.0641 max mem: 9377 +Train: [26] [4300/6250] eta: 0:04:42 lr: 0.000110 grad: 0.1283 (0.1291) loss: 0.7659 (0.7757) time: 0.1415 data: 0.0618 max mem: 9377 +Train: [26] [4400/6250] eta: 0:04:27 lr: 0.000110 grad: 0.1209 (0.1290) loss: 0.7710 (0.7756) time: 0.1022 data: 0.0216 max mem: 9377 +Train: [26] [4500/6250] eta: 0:04:13 lr: 0.000110 grad: 0.1208 (0.1289) loss: 0.7734 (0.7756) time: 0.1654 data: 0.0861 max mem: 9377 +Train: [26] [4600/6250] eta: 0:03:58 lr: 0.000110 grad: 0.1200 (0.1287) loss: 0.7795 (0.7756) time: 0.1420 data: 0.0587 max mem: 9377 +Train: [26] [4700/6250] eta: 0:03:43 lr: 0.000110 grad: 0.1220 (0.1286) loss: 0.7813 (0.7757) time: 0.1495 data: 0.0750 max mem: 9377 +Train: [26] [4800/6250] eta: 0:03:29 lr: 0.000109 grad: 0.1174 (0.1285) loss: 0.7703 (0.7756) time: 0.1396 data: 0.0576 max mem: 9377 +Train: [26] [4900/6250] eta: 0:03:14 lr: 0.000109 grad: 0.1289 (0.1284) loss: 0.7727 (0.7756) time: 0.1407 data: 0.0546 max mem: 9377 +Train: [26] [5000/6250] eta: 0:03:00 lr: 0.000109 grad: 0.1310 (0.1284) loss: 0.7638 (0.7756) time: 0.1312 data: 0.0482 max mem: 9377 +Train: [26] [5100/6250] eta: 0:02:45 lr: 0.000109 grad: 0.1269 (0.1284) loss: 0.7680 (0.7755) time: 0.1455 data: 0.0618 max mem: 9377 +Train: [26] [5200/6250] eta: 0:02:31 lr: 0.000109 grad: 0.1238 (0.1283) loss: 0.7768 (0.7755) time: 0.1539 data: 0.0791 max mem: 9377 +Train: [26] [5300/6250] eta: 0:02:17 lr: 0.000109 grad: 0.1257 (0.1283) loss: 0.7658 (0.7754) time: 0.1906 data: 0.1111 max mem: 9377 +Train: [26] [5400/6250] eta: 0:02:03 lr: 0.000109 grad: 0.1293 (0.1283) loss: 0.7691 (0.7753) time: 0.1588 data: 0.0752 max mem: 9377 +Train: [26] [5500/6250] eta: 0:01:48 lr: 0.000109 grad: 0.1169 (0.1283) loss: 0.7807 (0.7752) time: 0.1588 data: 0.0668 max mem: 9377 +Train: [26] [5600/6250] eta: 0:01:34 lr: 0.000109 grad: 0.1259 (0.1282) loss: 0.7687 (0.7751) time: 0.1439 data: 0.0663 max mem: 9377 +Train: [26] [5700/6250] eta: 0:01:19 lr: 0.000109 grad: 0.1244 (0.1283) loss: 0.7663 (0.7749) time: 0.1612 data: 0.0812 max mem: 9377 +Train: [26] [5800/6250] eta: 0:01:05 lr: 0.000109 grad: 0.1317 (0.1283) loss: 0.7602 (0.7748) time: 0.1235 data: 0.0418 max mem: 9377 +Train: [26] [5900/6250] eta: 0:00:50 lr: 0.000109 grad: 0.1242 (0.1283) loss: 0.7602 (0.7746) time: 0.1591 data: 0.0715 max mem: 9377 +Train: [26] [6000/6250] eta: 0:00:36 lr: 0.000109 grad: 0.1358 (0.1285) loss: 0.7595 (0.7744) time: 0.1478 data: 0.0656 max mem: 9377 +Train: [26] [6100/6250] eta: 0:00:21 lr: 0.000109 grad: 0.1277 (0.1286) loss: 0.7620 (0.7741) time: 0.1390 data: 0.0526 max mem: 9377 +Train: [26] [6200/6250] eta: 0:00:07 lr: 0.000109 grad: 0.1288 (0.1288) loss: 0.7655 (0.7739) time: 0.1474 data: 0.0615 max mem: 9377 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.1351 (0.1288) loss: 0.7634 (0.7738) time: 0.1454 data: 0.0566 max mem: 9377 +Train: [26] Total time: 0:15:14 (0.1464 s / it) +Averaged stats: lr: 0.000109 grad: 0.1351 (0.1288) loss: 0.7634 (0.7738) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:03:42 loss: 0.8556 (0.8556) time: 3.5966 data: 3.5059 max mem: 9377 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8555 (0.8570) time: 0.1055 data: 0.0805 max mem: 9377 +Eval (hcp-train-subset): [26] Total time: 0:00:12 (0.2048 s / it) +Averaged stats (hcp-train-subset): loss: 0.8555 (0.8570) +Eval (hcp-val): [26] [ 0/62] eta: 0:04:39 loss: 0.8503 (0.8503) time: 4.5124 data: 4.4828 max mem: 9377 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8553 (0.8557) time: 0.1027 data: 0.0776 max mem: 9377 +Eval (hcp-val): [26] Total time: 0:00:13 (0.2108 s / it) +Averaged stats (hcp-val): loss: 0.8553 (0.8557) +Eval (nsd-val): [26] [ 0/62] eta: 0:04:49 loss: 0.8229 (0.8229) time: 4.6698 data: 4.6398 max mem: 9377 +Eval (nsd-val): [26] [61/62] eta: 0:00:00 loss: 0.8315 (0.8308) time: 0.1199 data: 0.0949 max mem: 9377 +Eval (nsd-val): [26] Total time: 0:00:12 (0.2046 s / it) +Averaged stats (nsd-val): loss: 0.8315 (0.8308) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [27] [ 0/6250] eta: 9:31:12 lr: 0.000109 grad: 0.3149 (0.3149) loss: 0.7392 (0.7392) time: 5.4836 data: 5.3539 max mem: 9377 +Train: [27] [ 100/6250] eta: 0:19:57 lr: 0.000109 grad: 0.1743 (0.2095) loss: 0.7954 (0.8001) time: 0.1474 data: 0.0665 max mem: 9377 +Train: [27] [ 200/6250] eta: 0:17:04 lr: 0.000109 grad: 0.1748 (0.1954) loss: 0.7763 (0.7900) time: 0.1136 data: 0.0310 max mem: 9377 +Train: [27] [ 300/6250] eta: 0:16:02 lr: 0.000109 grad: 0.1293 (0.1787) loss: 0.7831 (0.7882) time: 0.1335 data: 0.0510 max mem: 9377 +Train: [27] [ 400/6250] eta: 0:15:16 lr: 0.000109 grad: 0.1258 (0.1669) loss: 0.7808 (0.7870) time: 0.1310 data: 0.0452 max mem: 9377 +Train: [27] [ 500/6250] eta: 0:14:42 lr: 0.000109 grad: 0.1319 (0.1609) loss: 0.7757 (0.7838) time: 0.1393 data: 0.0575 max mem: 9377 +Train: [27] [ 600/6250] eta: 0:14:08 lr: 0.000109 grad: 0.1344 (0.1579) loss: 0.7701 (0.7807) time: 0.1383 data: 0.0547 max mem: 9377 +Train: [27] [ 700/6250] eta: 0:13:48 lr: 0.000109 grad: 0.1307 (0.1541) loss: 0.7677 (0.7788) time: 0.1454 data: 0.0566 max mem: 9377 +Train: [27] [ 800/6250] eta: 0:13:32 lr: 0.000109 grad: 0.1217 (0.1518) loss: 0.7801 (0.7777) time: 0.1259 data: 0.0439 max mem: 9377 +Train: [27] [ 900/6250] eta: 0:13:21 lr: 0.000109 grad: 0.1318 (0.1496) loss: 0.7634 (0.7770) time: 0.1415 data: 0.0531 max mem: 9377 +Train: [27] [1000/6250] eta: 0:13:06 lr: 0.000109 grad: 0.1186 (0.1478) loss: 0.7712 (0.7760) time: 0.1475 data: 0.0616 max mem: 9377 +Train: [27] [1100/6250] eta: 0:12:49 lr: 0.000109 grad: 0.1172 (0.1461) loss: 0.7785 (0.7752) time: 0.1215 data: 0.0330 max mem: 9377 +Train: [27] [1200/6250] eta: 0:12:30 lr: 0.000109 grad: 0.1365 (0.1449) loss: 0.7633 (0.7742) time: 0.1392 data: 0.0591 max mem: 9377 +Train: [27] [1300/6250] eta: 0:12:11 lr: 0.000109 grad: 0.1273 (0.1439) loss: 0.7612 (0.7734) time: 0.1277 data: 0.0460 max mem: 9377 +Train: [27] [1400/6250] eta: 0:11:53 lr: 0.000109 grad: 0.1376 (0.1430) loss: 0.7661 (0.7725) time: 0.1240 data: 0.0386 max mem: 9377 +Train: [27] [1500/6250] eta: 0:11:36 lr: 0.000109 grad: 0.1340 (0.1426) loss: 0.7643 (0.7717) time: 0.0911 data: 0.0002 max mem: 9377 +Train: [27] [1600/6250] eta: 0:11:18 lr: 0.000109 grad: 0.1257 (0.1420) loss: 0.7673 (0.7714) time: 0.1436 data: 0.0571 max mem: 9377 +Train: [27] [1700/6250] eta: 0:11:03 lr: 0.000109 grad: 0.1232 (0.1414) loss: 0.7661 (0.7710) time: 0.1510 data: 0.0610 max mem: 9377 +Train: [27] [1800/6250] eta: 0:10:44 lr: 0.000109 grad: 0.1250 (0.1409) loss: 0.7709 (0.7706) time: 0.1172 data: 0.0314 max mem: 9377 +Train: [27] [1900/6250] eta: 0:10:27 lr: 0.000109 grad: 0.1269 (0.1402) loss: 0.7538 (0.7702) time: 0.1479 data: 0.0667 max mem: 9377 +Train: [27] [2000/6250] eta: 0:10:11 lr: 0.000109 grad: 0.1223 (0.1396) loss: 0.7665 (0.7699) time: 0.1493 data: 0.0674 max mem: 9377 +Train: [27] [2100/6250] eta: 0:09:57 lr: 0.000109 grad: 0.1298 (0.1392) loss: 0.7526 (0.7693) time: 0.1672 data: 0.0925 max mem: 9377 +Train: [27] [2200/6250] eta: 0:09:43 lr: 0.000109 grad: 0.1312 (0.1390) loss: 0.7625 (0.7689) time: 0.1583 data: 0.0809 max mem: 9377 +Train: [27] [2300/6250] eta: 0:09:29 lr: 0.000109 grad: 0.1222 (0.1388) loss: 0.7623 (0.7685) time: 0.1274 data: 0.0427 max mem: 9377 +Train: [27] [2400/6250] eta: 0:09:15 lr: 0.000109 grad: 0.1283 (0.1386) loss: 0.7532 (0.7679) time: 0.1696 data: 0.0957 max mem: 9377 +Train: [27] [2500/6250] eta: 0:09:01 lr: 0.000109 grad: 0.1277 (0.1383) loss: 0.7576 (0.7676) time: 0.1649 data: 0.0865 max mem: 9377 +Train: [27] [2600/6250] eta: 0:08:46 lr: 0.000109 grad: 0.1282 (0.1380) loss: 0.7601 (0.7674) time: 0.1472 data: 0.0637 max mem: 9377 +Train: [27] [2700/6250] eta: 0:08:32 lr: 0.000109 grad: 0.1328 (0.1378) loss: 0.7623 (0.7672) time: 0.1576 data: 0.0821 max mem: 9377 +Train: [27] [2800/6250] eta: 0:08:20 lr: 0.000109 grad: 0.1275 (0.1375) loss: 0.7657 (0.7671) time: 0.1490 data: 0.0663 max mem: 9377 +Train: [27] [2900/6250] eta: 0:08:06 lr: 0.000109 grad: 0.1353 (0.1373) loss: 0.7620 (0.7671) time: 0.1513 data: 0.0772 max mem: 9377 +Train: [27] [3000/6250] eta: 0:07:52 lr: 0.000109 grad: 0.1301 (0.1372) loss: 0.7746 (0.7670) time: 0.1336 data: 0.0548 max mem: 9377 +Train: [27] [3100/6250] eta: 0:07:37 lr: 0.000108 grad: 0.1262 (0.1369) loss: 0.7651 (0.7669) time: 0.1410 data: 0.0620 max mem: 9377 +Train: [27] [3200/6250] eta: 0:07:23 lr: 0.000108 grad: 0.1250 (0.1366) loss: 0.7666 (0.7669) time: 0.1806 data: 0.1074 max mem: 9377 +Train: [27] [3300/6250] eta: 0:07:10 lr: 0.000108 grad: 0.1313 (0.1365) loss: 0.7665 (0.7669) time: 0.1419 data: 0.0616 max mem: 9377 +Train: [27] [3400/6250] eta: 0:06:56 lr: 0.000108 grad: 0.1271 (0.1364) loss: 0.7643 (0.7668) time: 0.1647 data: 0.0867 max mem: 9377 +Train: [27] [3500/6250] eta: 0:06:41 lr: 0.000108 grad: 0.1308 (0.1362) loss: 0.7458 (0.7666) time: 0.1551 data: 0.0807 max mem: 9377 +Train: [27] [3600/6250] eta: 0:06:26 lr: 0.000108 grad: 0.1347 (0.1362) loss: 0.7583 (0.7663) time: 0.1341 data: 0.0497 max mem: 9377 +Train: [27] [3700/6250] eta: 0:06:12 lr: 0.000108 grad: 0.1300 (0.1360) loss: 0.7628 (0.7661) time: 0.1607 data: 0.0789 max mem: 9377 +Train: [27] [3800/6250] eta: 0:05:57 lr: 0.000108 grad: 0.1269 (0.1358) loss: 0.7519 (0.7660) time: 0.1453 data: 0.0656 max mem: 9377 +Train: [27] [3900/6250] eta: 0:05:44 lr: 0.000108 grad: 0.1290 (0.1356) loss: 0.7532 (0.7659) time: 0.1173 data: 0.0362 max mem: 9377 +Train: [27] [4000/6250] eta: 0:05:30 lr: 0.000108 grad: 0.1307 (0.1354) loss: 0.7546 (0.7657) time: 0.1874 data: 0.1118 max mem: 9377 +Train: [27] [4100/6250] eta: 0:05:16 lr: 0.000108 grad: 0.1269 (0.1353) loss: 0.7475 (0.7656) time: 0.1694 data: 0.0914 max mem: 9377 +Train: [27] [4200/6250] eta: 0:05:02 lr: 0.000108 grad: 0.1191 (0.1351) loss: 0.7624 (0.7655) time: 0.1764 data: 0.1002 max mem: 9377 +Train: [27] [4300/6250] eta: 0:04:48 lr: 0.000108 grad: 0.1329 (0.1350) loss: 0.7545 (0.7654) time: 0.1669 data: 0.0906 max mem: 9377 +Train: [27] [4400/6250] eta: 0:04:34 lr: 0.000108 grad: 0.1309 (0.1349) loss: 0.7585 (0.7653) time: 0.1478 data: 0.0643 max mem: 9377 +Train: [27] [4500/6250] eta: 0:04:20 lr: 0.000108 grad: 0.1227 (0.1348) loss: 0.7726 (0.7653) time: 0.1562 data: 0.0775 max mem: 9377 +Train: [27] [4600/6250] eta: 0:04:05 lr: 0.000108 grad: 0.1217 (0.1347) loss: 0.7675 (0.7652) time: 0.1652 data: 0.0873 max mem: 9377 +Train: [27] [4700/6250] eta: 0:03:51 lr: 0.000108 grad: 0.1285 (0.1345) loss: 0.7696 (0.7652) time: 0.1615 data: 0.0863 max mem: 9377 +Train: [27] [4800/6250] eta: 0:03:37 lr: 0.000108 grad: 0.1212 (0.1344) loss: 0.7774 (0.7653) time: 0.1666 data: 0.0937 max mem: 9377 +Train: [27] [4900/6250] eta: 0:03:22 lr: 0.000108 grad: 0.1334 (0.1344) loss: 0.7447 (0.7651) time: 0.1348 data: 0.0574 max mem: 9377 +Train: [27] [5000/6250] eta: 0:03:07 lr: 0.000108 grad: 0.1223 (0.1343) loss: 0.7534 (0.7650) time: 0.1322 data: 0.0518 max mem: 9377 +Train: [27] [5100/6250] eta: 0:02:52 lr: 0.000108 grad: 0.1319 (0.1342) loss: 0.7682 (0.7649) time: 0.1453 data: 0.0694 max mem: 9377 +Train: [27] [5200/6250] eta: 0:02:37 lr: 0.000108 grad: 0.1377 (0.1342) loss: 0.7585 (0.7649) time: 0.1556 data: 0.0777 max mem: 9377 +Train: [27] [5300/6250] eta: 0:02:22 lr: 0.000108 grad: 0.1294 (0.1341) loss: 0.7538 (0.7649) time: 0.1443 data: 0.0622 max mem: 9377 +Train: [27] [5400/6250] eta: 0:02:07 lr: 0.000108 grad: 0.1357 (0.1341) loss: 0.7658 (0.7648) time: 0.1137 data: 0.0285 max mem: 9377 +Train: [27] [5500/6250] eta: 0:01:52 lr: 0.000108 grad: 0.1295 (0.1341) loss: 0.7539 (0.7647) time: 0.1264 data: 0.0456 max mem: 9377 +Train: [27] [5600/6250] eta: 0:01:37 lr: 0.000108 grad: 0.1162 (0.1339) loss: 0.7769 (0.7646) time: 0.1533 data: 0.0693 max mem: 9377 +Train: [27] [5700/6250] eta: 0:01:22 lr: 0.000108 grad: 0.1303 (0.1338) loss: 0.7621 (0.7645) time: 0.1821 data: 0.0979 max mem: 9377 +Train: [27] [5800/6250] eta: 0:01:07 lr: 0.000108 grad: 0.1272 (0.1337) loss: 0.7548 (0.7645) time: 0.1682 data: 0.0872 max mem: 9377 +Train: [27] [5900/6250] eta: 0:00:52 lr: 0.000108 grad: 0.1312 (0.1336) loss: 0.7645 (0.7646) time: 0.1573 data: 0.0763 max mem: 9377 +Train: [27] [6000/6250] eta: 0:00:37 lr: 0.000108 grad: 0.1326 (0.1336) loss: 0.7665 (0.7645) time: 0.1621 data: 0.0778 max mem: 9377 +Train: [27] [6100/6250] eta: 0:00:22 lr: 0.000108 grad: 0.1348 (0.1336) loss: 0.7589 (0.7644) time: 0.1747 data: 0.0895 max mem: 9377 +Train: [27] [6200/6250] eta: 0:00:07 lr: 0.000108 grad: 0.1266 (0.1336) loss: 0.7568 (0.7643) time: 0.1466 data: 0.0615 max mem: 9377 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.1289 (0.1335) loss: 0.7541 (0.7643) time: 0.1682 data: 0.0868 max mem: 9377 +Train: [27] Total time: 0:15:55 (0.1529 s / it) +Averaged stats: lr: 0.000108 grad: 0.1289 (0.1335) loss: 0.7541 (0.7643) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:03:21 loss: 0.8534 (0.8534) time: 3.2521 data: 3.1886 max mem: 9377 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8549 (0.8562) time: 0.1214 data: 0.0938 max mem: 9377 +Eval (hcp-train-subset): [27] Total time: 0:00:14 (0.2334 s / it) +Averaged stats (hcp-train-subset): loss: 0.8549 (0.8562) +Eval (hcp-val): [27] [ 0/62] eta: 0:04:39 loss: 0.8535 (0.8535) time: 4.5156 data: 4.4517 max mem: 9377 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8525 (0.8545) time: 0.1272 data: 0.1016 max mem: 9377 +Eval (hcp-val): [27] Total time: 0:00:15 (0.2426 s / it) +Averaged stats (hcp-val): loss: 0.8525 (0.8545) +Eval (nsd-val): [27] [ 0/62] eta: 0:03:56 loss: 0.8186 (0.8186) time: 3.8196 data: 3.7511 max mem: 9377 +Eval (nsd-val): [27] [61/62] eta: 0:00:00 loss: 0.8267 (0.8286) time: 0.1316 data: 0.1043 max mem: 9377 +Eval (nsd-val): [27] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (nsd-val): loss: 0.8267 (0.8286) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [28] [ 0/6250] eta: 7:21:52 lr: 0.000108 grad: 0.0712 (0.0712) loss: 0.8535 (0.8535) time: 4.2420 data: 4.0491 max mem: 9377 +Train: [28] [ 100/6250] eta: 0:21:42 lr: 0.000108 grad: 0.1516 (0.2147) loss: 0.7907 (0.7944) time: 0.1660 data: 0.0629 max mem: 9377 +Train: [28] [ 200/6250] eta: 0:18:25 lr: 0.000108 grad: 0.1475 (0.2025) loss: 0.7652 (0.7801) time: 0.1600 data: 0.0648 max mem: 9377 +Train: [28] [ 300/6250] eta: 0:17:01 lr: 0.000108 grad: 0.1362 (0.1829) loss: 0.7653 (0.7781) time: 0.1499 data: 0.0613 max mem: 9377 +Train: [28] [ 400/6250] eta: 0:16:04 lr: 0.000108 grad: 0.1257 (0.1736) loss: 0.7813 (0.7764) time: 0.1434 data: 0.0597 max mem: 9377 +Train: [28] [ 500/6250] eta: 0:15:22 lr: 0.000108 grad: 0.1340 (0.1660) loss: 0.7767 (0.7751) time: 0.1363 data: 0.0466 max mem: 9377 +Train: [28] [ 600/6250] eta: 0:14:54 lr: 0.000108 grad: 0.1252 (0.1615) loss: 0.7511 (0.7729) time: 0.1518 data: 0.0699 max mem: 9377 +Train: [28] [ 700/6250] eta: 0:14:29 lr: 0.000108 grad: 0.1464 (0.1592) loss: 0.7697 (0.7710) time: 0.1487 data: 0.0713 max mem: 9377 +Train: [28] [ 800/6250] eta: 0:14:06 lr: 0.000108 grad: 0.1325 (0.1566) loss: 0.7624 (0.7700) time: 0.1515 data: 0.0590 max mem: 9377 +Train: [28] [ 900/6250] eta: 0:13:51 lr: 0.000108 grad: 0.1250 (0.1540) loss: 0.7597 (0.7686) time: 0.1699 data: 0.0797 max mem: 9377 +Train: [28] [1000/6250] eta: 0:13:34 lr: 0.000108 grad: 0.1320 (0.1515) loss: 0.7524 (0.7683) time: 0.1886 data: 0.1071 max mem: 9377 +Train: [28] [1100/6250] eta: 0:13:12 lr: 0.000108 grad: 0.1250 (0.1495) loss: 0.7601 (0.7678) time: 0.1510 data: 0.0682 max mem: 9377 +Train: [28] [1200/6250] eta: 0:12:54 lr: 0.000108 grad: 0.1257 (0.1479) loss: 0.7595 (0.7669) time: 0.1561 data: 0.0743 max mem: 9377 +Train: [28] [1300/6250] eta: 0:12:33 lr: 0.000107 grad: 0.1343 (0.1467) loss: 0.7498 (0.7663) time: 0.1401 data: 0.0577 max mem: 9377 +Train: [28] [1400/6250] eta: 0:12:15 lr: 0.000107 grad: 0.1223 (0.1454) loss: 0.7651 (0.7661) time: 0.1513 data: 0.0734 max mem: 9377 +Train: [28] [1500/6250] eta: 0:11:56 lr: 0.000107 grad: 0.1291 (0.1444) loss: 0.7573 (0.7657) time: 0.1483 data: 0.0625 max mem: 9377 +Train: [28] [1600/6250] eta: 0:11:36 lr: 0.000107 grad: 0.1262 (0.1437) loss: 0.7537 (0.7653) time: 0.1182 data: 0.0324 max mem: 9377 +Train: [28] [1700/6250] eta: 0:11:19 lr: 0.000107 grad: 0.1265 (0.1428) loss: 0.7563 (0.7648) time: 0.1368 data: 0.0495 max mem: 9377 +Train: [28] [1800/6250] eta: 0:11:00 lr: 0.000107 grad: 0.1186 (0.1418) loss: 0.7653 (0.7646) time: 0.1264 data: 0.0347 max mem: 9377 +Train: [28] [1900/6250] eta: 0:10:42 lr: 0.000107 grad: 0.1236 (0.1410) loss: 0.7661 (0.7643) time: 0.1254 data: 0.0416 max mem: 9377 +Train: [28] [2000/6250] eta: 0:10:25 lr: 0.000107 grad: 0.1244 (0.1404) loss: 0.7662 (0.7642) time: 0.1472 data: 0.0642 max mem: 9377 +Train: [28] [2100/6250] eta: 0:10:09 lr: 0.000107 grad: 0.1274 (0.1398) loss: 0.7659 (0.7640) time: 0.1262 data: 0.0466 max mem: 9377 +Train: [28] [2200/6250] eta: 0:09:53 lr: 0.000107 grad: 0.1264 (0.1392) loss: 0.7471 (0.7639) time: 0.1369 data: 0.0578 max mem: 9377 +Train: [28] [2300/6250] eta: 0:09:37 lr: 0.000107 grad: 0.1257 (0.1387) loss: 0.7561 (0.7637) time: 0.1332 data: 0.0557 max mem: 9377 +Train: [28] [2400/6250] eta: 0:09:20 lr: 0.000107 grad: 0.1280 (0.1382) loss: 0.7602 (0.7638) time: 0.1267 data: 0.0436 max mem: 9377 +Train: [28] [2500/6250] eta: 0:09:05 lr: 0.000107 grad: 0.1125 (0.1377) loss: 0.7757 (0.7640) time: 0.1253 data: 0.0426 max mem: 9377 +Train: [28] [2600/6250] eta: 0:08:50 lr: 0.000107 grad: 0.1210 (0.1372) loss: 0.7733 (0.7642) time: 0.1505 data: 0.0651 max mem: 9377 +Train: [28] [2700/6250] eta: 0:08:34 lr: 0.000107 grad: 0.1238 (0.1368) loss: 0.7661 (0.7644) time: 0.1461 data: 0.0706 max mem: 9377 +Train: [28] [2800/6250] eta: 0:08:19 lr: 0.000107 grad: 0.1253 (0.1364) loss: 0.7741 (0.7646) time: 0.1512 data: 0.0659 max mem: 9377 +Train: [28] [2900/6250] eta: 0:08:03 lr: 0.000107 grad: 0.1222 (0.1360) loss: 0.7696 (0.7647) time: 0.1288 data: 0.0462 max mem: 9377 +Train: [28] [3000/6250] eta: 0:07:49 lr: 0.000107 grad: 0.1238 (0.1358) loss: 0.7667 (0.7649) time: 0.1581 data: 0.0737 max mem: 9377 +Train: [28] [3100/6250] eta: 0:07:34 lr: 0.000107 grad: 0.1285 (0.1356) loss: 0.7771 (0.7649) time: 0.1301 data: 0.0428 max mem: 9377 +Train: [28] [3200/6250] eta: 0:07:19 lr: 0.000107 grad: 0.1248 (0.1353) loss: 0.7727 (0.7649) time: 0.1484 data: 0.0661 max mem: 9377 +Train: [28] [3300/6250] eta: 0:07:04 lr: 0.000107 grad: 0.1252 (0.1350) loss: 0.7713 (0.7651) time: 0.1168 data: 0.0374 max mem: 9377 +Train: [28] [3400/6250] eta: 0:06:50 lr: 0.000107 grad: 0.1293 (0.1350) loss: 0.7680 (0.7652) time: 0.1123 data: 0.0304 max mem: 9377 +Train: [28] [3500/6250] eta: 0:06:37 lr: 0.000107 grad: 0.1263 (0.1350) loss: 0.7732 (0.7652) time: 0.1198 data: 0.0400 max mem: 9377 +Train: [28] [3600/6250] eta: 0:06:23 lr: 0.000107 grad: 0.1245 (0.1349) loss: 0.7713 (0.7652) time: 0.1406 data: 0.0557 max mem: 9377 +Train: [28] [3700/6250] eta: 0:06:09 lr: 0.000107 grad: 0.1270 (0.1347) loss: 0.7603 (0.7652) time: 0.1538 data: 0.0726 max mem: 9377 +Train: [28] [3800/6250] eta: 0:05:55 lr: 0.000107 grad: 0.1298 (0.1347) loss: 0.7638 (0.7652) time: 0.1332 data: 0.0520 max mem: 9377 +Train: [28] [3900/6250] eta: 0:05:41 lr: 0.000107 grad: 0.1271 (0.1347) loss: 0.7582 (0.7651) time: 0.1525 data: 0.0724 max mem: 9377 +Train: [28] [4000/6250] eta: 0:05:27 lr: 0.000107 grad: 0.1252 (0.1345) loss: 0.7607 (0.7650) time: 0.1482 data: 0.0694 max mem: 9377 +Train: [28] [4100/6250] eta: 0:05:12 lr: 0.000107 grad: 0.1281 (0.1345) loss: 0.7547 (0.7649) time: 0.1416 data: 0.0630 max mem: 9377 +Train: [28] [4200/6250] eta: 0:04:57 lr: 0.000107 grad: 0.1284 (0.1344) loss: 0.7548 (0.7648) time: 0.1322 data: 0.0546 max mem: 9377 +Train: [28] [4300/6250] eta: 0:04:42 lr: 0.000107 grad: 0.1217 (0.1342) loss: 0.7771 (0.7649) time: 0.1390 data: 0.0574 max mem: 9377 +Train: [28] [4400/6250] eta: 0:04:27 lr: 0.000107 grad: 0.1278 (0.1340) loss: 0.7595 (0.7649) time: 0.1217 data: 0.0389 max mem: 9377 +Train: [28] [4500/6250] eta: 0:04:12 lr: 0.000107 grad: 0.1223 (0.1338) loss: 0.7487 (0.7648) time: 0.1185 data: 0.0319 max mem: 9377 +Train: [28] [4600/6250] eta: 0:03:58 lr: 0.000107 grad: 0.1266 (0.1336) loss: 0.7620 (0.7647) time: 0.1311 data: 0.0531 max mem: 9377 +Train: [28] [4700/6250] eta: 0:03:43 lr: 0.000107 grad: 0.1298 (0.1337) loss: 0.7624 (0.7647) time: 0.1250 data: 0.0446 max mem: 9377 +Train: [28] [4800/6250] eta: 0:03:29 lr: 0.000107 grad: 0.1258 (0.1336) loss: 0.7497 (0.7646) time: 0.1429 data: 0.0596 max mem: 9377 +Train: [28] [4900/6250] eta: 0:03:14 lr: 0.000107 grad: 0.1293 (0.1335) loss: 0.7596 (0.7645) time: 0.1366 data: 0.0558 max mem: 9377 +Train: [28] [5000/6250] eta: 0:02:59 lr: 0.000107 grad: 0.1262 (0.1335) loss: 0.7522 (0.7643) time: 0.1351 data: 0.0517 max mem: 9377 +Train: [28] [5100/6250] eta: 0:02:45 lr: 0.000107 grad: 0.1349 (0.1334) loss: 0.7509 (0.7642) time: 0.1108 data: 0.0241 max mem: 9377 +Train: [28] [5200/6250] eta: 0:02:30 lr: 0.000107 grad: 0.1340 (0.1334) loss: 0.7621 (0.7641) time: 0.1423 data: 0.0569 max mem: 9377 +Train: [28] [5300/6250] eta: 0:02:16 lr: 0.000107 grad: 0.1312 (0.1334) loss: 0.7552 (0.7639) time: 0.1590 data: 0.0807 max mem: 9377 +Train: [28] [5400/6250] eta: 0:02:01 lr: 0.000107 grad: 0.1354 (0.1334) loss: 0.7616 (0.7637) time: 0.1584 data: 0.0780 max mem: 9377 +Train: [28] [5500/6250] eta: 0:01:47 lr: 0.000107 grad: 0.1318 (0.1334) loss: 0.7410 (0.7635) time: 0.1736 data: 0.0982 max mem: 9377 +Train: [28] [5600/6250] eta: 0:01:33 lr: 0.000106 grad: 0.1243 (0.1334) loss: 0.7573 (0.7634) time: 0.1671 data: 0.0868 max mem: 9377 +Train: [28] [5700/6250] eta: 0:01:18 lr: 0.000106 grad: 0.1308 (0.1334) loss: 0.7529 (0.7632) time: 0.1237 data: 0.0404 max mem: 9377 +Train: [28] [5800/6250] eta: 0:01:04 lr: 0.000106 grad: 0.1357 (0.1334) loss: 0.7448 (0.7631) time: 0.1377 data: 0.0569 max mem: 9377 +Train: [28] [5900/6250] eta: 0:00:50 lr: 0.000106 grad: 0.1390 (0.1334) loss: 0.7566 (0.7629) time: 0.1583 data: 0.0745 max mem: 9377 +Train: [28] [6000/6250] eta: 0:00:35 lr: 0.000106 grad: 0.1375 (0.1335) loss: 0.7539 (0.7628) time: 0.1439 data: 0.0440 max mem: 9377 +Train: [28] [6100/6250] eta: 0:00:21 lr: 0.000106 grad: 0.1304 (0.1335) loss: 0.7597 (0.7628) time: 0.1684 data: 0.0904 max mem: 9377 +Train: [28] [6200/6250] eta: 0:00:07 lr: 0.000106 grad: 0.1295 (0.1335) loss: 0.7602 (0.7628) time: 0.1108 data: 0.0287 max mem: 9377 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.1293 (0.1335) loss: 0.7583 (0.7628) time: 0.1372 data: 0.0564 max mem: 9377 +Train: [28] Total time: 0:15:03 (0.1446 s / it) +Averaged stats: lr: 0.000106 grad: 0.1293 (0.1335) loss: 0.7583 (0.7628) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:05:08 loss: 0.8536 (0.8536) time: 4.9702 data: 4.9400 max mem: 9377 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8568 (0.8575) time: 0.1010 data: 0.0737 max mem: 9377 +Eval (hcp-train-subset): [28] Total time: 0:00:13 (0.2157 s / it) +Averaged stats (hcp-train-subset): loss: 0.8568 (0.8575) +Eval (hcp-val): [28] [ 0/62] eta: 0:03:33 loss: 0.8577 (0.8577) time: 3.4436 data: 3.3635 max mem: 9377 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8540 (0.8556) time: 0.1176 data: 0.0925 max mem: 9377 +Eval (hcp-val): [28] Total time: 0:00:13 (0.2216 s / it) +Averaged stats (hcp-val): loss: 0.8540 (0.8556) +Eval (nsd-val): [28] [ 0/62] eta: 0:06:12 loss: 0.8231 (0.8231) time: 6.0101 data: 5.9786 max mem: 9377 +Eval (nsd-val): [28] [61/62] eta: 0:00:00 loss: 0.8313 (0.8330) time: 0.1332 data: 0.1067 max mem: 9377 +Eval (nsd-val): [28] Total time: 0:00:14 (0.2353 s / it) +Averaged stats (nsd-val): loss: 0.8313 (0.8330) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 9:05:35 lr: 0.000106 grad: 0.3086 (0.3086) loss: 0.9032 (0.9032) time: 5.2377 data: 4.9451 max mem: 9377 +Train: [29] [ 100/6250] eta: 0:23:09 lr: 0.000106 grad: 0.1909 (0.2048) loss: 0.7839 (0.8009) time: 0.1795 data: 0.0822 max mem: 9377 +Train: [29] [ 200/6250] eta: 0:19:39 lr: 0.000106 grad: 0.1856 (0.1918) loss: 0.7843 (0.7932) time: 0.1734 data: 0.0638 max mem: 9377 +Train: [29] [ 300/6250] eta: 0:18:21 lr: 0.000106 grad: 0.1424 (0.1870) loss: 0.7721 (0.7869) time: 0.1643 data: 0.0762 max mem: 9377 +Train: [29] [ 400/6250] eta: 0:17:09 lr: 0.000106 grad: 0.1400 (0.1785) loss: 0.7645 (0.7818) time: 0.1616 data: 0.0689 max mem: 9377 +Train: [29] [ 500/6250] eta: 0:16:19 lr: 0.000106 grad: 0.1405 (0.1729) loss: 0.7714 (0.7791) time: 0.1688 data: 0.0859 max mem: 9377 +Train: [29] [ 600/6250] eta: 0:15:46 lr: 0.000106 grad: 0.1328 (0.1678) loss: 0.7576 (0.7768) time: 0.1711 data: 0.0893 max mem: 9377 +Train: [29] [ 700/6250] eta: 0:15:18 lr: 0.000106 grad: 0.1334 (0.1627) loss: 0.7657 (0.7749) time: 0.1581 data: 0.0774 max mem: 9377 +Train: [29] [ 800/6250] eta: 0:14:48 lr: 0.000106 grad: 0.1340 (0.1592) loss: 0.7732 (0.7737) time: 0.1469 data: 0.0585 max mem: 9377 +Train: [29] [ 900/6250] eta: 0:14:33 lr: 0.000106 grad: 0.1191 (0.1562) loss: 0.7828 (0.7732) time: 0.1827 data: 0.1039 max mem: 9377 +Train: [29] [1000/6250] eta: 0:14:08 lr: 0.000106 grad: 0.1390 (0.1541) loss: 0.7579 (0.7722) time: 0.1451 data: 0.0593 max mem: 9377 +Train: [29] [1100/6250] eta: 0:13:51 lr: 0.000106 grad: 0.1282 (0.1521) loss: 0.7584 (0.7715) time: 0.1686 data: 0.0857 max mem: 9377 +Train: [29] [1200/6250] eta: 0:13:29 lr: 0.000106 grad: 0.1275 (0.1502) loss: 0.7721 (0.7708) time: 0.1698 data: 0.0914 max mem: 9377 +Train: [29] [1300/6250] eta: 0:13:08 lr: 0.000106 grad: 0.1271 (0.1486) loss: 0.7723 (0.7705) time: 0.1547 data: 0.0758 max mem: 9377 +Train: [29] [1400/6250] eta: 0:12:44 lr: 0.000106 grad: 0.1345 (0.1476) loss: 0.7648 (0.7701) time: 0.1352 data: 0.0551 max mem: 9377 +Train: [29] [1500/6250] eta: 0:12:27 lr: 0.000106 grad: 0.1246 (0.1465) loss: 0.7553 (0.7696) time: 0.1693 data: 0.0924 max mem: 9377 +Train: [29] [1600/6250] eta: 0:12:07 lr: 0.000106 grad: 0.1231 (0.1451) loss: 0.7716 (0.7696) time: 0.1416 data: 0.0612 max mem: 9377 +Train: [29] [1700/6250] eta: 0:11:47 lr: 0.000106 grad: 0.1245 (0.1443) loss: 0.7676 (0.7693) time: 0.1393 data: 0.0540 max mem: 9377 +Train: [29] [1800/6250] eta: 0:11:27 lr: 0.000106 grad: 0.1435 (0.1439) loss: 0.7661 (0.7690) time: 0.1211 data: 0.0315 max mem: 9377 +Train: [29] [1900/6250] eta: 0:11:08 lr: 0.000106 grad: 0.1315 (0.1434) loss: 0.7593 (0.7688) time: 0.1407 data: 0.0687 max mem: 9377 +Train: [29] [2000/6250] eta: 0:10:50 lr: 0.000106 grad: 0.1384 (0.1430) loss: 0.7591 (0.7685) time: 0.1340 data: 0.0586 max mem: 9377 +Train: [29] [2100/6250] eta: 0:10:31 lr: 0.000106 grad: 0.1307 (0.1426) loss: 0.7693 (0.7683) time: 0.1407 data: 0.0568 max mem: 9377 +Train: [29] [2200/6250] eta: 0:10:13 lr: 0.000106 grad: 0.1323 (0.1420) loss: 0.7668 (0.7681) time: 0.1333 data: 0.0509 max mem: 9377 +Train: [29] [2300/6250] eta: 0:09:56 lr: 0.000106 grad: 0.1276 (0.1417) loss: 0.7670 (0.7680) time: 0.1499 data: 0.0693 max mem: 9377 +Train: [29] [2400/6250] eta: 0:09:38 lr: 0.000106 grad: 0.1264 (0.1413) loss: 0.7749 (0.7680) time: 0.1111 data: 0.0308 max mem: 9377 +Train: [29] [2500/6250] eta: 0:09:21 lr: 0.000106 grad: 0.1233 (0.1408) loss: 0.7639 (0.7680) time: 0.1353 data: 0.0598 max mem: 9377 +Train: [29] [2600/6250] eta: 0:09:03 lr: 0.000106 grad: 0.1216 (0.1402) loss: 0.7687 (0.7682) time: 0.1257 data: 0.0384 max mem: 9377 +Train: [29] [2700/6250] eta: 0:08:47 lr: 0.000106 grad: 0.1276 (0.1398) loss: 0.7737 (0.7682) time: 0.1371 data: 0.0621 max mem: 9377 +Train: [29] [2800/6250] eta: 0:08:31 lr: 0.000106 grad: 0.1276 (0.1394) loss: 0.7619 (0.7681) time: 0.1400 data: 0.0567 max mem: 9377 +Train: [29] [2900/6250] eta: 0:08:15 lr: 0.000106 grad: 0.1327 (0.1392) loss: 0.7636 (0.7681) time: 0.1352 data: 0.0556 max mem: 9377 +Train: [29] [3000/6250] eta: 0:07:58 lr: 0.000106 grad: 0.1322 (0.1389) loss: 0.7599 (0.7682) time: 0.1294 data: 0.0499 max mem: 9377 +Train: [29] [3100/6250] eta: 0:07:42 lr: 0.000106 grad: 0.1316 (0.1386) loss: 0.7717 (0.7682) time: 0.1229 data: 0.0419 max mem: 9377 +Train: [29] [3200/6250] eta: 0:07:27 lr: 0.000106 grad: 0.1330 (0.1382) loss: 0.7695 (0.7682) time: 0.1336 data: 0.0508 max mem: 9377 +Train: [29] [3300/6250] eta: 0:07:11 lr: 0.000106 grad: 0.1299 (0.1379) loss: 0.7661 (0.7682) time: 0.1186 data: 0.0394 max mem: 9377 +Train: [29] [3400/6250] eta: 0:06:55 lr: 0.000106 grad: 0.1253 (0.1378) loss: 0.7656 (0.7681) time: 0.1343 data: 0.0458 max mem: 9377 +Train: [29] [3500/6250] eta: 0:06:40 lr: 0.000105 grad: 0.1302 (0.1375) loss: 0.7605 (0.7680) time: 0.1080 data: 0.0316 max mem: 9377 +Train: [29] [3600/6250] eta: 0:06:25 lr: 0.000105 grad: 0.1226 (0.1372) loss: 0.7714 (0.7681) time: 0.1469 data: 0.0674 max mem: 9377 +Train: [29] [3700/6250] eta: 0:06:10 lr: 0.000105 grad: 0.1228 (0.1370) loss: 0.7705 (0.7681) time: 0.1354 data: 0.0553 max mem: 9377 +Train: [29] [3800/6250] eta: 0:05:55 lr: 0.000105 grad: 0.1255 (0.1368) loss: 0.7673 (0.7681) time: 0.1709 data: 0.0833 max mem: 9377 +Train: [29] [3900/6250] eta: 0:05:39 lr: 0.000105 grad: 0.1318 (0.1366) loss: 0.7682 (0.7681) time: 0.1554 data: 0.0751 max mem: 9377 +Train: [29] [4000/6250] eta: 0:05:24 lr: 0.000105 grad: 0.1313 (0.1365) loss: 0.7634 (0.7681) time: 0.1411 data: 0.0654 max mem: 9377 +Train: [29] [4100/6250] eta: 0:05:10 lr: 0.000105 grad: 0.1291 (0.1363) loss: 0.7614 (0.7681) time: 0.1430 data: 0.0593 max mem: 9377 +Train: [29] [4200/6250] eta: 0:04:55 lr: 0.000105 grad: 0.1234 (0.1361) loss: 0.7650 (0.7681) time: 0.1483 data: 0.0676 max mem: 9377 +Train: [29] [4300/6250] eta: 0:04:41 lr: 0.000105 grad: 0.1245 (0.1360) loss: 0.7642 (0.7680) time: 0.1622 data: 0.0821 max mem: 9377 +Train: [29] [4400/6250] eta: 0:04:27 lr: 0.000105 grad: 0.1361 (0.1359) loss: 0.7519 (0.7680) time: 0.1429 data: 0.0628 max mem: 9377 +Train: [29] [4500/6250] eta: 0:04:12 lr: 0.000105 grad: 0.1243 (0.1357) loss: 0.7804 (0.7680) time: 0.1405 data: 0.0604 max mem: 9377 +Train: [29] [4600/6250] eta: 0:03:58 lr: 0.000105 grad: 0.1234 (0.1355) loss: 0.7731 (0.7681) time: 0.0988 data: 0.0166 max mem: 9377 +Train: [29] [4700/6250] eta: 0:03:44 lr: 0.000105 grad: 0.1234 (0.1353) loss: 0.7753 (0.7681) time: 0.1348 data: 0.0527 max mem: 9377 +Train: [29] [4800/6250] eta: 0:03:29 lr: 0.000105 grad: 0.1305 (0.1352) loss: 0.7635 (0.7681) time: 0.1406 data: 0.0632 max mem: 9377 +Train: [29] [4900/6250] eta: 0:03:15 lr: 0.000105 grad: 0.1288 (0.1351) loss: 0.7670 (0.7680) time: 0.1314 data: 0.0530 max mem: 9377 +Train: [29] [5000/6250] eta: 0:03:00 lr: 0.000105 grad: 0.1264 (0.1350) loss: 0.7666 (0.7680) time: 0.1610 data: 0.0815 max mem: 9377 +Train: [29] [5100/6250] eta: 0:02:46 lr: 0.000105 grad: 0.1251 (0.1349) loss: 0.7623 (0.7681) time: 0.1532 data: 0.0666 max mem: 9377 +Train: [29] [5200/6250] eta: 0:02:31 lr: 0.000105 grad: 0.1293 (0.1348) loss: 0.7565 (0.7680) time: 0.1583 data: 0.0783 max mem: 9377 +Train: [29] [5300/6250] eta: 0:02:17 lr: 0.000105 grad: 0.1319 (0.1348) loss: 0.7705 (0.7680) time: 0.1366 data: 0.0520 max mem: 9377 +Train: [29] [5400/6250] eta: 0:02:02 lr: 0.000105 grad: 0.1295 (0.1347) loss: 0.7682 (0.7679) time: 0.1390 data: 0.0590 max mem: 9377 +Train: [29] [5500/6250] eta: 0:01:48 lr: 0.000105 grad: 0.1284 (0.1347) loss: 0.7671 (0.7679) time: 0.1463 data: 0.0644 max mem: 9377 +Train: [29] [5600/6250] eta: 0:01:33 lr: 0.000105 grad: 0.1320 (0.1346) loss: 0.7620 (0.7678) time: 0.1361 data: 0.0481 max mem: 9377 +Train: [29] [5700/6250] eta: 0:01:19 lr: 0.000105 grad: 0.1278 (0.1346) loss: 0.7515 (0.7677) time: 0.1326 data: 0.0480 max mem: 9377 +Train: [29] [5800/6250] eta: 0:01:04 lr: 0.000105 grad: 0.1333 (0.1347) loss: 0.7629 (0.7675) time: 0.1505 data: 0.0680 max mem: 9377 +Train: [29] [5900/6250] eta: 0:00:50 lr: 0.000105 grad: 0.1348 (0.1347) loss: 0.7552 (0.7675) time: 0.1436 data: 0.0630 max mem: 9377 +Train: [29] [6000/6250] eta: 0:00:35 lr: 0.000105 grad: 0.1307 (0.1347) loss: 0.7521 (0.7673) time: 0.1292 data: 0.0460 max mem: 9377 +Train: [29] [6100/6250] eta: 0:00:21 lr: 0.000105 grad: 0.1303 (0.1347) loss: 0.7598 (0.7672) time: 0.1375 data: 0.0524 max mem: 9377 +Train: [29] [6200/6250] eta: 0:00:07 lr: 0.000105 grad: 0.1417 (0.1347) loss: 0.7479 (0.7669) time: 0.1388 data: 0.0598 max mem: 9377 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.1281 (0.1347) loss: 0.7526 (0.7668) time: 0.1354 data: 0.0553 max mem: 9377 +Train: [29] Total time: 0:15:03 (0.1446 s / it) +Averaged stats: lr: 0.000105 grad: 0.1281 (0.1347) loss: 0.7526 (0.7668) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:03:29 loss: 0.8537 (0.8537) time: 3.3862 data: 3.2325 max mem: 9377 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8558 (0.8568) time: 0.1256 data: 0.1006 max mem: 9377 +Eval (hcp-train-subset): [29] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (hcp-train-subset): loss: 0.8558 (0.8568) +Making plots (hcp-train-subset): example=1 +Eval (hcp-val): [29] [ 0/62] eta: 0:03:31 loss: 0.8600 (0.8600) time: 3.4067 data: 3.3402 max mem: 9377 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8544 (0.8546) time: 0.1194 data: 0.0947 max mem: 9377 +Eval (hcp-val): [29] Total time: 0:00:12 (0.2035 s / it) +Averaged stats (hcp-val): loss: 0.8544 (0.8546) +Making plots (hcp-val): example=28 +Eval (nsd-val): [29] [ 0/62] eta: 0:04:17 loss: 0.8161 (0.8161) time: 4.1593 data: 4.0958 max mem: 9377 +Eval (nsd-val): [29] [61/62] eta: 0:00:00 loss: 0.8294 (0.8305) time: 0.1274 data: 0.1025 max mem: 9377 +Eval (nsd-val): [29] Total time: 0:00:12 (0.2023 s / it) +Averaged stats (nsd-val): loss: 0.8294 (0.8305) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00029.pth +Train: [30] [ 0/6250] eta: 10:25:41 lr: 0.000105 grad: 0.2324 (0.2324) loss: 0.7436 (0.7436) time: 6.0067 data: 5.9168 max mem: 9377 +Train: [30] [ 100/6250] eta: 0:18:45 lr: 0.000105 grad: 0.1718 (0.2249) loss: 0.7784 (0.7891) time: 0.1416 data: 0.0492 max mem: 9377 +Train: [30] [ 200/6250] eta: 0:16:05 lr: 0.000105 grad: 0.1699 (0.2051) loss: 0.7803 (0.7834) time: 0.1251 data: 0.0381 max mem: 9377 +Train: [30] [ 300/6250] eta: 0:15:07 lr: 0.000105 grad: 0.1637 (0.1931) loss: 0.7649 (0.7794) time: 0.1256 data: 0.0355 max mem: 9377 +Train: [30] [ 400/6250] eta: 0:14:31 lr: 0.000105 grad: 0.1321 (0.1833) loss: 0.7704 (0.7769) time: 0.1370 data: 0.0435 max mem: 9377 +Train: [30] [ 500/6250] eta: 0:14:10 lr: 0.000105 grad: 0.1319 (0.1751) loss: 0.7798 (0.7757) time: 0.1254 data: 0.0338 max mem: 9377 +Train: [30] [ 600/6250] eta: 0:13:51 lr: 0.000105 grad: 0.1211 (0.1686) loss: 0.7726 (0.7742) time: 0.1459 data: 0.0598 max mem: 9377 +Train: [30] [ 700/6250] eta: 0:13:24 lr: 0.000105 grad: 0.1368 (0.1642) loss: 0.7679 (0.7737) time: 0.1187 data: 0.0290 max mem: 9377 +Train: [30] [ 800/6250] eta: 0:13:07 lr: 0.000105 grad: 0.1292 (0.1609) loss: 0.7660 (0.7732) time: 0.1357 data: 0.0512 max mem: 9377 +Train: [30] [ 900/6250] eta: 0:13:06 lr: 0.000105 grad: 0.1279 (0.1581) loss: 0.7779 (0.7726) time: 0.1650 data: 0.0806 max mem: 9377 +Train: [30] [1000/6250] eta: 0:12:59 lr: 0.000105 grad: 0.1332 (0.1559) loss: 0.7650 (0.7712) time: 0.1474 data: 0.0446 max mem: 9377 +Train: [30] [1100/6250] eta: 0:12:53 lr: 0.000105 grad: 0.1268 (0.1541) loss: 0.7669 (0.7704) time: 0.1629 data: 0.0752 max mem: 9377 +Train: [30] [1200/6250] eta: 0:12:39 lr: 0.000105 grad: 0.1357 (0.1523) loss: 0.7596 (0.7698) time: 0.1484 data: 0.0637 max mem: 9377 +Train: [30] [1300/6250] eta: 0:12:28 lr: 0.000105 grad: 0.1346 (0.1510) loss: 0.7574 (0.7689) time: 0.1704 data: 0.0902 max mem: 9377 +Train: [30] [1400/6250] eta: 0:12:16 lr: 0.000104 grad: 0.1325 (0.1499) loss: 0.7540 (0.7682) time: 0.1351 data: 0.0405 max mem: 9377 +Train: [30] [1500/6250] eta: 0:12:05 lr: 0.000104 grad: 0.1352 (0.1488) loss: 0.7534 (0.7676) time: 0.1660 data: 0.0770 max mem: 9377 +Train: [30] [1600/6250] eta: 0:11:50 lr: 0.000104 grad: 0.1422 (0.1482) loss: 0.7402 (0.7667) time: 0.1353 data: 0.0386 max mem: 9377 +Train: [30] [1700/6250] eta: 0:11:33 lr: 0.000104 grad: 0.1228 (0.1474) loss: 0.7617 (0.7661) time: 0.1327 data: 0.0408 max mem: 9377 +Train: [30] [1800/6250] eta: 0:11:15 lr: 0.000104 grad: 0.1337 (0.1468) loss: 0.7484 (0.7653) time: 0.1336 data: 0.0474 max mem: 9377 +Train: [30] [1900/6250] eta: 0:10:55 lr: 0.000104 grad: 0.1313 (0.1462) loss: 0.7592 (0.7647) time: 0.1379 data: 0.0575 max mem: 9377 +Train: [30] [2000/6250] eta: 0:10:38 lr: 0.000104 grad: 0.1366 (0.1457) loss: 0.7545 (0.7643) time: 0.1393 data: 0.0539 max mem: 9377 +Train: [30] [2100/6250] eta: 0:10:21 lr: 0.000104 grad: 0.1271 (0.1450) loss: 0.7583 (0.7640) time: 0.1380 data: 0.0553 max mem: 9377 +Train: [30] [2200/6250] eta: 0:10:05 lr: 0.000104 grad: 0.1250 (0.1443) loss: 0.7698 (0.7640) time: 0.1399 data: 0.0553 max mem: 9377 +Train: [30] [2300/6250] eta: 0:09:50 lr: 0.000104 grad: 0.1301 (0.1436) loss: 0.7532 (0.7638) time: 0.1695 data: 0.0866 max mem: 9377 +Train: [30] [2400/6250] eta: 0:09:34 lr: 0.000104 grad: 0.1309 (0.1430) loss: 0.7684 (0.7636) time: 0.1526 data: 0.0716 max mem: 9377 +Train: [30] [2500/6250] eta: 0:09:17 lr: 0.000104 grad: 0.1314 (0.1426) loss: 0.7577 (0.7635) time: 0.1465 data: 0.0649 max mem: 9377 +Train: [30] [2600/6250] eta: 0:09:00 lr: 0.000104 grad: 0.1273 (0.1424) loss: 0.7692 (0.7634) time: 0.1210 data: 0.0393 max mem: 9377 +Train: [30] [2700/6250] eta: 0:08:43 lr: 0.000104 grad: 0.1195 (0.1419) loss: 0.7708 (0.7634) time: 0.1335 data: 0.0582 max mem: 9377 +Train: [30] [2800/6250] eta: 0:08:28 lr: 0.000104 grad: 0.1281 (0.1415) loss: 0.7590 (0.7634) time: 0.1392 data: 0.0596 max mem: 9377 +Train: [30] [2900/6250] eta: 0:08:12 lr: 0.000104 grad: 0.1310 (0.1412) loss: 0.7494 (0.7634) time: 0.1297 data: 0.0401 max mem: 9377 +Train: [30] [3000/6250] eta: 0:07:56 lr: 0.000104 grad: 0.1261 (0.1408) loss: 0.7674 (0.7635) time: 0.1314 data: 0.0503 max mem: 9377 +Train: [30] [3100/6250] eta: 0:07:40 lr: 0.000104 grad: 0.1311 (0.1405) loss: 0.7574 (0.7635) time: 0.1269 data: 0.0406 max mem: 9377 +Train: [30] [3200/6250] eta: 0:07:24 lr: 0.000104 grad: 0.1349 (0.1403) loss: 0.7536 (0.7634) time: 0.1120 data: 0.0260 max mem: 9377 +Train: [30] [3300/6250] eta: 0:07:09 lr: 0.000104 grad: 0.1315 (0.1401) loss: 0.7697 (0.7634) time: 0.1441 data: 0.0658 max mem: 9377 +Train: [30] [3400/6250] eta: 0:06:53 lr: 0.000104 grad: 0.1259 (0.1398) loss: 0.7538 (0.7634) time: 0.1375 data: 0.0553 max mem: 9377 +Train: [30] [3500/6250] eta: 0:06:38 lr: 0.000104 grad: 0.1338 (0.1396) loss: 0.7536 (0.7634) time: 0.1414 data: 0.0598 max mem: 9377 +Train: [30] [3600/6250] eta: 0:06:23 lr: 0.000104 grad: 0.1293 (0.1394) loss: 0.7646 (0.7633) time: 0.1439 data: 0.0665 max mem: 9377 +Train: [30] [3700/6250] eta: 0:06:09 lr: 0.000104 grad: 0.1274 (0.1391) loss: 0.7567 (0.7634) time: 0.1544 data: 0.0736 max mem: 9377 +Train: [30] [3800/6250] eta: 0:05:53 lr: 0.000104 grad: 0.1240 (0.1389) loss: 0.7714 (0.7633) time: 0.1186 data: 0.0395 max mem: 9377 +Train: [30] [3900/6250] eta: 0:05:38 lr: 0.000104 grad: 0.1318 (0.1388) loss: 0.7672 (0.7633) time: 0.1331 data: 0.0499 max mem: 9377 +Train: [30] [4000/6250] eta: 0:05:24 lr: 0.000104 grad: 0.1370 (0.1388) loss: 0.7632 (0.7632) time: 0.1524 data: 0.0760 max mem: 9377 +Train: [30] [4100/6250] eta: 0:05:09 lr: 0.000104 grad: 0.1361 (0.1387) loss: 0.7578 (0.7632) time: 0.1519 data: 0.0644 max mem: 9377 +Train: [30] [4200/6250] eta: 0:04:54 lr: 0.000104 grad: 0.1294 (0.1387) loss: 0.7654 (0.7631) time: 0.1333 data: 0.0530 max mem: 9377 +Train: [30] [4300/6250] eta: 0:04:39 lr: 0.000104 grad: 0.1377 (0.1386) loss: 0.7608 (0.7631) time: 0.1270 data: 0.0437 max mem: 9377 +Train: [30] [4400/6250] eta: 0:04:25 lr: 0.000104 grad: 0.1343 (0.1385) loss: 0.7622 (0.7630) time: 0.1103 data: 0.0268 max mem: 9377 +Train: [30] [4500/6250] eta: 0:04:10 lr: 0.000104 grad: 0.1373 (0.1384) loss: 0.7566 (0.7628) time: 0.1349 data: 0.0510 max mem: 9377 +Train: [30] [4600/6250] eta: 0:03:56 lr: 0.000104 grad: 0.1322 (0.1383) loss: 0.7580 (0.7627) time: 0.1493 data: 0.0691 max mem: 9377 +Train: [30] [4700/6250] eta: 0:03:41 lr: 0.000104 grad: 0.1261 (0.1381) loss: 0.7633 (0.7626) time: 0.1540 data: 0.0743 max mem: 9377 +Train: [30] [4800/6250] eta: 0:03:27 lr: 0.000104 grad: 0.1290 (0.1380) loss: 0.7729 (0.7626) time: 0.1796 data: 0.1015 max mem: 9377 +Train: [30] [4900/6250] eta: 0:03:13 lr: 0.000104 grad: 0.1339 (0.1379) loss: 0.7575 (0.7624) time: 0.1341 data: 0.0552 max mem: 9377 +Train: [30] [5000/6250] eta: 0:02:59 lr: 0.000104 grad: 0.1310 (0.1379) loss: 0.7675 (0.7623) time: 0.1589 data: 0.0842 max mem: 9377 +Train: [30] [5100/6250] eta: 0:02:45 lr: 0.000104 grad: 0.1364 (0.1378) loss: 0.7642 (0.7622) time: 0.1449 data: 0.0650 max mem: 9377 +Train: [30] [5200/6250] eta: 0:02:30 lr: 0.000104 grad: 0.1376 (0.1378) loss: 0.7427 (0.7620) time: 0.1693 data: 0.0955 max mem: 9377 +Train: [30] [5300/6250] eta: 0:02:16 lr: 0.000104 grad: 0.1337 (0.1378) loss: 0.7576 (0.7619) time: 0.1545 data: 0.0721 max mem: 9377 +Train: [30] [5400/6250] eta: 0:02:02 lr: 0.000103 grad: 0.1398 (0.1378) loss: 0.7576 (0.7618) time: 0.1257 data: 0.0490 max mem: 9377 +Train: [30] [5500/6250] eta: 0:01:47 lr: 0.000103 grad: 0.1307 (0.1377) loss: 0.7660 (0.7618) time: 0.1181 data: 0.0319 max mem: 9377 +Train: [30] [5600/6250] eta: 0:01:33 lr: 0.000103 grad: 0.1345 (0.1376) loss: 0.7579 (0.7618) time: 0.1341 data: 0.0551 max mem: 9377 +Train: [30] [5700/6250] eta: 0:01:19 lr: 0.000103 grad: 0.1337 (0.1376) loss: 0.7600 (0.7618) time: 0.1239 data: 0.0451 max mem: 9377 +Train: [30] [5800/6250] eta: 0:01:04 lr: 0.000103 grad: 0.1347 (0.1376) loss: 0.7479 (0.7618) time: 0.1482 data: 0.0727 max mem: 9377 +Train: [30] [5900/6250] eta: 0:00:50 lr: 0.000103 grad: 0.1298 (0.1376) loss: 0.7649 (0.7618) time: 0.1612 data: 0.0791 max mem: 9377 +Train: [30] [6000/6250] eta: 0:00:35 lr: 0.000103 grad: 0.1246 (0.1375) loss: 0.7589 (0.7617) time: 0.1322 data: 0.0474 max mem: 9377 +Train: [30] [6100/6250] eta: 0:00:21 lr: 0.000103 grad: 0.1297 (0.1375) loss: 0.7583 (0.7616) time: 0.1406 data: 0.0557 max mem: 9377 +Train: [30] [6200/6250] eta: 0:00:07 lr: 0.000103 grad: 0.1268 (0.1374) loss: 0.7715 (0.7616) time: 0.1570 data: 0.0792 max mem: 9377 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.1303 (0.1373) loss: 0.7704 (0.7616) time: 0.1336 data: 0.0561 max mem: 9377 +Train: [30] Total time: 0:15:03 (0.1446 s / it) +Averaged stats: lr: 0.000103 grad: 0.1303 (0.1373) loss: 0.7704 (0.7616) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:03:33 loss: 0.8531 (0.8531) time: 3.4390 data: 3.3396 max mem: 9377 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8565 (0.8594) time: 0.1207 data: 0.0958 max mem: 9377 +Eval (hcp-train-subset): [30] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (hcp-train-subset): loss: 0.8565 (0.8594) +Eval (hcp-val): [30] [ 0/62] eta: 0:04:17 loss: 0.8546 (0.8546) time: 4.1585 data: 4.0687 max mem: 9377 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8547 (0.8568) time: 0.1307 data: 0.1055 max mem: 9377 +Eval (hcp-val): [30] Total time: 0:00:13 (0.2189 s / it) +Averaged stats (hcp-val): loss: 0.8547 (0.8568) +Eval (nsd-val): [30] [ 0/62] eta: 0:05:07 loss: 0.8186 (0.8186) time: 4.9608 data: 4.9308 max mem: 9377 +Eval (nsd-val): [30] [61/62] eta: 0:00:00 loss: 0.8296 (0.8303) time: 0.0990 data: 0.0740 max mem: 9377 +Eval (nsd-val): [30] Total time: 0:00:13 (0.2147 s / it) +Averaged stats (nsd-val): loss: 0.8296 (0.8303) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [31] [ 0/6250] eta: 10:09:24 lr: 0.000103 grad: 0.5188 (0.5188) loss: 0.7899 (0.7899) time: 5.8504 data: 5.7557 max mem: 9377 +Train: [31] [ 100/6250] eta: 0:18:55 lr: 0.000103 grad: 0.1697 (0.1954) loss: 0.7757 (0.7812) time: 0.1373 data: 0.0493 max mem: 9377 +Train: [31] [ 200/6250] eta: 0:16:31 lr: 0.000103 grad: 0.1655 (0.1877) loss: 0.7598 (0.7735) time: 0.1406 data: 0.0550 max mem: 9377 +Train: [31] [ 300/6250] eta: 0:15:27 lr: 0.000103 grad: 0.1556 (0.1804) loss: 0.7575 (0.7698) time: 0.1302 data: 0.0415 max mem: 9377 +Train: [31] [ 400/6250] eta: 0:14:50 lr: 0.000103 grad: 0.1534 (0.1781) loss: 0.7666 (0.7664) time: 0.1374 data: 0.0470 max mem: 9377 +Train: [31] [ 500/6250] eta: 0:14:23 lr: 0.000103 grad: 0.1340 (0.1725) loss: 0.7569 (0.7649) time: 0.1471 data: 0.0557 max mem: 9377 +Train: [31] [ 600/6250] eta: 0:13:57 lr: 0.000103 grad: 0.1360 (0.1669) loss: 0.7517 (0.7644) time: 0.1325 data: 0.0432 max mem: 9377 +Train: [31] [ 700/6250] eta: 0:13:27 lr: 0.000103 grad: 0.1327 (0.1632) loss: 0.7612 (0.7639) time: 0.1286 data: 0.0373 max mem: 9377 +Train: [31] [ 800/6250] eta: 0:13:06 lr: 0.000103 grad: 0.1239 (0.1598) loss: 0.7744 (0.7640) time: 0.1513 data: 0.0657 max mem: 9377 +Train: [31] [ 900/6250] eta: 0:12:44 lr: 0.000103 grad: 0.1358 (0.1573) loss: 0.7582 (0.7639) time: 0.1486 data: 0.0654 max mem: 9377 +Train: [31] [1000/6250] eta: 0:12:33 lr: 0.000103 grad: 0.1373 (0.1549) loss: 0.7549 (0.7635) time: 0.1349 data: 0.0584 max mem: 9377 +Train: [31] [1100/6250] eta: 0:12:16 lr: 0.000103 grad: 0.1354 (0.1530) loss: 0.7507 (0.7632) time: 0.1403 data: 0.0603 max mem: 9377 +Train: [31] [1200/6250] eta: 0:11:58 lr: 0.000103 grad: 0.1331 (0.1517) loss: 0.7487 (0.7627) time: 0.1408 data: 0.0653 max mem: 9377 +Train: [31] [1300/6250] eta: 0:11:44 lr: 0.000103 grad: 0.1292 (0.1500) loss: 0.7655 (0.7628) time: 0.1260 data: 0.0450 max mem: 9377 +Train: [31] [1400/6250] eta: 0:11:37 lr: 0.000103 grad: 0.1266 (0.1487) loss: 0.7537 (0.7626) time: 0.1843 data: 0.1004 max mem: 9377 +Train: [31] [1500/6250] eta: 0:11:27 lr: 0.000103 grad: 0.1305 (0.1477) loss: 0.7533 (0.7623) time: 0.1492 data: 0.0635 max mem: 9377 +Train: [31] [1600/6250] eta: 0:11:14 lr: 0.000103 grad: 0.1313 (0.1466) loss: 0.7638 (0.7622) time: 0.1393 data: 0.0582 max mem: 9377 +Train: [31] [1700/6250] eta: 0:10:57 lr: 0.000103 grad: 0.1248 (0.1458) loss: 0.7656 (0.7620) time: 0.1589 data: 0.0794 max mem: 9377 +Train: [31] [1800/6250] eta: 0:10:41 lr: 0.000103 grad: 0.1325 (0.1452) loss: 0.7544 (0.7618) time: 0.1471 data: 0.0698 max mem: 9377 +Train: [31] [1900/6250] eta: 0:10:25 lr: 0.000103 grad: 0.1381 (0.1450) loss: 0.7515 (0.7615) time: 0.1400 data: 0.0607 max mem: 9377 +Train: [31] [2000/6250] eta: 0:10:11 lr: 0.000103 grad: 0.1279 (0.1445) loss: 0.7553 (0.7610) time: 0.1457 data: 0.0632 max mem: 9377 +Train: [31] [2100/6250] eta: 0:09:55 lr: 0.000103 grad: 0.1318 (0.1440) loss: 0.7719 (0.7610) time: 0.1311 data: 0.0487 max mem: 9377 +Train: [31] [2200/6250] eta: 0:09:40 lr: 0.000103 grad: 0.1369 (0.1434) loss: 0.7484 (0.7609) time: 0.1235 data: 0.0472 max mem: 9377 +Train: [31] [2300/6250] eta: 0:09:26 lr: 0.000103 grad: 0.1321 (0.1430) loss: 0.7698 (0.7610) time: 0.1377 data: 0.0561 max mem: 9377 +Train: [31] [2400/6250] eta: 0:09:10 lr: 0.000103 grad: 0.1259 (0.1426) loss: 0.7545 (0.7609) time: 0.1365 data: 0.0511 max mem: 9377 +Train: [31] [2500/6250] eta: 0:08:54 lr: 0.000103 grad: 0.1229 (0.1420) loss: 0.7700 (0.7608) time: 0.1436 data: 0.0612 max mem: 9377 +Train: [31] [2600/6250] eta: 0:08:39 lr: 0.000103 grad: 0.1294 (0.1416) loss: 0.7488 (0.7607) time: 0.1457 data: 0.0641 max mem: 9377 +Train: [31] [2700/6250] eta: 0:08:24 lr: 0.000103 grad: 0.1270 (0.1413) loss: 0.7561 (0.7605) time: 0.1490 data: 0.0715 max mem: 9377 +Train: [31] [2800/6250] eta: 0:08:10 lr: 0.000103 grad: 0.1223 (0.1409) loss: 0.7580 (0.7603) time: 0.1676 data: 0.0883 max mem: 9377 +Train: [31] [2900/6250] eta: 0:07:54 lr: 0.000103 grad: 0.1310 (0.1406) loss: 0.7669 (0.7602) time: 0.1433 data: 0.0660 max mem: 9377 +Train: [31] [3000/6250] eta: 0:07:40 lr: 0.000103 grad: 0.1233 (0.1403) loss: 0.7624 (0.7602) time: 0.1268 data: 0.0438 max mem: 9377 +Train: [31] [3100/6250] eta: 0:07:24 lr: 0.000103 grad: 0.1249 (0.1400) loss: 0.7550 (0.7601) time: 0.1235 data: 0.0389 max mem: 9377 +Train: [31] [3200/6250] eta: 0:07:10 lr: 0.000102 grad: 0.1336 (0.1398) loss: 0.7593 (0.7601) time: 0.1405 data: 0.0635 max mem: 9377 +Train: [31] [3300/6250] eta: 0:06:55 lr: 0.000102 grad: 0.1328 (0.1395) loss: 0.7602 (0.7599) time: 0.1205 data: 0.0377 max mem: 9377 +Train: [31] [3400/6250] eta: 0:06:41 lr: 0.000102 grad: 0.1329 (0.1393) loss: 0.7536 (0.7598) time: 0.1472 data: 0.0673 max mem: 9377 +Train: [31] [3500/6250] eta: 0:06:27 lr: 0.000102 grad: 0.1341 (0.1391) loss: 0.7456 (0.7596) time: 0.1488 data: 0.0699 max mem: 9377 +Train: [31] [3600/6250] eta: 0:06:12 lr: 0.000102 grad: 0.1317 (0.1389) loss: 0.7486 (0.7595) time: 0.1347 data: 0.0505 max mem: 9377 +Train: [31] [3700/6250] eta: 0:05:58 lr: 0.000102 grad: 0.1324 (0.1388) loss: 0.7471 (0.7594) time: 0.1407 data: 0.0598 max mem: 9377 +Train: [31] [3800/6250] eta: 0:05:43 lr: 0.000102 grad: 0.1321 (0.1387) loss: 0.7588 (0.7592) time: 0.1260 data: 0.0439 max mem: 9377 +Train: [31] [3900/6250] eta: 0:05:29 lr: 0.000102 grad: 0.1359 (0.1386) loss: 0.7522 (0.7591) time: 0.1394 data: 0.0559 max mem: 9377 +Train: [31] [4000/6250] eta: 0:05:14 lr: 0.000102 grad: 0.1282 (0.1384) loss: 0.7585 (0.7591) time: 0.1466 data: 0.0631 max mem: 9377 +Train: [31] [4100/6250] eta: 0:05:00 lr: 0.000102 grad: 0.1337 (0.1384) loss: 0.7507 (0.7589) time: 0.1420 data: 0.0634 max mem: 9377 +Train: [31] [4200/6250] eta: 0:04:46 lr: 0.000102 grad: 0.1284 (0.1383) loss: 0.7621 (0.7588) time: 0.1243 data: 0.0405 max mem: 9377 +Train: [31] [4300/6250] eta: 0:04:32 lr: 0.000102 grad: 0.1310 (0.1382) loss: 0.7610 (0.7587) time: 0.1381 data: 0.0569 max mem: 9377 +Train: [31] [4400/6250] eta: 0:04:18 lr: 0.000102 grad: 0.1362 (0.1382) loss: 0.7420 (0.7586) time: 0.1474 data: 0.0703 max mem: 9377 +Train: [31] [4500/6250] eta: 0:04:03 lr: 0.000102 grad: 0.1410 (0.1383) loss: 0.7485 (0.7585) time: 0.1151 data: 0.0275 max mem: 9377 +Train: [31] [4600/6250] eta: 0:03:49 lr: 0.000102 grad: 0.1306 (0.1382) loss: 0.7627 (0.7586) time: 0.1420 data: 0.0550 max mem: 9377 +Train: [31] [4700/6250] eta: 0:03:35 lr: 0.000102 grad: 0.1323 (0.1382) loss: 0.7645 (0.7586) time: 0.1171 data: 0.0348 max mem: 9377 +Train: [31] [4800/6250] eta: 0:03:21 lr: 0.000102 grad: 0.1408 (0.1382) loss: 0.7469 (0.7585) time: 0.1455 data: 0.0609 max mem: 9377 +Train: [31] [4900/6250] eta: 0:03:07 lr: 0.000102 grad: 0.1383 (0.1383) loss: 0.7551 (0.7586) time: 0.1329 data: 0.0575 max mem: 9377 +Train: [31] [5000/6250] eta: 0:02:53 lr: 0.000102 grad: 0.1324 (0.1383) loss: 0.7531 (0.7587) time: 0.1490 data: 0.0729 max mem: 9377 +Train: [31] [5100/6250] eta: 0:02:39 lr: 0.000102 grad: 0.1377 (0.1382) loss: 0.7538 (0.7587) time: 0.1077 data: 0.0199 max mem: 9377 +Train: [31] [5200/6250] eta: 0:02:25 lr: 0.000102 grad: 0.1351 (0.1381) loss: 0.7594 (0.7587) time: 0.1558 data: 0.0726 max mem: 9377 +Train: [31] [5300/6250] eta: 0:02:11 lr: 0.000102 grad: 0.1339 (0.1381) loss: 0.7660 (0.7588) time: 0.1320 data: 0.0488 max mem: 9377 +Train: [31] [5400/6250] eta: 0:01:57 lr: 0.000102 grad: 0.1331 (0.1382) loss: 0.7568 (0.7588) time: 0.1326 data: 0.0561 max mem: 9377 +Train: [31] [5500/6250] eta: 0:01:44 lr: 0.000102 grad: 0.1369 (0.1380) loss: 0.7483 (0.7589) time: 0.1546 data: 0.0762 max mem: 9377 +Train: [31] [5600/6250] eta: 0:01:30 lr: 0.000102 grad: 0.1323 (0.1380) loss: 0.7536 (0.7589) time: 0.1300 data: 0.0458 max mem: 9377 +Train: [31] [5700/6250] eta: 0:01:16 lr: 0.000102 grad: 0.1266 (0.1379) loss: 0.7628 (0.7590) time: 0.1280 data: 0.0442 max mem: 9377 +Train: [31] [5800/6250] eta: 0:01:02 lr: 0.000102 grad: 0.1366 (0.1378) loss: 0.7465 (0.7590) time: 0.1371 data: 0.0591 max mem: 9377 +Train: [31] [5900/6250] eta: 0:00:48 lr: 0.000102 grad: 0.1304 (0.1377) loss: 0.7636 (0.7590) time: 0.1442 data: 0.0654 max mem: 9377 +Train: [31] [6000/6250] eta: 0:00:34 lr: 0.000102 grad: 0.1333 (0.1376) loss: 0.7574 (0.7591) time: 0.1613 data: 0.0806 max mem: 9377 +Train: [31] [6100/6250] eta: 0:00:20 lr: 0.000102 grad: 0.1302 (0.1375) loss: 0.7629 (0.7592) time: 0.1475 data: 0.0725 max mem: 9377 +Train: [31] [6200/6250] eta: 0:00:06 lr: 0.000102 grad: 0.1320 (0.1374) loss: 0.7659 (0.7592) time: 0.1471 data: 0.0620 max mem: 9377 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.1339 (0.1374) loss: 0.7618 (0.7592) time: 0.1522 data: 0.0729 max mem: 9377 +Train: [31] Total time: 0:14:33 (0.1398 s / it) +Averaged stats: lr: 0.000102 grad: 0.1339 (0.1374) loss: 0.7618 (0.7592) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:04:00 loss: 0.8531 (0.8531) time: 3.8871 data: 3.8020 max mem: 9377 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8573 (0.8581) time: 0.1178 data: 0.0924 max mem: 9377 +Eval (hcp-train-subset): [31] Total time: 0:00:12 (0.2077 s / it) +Averaged stats (hcp-train-subset): loss: 0.8573 (0.8581) +Eval (hcp-val): [31] [ 0/62] eta: 0:05:09 loss: 0.8543 (0.8543) time: 4.9839 data: 4.9538 max mem: 9377 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8526 (0.8547) time: 0.1240 data: 0.0977 max mem: 9377 +Eval (hcp-val): [31] Total time: 0:00:12 (0.2074 s / it) +Averaged stats (hcp-val): loss: 0.8526 (0.8547) +Eval (nsd-val): [31] [ 0/62] eta: 0:04:02 loss: 0.8259 (0.8259) time: 3.9036 data: 3.8443 max mem: 9377 +Eval (nsd-val): [31] [61/62] eta: 0:00:00 loss: 0.8338 (0.8360) time: 0.1213 data: 0.0962 max mem: 9377 +Eval (nsd-val): [31] Total time: 0:00:12 (0.2066 s / it) +Averaged stats (nsd-val): loss: 0.8338 (0.8360) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [32] [ 0/6250] eta: 10:22:29 lr: 0.000102 grad: 0.5232 (0.5232) loss: 0.5777 (0.5777) time: 5.9759 data: 5.8461 max mem: 9377 +Train: [32] [ 100/6250] eta: 0:20:34 lr: 0.000102 grad: 0.2191 (0.2673) loss: 0.7487 (0.7555) time: 0.1537 data: 0.0590 max mem: 9377 +Train: [32] [ 200/6250] eta: 0:18:00 lr: 0.000102 grad: 0.1914 (0.2411) loss: 0.7519 (0.7615) time: 0.1497 data: 0.0546 max mem: 9377 +Train: [32] [ 300/6250] eta: 0:16:43 lr: 0.000102 grad: 0.1437 (0.2167) loss: 0.7557 (0.7608) time: 0.1564 data: 0.0681 max mem: 9377 +Train: [32] [ 400/6250] eta: 0:15:58 lr: 0.000102 grad: 0.1534 (0.2021) loss: 0.7561 (0.7601) time: 0.1493 data: 0.0636 max mem: 9377 +Train: [32] [ 500/6250] eta: 0:15:10 lr: 0.000102 grad: 0.1476 (0.1914) loss: 0.7475 (0.7604) time: 0.1346 data: 0.0584 max mem: 9377 +Train: [32] [ 600/6250] eta: 0:14:37 lr: 0.000102 grad: 0.1346 (0.1831) loss: 0.7457 (0.7602) time: 0.1379 data: 0.0515 max mem: 9377 +Train: [32] [ 700/6250] eta: 0:14:12 lr: 0.000102 grad: 0.1269 (0.1756) loss: 0.7628 (0.7606) time: 0.1464 data: 0.0578 max mem: 9377 +Train: [32] [ 800/6250] eta: 0:13:46 lr: 0.000101 grad: 0.1225 (0.1699) loss: 0.7555 (0.7608) time: 0.1444 data: 0.0553 max mem: 9377 +Train: [32] [ 900/6250] eta: 0:13:16 lr: 0.000101 grad: 0.1196 (0.1656) loss: 0.7563 (0.7609) time: 0.1417 data: 0.0485 max mem: 9377 +Train: [32] [1000/6250] eta: 0:12:55 lr: 0.000101 grad: 0.1254 (0.1623) loss: 0.7585 (0.7610) time: 0.1365 data: 0.0579 max mem: 9377 +Train: [32] [1100/6250] eta: 0:12:36 lr: 0.000101 grad: 0.1395 (0.1595) loss: 0.7570 (0.7608) time: 0.1491 data: 0.0766 max mem: 9377 +Train: [32] [1200/6250] eta: 0:12:15 lr: 0.000101 grad: 0.1354 (0.1576) loss: 0.7616 (0.7608) time: 0.1366 data: 0.0497 max mem: 9377 +Train: [32] [1300/6250] eta: 0:11:58 lr: 0.000101 grad: 0.1301 (0.1558) loss: 0.7651 (0.7608) time: 0.1414 data: 0.0627 max mem: 9377 +Train: [32] [1400/6250] eta: 0:11:41 lr: 0.000101 grad: 0.1188 (0.1538) loss: 0.7690 (0.7610) time: 0.1400 data: 0.0539 max mem: 9377 +Train: [32] [1500/6250] eta: 0:11:24 lr: 0.000101 grad: 0.1265 (0.1521) loss: 0.7731 (0.7611) time: 0.1367 data: 0.0527 max mem: 9377 +Train: [32] [1600/6250] eta: 0:11:07 lr: 0.000101 grad: 0.1320 (0.1507) loss: 0.7573 (0.7612) time: 0.1514 data: 0.0710 max mem: 9377 +Train: [32] [1700/6250] eta: 0:10:49 lr: 0.000101 grad: 0.1314 (0.1493) loss: 0.7621 (0.7615) time: 0.1272 data: 0.0447 max mem: 9377 +Train: [32] [1800/6250] eta: 0:10:30 lr: 0.000101 grad: 0.1269 (0.1484) loss: 0.7658 (0.7615) time: 0.1233 data: 0.0299 max mem: 9377 +Train: [32] [1900/6250] eta: 0:10:13 lr: 0.000101 grad: 0.1312 (0.1476) loss: 0.7593 (0.7616) time: 0.1275 data: 0.0431 max mem: 9377 +Train: [32] [2000/6250] eta: 0:09:58 lr: 0.000101 grad: 0.1272 (0.1470) loss: 0.7621 (0.7616) time: 0.1223 data: 0.0413 max mem: 9377 +Train: [32] [2100/6250] eta: 0:09:42 lr: 0.000101 grad: 0.1251 (0.1463) loss: 0.7705 (0.7616) time: 0.1183 data: 0.0381 max mem: 9377 +Train: [32] [2200/6250] eta: 0:09:28 lr: 0.000101 grad: 0.1220 (0.1455) loss: 0.7611 (0.7617) time: 0.1349 data: 0.0487 max mem: 9377 +Train: [32] [2300/6250] eta: 0:09:13 lr: 0.000101 grad: 0.1350 (0.1452) loss: 0.7578 (0.7618) time: 0.1287 data: 0.0400 max mem: 9377 +Train: [32] [2400/6250] eta: 0:09:00 lr: 0.000101 grad: 0.1310 (0.1447) loss: 0.7640 (0.7617) time: 0.1481 data: 0.0637 max mem: 9377 +Train: [32] [2500/6250] eta: 0:08:46 lr: 0.000101 grad: 0.1460 (0.1444) loss: 0.7584 (0.7616) time: 0.1432 data: 0.0601 max mem: 9377 +Train: [32] [2600/6250] eta: 0:08:32 lr: 0.000101 grad: 0.1267 (0.1441) loss: 0.7670 (0.7616) time: 0.1470 data: 0.0598 max mem: 9377 +Train: [32] [2700/6250] eta: 0:08:17 lr: 0.000101 grad: 0.1228 (0.1437) loss: 0.7776 (0.7618) time: 0.1423 data: 0.0653 max mem: 9377 +Train: [32] [2800/6250] eta: 0:08:04 lr: 0.000101 grad: 0.1271 (0.1433) loss: 0.7577 (0.7617) time: 0.1387 data: 0.0528 max mem: 9377 +Train: [32] [2900/6250] eta: 0:07:50 lr: 0.000101 grad: 0.1356 (0.1430) loss: 0.7637 (0.7616) time: 0.1421 data: 0.0624 max mem: 9377 +Train: [32] [3000/6250] eta: 0:07:36 lr: 0.000101 grad: 0.1283 (0.1427) loss: 0.7757 (0.7616) time: 0.1503 data: 0.0646 max mem: 9377 +Train: [32] [3100/6250] eta: 0:07:22 lr: 0.000101 grad: 0.1378 (0.1425) loss: 0.7490 (0.7613) time: 0.1567 data: 0.0821 max mem: 9377 +Train: [32] [3200/6250] eta: 0:07:08 lr: 0.000101 grad: 0.1355 (0.1424) loss: 0.7539 (0.7610) time: 0.1430 data: 0.0634 max mem: 9377 +Train: [32] [3300/6250] eta: 0:06:54 lr: 0.000101 grad: 0.1405 (0.1422) loss: 0.7318 (0.7606) time: 0.1434 data: 0.0637 max mem: 9377 +Train: [32] [3400/6250] eta: 0:06:39 lr: 0.000101 grad: 0.1312 (0.1421) loss: 0.7565 (0.7602) time: 0.1222 data: 0.0413 max mem: 9377 +Train: [32] [3500/6250] eta: 0:06:25 lr: 0.000101 grad: 0.1346 (0.1420) loss: 0.7391 (0.7598) time: 0.1418 data: 0.0600 max mem: 9377 +Train: [32] [3600/6250] eta: 0:06:11 lr: 0.000101 grad: 0.1482 (0.1419) loss: 0.7449 (0.7595) time: 0.1372 data: 0.0569 max mem: 9377 +Train: [32] [3700/6250] eta: 0:05:57 lr: 0.000101 grad: 0.1434 (0.1419) loss: 0.7457 (0.7593) time: 0.1078 data: 0.0292 max mem: 9377 +Train: [32] [3800/6250] eta: 0:05:43 lr: 0.000101 grad: 0.1343 (0.1418) loss: 0.7445 (0.7590) time: 0.1364 data: 0.0550 max mem: 9377 +Train: [32] [3900/6250] eta: 0:05:28 lr: 0.000101 grad: 0.1295 (0.1416) loss: 0.7524 (0.7588) time: 0.1356 data: 0.0529 max mem: 9377 +Train: [32] [4000/6250] eta: 0:05:14 lr: 0.000101 grad: 0.1327 (0.1415) loss: 0.7500 (0.7586) time: 0.1368 data: 0.0531 max mem: 9377 +Train: [32] [4100/6250] eta: 0:05:00 lr: 0.000101 grad: 0.1304 (0.1413) loss: 0.7507 (0.7585) time: 0.1419 data: 0.0608 max mem: 9377 +Train: [32] [4200/6250] eta: 0:04:46 lr: 0.000101 grad: 0.1374 (0.1411) loss: 0.7593 (0.7585) time: 0.1237 data: 0.0466 max mem: 9377 +Train: [32] [4300/6250] eta: 0:04:32 lr: 0.000101 grad: 0.1307 (0.1409) loss: 0.7586 (0.7584) time: 0.1328 data: 0.0435 max mem: 9377 +Train: [32] [4400/6250] eta: 0:04:18 lr: 0.000101 grad: 0.1293 (0.1408) loss: 0.7653 (0.7583) time: 0.1372 data: 0.0559 max mem: 9377 +Train: [32] [4500/6250] eta: 0:04:03 lr: 0.000101 grad: 0.1328 (0.1406) loss: 0.7565 (0.7583) time: 0.1272 data: 0.0479 max mem: 9377 +Train: [32] [4600/6250] eta: 0:03:50 lr: 0.000101 grad: 0.1400 (0.1405) loss: 0.7483 (0.7581) time: 0.1459 data: 0.0629 max mem: 9377 +Train: [32] [4700/6250] eta: 0:03:36 lr: 0.000100 grad: 0.1313 (0.1404) loss: 0.7534 (0.7580) time: 0.1307 data: 0.0521 max mem: 9377 +Train: [32] [4800/6250] eta: 0:03:22 lr: 0.000100 grad: 0.1369 (0.1403) loss: 0.7483 (0.7580) time: 0.1061 data: 0.0249 max mem: 9377 +Train: [32] [4900/6250] eta: 0:03:08 lr: 0.000100 grad: 0.1291 (0.1401) loss: 0.7547 (0.7580) time: 0.1408 data: 0.0616 max mem: 9377 +Train: [32] [5000/6250] eta: 0:02:54 lr: 0.000100 grad: 0.1341 (0.1400) loss: 0.7516 (0.7580) time: 0.1487 data: 0.0722 max mem: 9377 +Train: [32] [5100/6250] eta: 0:02:40 lr: 0.000100 grad: 0.1324 (0.1398) loss: 0.7544 (0.7580) time: 0.1555 data: 0.0742 max mem: 9377 +Train: [32] [5200/6250] eta: 0:02:26 lr: 0.000100 grad: 0.1410 (0.1397) loss: 0.7469 (0.7580) time: 0.1351 data: 0.0502 max mem: 9377 +Train: [32] [5300/6250] eta: 0:02:12 lr: 0.000100 grad: 0.1295 (0.1396) loss: 0.7633 (0.7580) time: 0.1257 data: 0.0462 max mem: 9377 +Train: [32] [5400/6250] eta: 0:01:58 lr: 0.000100 grad: 0.1308 (0.1396) loss: 0.7602 (0.7579) time: 0.1527 data: 0.0731 max mem: 9377 +Train: [32] [5500/6250] eta: 0:01:44 lr: 0.000100 grad: 0.1348 (0.1395) loss: 0.7497 (0.7579) time: 0.1396 data: 0.0531 max mem: 9377 +Train: [32] [5600/6250] eta: 0:01:30 lr: 0.000100 grad: 0.1308 (0.1394) loss: 0.7531 (0.7578) time: 0.1418 data: 0.0599 max mem: 9377 +Train: [32] [5700/6250] eta: 0:01:16 lr: 0.000100 grad: 0.1287 (0.1393) loss: 0.7517 (0.7577) time: 0.1401 data: 0.0597 max mem: 9377 +Train: [32] [5800/6250] eta: 0:01:02 lr: 0.000100 grad: 0.1299 (0.1392) loss: 0.7573 (0.7577) time: 0.1406 data: 0.0623 max mem: 9377 +Train: [32] [5900/6250] eta: 0:00:48 lr: 0.000100 grad: 0.1315 (0.1391) loss: 0.7520 (0.7577) time: 0.1463 data: 0.0672 max mem: 9377 +Train: [32] [6000/6250] eta: 0:00:34 lr: 0.000100 grad: 0.1425 (0.1390) loss: 0.7503 (0.7576) time: 0.1051 data: 0.0180 max mem: 9377 +Train: [32] [6100/6250] eta: 0:00:20 lr: 0.000100 grad: 0.1340 (0.1390) loss: 0.7607 (0.7577) time: 0.1430 data: 0.0620 max mem: 9377 +Train: [32] [6200/6250] eta: 0:00:06 lr: 0.000100 grad: 0.1358 (0.1389) loss: 0.7514 (0.7578) time: 0.1633 data: 0.0861 max mem: 9377 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1340 (0.1389) loss: 0.7614 (0.7578) time: 0.1670 data: 0.0858 max mem: 9377 +Train: [32] Total time: 0:14:35 (0.1401 s / it) +Averaged stats: lr: 0.000100 grad: 0.1340 (0.1389) loss: 0.7614 (0.7578) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:04:18 loss: 0.8562 (0.8562) time: 4.1670 data: 4.1192 max mem: 9377 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8577 (0.8577) time: 0.1179 data: 0.0913 max mem: 9377 +Eval (hcp-train-subset): [32] Total time: 0:00:13 (0.2241 s / it) +Averaged stats (hcp-train-subset): loss: 0.8577 (0.8577) +Eval (hcp-val): [32] [ 0/62] eta: 0:04:51 loss: 0.8520 (0.8520) time: 4.7080 data: 4.6750 max mem: 9377 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8542 (0.8559) time: 0.1165 data: 0.0914 max mem: 9377 +Eval (hcp-val): [32] Total time: 0:00:13 (0.2174 s / it) +Averaged stats (hcp-val): loss: 0.8542 (0.8559) +Eval (nsd-val): [32] [ 0/62] eta: 0:05:39 loss: 0.8196 (0.8196) time: 5.4681 data: 5.4368 max mem: 9377 +Eval (nsd-val): [32] [61/62] eta: 0:00:00 loss: 0.8300 (0.8297) time: 0.1226 data: 0.0973 max mem: 9377 +Eval (nsd-val): [32] Total time: 0:00:13 (0.2135 s / it) +Averaged stats (nsd-val): loss: 0.8300 (0.8297) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 8:43:17 lr: 0.000100 grad: 0.2062 (0.2062) loss: 0.7732 (0.7732) time: 5.0236 data: 4.8665 max mem: 9377 +Train: [33] [ 100/6250] eta: 0:19:43 lr: 0.000100 grad: 0.2202 (0.2653) loss: 0.7600 (0.7675) time: 0.1551 data: 0.0692 max mem: 9377 +Train: [33] [ 200/6250] eta: 0:16:49 lr: 0.000100 grad: 0.1686 (0.2332) loss: 0.7669 (0.7640) time: 0.1298 data: 0.0368 max mem: 9377 +Train: [33] [ 300/6250] eta: 0:15:41 lr: 0.000100 grad: 0.1657 (0.2116) loss: 0.7699 (0.7633) time: 0.1542 data: 0.0668 max mem: 9377 +Train: [33] [ 400/6250] eta: 0:14:50 lr: 0.000100 grad: 0.1591 (0.2021) loss: 0.7553 (0.7631) time: 0.1380 data: 0.0514 max mem: 9377 +Train: [33] [ 500/6250] eta: 0:14:34 lr: 0.000100 grad: 0.1374 (0.1924) loss: 0.7584 (0.7627) time: 0.1701 data: 0.0888 max mem: 9377 +Train: [33] [ 600/6250] eta: 0:14:08 lr: 0.000100 grad: 0.1348 (0.1837) loss: 0.7586 (0.7622) time: 0.1432 data: 0.0589 max mem: 9377 +Train: [33] [ 700/6250] eta: 0:13:47 lr: 0.000100 grad: 0.1265 (0.1761) loss: 0.7496 (0.7630) time: 0.1379 data: 0.0531 max mem: 9377 +Train: [33] [ 800/6250] eta: 0:13:19 lr: 0.000100 grad: 0.1414 (0.1714) loss: 0.7585 (0.7635) time: 0.1044 data: 0.0271 max mem: 9377 +Train: [33] [ 900/6250] eta: 0:13:05 lr: 0.000100 grad: 0.1295 (0.1672) loss: 0.7649 (0.7639) time: 0.1459 data: 0.0673 max mem: 9377 +Train: [33] [1000/6250] eta: 0:12:44 lr: 0.000100 grad: 0.1298 (0.1636) loss: 0.7624 (0.7641) time: 0.1434 data: 0.0634 max mem: 9377 +Train: [33] [1100/6250] eta: 0:12:27 lr: 0.000100 grad: 0.1317 (0.1611) loss: 0.7678 (0.7639) time: 0.1325 data: 0.0579 max mem: 9377 +Train: [33] [1200/6250] eta: 0:12:08 lr: 0.000100 grad: 0.1300 (0.1589) loss: 0.7663 (0.7637) time: 0.1260 data: 0.0407 max mem: 9377 +Train: [33] [1300/6250] eta: 0:11:50 lr: 0.000100 grad: 0.1336 (0.1572) loss: 0.7541 (0.7632) time: 0.1471 data: 0.0709 max mem: 9377 +Train: [33] [1400/6250] eta: 0:11:33 lr: 0.000100 grad: 0.1394 (0.1556) loss: 0.7572 (0.7628) time: 0.1480 data: 0.0640 max mem: 9377 +Train: [33] [1500/6250] eta: 0:11:18 lr: 0.000100 grad: 0.1273 (0.1542) loss: 0.7571 (0.7626) time: 0.1418 data: 0.0578 max mem: 9377 +Train: [33] [1600/6250] eta: 0:11:02 lr: 0.000100 grad: 0.1306 (0.1530) loss: 0.7659 (0.7623) time: 0.1306 data: 0.0423 max mem: 9377 +Train: [33] [1700/6250] eta: 0:10:45 lr: 0.000100 grad: 0.1403 (0.1520) loss: 0.7546 (0.7621) time: 0.1257 data: 0.0395 max mem: 9377 +Train: [33] [1800/6250] eta: 0:10:28 lr: 0.000100 grad: 0.1326 (0.1512) loss: 0.7581 (0.7618) time: 0.1299 data: 0.0432 max mem: 9377 +Train: [33] [1900/6250] eta: 0:10:10 lr: 0.000100 grad: 0.1268 (0.1503) loss: 0.7633 (0.7616) time: 0.1384 data: 0.0582 max mem: 9377 +Train: [33] [2000/6250] eta: 0:09:55 lr: 0.000100 grad: 0.1343 (0.1495) loss: 0.7576 (0.7613) time: 0.1575 data: 0.0718 max mem: 9377 +Train: [33] [2100/6250] eta: 0:09:40 lr: 0.000100 grad: 0.1374 (0.1489) loss: 0.7542 (0.7611) time: 0.1287 data: 0.0360 max mem: 9377 +Train: [33] [2200/6250] eta: 0:09:24 lr: 0.000099 grad: 0.1294 (0.1482) loss: 0.7555 (0.7608) time: 0.0981 data: 0.0154 max mem: 9377 +Train: [33] [2300/6250] eta: 0:09:11 lr: 0.000099 grad: 0.1303 (0.1477) loss: 0.7571 (0.7605) time: 0.1351 data: 0.0552 max mem: 9377 +Train: [33] [2400/6250] eta: 0:08:57 lr: 0.000099 grad: 0.1264 (0.1470) loss: 0.7505 (0.7603) time: 0.1321 data: 0.0548 max mem: 9377 +Train: [33] [2500/6250] eta: 0:08:45 lr: 0.000099 grad: 0.1438 (0.1467) loss: 0.7295 (0.7599) time: 0.1178 data: 0.0398 max mem: 9377 +Train: [33] [2600/6250] eta: 0:08:32 lr: 0.000099 grad: 0.1359 (0.1463) loss: 0.7610 (0.7597) time: 0.1542 data: 0.0769 max mem: 9377 +Train: [33] [2700/6250] eta: 0:08:19 lr: 0.000099 grad: 0.1357 (0.1461) loss: 0.7496 (0.7593) time: 0.1467 data: 0.0682 max mem: 9377 +Train: [33] [2800/6250] eta: 0:08:06 lr: 0.000099 grad: 0.1510 (0.1461) loss: 0.7417 (0.7588) time: 0.1633 data: 0.0838 max mem: 9377 +Train: [33] [2900/6250] eta: 0:07:52 lr: 0.000099 grad: 0.1387 (0.1461) loss: 0.7492 (0.7584) time: 0.1332 data: 0.0548 max mem: 9377 +Train: [33] [3000/6250] eta: 0:07:38 lr: 0.000099 grad: 0.1344 (0.1460) loss: 0.7429 (0.7581) time: 0.1446 data: 0.0664 max mem: 9377 +Train: [33] [3100/6250] eta: 0:07:23 lr: 0.000099 grad: 0.1417 (0.1458) loss: 0.7470 (0.7578) time: 0.1296 data: 0.0478 max mem: 9377 +Train: [33] [3200/6250] eta: 0:07:09 lr: 0.000099 grad: 0.1457 (0.1457) loss: 0.7391 (0.7574) time: 0.1408 data: 0.0598 max mem: 9377 +Train: [33] [3300/6250] eta: 0:06:55 lr: 0.000099 grad: 0.1349 (0.1455) loss: 0.7344 (0.7571) time: 0.1294 data: 0.0496 max mem: 9377 +Train: [33] [3400/6250] eta: 0:06:41 lr: 0.000099 grad: 0.1278 (0.1451) loss: 0.7451 (0.7568) time: 0.1277 data: 0.0423 max mem: 9377 +Train: [33] [3500/6250] eta: 0:06:26 lr: 0.000099 grad: 0.1373 (0.1449) loss: 0.7338 (0.7564) time: 0.1428 data: 0.0565 max mem: 9377 +Train: [33] [3600/6250] eta: 0:06:12 lr: 0.000099 grad: 0.1394 (0.1447) loss: 0.7337 (0.7560) time: 0.1403 data: 0.0560 max mem: 9377 +Train: [33] [3700/6250] eta: 0:05:58 lr: 0.000099 grad: 0.1346 (0.1445) loss: 0.7481 (0.7558) time: 0.1390 data: 0.0574 max mem: 9377 +Train: [33] [3800/6250] eta: 0:05:44 lr: 0.000099 grad: 0.1322 (0.1443) loss: 0.7537 (0.7556) time: 0.1323 data: 0.0493 max mem: 9377 +Train: [33] [3900/6250] eta: 0:05:30 lr: 0.000099 grad: 0.1345 (0.1442) loss: 0.7379 (0.7553) time: 0.1359 data: 0.0507 max mem: 9377 +Train: [33] [4000/6250] eta: 0:05:16 lr: 0.000099 grad: 0.1327 (0.1441) loss: 0.7559 (0.7551) time: 0.1411 data: 0.0573 max mem: 9377 +Train: [33] [4100/6250] eta: 0:05:02 lr: 0.000099 grad: 0.1345 (0.1439) loss: 0.7526 (0.7550) time: 0.1238 data: 0.0428 max mem: 9377 +Train: [33] [4200/6250] eta: 0:04:48 lr: 0.000099 grad: 0.1332 (0.1437) loss: 0.7504 (0.7548) time: 0.1227 data: 0.0399 max mem: 9377 +Train: [33] [4300/6250] eta: 0:04:34 lr: 0.000099 grad: 0.1355 (0.1436) loss: 0.7563 (0.7548) time: 0.1521 data: 0.0746 max mem: 9377 +Train: [33] [4400/6250] eta: 0:04:19 lr: 0.000099 grad: 0.1336 (0.1434) loss: 0.7504 (0.7546) time: 0.1324 data: 0.0513 max mem: 9377 +Train: [33] [4500/6250] eta: 0:04:05 lr: 0.000099 grad: 0.1414 (0.1433) loss: 0.7429 (0.7545) time: 0.1316 data: 0.0502 max mem: 9377 +Train: [33] [4600/6250] eta: 0:03:51 lr: 0.000099 grad: 0.1321 (0.1432) loss: 0.7506 (0.7543) time: 0.1304 data: 0.0465 max mem: 9377 +Train: [33] [4700/6250] eta: 0:03:37 lr: 0.000099 grad: 0.1380 (0.1431) loss: 0.7463 (0.7542) time: 0.1333 data: 0.0475 max mem: 9377 +Train: [33] [4800/6250] eta: 0:03:23 lr: 0.000099 grad: 0.1427 (0.1431) loss: 0.7563 (0.7542) time: 0.1500 data: 0.0694 max mem: 9377 +Train: [33] [4900/6250] eta: 0:03:08 lr: 0.000099 grad: 0.1307 (0.1430) loss: 0.7541 (0.7541) time: 0.1397 data: 0.0600 max mem: 9377 +Train: [33] [5000/6250] eta: 0:02:54 lr: 0.000099 grad: 0.1397 (0.1430) loss: 0.7546 (0.7540) time: 0.1299 data: 0.0460 max mem: 9377 +Train: [33] [5100/6250] eta: 0:02:40 lr: 0.000099 grad: 0.1448 (0.1431) loss: 0.7563 (0.7539) time: 0.1219 data: 0.0375 max mem: 9377 +Train: [33] [5200/6250] eta: 0:02:26 lr: 0.000099 grad: 0.1431 (0.1430) loss: 0.7449 (0.7538) time: 0.1391 data: 0.0570 max mem: 9377 +Train: [33] [5300/6250] eta: 0:02:12 lr: 0.000099 grad: 0.1421 (0.1428) loss: 0.7453 (0.7538) time: 0.1359 data: 0.0549 max mem: 9377 +Train: [33] [5400/6250] eta: 0:01:58 lr: 0.000099 grad: 0.1357 (0.1428) loss: 0.7510 (0.7538) time: 0.1442 data: 0.0618 max mem: 9377 +Train: [33] [5500/6250] eta: 0:01:44 lr: 0.000099 grad: 0.1294 (0.1427) loss: 0.7442 (0.7537) time: 0.1288 data: 0.0460 max mem: 9377 +Train: [33] [5600/6250] eta: 0:01:30 lr: 0.000099 grad: 0.1374 (0.1426) loss: 0.7471 (0.7536) time: 0.1487 data: 0.0655 max mem: 9377 +Train: [33] [5700/6250] eta: 0:01:16 lr: 0.000099 grad: 0.1368 (0.1425) loss: 0.7488 (0.7535) time: 0.1330 data: 0.0528 max mem: 9377 +Train: [33] [5800/6250] eta: 0:01:02 lr: 0.000099 grad: 0.1343 (0.1425) loss: 0.7586 (0.7534) time: 0.1491 data: 0.0664 max mem: 9377 +Train: [33] [5900/6250] eta: 0:00:48 lr: 0.000098 grad: 0.1343 (0.1424) loss: 0.7459 (0.7534) time: 0.1436 data: 0.0663 max mem: 9377 +Train: [33] [6000/6250] eta: 0:00:34 lr: 0.000098 grad: 0.1299 (0.1423) loss: 0.7584 (0.7535) time: 0.1266 data: 0.0493 max mem: 9377 +Train: [33] [6100/6250] eta: 0:00:20 lr: 0.000098 grad: 0.1269 (0.1421) loss: 0.7590 (0.7535) time: 0.1335 data: 0.0568 max mem: 9377 +Train: [33] [6200/6250] eta: 0:00:06 lr: 0.000098 grad: 0.1371 (0.1420) loss: 0.7511 (0.7534) time: 0.1229 data: 0.0371 max mem: 9377 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.1385 (0.1419) loss: 0.7551 (0.7535) time: 0.1379 data: 0.0632 max mem: 9377 +Train: [33] Total time: 0:14:36 (0.1403 s / it) +Averaged stats: lr: 0.000098 grad: 0.1385 (0.1419) loss: 0.7551 (0.7535) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:05:31 loss: 0.8546 (0.8546) time: 5.3391 data: 5.3084 max mem: 9377 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8569 (0.8592) time: 0.1424 data: 0.1168 max mem: 9377 +Eval (hcp-train-subset): [33] Total time: 0:00:13 (0.2163 s / it) +Averaged stats (hcp-train-subset): loss: 0.8569 (0.8592) +Eval (hcp-val): [33] [ 0/62] eta: 0:03:34 loss: 0.8529 (0.8529) time: 3.4532 data: 3.3645 max mem: 9377 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8534 (0.8557) time: 0.1218 data: 0.0964 max mem: 9377 +Eval (hcp-val): [33] Total time: 0:00:13 (0.2203 s / it) +Averaged stats (hcp-val): loss: 0.8534 (0.8557) +Eval (nsd-val): [33] [ 0/62] eta: 0:03:10 loss: 0.8161 (0.8161) time: 3.0759 data: 2.9814 max mem: 9377 +Eval (nsd-val): [33] [61/62] eta: 0:00:00 loss: 0.8272 (0.8301) time: 0.0952 data: 0.0684 max mem: 9377 +Eval (nsd-val): [33] Total time: 0:00:13 (0.2113 s / it) +Averaged stats (nsd-val): loss: 0.8272 (0.8301) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [34] [ 0/6250] eta: 9:44:10 lr: 0.000098 grad: 0.1288 (0.1288) loss: 0.8344 (0.8344) time: 5.6081 data: 5.4667 max mem: 9377 +Train: [34] [ 100/6250] eta: 0:20:22 lr: 0.000098 grad: 0.2409 (0.2518) loss: 0.7329 (0.7676) time: 0.1566 data: 0.0523 max mem: 9377 +Train: [34] [ 200/6250] eta: 0:17:28 lr: 0.000098 grad: 0.1875 (0.2347) loss: 0.7699 (0.7642) time: 0.1425 data: 0.0533 max mem: 9377 +Train: [34] [ 300/6250] eta: 0:16:29 lr: 0.000098 grad: 0.2141 (0.2235) loss: 0.7492 (0.7587) time: 0.1548 data: 0.0719 max mem: 9377 +Train: [34] [ 400/6250] eta: 0:15:47 lr: 0.000098 grad: 0.1568 (0.2129) loss: 0.7453 (0.7562) time: 0.1589 data: 0.0747 max mem: 9377 +Train: [34] [ 500/6250] eta: 0:15:03 lr: 0.000098 grad: 0.1487 (0.2020) loss: 0.7592 (0.7547) time: 0.1304 data: 0.0382 max mem: 9377 +Train: [34] [ 600/6250] eta: 0:14:26 lr: 0.000098 grad: 0.1641 (0.1943) loss: 0.7340 (0.7539) time: 0.1317 data: 0.0354 max mem: 9377 +Train: [34] [ 700/6250] eta: 0:13:59 lr: 0.000098 grad: 0.1417 (0.1885) loss: 0.7548 (0.7524) time: 0.1409 data: 0.0541 max mem: 9377 +Train: [34] [ 800/6250] eta: 0:13:45 lr: 0.000098 grad: 0.1299 (0.1826) loss: 0.7566 (0.7519) time: 0.1891 data: 0.1098 max mem: 9377 +Train: [34] [ 900/6250] eta: 0:13:27 lr: 0.000098 grad: 0.1372 (0.1775) loss: 0.7514 (0.7522) time: 0.1201 data: 0.0312 max mem: 9377 +Train: [34] [1000/6250] eta: 0:13:13 lr: 0.000098 grad: 0.1384 (0.1735) loss: 0.7674 (0.7526) time: 0.1515 data: 0.0746 max mem: 9377 +Train: [34] [1100/6250] eta: 0:12:55 lr: 0.000098 grad: 0.1372 (0.1705) loss: 0.7546 (0.7525) time: 0.1345 data: 0.0611 max mem: 9377 +Train: [34] [1200/6250] eta: 0:12:36 lr: 0.000098 grad: 0.1278 (0.1675) loss: 0.7534 (0.7525) time: 0.1338 data: 0.0507 max mem: 9377 +Train: [34] [1300/6250] eta: 0:12:19 lr: 0.000098 grad: 0.1336 (0.1649) loss: 0.7510 (0.7528) time: 0.1561 data: 0.0774 max mem: 9377 +Train: [34] [1400/6250] eta: 0:11:57 lr: 0.000098 grad: 0.1252 (0.1626) loss: 0.7604 (0.7532) time: 0.1223 data: 0.0400 max mem: 9377 +Train: [34] [1500/6250] eta: 0:11:39 lr: 0.000098 grad: 0.1310 (0.1606) loss: 0.7609 (0.7537) time: 0.1472 data: 0.0580 max mem: 9377 +Train: [34] [1600/6250] eta: 0:11:21 lr: 0.000098 grad: 0.1311 (0.1590) loss: 0.7653 (0.7540) time: 0.1411 data: 0.0609 max mem: 9377 +Train: [34] [1700/6250] eta: 0:11:02 lr: 0.000098 grad: 0.1375 (0.1576) loss: 0.7502 (0.7540) time: 0.1396 data: 0.0606 max mem: 9377 +Train: [34] [1800/6250] eta: 0:10:44 lr: 0.000098 grad: 0.1426 (0.1566) loss: 0.7508 (0.7539) time: 0.1298 data: 0.0379 max mem: 9377 +Train: [34] [1900/6250] eta: 0:10:25 lr: 0.000098 grad: 0.1369 (0.1557) loss: 0.7578 (0.7536) time: 0.1124 data: 0.0245 max mem: 9377 +Train: [34] [2000/6250] eta: 0:10:08 lr: 0.000098 grad: 0.1317 (0.1546) loss: 0.7593 (0.7535) time: 0.1306 data: 0.0409 max mem: 9377 +Train: [34] [2100/6250] eta: 0:09:52 lr: 0.000098 grad: 0.1366 (0.1537) loss: 0.7443 (0.7533) time: 0.1297 data: 0.0448 max mem: 9377 +Train: [34] [2200/6250] eta: 0:09:37 lr: 0.000098 grad: 0.1343 (0.1528) loss: 0.7472 (0.7531) time: 0.1275 data: 0.0462 max mem: 9377 +Train: [34] [2300/6250] eta: 0:09:23 lr: 0.000098 grad: 0.1319 (0.1521) loss: 0.7493 (0.7530) time: 0.1504 data: 0.0667 max mem: 9377 +Train: [34] [2400/6250] eta: 0:09:07 lr: 0.000098 grad: 0.1298 (0.1512) loss: 0.7600 (0.7529) time: 0.1334 data: 0.0486 max mem: 9377 +Train: [34] [2500/6250] eta: 0:08:53 lr: 0.000098 grad: 0.1265 (0.1505) loss: 0.7539 (0.7529) time: 0.1375 data: 0.0523 max mem: 9377 +Train: [34] [2600/6250] eta: 0:08:40 lr: 0.000098 grad: 0.1331 (0.1497) loss: 0.7474 (0.7529) time: 0.1540 data: 0.0710 max mem: 9377 +Train: [34] [2700/6250] eta: 0:08:27 lr: 0.000098 grad: 0.1339 (0.1491) loss: 0.7486 (0.7529) time: 0.1499 data: 0.0704 max mem: 9377 +Train: [34] [2800/6250] eta: 0:08:14 lr: 0.000098 grad: 0.1379 (0.1486) loss: 0.7327 (0.7526) time: 0.1583 data: 0.0810 max mem: 9377 +Train: [34] [2900/6250] eta: 0:07:59 lr: 0.000098 grad: 0.1361 (0.1482) loss: 0.7461 (0.7526) time: 0.1433 data: 0.0701 max mem: 9377 +Train: [34] [3000/6250] eta: 0:07:45 lr: 0.000098 grad: 0.1320 (0.1478) loss: 0.7460 (0.7525) time: 0.1298 data: 0.0479 max mem: 9377 +Train: [34] [3100/6250] eta: 0:07:31 lr: 0.000098 grad: 0.1338 (0.1473) loss: 0.7510 (0.7525) time: 0.1519 data: 0.0734 max mem: 9377 +Train: [34] [3200/6250] eta: 0:07:16 lr: 0.000098 grad: 0.1349 (0.1469) loss: 0.7518 (0.7526) time: 0.1410 data: 0.0627 max mem: 9377 +Train: [34] [3300/6250] eta: 0:07:01 lr: 0.000097 grad: 0.1279 (0.1464) loss: 0.7428 (0.7526) time: 0.1368 data: 0.0555 max mem: 9377 +Train: [34] [3400/6250] eta: 0:06:46 lr: 0.000097 grad: 0.1286 (0.1460) loss: 0.7541 (0.7526) time: 0.1402 data: 0.0563 max mem: 9377 +Train: [34] [3500/6250] eta: 0:06:32 lr: 0.000097 grad: 0.1346 (0.1459) loss: 0.7434 (0.7525) time: 0.1312 data: 0.0507 max mem: 9377 +Train: [34] [3600/6250] eta: 0:06:17 lr: 0.000097 grad: 0.1360 (0.1457) loss: 0.7519 (0.7524) time: 0.1582 data: 0.0823 max mem: 9377 +Train: [34] [3700/6250] eta: 0:06:03 lr: 0.000097 grad: 0.1408 (0.1455) loss: 0.7442 (0.7523) time: 0.1428 data: 0.0591 max mem: 9377 +Train: [34] [3800/6250] eta: 0:05:48 lr: 0.000097 grad: 0.1453 (0.1455) loss: 0.7416 (0.7522) time: 0.1408 data: 0.0577 max mem: 9377 +Train: [34] [3900/6250] eta: 0:05:34 lr: 0.000097 grad: 0.1402 (0.1453) loss: 0.7508 (0.7521) time: 0.1261 data: 0.0488 max mem: 9377 +Train: [34] [4000/6250] eta: 0:05:20 lr: 0.000097 grad: 0.1467 (0.1453) loss: 0.7378 (0.7519) time: 0.1499 data: 0.0694 max mem: 9377 +Train: [34] [4100/6250] eta: 0:05:06 lr: 0.000097 grad: 0.1343 (0.1453) loss: 0.7482 (0.7518) time: 0.1397 data: 0.0553 max mem: 9377 +Train: [34] [4200/6250] eta: 0:04:51 lr: 0.000097 grad: 0.1325 (0.1450) loss: 0.7605 (0.7518) time: 0.1399 data: 0.0576 max mem: 9377 +Train: [34] [4300/6250] eta: 0:04:37 lr: 0.000097 grad: 0.1290 (0.1449) loss: 0.7566 (0.7517) time: 0.1422 data: 0.0631 max mem: 9377 +Train: [34] [4400/6250] eta: 0:04:23 lr: 0.000097 grad: 0.1419 (0.1447) loss: 0.7526 (0.7517) time: 0.1532 data: 0.0684 max mem: 9377 +Train: [34] [4500/6250] eta: 0:04:08 lr: 0.000097 grad: 0.1360 (0.1447) loss: 0.7505 (0.7517) time: 0.1349 data: 0.0538 max mem: 9377 +Train: [34] [4600/6250] eta: 0:03:54 lr: 0.000097 grad: 0.1352 (0.1446) loss: 0.7573 (0.7517) time: 0.1463 data: 0.0590 max mem: 9377 +Train: [34] [4700/6250] eta: 0:03:40 lr: 0.000097 grad: 0.1358 (0.1445) loss: 0.7509 (0.7517) time: 0.1646 data: 0.0869 max mem: 9377 +Train: [34] [4800/6250] eta: 0:03:26 lr: 0.000097 grad: 0.1420 (0.1444) loss: 0.7453 (0.7517) time: 0.1406 data: 0.0522 max mem: 9377 +Train: [34] [4900/6250] eta: 0:03:12 lr: 0.000097 grad: 0.1308 (0.1442) loss: 0.7649 (0.7517) time: 0.1480 data: 0.0675 max mem: 9377 +Train: [34] [5000/6250] eta: 0:02:57 lr: 0.000097 grad: 0.1404 (0.1442) loss: 0.7554 (0.7518) time: 0.1235 data: 0.0403 max mem: 9377 +Train: [34] [5100/6250] eta: 0:02:43 lr: 0.000097 grad: 0.1376 (0.1441) loss: 0.7493 (0.7518) time: 0.1343 data: 0.0454 max mem: 9377 +Train: [34] [5200/6250] eta: 0:02:29 lr: 0.000097 grad: 0.1396 (0.1441) loss: 0.7579 (0.7519) time: 0.1343 data: 0.0543 max mem: 9377 +Train: [34] [5300/6250] eta: 0:02:15 lr: 0.000097 grad: 0.1359 (0.1442) loss: 0.7592 (0.7519) time: 0.1715 data: 0.0879 max mem: 9377 +Train: [34] [5400/6250] eta: 0:02:00 lr: 0.000097 grad: 0.1371 (0.1441) loss: 0.7511 (0.7519) time: 0.1411 data: 0.0586 max mem: 9377 +Train: [34] [5500/6250] eta: 0:01:46 lr: 0.000097 grad: 0.1401 (0.1441) loss: 0.7363 (0.7519) time: 0.1391 data: 0.0565 max mem: 9377 +Train: [34] [5600/6250] eta: 0:01:32 lr: 0.000097 grad: 0.1439 (0.1442) loss: 0.7459 (0.7518) time: 0.1106 data: 0.0232 max mem: 9377 +Train: [34] [5700/6250] eta: 0:01:18 lr: 0.000097 grad: 0.1294 (0.1441) loss: 0.7596 (0.7519) time: 0.1708 data: 0.0937 max mem: 9377 +Train: [34] [5800/6250] eta: 0:01:03 lr: 0.000097 grad: 0.1388 (0.1440) loss: 0.7578 (0.7519) time: 0.1403 data: 0.0562 max mem: 9377 +Train: [34] [5900/6250] eta: 0:00:49 lr: 0.000097 grad: 0.1440 (0.1440) loss: 0.7474 (0.7518) time: 0.1589 data: 0.0784 max mem: 9377 +Train: [34] [6000/6250] eta: 0:00:35 lr: 0.000097 grad: 0.1415 (0.1441) loss: 0.7526 (0.7518) time: 0.1367 data: 0.0571 max mem: 9377 +Train: [34] [6100/6250] eta: 0:00:21 lr: 0.000097 grad: 0.1426 (0.1441) loss: 0.7477 (0.7518) time: 0.1470 data: 0.0550 max mem: 9377 +Train: [34] [6200/6250] eta: 0:00:07 lr: 0.000097 grad: 0.1362 (0.1440) loss: 0.7558 (0.7519) time: 0.1328 data: 0.0515 max mem: 9377 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.1328 (0.1439) loss: 0.7654 (0.7519) time: 0.1400 data: 0.0602 max mem: 9377 +Train: [34] Total time: 0:14:49 (0.1423 s / it) +Averaged stats: lr: 0.000097 grad: 0.1328 (0.1439) loss: 0.7654 (0.7519) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:03:26 loss: 0.8492 (0.8492) time: 3.3332 data: 3.2405 max mem: 9377 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8542 (0.8564) time: 0.1143 data: 0.0875 max mem: 9377 +Eval (hcp-train-subset): [34] Total time: 0:00:13 (0.2119 s / it) +Averaged stats (hcp-train-subset): loss: 0.8542 (0.8564) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [34] [ 0/62] eta: 0:04:28 loss: 0.8497 (0.8497) time: 4.3356 data: 4.2731 max mem: 9377 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8539 (0.8546) time: 0.0965 data: 0.0697 max mem: 9377 +Eval (hcp-val): [34] Total time: 0:00:13 (0.2228 s / it) +Averaged stats (hcp-val): loss: 0.8539 (0.8546) +Making plots (hcp-val): example=45 +Eval (nsd-val): [34] [ 0/62] eta: 0:03:40 loss: 0.8180 (0.8180) time: 3.5554 data: 3.4911 max mem: 9377 +Eval (nsd-val): [34] [61/62] eta: 0:00:00 loss: 0.8287 (0.8301) time: 0.1081 data: 0.0832 max mem: 9377 +Eval (nsd-val): [34] Total time: 0:00:13 (0.2142 s / it) +Averaged stats (nsd-val): loss: 0.8287 (0.8301) +Making plots (nsd-val): example=30 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00034.pth +Train: [35] [ 0/6250] eta: 10:34:51 lr: 0.000097 grad: 0.1408 (0.1408) loss: 0.8288 (0.8288) time: 6.0947 data: 5.9771 max mem: 9377 +Train: [35] [ 100/6250] eta: 0:20:48 lr: 0.000097 grad: 0.2229 (0.2621) loss: 0.7361 (0.7614) time: 0.1565 data: 0.0649 max mem: 9377 +Train: [35] [ 200/6250] eta: 0:17:46 lr: 0.000097 grad: 0.1769 (0.2426) loss: 0.7409 (0.7559) time: 0.1626 data: 0.0712 max mem: 9377 +Train: [35] [ 300/6250] eta: 0:16:36 lr: 0.000097 grad: 0.1524 (0.2147) loss: 0.7567 (0.7561) time: 0.1564 data: 0.0658 max mem: 9377 +Train: [35] [ 400/6250] eta: 0:15:49 lr: 0.000097 grad: 0.1501 (0.2013) loss: 0.7600 (0.7572) time: 0.1564 data: 0.0767 max mem: 9377 +Train: [35] [ 500/6250] eta: 0:15:19 lr: 0.000097 grad: 0.1421 (0.1912) loss: 0.7503 (0.7571) time: 0.1762 data: 0.0830 max mem: 9377 +Train: [35] [ 600/6250] eta: 0:14:45 lr: 0.000097 grad: 0.1504 (0.1850) loss: 0.7637 (0.7572) time: 0.1494 data: 0.0693 max mem: 9377 +Train: [35] [ 700/6250] eta: 0:14:20 lr: 0.000096 grad: 0.1346 (0.1790) loss: 0.7628 (0.7578) time: 0.1268 data: 0.0395 max mem: 9377 +Train: [35] [ 800/6250] eta: 0:13:57 lr: 0.000096 grad: 0.1369 (0.1743) loss: 0.7622 (0.7580) time: 0.1560 data: 0.0676 max mem: 9377 +Train: [35] [ 900/6250] eta: 0:13:33 lr: 0.000096 grad: 0.1365 (0.1703) loss: 0.7532 (0.7576) time: 0.1387 data: 0.0447 max mem: 9377 +Train: [35] [1000/6250] eta: 0:13:18 lr: 0.000096 grad: 0.1441 (0.1676) loss: 0.7468 (0.7571) time: 0.1602 data: 0.0798 max mem: 9377 +Train: [35] [1100/6250] eta: 0:12:59 lr: 0.000096 grad: 0.1357 (0.1653) loss: 0.7584 (0.7567) time: 0.1379 data: 0.0565 max mem: 9377 +Train: [35] [1200/6250] eta: 0:12:47 lr: 0.000096 grad: 0.1328 (0.1630) loss: 0.7597 (0.7562) time: 0.1558 data: 0.0751 max mem: 9377 +Train: [35] [1300/6250] eta: 0:12:30 lr: 0.000096 grad: 0.1414 (0.1609) loss: 0.7363 (0.7555) time: 0.1644 data: 0.0811 max mem: 9377 +Train: [35] [1400/6250] eta: 0:12:08 lr: 0.000096 grad: 0.1229 (0.1591) loss: 0.7540 (0.7553) time: 0.1121 data: 0.0378 max mem: 9377 +Train: [35] [1500/6250] eta: 0:11:48 lr: 0.000096 grad: 0.1258 (0.1574) loss: 0.7526 (0.7551) time: 0.1283 data: 0.0464 max mem: 9377 +Train: [35] [1600/6250] eta: 0:11:31 lr: 0.000096 grad: 0.1316 (0.1560) loss: 0.7489 (0.7548) time: 0.1315 data: 0.0464 max mem: 9377 +Train: [35] [1700/6250] eta: 0:11:11 lr: 0.000096 grad: 0.1319 (0.1551) loss: 0.7534 (0.7545) time: 0.1197 data: 0.0419 max mem: 9377 +Train: [35] [1800/6250] eta: 0:10:54 lr: 0.000096 grad: 0.1421 (0.1542) loss: 0.7488 (0.7542) time: 0.1247 data: 0.0412 max mem: 9377 +Train: [35] [1900/6250] eta: 0:10:35 lr: 0.000096 grad: 0.1334 (0.1535) loss: 0.7477 (0.7539) time: 0.1220 data: 0.0395 max mem: 9377 +Train: [35] [2000/6250] eta: 0:10:16 lr: 0.000096 grad: 0.1362 (0.1528) loss: 0.7584 (0.7536) time: 0.1178 data: 0.0325 max mem: 9377 +Train: [35] [2100/6250] eta: 0:09:58 lr: 0.000096 grad: 0.1428 (0.1522) loss: 0.7456 (0.7533) time: 0.1379 data: 0.0428 max mem: 9377 +Train: [35] [2200/6250] eta: 0:09:40 lr: 0.000096 grad: 0.1400 (0.1518) loss: 0.7553 (0.7531) time: 0.1169 data: 0.0210 max mem: 9377 +Train: [35] [2300/6250] eta: 0:09:23 lr: 0.000096 grad: 0.1402 (0.1513) loss: 0.7534 (0.7529) time: 0.1420 data: 0.0624 max mem: 9377 +Train: [35] [2400/6250] eta: 0:09:10 lr: 0.000096 grad: 0.1319 (0.1507) loss: 0.7495 (0.7529) time: 0.1619 data: 0.0849 max mem: 9377 +Train: [35] [2500/6250] eta: 0:08:55 lr: 0.000096 grad: 0.1299 (0.1502) loss: 0.7600 (0.7526) time: 0.1467 data: 0.0635 max mem: 9377 +Train: [35] [2600/6250] eta: 0:08:41 lr: 0.000096 grad: 0.1316 (0.1497) loss: 0.7603 (0.7526) time: 0.1411 data: 0.0624 max mem: 9377 +Train: [35] [2700/6250] eta: 0:08:25 lr: 0.000096 grad: 0.1380 (0.1492) loss: 0.7350 (0.7525) time: 0.1278 data: 0.0428 max mem: 9377 +Train: [35] [2800/6250] eta: 0:08:10 lr: 0.000096 grad: 0.1309 (0.1486) loss: 0.7535 (0.7526) time: 0.1436 data: 0.0669 max mem: 9377 +Train: [35] [2900/6250] eta: 0:07:55 lr: 0.000096 grad: 0.1361 (0.1481) loss: 0.7532 (0.7527) time: 0.1369 data: 0.0595 max mem: 9377 +Train: [35] [3000/6250] eta: 0:07:40 lr: 0.000096 grad: 0.1301 (0.1476) loss: 0.7539 (0.7528) time: 0.1180 data: 0.0302 max mem: 9377 +Train: [35] [3100/6250] eta: 0:07:25 lr: 0.000096 grad: 0.1453 (0.1475) loss: 0.7454 (0.7527) time: 0.1204 data: 0.0393 max mem: 9377 +Train: [35] [3200/6250] eta: 0:07:10 lr: 0.000096 grad: 0.1342 (0.1472) loss: 0.7499 (0.7526) time: 0.1403 data: 0.0602 max mem: 9377 +Train: [35] [3300/6250] eta: 0:06:55 lr: 0.000096 grad: 0.1458 (0.1471) loss: 0.7441 (0.7524) time: 0.1364 data: 0.0520 max mem: 9377 +Train: [35] [3400/6250] eta: 0:06:40 lr: 0.000096 grad: 0.1274 (0.1469) loss: 0.7679 (0.7526) time: 0.1124 data: 0.0304 max mem: 9377 +Train: [35] [3500/6250] eta: 0:06:26 lr: 0.000096 grad: 0.1390 (0.1468) loss: 0.7503 (0.7525) time: 0.1461 data: 0.0625 max mem: 9377 +Train: [35] [3600/6250] eta: 0:06:11 lr: 0.000096 grad: 0.1265 (0.1464) loss: 0.7677 (0.7527) time: 0.1405 data: 0.0598 max mem: 9377 +Train: [35] [3700/6250] eta: 0:05:57 lr: 0.000096 grad: 0.1349 (0.1462) loss: 0.7459 (0.7528) time: 0.1554 data: 0.0800 max mem: 9377 +Train: [35] [3800/6250] eta: 0:05:43 lr: 0.000096 grad: 0.1364 (0.1459) loss: 0.7523 (0.7528) time: 0.1272 data: 0.0445 max mem: 9377 +Train: [35] [3900/6250] eta: 0:05:29 lr: 0.000096 grad: 0.1458 (0.1458) loss: 0.7524 (0.7530) time: 0.1483 data: 0.0650 max mem: 9377 +Train: [35] [4000/6250] eta: 0:05:14 lr: 0.000096 grad: 0.1495 (0.1458) loss: 0.7558 (0.7530) time: 0.1359 data: 0.0537 max mem: 9377 +Train: [35] [4100/6250] eta: 0:05:00 lr: 0.000096 grad: 0.1334 (0.1458) loss: 0.7497 (0.7530) time: 0.1509 data: 0.0670 max mem: 9377 +Train: [35] [4200/6250] eta: 0:04:47 lr: 0.000096 grad: 0.1402 (0.1456) loss: 0.7529 (0.7531) time: 0.1370 data: 0.0586 max mem: 9377 +Train: [35] [4300/6250] eta: 0:04:33 lr: 0.000095 grad: 0.1382 (0.1455) loss: 0.7558 (0.7531) time: 0.1407 data: 0.0556 max mem: 9377 +Train: [35] [4400/6250] eta: 0:04:18 lr: 0.000095 grad: 0.1386 (0.1453) loss: 0.7517 (0.7531) time: 0.1506 data: 0.0688 max mem: 9377 +Train: [35] [4500/6250] eta: 0:04:04 lr: 0.000095 grad: 0.1360 (0.1452) loss: 0.7547 (0.7531) time: 0.1424 data: 0.0625 max mem: 9377 +Train: [35] [4600/6250] eta: 0:03:50 lr: 0.000095 grad: 0.1329 (0.1451) loss: 0.7619 (0.7531) time: 0.1318 data: 0.0501 max mem: 9377 +Train: [35] [4700/6250] eta: 0:03:36 lr: 0.000095 grad: 0.1391 (0.1449) loss: 0.7527 (0.7532) time: 0.1314 data: 0.0508 max mem: 9377 +Train: [35] [4800/6250] eta: 0:03:22 lr: 0.000095 grad: 0.1359 (0.1448) loss: 0.7487 (0.7532) time: 0.1454 data: 0.0643 max mem: 9377 +Train: [35] [4900/6250] eta: 0:03:08 lr: 0.000095 grad: 0.1322 (0.1446) loss: 0.7544 (0.7532) time: 0.1267 data: 0.0392 max mem: 9377 +Train: [35] [5000/6250] eta: 0:02:54 lr: 0.000095 grad: 0.1387 (0.1445) loss: 0.7493 (0.7532) time: 0.1349 data: 0.0528 max mem: 9377 +Train: [35] [5100/6250] eta: 0:02:40 lr: 0.000095 grad: 0.1393 (0.1443) loss: 0.7480 (0.7533) time: 0.1399 data: 0.0559 max mem: 9377 +Train: [35] [5200/6250] eta: 0:02:26 lr: 0.000095 grad: 0.1428 (0.1442) loss: 0.7474 (0.7532) time: 0.1250 data: 0.0419 max mem: 9377 +Train: [35] [5300/6250] eta: 0:02:12 lr: 0.000095 grad: 0.1378 (0.1442) loss: 0.7566 (0.7531) time: 0.1391 data: 0.0553 max mem: 9377 +Train: [35] [5400/6250] eta: 0:01:58 lr: 0.000095 grad: 0.1325 (0.1441) loss: 0.7591 (0.7530) time: 0.1472 data: 0.0604 max mem: 9377 +Train: [35] [5500/6250] eta: 0:01:44 lr: 0.000095 grad: 0.1370 (0.1440) loss: 0.7486 (0.7530) time: 0.1712 data: 0.0892 max mem: 9377 +Train: [35] [5600/6250] eta: 0:01:30 lr: 0.000095 grad: 0.1338 (0.1439) loss: 0.7570 (0.7530) time: 0.1112 data: 0.0306 max mem: 9377 +Train: [35] [5700/6250] eta: 0:01:16 lr: 0.000095 grad: 0.1393 (0.1438) loss: 0.7612 (0.7530) time: 0.1429 data: 0.0613 max mem: 9377 +Train: [35] [5800/6250] eta: 0:01:02 lr: 0.000095 grad: 0.1362 (0.1438) loss: 0.7499 (0.7530) time: 0.1423 data: 0.0647 max mem: 9377 +Train: [35] [5900/6250] eta: 0:00:49 lr: 0.000095 grad: 0.1340 (0.1438) loss: 0.7581 (0.7531) time: 0.1737 data: 0.0957 max mem: 9377 +Train: [35] [6000/6250] eta: 0:00:35 lr: 0.000095 grad: 0.1432 (0.1437) loss: 0.7579 (0.7532) time: 0.1269 data: 0.0448 max mem: 9377 +Train: [35] [6100/6250] eta: 0:00:21 lr: 0.000095 grad: 0.1476 (0.1437) loss: 0.7491 (0.7532) time: 0.1455 data: 0.0628 max mem: 9377 +Train: [35] [6200/6250] eta: 0:00:06 lr: 0.000095 grad: 0.1451 (0.1437) loss: 0.7372 (0.7532) time: 0.1356 data: 0.0535 max mem: 9377 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.1408 (0.1437) loss: 0.7422 (0.7532) time: 0.1342 data: 0.0457 max mem: 9377 +Train: [35] Total time: 0:14:40 (0.1408 s / it) +Averaged stats: lr: 0.000095 grad: 0.1408 (0.1437) loss: 0.7422 (0.7532) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:03:56 loss: 0.8606 (0.8606) time: 3.8145 data: 3.7300 max mem: 9377 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8580 (0.8588) time: 0.1441 data: 0.1184 max mem: 9377 +Eval (hcp-train-subset): [35] Total time: 0:00:13 (0.2161 s / it) +Averaged stats (hcp-train-subset): loss: 0.8580 (0.8588) +Eval (hcp-val): [35] [ 0/62] eta: 0:03:54 loss: 0.8635 (0.8635) time: 3.7895 data: 3.7090 max mem: 9377 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8566 (0.8563) time: 0.1384 data: 0.1119 max mem: 9377 +Eval (hcp-val): [35] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (hcp-val): loss: 0.8566 (0.8563) +Eval (nsd-val): [35] [ 0/62] eta: 0:03:13 loss: 0.8201 (0.8201) time: 3.1265 data: 3.0506 max mem: 9377 +Eval (nsd-val): [35] [61/62] eta: 0:00:00 loss: 0.8310 (0.8314) time: 0.1188 data: 0.0922 max mem: 9377 +Eval (nsd-val): [35] Total time: 0:00:13 (0.2119 s / it) +Averaged stats (nsd-val): loss: 0.8310 (0.8314) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [36] [ 0/6250] eta: 7:03:59 lr: 0.000095 grad: 0.2470 (0.2470) loss: 0.7715 (0.7715) time: 4.0702 data: 3.7787 max mem: 9377 +Train: [36] [ 100/6250] eta: 0:20:51 lr: 0.000095 grad: 0.2082 (0.2701) loss: 0.7427 (0.7581) time: 0.1424 data: 0.0501 max mem: 9377 +Train: [36] [ 200/6250] eta: 0:18:11 lr: 0.000095 grad: 0.1912 (0.2318) loss: 0.7296 (0.7503) time: 0.1469 data: 0.0549 max mem: 9377 +Train: [36] [ 300/6250] eta: 0:16:37 lr: 0.000095 grad: 0.1528 (0.2109) loss: 0.7437 (0.7491) time: 0.1546 data: 0.0656 max mem: 9377 +Train: [36] [ 400/6250] eta: 0:15:49 lr: 0.000095 grad: 0.1515 (0.1989) loss: 0.7400 (0.7486) time: 0.1505 data: 0.0552 max mem: 9377 +Train: [36] [ 500/6250] eta: 0:15:11 lr: 0.000095 grad: 0.1366 (0.1887) loss: 0.7625 (0.7480) time: 0.1403 data: 0.0492 max mem: 9377 +Train: [36] [ 600/6250] eta: 0:14:36 lr: 0.000095 grad: 0.1442 (0.1815) loss: 0.7349 (0.7474) time: 0.1262 data: 0.0372 max mem: 9377 +Train: [36] [ 700/6250] eta: 0:14:03 lr: 0.000095 grad: 0.1349 (0.1764) loss: 0.7400 (0.7468) time: 0.1244 data: 0.0363 max mem: 9377 +Train: [36] [ 800/6250] eta: 0:13:37 lr: 0.000095 grad: 0.1350 (0.1718) loss: 0.7605 (0.7467) time: 0.1277 data: 0.0426 max mem: 9377 +Train: [36] [ 900/6250] eta: 0:13:19 lr: 0.000095 grad: 0.1361 (0.1677) loss: 0.7630 (0.7475) time: 0.1532 data: 0.0696 max mem: 9377 +Train: [36] [1000/6250] eta: 0:12:57 lr: 0.000095 grad: 0.1366 (0.1651) loss: 0.7548 (0.7479) time: 0.1275 data: 0.0440 max mem: 9377 +Train: [36] [1100/6250] eta: 0:12:35 lr: 0.000095 grad: 0.1495 (0.1631) loss: 0.7461 (0.7483) time: 0.1451 data: 0.0672 max mem: 9377 +Train: [36] [1200/6250] eta: 0:12:18 lr: 0.000095 grad: 0.1332 (0.1608) loss: 0.7555 (0.7490) time: 0.1411 data: 0.0614 max mem: 9377 +Train: [36] [1300/6250] eta: 0:12:00 lr: 0.000095 grad: 0.1296 (0.1588) loss: 0.7502 (0.7492) time: 0.1498 data: 0.0614 max mem: 9377 +Train: [36] [1400/6250] eta: 0:11:49 lr: 0.000095 grad: 0.1262 (0.1569) loss: 0.7442 (0.7495) time: 0.1458 data: 0.0540 max mem: 9377 +Train: [36] [1500/6250] eta: 0:11:35 lr: 0.000095 grad: 0.1334 (0.1552) loss: 0.7534 (0.7498) time: 0.1410 data: 0.0544 max mem: 9377 +Train: [36] [1600/6250] eta: 0:11:21 lr: 0.000094 grad: 0.1265 (0.1537) loss: 0.7556 (0.7501) time: 0.1318 data: 0.0506 max mem: 9377 +Train: [36] [1700/6250] eta: 0:11:04 lr: 0.000094 grad: 0.1384 (0.1527) loss: 0.7438 (0.7503) time: 0.1370 data: 0.0609 max mem: 9377 +Train: [36] [1800/6250] eta: 0:10:50 lr: 0.000094 grad: 0.1356 (0.1518) loss: 0.7457 (0.7505) time: 0.1388 data: 0.0569 max mem: 9377 +Train: [36] [1900/6250] eta: 0:10:35 lr: 0.000094 grad: 0.1354 (0.1510) loss: 0.7489 (0.7503) time: 0.1523 data: 0.0647 max mem: 9377 +Train: [36] [2000/6250] eta: 0:10:21 lr: 0.000094 grad: 0.1319 (0.1503) loss: 0.7469 (0.7501) time: 0.1554 data: 0.0695 max mem: 9377 +Train: [36] [2100/6250] eta: 0:10:06 lr: 0.000094 grad: 0.1379 (0.1497) loss: 0.7521 (0.7498) time: 0.1319 data: 0.0505 max mem: 9377 +Train: [36] [2200/6250] eta: 0:09:50 lr: 0.000094 grad: 0.1432 (0.1492) loss: 0.7430 (0.7496) time: 0.1184 data: 0.0278 max mem: 9377 +Train: [36] [2300/6250] eta: 0:09:34 lr: 0.000094 grad: 0.1351 (0.1489) loss: 0.7512 (0.7495) time: 0.1328 data: 0.0449 max mem: 9377 +Train: [36] [2400/6250] eta: 0:09:15 lr: 0.000094 grad: 0.1340 (0.1484) loss: 0.7570 (0.7494) time: 0.1232 data: 0.0388 max mem: 9377 +Train: [36] [2500/6250] eta: 0:08:58 lr: 0.000094 grad: 0.1363 (0.1480) loss: 0.7292 (0.7492) time: 0.1202 data: 0.0303 max mem: 9377 +Train: [36] [2600/6250] eta: 0:08:42 lr: 0.000094 grad: 0.1370 (0.1477) loss: 0.7467 (0.7490) time: 0.1463 data: 0.0648 max mem: 9377 +Train: [36] [2700/6250] eta: 0:08:27 lr: 0.000094 grad: 0.1356 (0.1474) loss: 0.7341 (0.7490) time: 0.1469 data: 0.0710 max mem: 9377 +Train: [36] [2800/6250] eta: 0:08:14 lr: 0.000094 grad: 0.1262 (0.1470) loss: 0.7500 (0.7490) time: 0.1639 data: 0.0890 max mem: 9377 +Train: [36] [2900/6250] eta: 0:08:00 lr: 0.000094 grad: 0.1330 (0.1466) loss: 0.7521 (0.7490) time: 0.1323 data: 0.0541 max mem: 9377 +Train: [36] [3000/6250] eta: 0:07:46 lr: 0.000094 grad: 0.1359 (0.1464) loss: 0.7520 (0.7491) time: 0.1894 data: 0.1159 max mem: 9377 +Train: [36] [3100/6250] eta: 0:07:33 lr: 0.000094 grad: 0.1344 (0.1462) loss: 0.7510 (0.7492) time: 0.1484 data: 0.0665 max mem: 9377 +Train: [36] [3200/6250] eta: 0:07:19 lr: 0.000094 grad: 0.1387 (0.1462) loss: 0.7498 (0.7491) time: 0.1489 data: 0.0713 max mem: 9377 +Train: [36] [3300/6250] eta: 0:07:06 lr: 0.000094 grad: 0.1405 (0.1460) loss: 0.7447 (0.7492) time: 0.1544 data: 0.0754 max mem: 9377 +Train: [36] [3400/6250] eta: 0:06:53 lr: 0.000094 grad: 0.1384 (0.1460) loss: 0.7516 (0.7492) time: 0.1559 data: 0.0767 max mem: 9377 +Train: [36] [3500/6250] eta: 0:06:39 lr: 0.000094 grad: 0.1380 (0.1459) loss: 0.7437 (0.7493) time: 0.1430 data: 0.0623 max mem: 9377 +Train: [36] [3600/6250] eta: 0:06:24 lr: 0.000094 grad: 0.1368 (0.1457) loss: 0.7552 (0.7494) time: 0.1340 data: 0.0523 max mem: 9377 +Train: [36] [3700/6250] eta: 0:06:09 lr: 0.000094 grad: 0.1497 (0.1457) loss: 0.7462 (0.7494) time: 0.1351 data: 0.0508 max mem: 9377 +Train: [36] [3800/6250] eta: 0:05:54 lr: 0.000094 grad: 0.1409 (0.1456) loss: 0.7457 (0.7494) time: 0.1494 data: 0.0706 max mem: 9377 +Train: [36] [3900/6250] eta: 0:05:39 lr: 0.000094 grad: 0.1350 (0.1455) loss: 0.7554 (0.7495) time: 0.1339 data: 0.0520 max mem: 9377 +Train: [36] [4000/6250] eta: 0:05:24 lr: 0.000094 grad: 0.1395 (0.1454) loss: 0.7451 (0.7494) time: 0.1261 data: 0.0481 max mem: 9377 +Train: [36] [4100/6250] eta: 0:05:09 lr: 0.000094 grad: 0.1393 (0.1453) loss: 0.7481 (0.7494) time: 0.1508 data: 0.0663 max mem: 9377 +Train: [36] [4200/6250] eta: 0:04:54 lr: 0.000094 grad: 0.1467 (0.1453) loss: 0.7436 (0.7493) time: 0.1421 data: 0.0557 max mem: 9377 +Train: [36] [4300/6250] eta: 0:04:40 lr: 0.000094 grad: 0.1319 (0.1451) loss: 0.7471 (0.7493) time: 0.1873 data: 0.0488 max mem: 9377 +Train: [36] [4400/6250] eta: 0:04:25 lr: 0.000094 grad: 0.1320 (0.1450) loss: 0.7523 (0.7493) time: 0.1417 data: 0.0580 max mem: 9377 +Train: [36] [4500/6250] eta: 0:04:11 lr: 0.000094 grad: 0.1311 (0.1448) loss: 0.7541 (0.7493) time: 0.1692 data: 0.0846 max mem: 9377 +Train: [36] [4600/6250] eta: 0:03:56 lr: 0.000094 grad: 0.1380 (0.1448) loss: 0.7565 (0.7492) time: 0.1338 data: 0.0526 max mem: 9377 +Train: [36] [4700/6250] eta: 0:03:42 lr: 0.000094 grad: 0.1373 (0.1447) loss: 0.7504 (0.7492) time: 0.1203 data: 0.0375 max mem: 9377 +Train: [36] [4800/6250] eta: 0:03:27 lr: 0.000094 grad: 0.1405 (0.1447) loss: 0.7410 (0.7492) time: 0.1335 data: 0.0514 max mem: 9377 +Train: [36] [4900/6250] eta: 0:03:12 lr: 0.000094 grad: 0.1398 (0.1447) loss: 0.7545 (0.7492) time: 0.1362 data: 0.0545 max mem: 9377 +Train: [36] [5000/6250] eta: 0:02:58 lr: 0.000094 grad: 0.1461 (0.1448) loss: 0.7516 (0.7493) time: 0.1390 data: 0.0514 max mem: 9377 +Train: [36] [5100/6250] eta: 0:02:44 lr: 0.000093 grad: 0.1343 (0.1447) loss: 0.7598 (0.7493) time: 0.1122 data: 0.0301 max mem: 9377 +Train: [36] [5200/6250] eta: 0:02:29 lr: 0.000093 grad: 0.1338 (0.1446) loss: 0.7570 (0.7494) time: 0.1476 data: 0.0629 max mem: 9377 +Train: [36] [5300/6250] eta: 0:02:15 lr: 0.000093 grad: 0.1349 (0.1445) loss: 0.7511 (0.7496) time: 0.1433 data: 0.0579 max mem: 9377 +Train: [36] [5400/6250] eta: 0:02:01 lr: 0.000093 grad: 0.1462 (0.1444) loss: 0.7452 (0.7495) time: 0.1481 data: 0.0622 max mem: 9377 +Train: [36] [5500/6250] eta: 0:01:46 lr: 0.000093 grad: 0.1319 (0.1444) loss: 0.7539 (0.7496) time: 0.1338 data: 0.0540 max mem: 9377 +Train: [36] [5600/6250] eta: 0:01:32 lr: 0.000093 grad: 0.1438 (0.1443) loss: 0.7452 (0.7497) time: 0.1424 data: 0.0631 max mem: 9377 +Train: [36] [5700/6250] eta: 0:01:18 lr: 0.000093 grad: 0.1373 (0.1442) loss: 0.7540 (0.7497) time: 0.1473 data: 0.0650 max mem: 9377 +Train: [36] [5800/6250] eta: 0:01:04 lr: 0.000093 grad: 0.1357 (0.1441) loss: 0.7577 (0.7499) time: 0.1486 data: 0.0680 max mem: 9377 +Train: [36] [5900/6250] eta: 0:00:49 lr: 0.000093 grad: 0.1376 (0.1440) loss: 0.7622 (0.7500) time: 0.1310 data: 0.0513 max mem: 9377 +Train: [36] [6000/6250] eta: 0:00:35 lr: 0.000093 grad: 0.1401 (0.1439) loss: 0.7473 (0.7500) time: 0.1379 data: 0.0605 max mem: 9377 +Train: [36] [6100/6250] eta: 0:00:21 lr: 0.000093 grad: 0.1402 (0.1439) loss: 0.7478 (0.7501) time: 0.1156 data: 0.0309 max mem: 9377 +Train: [36] [6200/6250] eta: 0:00:07 lr: 0.000093 grad: 0.1480 (0.1438) loss: 0.7486 (0.7501) time: 0.1344 data: 0.0551 max mem: 9377 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.1416 (0.1438) loss: 0.7408 (0.7501) time: 0.1463 data: 0.0662 max mem: 9377 +Train: [36] Total time: 0:14:54 (0.1431 s / it) +Averaged stats: lr: 0.000093 grad: 0.1416 (0.1438) loss: 0.7408 (0.7501) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:03:31 loss: 0.8544 (0.8544) time: 3.4113 data: 3.3287 max mem: 9377 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8575 (0.8600) time: 0.1388 data: 0.1135 max mem: 9377 +Eval (hcp-train-subset): [36] Total time: 0:00:13 (0.2234 s / it) +Averaged stats (hcp-train-subset): loss: 0.8575 (0.8600) +Eval (hcp-val): [36] [ 0/62] eta: 0:04:48 loss: 0.8538 (0.8538) time: 4.6539 data: 4.6239 max mem: 9377 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8565 (0.8590) time: 0.1311 data: 0.1035 max mem: 9377 +Eval (hcp-val): [36] Total time: 0:00:14 (0.2376 s / it) +Averaged stats (hcp-val): loss: 0.8565 (0.8590) +Eval (nsd-val): [36] [ 0/62] eta: 0:06:08 loss: 0.8223 (0.8223) time: 5.9440 data: 5.9136 max mem: 9377 +Eval (nsd-val): [36] [61/62] eta: 0:00:00 loss: 0.8320 (0.8332) time: 0.1586 data: 0.1331 max mem: 9377 +Eval (nsd-val): [36] Total time: 0:00:15 (0.2442 s / it) +Averaged stats (nsd-val): loss: 0.8320 (0.8332) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 8:59:05 lr: 0.000093 grad: 0.1105 (0.1105) loss: 0.8387 (0.8387) time: 5.1753 data: 4.9658 max mem: 9377 +Train: [37] [ 100/6250] eta: 0:22:54 lr: 0.000093 grad: 0.1920 (0.2189) loss: 0.7765 (0.7854) time: 0.1632 data: 0.0646 max mem: 9377 +Train: [37] [ 200/6250] eta: 0:19:59 lr: 0.000093 grad: 0.1857 (0.2226) loss: 0.7623 (0.7731) time: 0.2001 data: 0.1070 max mem: 9377 +Train: [37] [ 300/6250] eta: 0:18:27 lr: 0.000093 grad: 0.1588 (0.2061) loss: 0.7673 (0.7685) time: 0.1633 data: 0.0738 max mem: 9377 +Train: [37] [ 400/6250] eta: 0:17:09 lr: 0.000093 grad: 0.1601 (0.1964) loss: 0.7582 (0.7662) time: 0.1543 data: 0.0674 max mem: 9377 +Train: [37] [ 500/6250] eta: 0:16:23 lr: 0.000093 grad: 0.1424 (0.1887) loss: 0.7709 (0.7660) time: 0.1602 data: 0.0755 max mem: 9377 +Train: [37] [ 600/6250] eta: 0:15:38 lr: 0.000093 grad: 0.1430 (0.1821) loss: 0.7670 (0.7660) time: 0.1690 data: 0.0850 max mem: 9377 +Train: [37] [ 700/6250] eta: 0:15:09 lr: 0.000093 grad: 0.1366 (0.1773) loss: 0.7617 (0.7660) time: 0.1659 data: 0.0779 max mem: 9377 +Train: [37] [ 800/6250] eta: 0:14:41 lr: 0.000093 grad: 0.1402 (0.1729) loss: 0.7561 (0.7658) time: 0.1583 data: 0.0740 max mem: 9377 +Train: [37] [ 900/6250] eta: 0:14:14 lr: 0.000093 grad: 0.1386 (0.1692) loss: 0.7575 (0.7655) time: 0.1473 data: 0.0623 max mem: 9377 +Train: [37] [1000/6250] eta: 0:13:48 lr: 0.000093 grad: 0.1457 (0.1669) loss: 0.7590 (0.7649) time: 0.1456 data: 0.0640 max mem: 9377 +Train: [37] [1100/6250] eta: 0:13:24 lr: 0.000093 grad: 0.1340 (0.1645) loss: 0.7640 (0.7645) time: 0.1506 data: 0.0722 max mem: 9377 +Train: [37] [1200/6250] eta: 0:13:07 lr: 0.000093 grad: 0.1414 (0.1624) loss: 0.7636 (0.7639) time: 0.1456 data: 0.0692 max mem: 9377 +Train: [37] [1300/6250] eta: 0:12:50 lr: 0.000093 grad: 0.1363 (0.1607) loss: 0.7636 (0.7635) time: 0.1109 data: 0.0268 max mem: 9377 +Train: [37] [1400/6250] eta: 0:12:35 lr: 0.000093 grad: 0.1414 (0.1598) loss: 0.7568 (0.7630) time: 0.1508 data: 0.0702 max mem: 9377 +Train: [37] [1500/6250] eta: 0:12:15 lr: 0.000093 grad: 0.1349 (0.1584) loss: 0.7560 (0.7625) time: 0.1491 data: 0.0698 max mem: 9377 +Train: [37] [1600/6250] eta: 0:11:57 lr: 0.000093 grad: 0.1414 (0.1573) loss: 0.7679 (0.7622) time: 0.1543 data: 0.0726 max mem: 9377 +Train: [37] [1700/6250] eta: 0:11:41 lr: 0.000093 grad: 0.1382 (0.1563) loss: 0.7659 (0.7622) time: 0.1934 data: 0.1164 max mem: 9377 +Train: [37] [1800/6250] eta: 0:11:23 lr: 0.000093 grad: 0.1426 (0.1557) loss: 0.7588 (0.7619) time: 0.1477 data: 0.0625 max mem: 9377 +Train: [37] [1900/6250] eta: 0:11:04 lr: 0.000093 grad: 0.1311 (0.1548) loss: 0.7701 (0.7618) time: 0.1288 data: 0.0424 max mem: 9377 +Train: [37] [2000/6250] eta: 0:10:45 lr: 0.000093 grad: 0.1402 (0.1540) loss: 0.7540 (0.7615) time: 0.1234 data: 0.0410 max mem: 9377 +Train: [37] [2100/6250] eta: 0:10:27 lr: 0.000093 grad: 0.1367 (0.1533) loss: 0.7524 (0.7612) time: 0.1654 data: 0.0823 max mem: 9377 +Train: [37] [2200/6250] eta: 0:10:11 lr: 0.000093 grad: 0.1364 (0.1526) loss: 0.7599 (0.7609) time: 0.1475 data: 0.0641 max mem: 9377 +Train: [37] [2300/6250] eta: 0:09:55 lr: 0.000092 grad: 0.1334 (0.1519) loss: 0.7486 (0.7607) time: 0.1379 data: 0.0494 max mem: 9377 +Train: [37] [2400/6250] eta: 0:09:39 lr: 0.000092 grad: 0.1367 (0.1515) loss: 0.7604 (0.7604) time: 0.1491 data: 0.0637 max mem: 9377 +Train: [37] [2500/6250] eta: 0:09:22 lr: 0.000092 grad: 0.1442 (0.1511) loss: 0.7642 (0.7600) time: 0.1321 data: 0.0427 max mem: 9377 +Train: [37] [2600/6250] eta: 0:09:04 lr: 0.000092 grad: 0.1412 (0.1507) loss: 0.7367 (0.7596) time: 0.1177 data: 0.0330 max mem: 9377 +Train: [37] [2700/6250] eta: 0:08:47 lr: 0.000092 grad: 0.1430 (0.1503) loss: 0.7453 (0.7591) time: 0.1353 data: 0.0459 max mem: 9377 +Train: [37] [2800/6250] eta: 0:08:31 lr: 0.000092 grad: 0.1379 (0.1500) loss: 0.7495 (0.7587) time: 0.1545 data: 0.0699 max mem: 9377 +Train: [37] [2900/6250] eta: 0:08:15 lr: 0.000092 grad: 0.1381 (0.1495) loss: 0.7435 (0.7586) time: 0.1477 data: 0.0637 max mem: 9377 +Train: [37] [3000/6250] eta: 0:07:59 lr: 0.000092 grad: 0.1490 (0.1493) loss: 0.7427 (0.7583) time: 0.1327 data: 0.0476 max mem: 9377 +Train: [37] [3100/6250] eta: 0:07:43 lr: 0.000092 grad: 0.1392 (0.1491) loss: 0.7457 (0.7580) time: 0.1439 data: 0.0720 max mem: 9377 +Train: [37] [3200/6250] eta: 0:07:29 lr: 0.000092 grad: 0.1453 (0.1490) loss: 0.7460 (0.7576) time: 0.1391 data: 0.0624 max mem: 9377 +Train: [37] [3300/6250] eta: 0:07:15 lr: 0.000092 grad: 0.1416 (0.1488) loss: 0.7384 (0.7573) time: 0.1691 data: 0.0926 max mem: 9377 +Train: [37] [3400/6250] eta: 0:07:00 lr: 0.000092 grad: 0.1401 (0.1485) loss: 0.7468 (0.7570) time: 0.1407 data: 0.0636 max mem: 9377 +Train: [37] [3500/6250] eta: 0:06:47 lr: 0.000092 grad: 0.1385 (0.1483) loss: 0.7463 (0.7567) time: 0.1869 data: 0.1150 max mem: 9377 +Train: [37] [3600/6250] eta: 0:06:32 lr: 0.000092 grad: 0.1409 (0.1481) loss: 0.7433 (0.7564) time: 0.1656 data: 0.0861 max mem: 9377 +Train: [37] [3700/6250] eta: 0:06:17 lr: 0.000092 grad: 0.1314 (0.1480) loss: 0.7498 (0.7562) time: 0.1336 data: 0.0608 max mem: 9377 +Train: [37] [3800/6250] eta: 0:06:02 lr: 0.000092 grad: 0.1373 (0.1478) loss: 0.7540 (0.7559) time: 0.1457 data: 0.0701 max mem: 9377 +Train: [37] [3900/6250] eta: 0:05:48 lr: 0.000092 grad: 0.1343 (0.1476) loss: 0.7526 (0.7557) time: 0.1647 data: 0.0849 max mem: 9377 +Train: [37] [4000/6250] eta: 0:05:33 lr: 0.000092 grad: 0.1376 (0.1475) loss: 0.7438 (0.7555) time: 0.1534 data: 0.0755 max mem: 9377 +Train: [37] [4100/6250] eta: 0:05:19 lr: 0.000092 grad: 0.1422 (0.1474) loss: 0.7392 (0.7552) time: 0.1565 data: 0.0778 max mem: 9377 +Train: [37] [4200/6250] eta: 0:05:04 lr: 0.000092 grad: 0.1398 (0.1472) loss: 0.7468 (0.7551) time: 0.1391 data: 0.0625 max mem: 9377 +Train: [37] [4300/6250] eta: 0:04:49 lr: 0.000092 grad: 0.1377 (0.1470) loss: 0.7591 (0.7550) time: 0.1384 data: 0.0538 max mem: 9377 +Train: [37] [4400/6250] eta: 0:04:34 lr: 0.000092 grad: 0.1460 (0.1470) loss: 0.7478 (0.7547) time: 0.1513 data: 0.0731 max mem: 9377 +Train: [37] [4500/6250] eta: 0:04:19 lr: 0.000092 grad: 0.1403 (0.1468) loss: 0.7478 (0.7545) time: 0.1506 data: 0.0754 max mem: 9377 +Train: [37] [4600/6250] eta: 0:04:04 lr: 0.000092 grad: 0.1445 (0.1467) loss: 0.7393 (0.7544) time: 0.1531 data: 0.0774 max mem: 9377 +Train: [37] [4700/6250] eta: 0:03:49 lr: 0.000092 grad: 0.1490 (0.1468) loss: 0.7428 (0.7542) time: 0.1384 data: 0.0579 max mem: 9377 +Train: [37] [4800/6250] eta: 0:03:34 lr: 0.000092 grad: 0.1419 (0.1468) loss: 0.7491 (0.7540) time: 0.1362 data: 0.0489 max mem: 9377 +Train: [37] [4900/6250] eta: 0:03:19 lr: 0.000092 grad: 0.1399 (0.1466) loss: 0.7495 (0.7539) time: 0.1338 data: 0.0548 max mem: 9377 +Train: [37] [5000/6250] eta: 0:03:04 lr: 0.000092 grad: 0.1321 (0.1465) loss: 0.7400 (0.7538) time: 0.1631 data: 0.0781 max mem: 9377 +Train: [37] [5100/6250] eta: 0:02:49 lr: 0.000092 grad: 0.1412 (0.1464) loss: 0.7528 (0.7536) time: 0.1495 data: 0.0704 max mem: 9377 +Train: [37] [5200/6250] eta: 0:02:34 lr: 0.000092 grad: 0.1469 (0.1464) loss: 0.7402 (0.7535) time: 0.1420 data: 0.0602 max mem: 9377 +Train: [37] [5300/6250] eta: 0:02:19 lr: 0.000092 grad: 0.1449 (0.1463) loss: 0.7292 (0.7533) time: 0.1601 data: 0.0746 max mem: 9377 +Train: [37] [5400/6250] eta: 0:02:04 lr: 0.000092 grad: 0.1439 (0.1463) loss: 0.7343 (0.7531) time: 0.1450 data: 0.0670 max mem: 9377 +Train: [37] [5500/6250] eta: 0:01:50 lr: 0.000092 grad: 0.1468 (0.1464) loss: 0.7387 (0.7530) time: 0.1310 data: 0.0502 max mem: 9377 +Train: [37] [5600/6250] eta: 0:01:35 lr: 0.000092 grad: 0.1407 (0.1464) loss: 0.7420 (0.7528) time: 0.1428 data: 0.0597 max mem: 9377 +Train: [37] [5700/6250] eta: 0:01:20 lr: 0.000091 grad: 0.1418 (0.1464) loss: 0.7357 (0.7527) time: 0.1911 data: 0.1134 max mem: 9377 +Train: [37] [5800/6250] eta: 0:01:05 lr: 0.000091 grad: 0.1488 (0.1464) loss: 0.7459 (0.7525) time: 0.1416 data: 0.0572 max mem: 9377 +Train: [37] [5900/6250] eta: 0:00:51 lr: 0.000091 grad: 0.1345 (0.1464) loss: 0.7500 (0.7524) time: 0.1272 data: 0.0479 max mem: 9377 +Train: [37] [6000/6250] eta: 0:00:36 lr: 0.000091 grad: 0.1381 (0.1463) loss: 0.7374 (0.7523) time: 0.1602 data: 0.0828 max mem: 9377 +Train: [37] [6100/6250] eta: 0:00:21 lr: 0.000091 grad: 0.1439 (0.1463) loss: 0.7498 (0.7522) time: 0.1349 data: 0.0512 max mem: 9377 +Train: [37] [6200/6250] eta: 0:00:07 lr: 0.000091 grad: 0.1398 (0.1462) loss: 0.7270 (0.7521) time: 0.1295 data: 0.0432 max mem: 9377 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.1316 (0.1462) loss: 0.7382 (0.7520) time: 0.1436 data: 0.0595 max mem: 9377 +Train: [37] Total time: 0:15:18 (0.1469 s / it) +Averaged stats: lr: 0.000091 grad: 0.1316 (0.1462) loss: 0.7382 (0.7520) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:04:01 loss: 0.8510 (0.8510) time: 3.8923 data: 3.8290 max mem: 9377 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8571 (0.8580) time: 0.1375 data: 0.1116 max mem: 9377 +Eval (hcp-train-subset): [37] Total time: 0:00:12 (0.2069 s / it) +Averaged stats (hcp-train-subset): loss: 0.8571 (0.8580) +Eval (hcp-val): [37] [ 0/62] eta: 0:05:47 loss: 0.8568 (0.8568) time: 5.6057 data: 5.5731 max mem: 9377 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8543 (0.8557) time: 0.1358 data: 0.1107 max mem: 9377 +Eval (hcp-val): [37] Total time: 0:00:13 (0.2235 s / it) +Averaged stats (hcp-val): loss: 0.8543 (0.8557) +Eval (nsd-val): [37] [ 0/62] eta: 0:05:36 loss: 0.8189 (0.8189) time: 5.4261 data: 5.3916 max mem: 9377 +Eval (nsd-val): [37] [61/62] eta: 0:00:00 loss: 0.8242 (0.8268) time: 0.1281 data: 0.1033 max mem: 9377 +Eval (nsd-val): [37] Total time: 0:00:12 (0.2082 s / it) +Averaged stats (nsd-val): loss: 0.8242 (0.8268) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 7:08:56 lr: 0.000091 grad: 0.2666 (0.2666) loss: 0.7875 (0.7875) time: 4.1178 data: 3.8662 max mem: 9377 +Train: [38] [ 100/6250] eta: 0:20:05 lr: 0.000091 grad: 0.2407 (0.2659) loss: 0.7458 (0.7707) time: 0.1464 data: 0.0572 max mem: 9377 +Train: [38] [ 200/6250] eta: 0:16:59 lr: 0.000091 grad: 0.1772 (0.2346) loss: 0.7652 (0.7638) time: 0.1299 data: 0.0333 max mem: 9377 +Train: [38] [ 300/6250] eta: 0:15:50 lr: 0.000091 grad: 0.1480 (0.2166) loss: 0.7746 (0.7649) time: 0.1295 data: 0.0309 max mem: 9377 +Train: [38] [ 400/6250] eta: 0:15:03 lr: 0.000091 grad: 0.1737 (0.2064) loss: 0.7367 (0.7608) time: 0.1391 data: 0.0463 max mem: 9377 +Train: [38] [ 500/6250] eta: 0:14:38 lr: 0.000091 grad: 0.1650 (0.1985) loss: 0.7370 (0.7564) time: 0.1104 data: 0.0112 max mem: 9377 +Train: [38] [ 600/6250] eta: 0:14:19 lr: 0.000091 grad: 0.1708 (0.1947) loss: 0.7370 (0.7535) time: 0.1256 data: 0.0345 max mem: 9377 +Train: [38] [ 700/6250] eta: 0:13:55 lr: 0.000091 grad: 0.1538 (0.1902) loss: 0.7356 (0.7510) time: 0.1381 data: 0.0428 max mem: 9377 +Train: [38] [ 800/6250] eta: 0:13:35 lr: 0.000091 grad: 0.1570 (0.1875) loss: 0.7386 (0.7491) time: 0.1208 data: 0.0347 max mem: 9377 +Train: [38] [ 900/6250] eta: 0:13:15 lr: 0.000091 grad: 0.1530 (0.1847) loss: 0.7349 (0.7482) time: 0.1277 data: 0.0382 max mem: 9377 +Train: [38] [1000/6250] eta: 0:12:56 lr: 0.000091 grad: 0.1535 (0.1818) loss: 0.7313 (0.7474) time: 0.1342 data: 0.0529 max mem: 9377 +Train: [38] [1100/6250] eta: 0:12:36 lr: 0.000091 grad: 0.1423 (0.1790) loss: 0.7375 (0.7468) time: 0.1452 data: 0.0615 max mem: 9377 +Train: [38] [1200/6250] eta: 0:12:26 lr: 0.000091 grad: 0.1415 (0.1760) loss: 0.7365 (0.7461) time: 0.1466 data: 0.0598 max mem: 9377 +Train: [38] [1300/6250] eta: 0:12:15 lr: 0.000091 grad: 0.1432 (0.1735) loss: 0.7285 (0.7457) time: 0.1446 data: 0.0654 max mem: 9377 +Train: [38] [1400/6250] eta: 0:12:02 lr: 0.000091 grad: 0.1482 (0.1719) loss: 0.7468 (0.7454) time: 0.1540 data: 0.0730 max mem: 9377 +Train: [38] [1500/6250] eta: 0:11:47 lr: 0.000091 grad: 0.1452 (0.1700) loss: 0.7408 (0.7454) time: 0.1532 data: 0.0687 max mem: 9377 +Train: [38] [1600/6250] eta: 0:11:32 lr: 0.000091 grad: 0.1493 (0.1685) loss: 0.7367 (0.7451) time: 0.1511 data: 0.0675 max mem: 9377 +Train: [38] [1700/6250] eta: 0:11:15 lr: 0.000091 grad: 0.1356 (0.1670) loss: 0.7419 (0.7449) time: 0.1443 data: 0.0573 max mem: 9377 +Train: [38] [1800/6250] eta: 0:10:59 lr: 0.000091 grad: 0.1399 (0.1656) loss: 0.7418 (0.7447) time: 0.1518 data: 0.0697 max mem: 9377 +Train: [38] [1900/6250] eta: 0:10:43 lr: 0.000091 grad: 0.1391 (0.1643) loss: 0.7368 (0.7445) time: 0.1397 data: 0.0440 max mem: 9377 +Train: [38] [2000/6250] eta: 0:10:32 lr: 0.000091 grad: 0.1431 (0.1633) loss: 0.7395 (0.7446) time: 0.1638 data: 0.0815 max mem: 9377 +Train: [38] [2100/6250] eta: 0:10:17 lr: 0.000091 grad: 0.1394 (0.1623) loss: 0.7525 (0.7449) time: 0.1524 data: 0.0679 max mem: 9377 +Train: [38] [2200/6250] eta: 0:10:00 lr: 0.000091 grad: 0.1364 (0.1613) loss: 0.7471 (0.7451) time: 0.1374 data: 0.0569 max mem: 9377 +Train: [38] [2300/6250] eta: 0:09:45 lr: 0.000091 grad: 0.1334 (0.1604) loss: 0.7476 (0.7454) time: 0.1668 data: 0.0863 max mem: 9377 +Train: [38] [2400/6250] eta: 0:09:31 lr: 0.000091 grad: 0.1390 (0.1597) loss: 0.7493 (0.7456) time: 0.1431 data: 0.0529 max mem: 9377 +Train: [38] [2500/6250] eta: 0:09:15 lr: 0.000091 grad: 0.1365 (0.1590) loss: 0.7588 (0.7459) time: 0.1379 data: 0.0539 max mem: 9377 +Train: [38] [2600/6250] eta: 0:09:00 lr: 0.000091 grad: 0.1375 (0.1583) loss: 0.7488 (0.7461) time: 0.1489 data: 0.0568 max mem: 9377 +Train: [38] [2700/6250] eta: 0:08:44 lr: 0.000091 grad: 0.1384 (0.1578) loss: 0.7402 (0.7461) time: 0.1508 data: 0.0604 max mem: 9377 +Train: [38] [2800/6250] eta: 0:08:28 lr: 0.000091 grad: 0.1333 (0.1571) loss: 0.7507 (0.7462) time: 0.1319 data: 0.0493 max mem: 9377 +Train: [38] [2900/6250] eta: 0:08:14 lr: 0.000090 grad: 0.1433 (0.1566) loss: 0.7462 (0.7463) time: 0.2191 data: 0.1363 max mem: 9377 +Train: [38] [3000/6250] eta: 0:07:57 lr: 0.000090 grad: 0.1385 (0.1561) loss: 0.7477 (0.7463) time: 0.1264 data: 0.0443 max mem: 9377 +Train: [38] [3100/6250] eta: 0:07:42 lr: 0.000090 grad: 0.1556 (0.1560) loss: 0.7323 (0.7463) time: 0.1382 data: 0.0552 max mem: 9377 +Train: [38] [3200/6250] eta: 0:07:27 lr: 0.000090 grad: 0.1514 (0.1561) loss: 0.7481 (0.7462) time: 0.1447 data: 0.0646 max mem: 9377 +Train: [38] [3300/6250] eta: 0:07:12 lr: 0.000090 grad: 0.1408 (0.1557) loss: 0.7392 (0.7461) time: 0.1429 data: 0.0617 max mem: 9377 +Train: [38] [3400/6250] eta: 0:06:57 lr: 0.000090 grad: 0.1499 (0.1554) loss: 0.7383 (0.7460) time: 0.1349 data: 0.0535 max mem: 9377 +Train: [38] [3500/6250] eta: 0:06:41 lr: 0.000090 grad: 0.1454 (0.1554) loss: 0.7458 (0.7459) time: 0.1460 data: 0.0624 max mem: 9377 +Train: [38] [3600/6250] eta: 0:06:26 lr: 0.000090 grad: 0.1408 (0.1551) loss: 0.7466 (0.7458) time: 0.1253 data: 0.0306 max mem: 9377 +Train: [38] [3700/6250] eta: 0:06:10 lr: 0.000090 grad: 0.1414 (0.1548) loss: 0.7305 (0.7457) time: 0.1402 data: 0.0524 max mem: 9377 +Train: [38] [3800/6250] eta: 0:05:55 lr: 0.000090 grad: 0.1360 (0.1545) loss: 0.7390 (0.7458) time: 0.1568 data: 0.0765 max mem: 9377 +Train: [38] [3900/6250] eta: 0:05:40 lr: 0.000090 grad: 0.1381 (0.1544) loss: 0.7489 (0.7457) time: 0.1145 data: 0.0293 max mem: 9377 +Train: [38] [4000/6250] eta: 0:05:25 lr: 0.000090 grad: 0.1394 (0.1541) loss: 0.7480 (0.7457) time: 0.1331 data: 0.0545 max mem: 9377 +Train: [38] [4100/6250] eta: 0:05:10 lr: 0.000090 grad: 0.1461 (0.1539) loss: 0.7404 (0.7456) time: 0.1487 data: 0.0689 max mem: 9377 +Train: [38] [4200/6250] eta: 0:04:56 lr: 0.000090 grad: 0.1392 (0.1536) loss: 0.7504 (0.7457) time: 0.1225 data: 0.0486 max mem: 9377 +Train: [38] [4300/6250] eta: 0:04:41 lr: 0.000090 grad: 0.1385 (0.1534) loss: 0.7525 (0.7458) time: 0.1636 data: 0.0884 max mem: 9377 +Train: [38] [4400/6250] eta: 0:04:27 lr: 0.000090 grad: 0.1386 (0.1531) loss: 0.7481 (0.7458) time: 0.1743 data: 0.0934 max mem: 9377 +Train: [38] [4500/6250] eta: 0:04:13 lr: 0.000090 grad: 0.1447 (0.1528) loss: 0.7333 (0.7459) time: 0.1481 data: 0.0683 max mem: 9377 +Train: [38] [4600/6250] eta: 0:03:59 lr: 0.000090 grad: 0.1467 (0.1526) loss: 0.7449 (0.7459) time: 0.1860 data: 0.1083 max mem: 9377 +Train: [38] [4700/6250] eta: 0:03:44 lr: 0.000090 grad: 0.1431 (0.1524) loss: 0.7445 (0.7460) time: 0.1610 data: 0.0779 max mem: 9377 +Train: [38] [4800/6250] eta: 0:03:30 lr: 0.000090 grad: 0.1470 (0.1523) loss: 0.7363 (0.7461) time: 0.1569 data: 0.0756 max mem: 9377 +Train: [38] [4900/6250] eta: 0:03:16 lr: 0.000090 grad: 0.1424 (0.1521) loss: 0.7435 (0.7461) time: 0.1669 data: 0.0857 max mem: 9377 +Train: [38] [5000/6250] eta: 0:03:01 lr: 0.000090 grad: 0.1433 (0.1520) loss: 0.7486 (0.7461) time: 0.1391 data: 0.0551 max mem: 9377 +Train: [38] [5100/6250] eta: 0:02:47 lr: 0.000090 grad: 0.1401 (0.1518) loss: 0.7414 (0.7462) time: 0.1553 data: 0.0755 max mem: 9377 +Train: [38] [5200/6250] eta: 0:02:33 lr: 0.000090 grad: 0.1428 (0.1517) loss: 0.7456 (0.7462) time: 0.1572 data: 0.0787 max mem: 9377 +Train: [38] [5300/6250] eta: 0:02:18 lr: 0.000090 grad: 0.1412 (0.1516) loss: 0.7477 (0.7462) time: 0.1393 data: 0.0590 max mem: 9377 +Train: [38] [5400/6250] eta: 0:02:03 lr: 0.000090 grad: 0.1460 (0.1514) loss: 0.7484 (0.7463) time: 0.1371 data: 0.0574 max mem: 9377 +Train: [38] [5500/6250] eta: 0:01:49 lr: 0.000090 grad: 0.1444 (0.1513) loss: 0.7345 (0.7464) time: 0.1403 data: 0.0578 max mem: 9377 +Train: [38] [5600/6250] eta: 0:01:34 lr: 0.000090 grad: 0.1424 (0.1512) loss: 0.7524 (0.7464) time: 0.1462 data: 0.0646 max mem: 9377 +Train: [38] [5700/6250] eta: 0:01:20 lr: 0.000090 grad: 0.1385 (0.1510) loss: 0.7510 (0.7464) time: 0.1376 data: 0.0527 max mem: 9377 +Train: [38] [5800/6250] eta: 0:01:05 lr: 0.000090 grad: 0.1490 (0.1509) loss: 0.7395 (0.7464) time: 0.1483 data: 0.0611 max mem: 9377 +Train: [38] [5900/6250] eta: 0:00:50 lr: 0.000090 grad: 0.1433 (0.1509) loss: 0.7514 (0.7464) time: 0.1527 data: 0.0762 max mem: 9377 +Train: [38] [6000/6250] eta: 0:00:36 lr: 0.000090 grad: 0.1462 (0.1508) loss: 0.7555 (0.7465) time: 0.1444 data: 0.0670 max mem: 9377 +Train: [38] [6100/6250] eta: 0:00:21 lr: 0.000090 grad: 0.1456 (0.1507) loss: 0.7440 (0.7466) time: 0.1353 data: 0.0539 max mem: 9377 +Train: [38] [6200/6250] eta: 0:00:07 lr: 0.000089 grad: 0.1467 (0.1506) loss: 0.7392 (0.7466) time: 0.1424 data: 0.0581 max mem: 9377 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.1391 (0.1505) loss: 0.7494 (0.7466) time: 0.1578 data: 0.0788 max mem: 9377 +Train: [38] Total time: 0:15:14 (0.1462 s / it) +Averaged stats: lr: 0.000089 grad: 0.1391 (0.1505) loss: 0.7494 (0.7466) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:03:57 loss: 0.8594 (0.8594) time: 3.8269 data: 3.7463 max mem: 9377 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8594 (0.8612) time: 0.1455 data: 0.1181 max mem: 9377 +Eval (hcp-train-subset): [38] Total time: 0:00:13 (0.2134 s / it) +Averaged stats (hcp-train-subset): loss: 0.8594 (0.8612) +Eval (hcp-val): [38] [ 0/62] eta: 0:05:18 loss: 0.8618 (0.8618) time: 5.1445 data: 5.1133 max mem: 9377 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8594 (0.8602) time: 0.1285 data: 0.1017 max mem: 9377 +Eval (hcp-val): [38] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (hcp-val): loss: 0.8594 (0.8602) +Eval (nsd-val): [38] [ 0/62] eta: 0:03:05 loss: 0.8202 (0.8202) time: 2.9947 data: 2.9136 max mem: 9377 +Eval (nsd-val): [38] [61/62] eta: 0:00:00 loss: 0.8305 (0.8307) time: 0.1219 data: 0.0949 max mem: 9377 +Eval (nsd-val): [38] Total time: 0:00:12 (0.2003 s / it) +Averaged stats (nsd-val): loss: 0.8305 (0.8307) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [39] [ 0/6250] eta: 6:32:46 lr: 0.000089 grad: 0.1223 (0.1223) loss: 0.8505 (0.8505) time: 3.7707 data: 3.4776 max mem: 9377 +Train: [39] [ 100/6250] eta: 0:18:52 lr: 0.000089 grad: 0.2015 (0.2183) loss: 0.7645 (0.7898) time: 0.1340 data: 0.0393 max mem: 9377 +Train: [39] [ 200/6250] eta: 0:16:40 lr: 0.000089 grad: 0.2131 (0.2161) loss: 0.7565 (0.7787) time: 0.1474 data: 0.0561 max mem: 9377 +Train: [39] [ 300/6250] eta: 0:15:25 lr: 0.000089 grad: 0.1732 (0.2042) loss: 0.7355 (0.7692) time: 0.1377 data: 0.0465 max mem: 9377 +Train: [39] [ 400/6250] eta: 0:14:44 lr: 0.000089 grad: 0.1993 (0.2003) loss: 0.7343 (0.7626) time: 0.1384 data: 0.0519 max mem: 9377 +Train: [39] [ 500/6250] eta: 0:14:14 lr: 0.000089 grad: 0.1561 (0.1957) loss: 0.7329 (0.7592) time: 0.1252 data: 0.0396 max mem: 9377 +Train: [39] [ 600/6250] eta: 0:13:46 lr: 0.000089 grad: 0.1586 (0.1890) loss: 0.7343 (0.7569) time: 0.1340 data: 0.0402 max mem: 9377 +Train: [39] [ 700/6250] eta: 0:13:22 lr: 0.000089 grad: 0.1584 (0.1847) loss: 0.7359 (0.7547) time: 0.1441 data: 0.0582 max mem: 9377 +Train: [39] [ 800/6250] eta: 0:13:15 lr: 0.000089 grad: 0.1521 (0.1811) loss: 0.7443 (0.7538) time: 0.1636 data: 0.0720 max mem: 9377 +Train: [39] [ 900/6250] eta: 0:13:09 lr: 0.000089 grad: 0.1383 (0.1771) loss: 0.7515 (0.7533) time: 0.1676 data: 0.0859 max mem: 9377 +Train: [39] [1000/6250] eta: 0:12:57 lr: 0.000089 grad: 0.1469 (0.1740) loss: 0.7518 (0.7529) time: 0.1269 data: 0.0382 max mem: 9377 +Train: [39] [1100/6250] eta: 0:12:35 lr: 0.000089 grad: 0.1462 (0.1717) loss: 0.7527 (0.7523) time: 0.1312 data: 0.0405 max mem: 9377 +Train: [39] [1200/6250] eta: 0:12:28 lr: 0.000089 grad: 0.1432 (0.1699) loss: 0.7349 (0.7517) time: 0.1593 data: 0.0684 max mem: 9377 +Train: [39] [1300/6250] eta: 0:12:19 lr: 0.000089 grad: 0.1446 (0.1680) loss: 0.7449 (0.7513) time: 0.1526 data: 0.0683 max mem: 9377 +Train: [39] [1400/6250] eta: 0:12:14 lr: 0.000089 grad: 0.1494 (0.1666) loss: 0.7508 (0.7512) time: 0.1756 data: 0.0892 max mem: 9377 +Train: [39] [1500/6250] eta: 0:12:03 lr: 0.000089 grad: 0.1371 (0.1652) loss: 0.7566 (0.7511) time: 0.2047 data: 0.1174 max mem: 9377 +Train: [39] [1600/6250] eta: 0:11:52 lr: 0.000089 grad: 0.1445 (0.1638) loss: 0.7439 (0.7509) time: 0.1657 data: 0.0699 max mem: 9377 +Train: [39] [1700/6250] eta: 0:11:41 lr: 0.000089 grad: 0.1372 (0.1626) loss: 0.7397 (0.7505) time: 0.1700 data: 0.0796 max mem: 9377 +Train: [39] [1800/6250] eta: 0:11:25 lr: 0.000089 grad: 0.1384 (0.1617) loss: 0.7403 (0.7502) time: 0.1331 data: 0.0486 max mem: 9377 +Train: [39] [1900/6250] eta: 0:11:07 lr: 0.000089 grad: 0.1436 (0.1606) loss: 0.7355 (0.7496) time: 0.1358 data: 0.0536 max mem: 9377 +Train: [39] [2000/6250] eta: 0:10:48 lr: 0.000089 grad: 0.1401 (0.1597) loss: 0.7365 (0.7493) time: 0.1489 data: 0.0690 max mem: 9377 +Train: [39] [2100/6250] eta: 0:10:33 lr: 0.000089 grad: 0.1339 (0.1589) loss: 0.7587 (0.7492) time: 0.1784 data: 0.1043 max mem: 9377 +Train: [39] [2200/6250] eta: 0:10:16 lr: 0.000089 grad: 0.1558 (0.1585) loss: 0.7338 (0.7488) time: 0.1382 data: 0.0573 max mem: 9377 +Train: [39] [2300/6250] eta: 0:10:01 lr: 0.000089 grad: 0.1400 (0.1580) loss: 0.7541 (0.7488) time: 0.1606 data: 0.0797 max mem: 9377 +Train: [39] [2400/6250] eta: 0:09:46 lr: 0.000089 grad: 0.1406 (0.1574) loss: 0.7439 (0.7486) time: 0.1681 data: 0.0837 max mem: 9377 +Train: [39] [2500/6250] eta: 0:09:31 lr: 0.000089 grad: 0.1518 (0.1570) loss: 0.7289 (0.7483) time: 0.1329 data: 0.0450 max mem: 9377 +Train: [39] [2600/6250] eta: 0:09:14 lr: 0.000089 grad: 0.1353 (0.1565) loss: 0.7456 (0.7481) time: 0.1472 data: 0.0642 max mem: 9377 +Train: [39] [2700/6250] eta: 0:08:57 lr: 0.000089 grad: 0.1421 (0.1562) loss: 0.7426 (0.7478) time: 0.1452 data: 0.0551 max mem: 9377 +Train: [39] [2800/6250] eta: 0:08:39 lr: 0.000089 grad: 0.1519 (0.1559) loss: 0.7370 (0.7474) time: 0.1273 data: 0.0333 max mem: 9377 +Train: [39] [2900/6250] eta: 0:08:22 lr: 0.000089 grad: 0.1469 (0.1558) loss: 0.7466 (0.7470) time: 0.1337 data: 0.0495 max mem: 9377 +Train: [39] [3000/6250] eta: 0:08:06 lr: 0.000089 grad: 0.1508 (0.1557) loss: 0.7394 (0.7467) time: 0.1414 data: 0.0501 max mem: 9377 +Train: [39] [3100/6250] eta: 0:07:49 lr: 0.000089 grad: 0.1434 (0.1555) loss: 0.7350 (0.7463) time: 0.1398 data: 0.0568 max mem: 9377 +Train: [39] [3200/6250] eta: 0:07:32 lr: 0.000089 grad: 0.1404 (0.1552) loss: 0.7338 (0.7461) time: 0.1146 data: 0.0334 max mem: 9377 +Train: [39] [3300/6250] eta: 0:07:17 lr: 0.000088 grad: 0.1441 (0.1549) loss: 0.7509 (0.7459) time: 0.1386 data: 0.0576 max mem: 9377 +Train: [39] [3400/6250] eta: 0:07:01 lr: 0.000088 grad: 0.1446 (0.1546) loss: 0.7396 (0.7458) time: 0.1382 data: 0.0599 max mem: 9377 +Train: [39] [3500/6250] eta: 0:06:46 lr: 0.000088 grad: 0.1361 (0.1544) loss: 0.7376 (0.7456) time: 0.1191 data: 0.0372 max mem: 9377 +Train: [39] [3600/6250] eta: 0:06:31 lr: 0.000088 grad: 0.1416 (0.1541) loss: 0.7447 (0.7456) time: 0.1475 data: 0.0599 max mem: 9377 +Train: [39] [3700/6250] eta: 0:06:15 lr: 0.000088 grad: 0.1425 (0.1539) loss: 0.7502 (0.7455) time: 0.1391 data: 0.0588 max mem: 9377 +Train: [39] [3800/6250] eta: 0:06:00 lr: 0.000088 grad: 0.1504 (0.1538) loss: 0.7445 (0.7454) time: 0.1274 data: 0.0458 max mem: 9377 +Train: [39] [3900/6250] eta: 0:05:44 lr: 0.000088 grad: 0.1506 (0.1539) loss: 0.7507 (0.7452) time: 0.1249 data: 0.0383 max mem: 9377 +Train: [39] [4000/6250] eta: 0:05:29 lr: 0.000088 grad: 0.1439 (0.1537) loss: 0.7394 (0.7451) time: 0.1443 data: 0.0614 max mem: 9377 +Train: [39] [4100/6250] eta: 0:05:13 lr: 0.000088 grad: 0.1513 (0.1536) loss: 0.7420 (0.7450) time: 0.1396 data: 0.0582 max mem: 9377 +Train: [39] [4200/6250] eta: 0:04:58 lr: 0.000088 grad: 0.1470 (0.1534) loss: 0.7436 (0.7449) time: 0.1305 data: 0.0480 max mem: 9377 +Train: [39] [4300/6250] eta: 0:04:43 lr: 0.000088 grad: 0.1451 (0.1533) loss: 0.7376 (0.7448) time: 0.1283 data: 0.0461 max mem: 9377 +Train: [39] [4400/6250] eta: 0:04:28 lr: 0.000088 grad: 0.1503 (0.1532) loss: 0.7285 (0.7446) time: 0.1319 data: 0.0503 max mem: 9377 +Train: [39] [4500/6250] eta: 0:04:13 lr: 0.000088 grad: 0.1437 (0.1530) loss: 0.7366 (0.7445) time: 0.1444 data: 0.0578 max mem: 9377 +Train: [39] [4600/6250] eta: 0:03:58 lr: 0.000088 grad: 0.1411 (0.1529) loss: 0.7496 (0.7444) time: 0.1435 data: 0.0634 max mem: 9377 +Train: [39] [4700/6250] eta: 0:03:44 lr: 0.000088 grad: 0.1438 (0.1527) loss: 0.7300 (0.7442) time: 0.1499 data: 0.0697 max mem: 9377 +Train: [39] [4800/6250] eta: 0:03:29 lr: 0.000088 grad: 0.1428 (0.1526) loss: 0.7303 (0.7441) time: 0.1290 data: 0.0432 max mem: 9377 +Train: [39] [4900/6250] eta: 0:03:14 lr: 0.000088 grad: 0.1482 (0.1525) loss: 0.7333 (0.7438) time: 0.1355 data: 0.0523 max mem: 9377 +Train: [39] [5000/6250] eta: 0:02:59 lr: 0.000088 grad: 0.1424 (0.1524) loss: 0.7406 (0.7436) time: 0.1295 data: 0.0500 max mem: 9377 +Train: [39] [5100/6250] eta: 0:02:45 lr: 0.000088 grad: 0.1454 (0.1524) loss: 0.7352 (0.7434) time: 0.1527 data: 0.0737 max mem: 9377 +Train: [39] [5200/6250] eta: 0:02:30 lr: 0.000088 grad: 0.1456 (0.1523) loss: 0.7291 (0.7432) time: 0.1437 data: 0.0596 max mem: 9377 +Train: [39] [5300/6250] eta: 0:02:16 lr: 0.000088 grad: 0.1460 (0.1522) loss: 0.7480 (0.7432) time: 0.1532 data: 0.0740 max mem: 9377 +Train: [39] [5400/6250] eta: 0:02:02 lr: 0.000088 grad: 0.1435 (0.1520) loss: 0.7412 (0.7432) time: 0.1614 data: 0.0830 max mem: 9377 +Train: [39] [5500/6250] eta: 0:01:47 lr: 0.000088 grad: 0.1473 (0.1519) loss: 0.7375 (0.7431) time: 0.1415 data: 0.0636 max mem: 9377 +Train: [39] [5600/6250] eta: 0:01:33 lr: 0.000088 grad: 0.1419 (0.1518) loss: 0.7371 (0.7431) time: 0.1461 data: 0.0647 max mem: 9377 +Train: [39] [5700/6250] eta: 0:01:19 lr: 0.000088 grad: 0.1440 (0.1517) loss: 0.7425 (0.7430) time: 0.1564 data: 0.0747 max mem: 9377 +Train: [39] [5800/6250] eta: 0:01:04 lr: 0.000088 grad: 0.1457 (0.1516) loss: 0.7548 (0.7429) time: 0.1692 data: 0.0892 max mem: 9377 +Train: [39] [5900/6250] eta: 0:00:50 lr: 0.000088 grad: 0.1534 (0.1515) loss: 0.7359 (0.7429) time: 0.1406 data: 0.0555 max mem: 9377 +Train: [39] [6000/6250] eta: 0:00:36 lr: 0.000088 grad: 0.1485 (0.1515) loss: 0.7369 (0.7428) time: 0.1459 data: 0.0725 max mem: 9377 +Train: [39] [6100/6250] eta: 0:00:21 lr: 0.000088 grad: 0.1428 (0.1514) loss: 0.7354 (0.7427) time: 0.1424 data: 0.0564 max mem: 9377 +Train: [39] [6200/6250] eta: 0:00:07 lr: 0.000088 grad: 0.1416 (0.1514) loss: 0.7441 (0.7426) time: 0.1331 data: 0.0536 max mem: 9377 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.1469 (0.1513) loss: 0.7325 (0.7425) time: 0.1360 data: 0.0551 max mem: 9377 +Train: [39] Total time: 0:15:06 (0.1450 s / it) +Averaged stats: lr: 0.000088 grad: 0.1469 (0.1513) loss: 0.7325 (0.7425) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:05:50 loss: 0.8619 (0.8619) time: 5.6468 data: 5.6170 max mem: 9377 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8609 (0.8622) time: 0.1331 data: 0.1073 max mem: 9377 +Eval (hcp-train-subset): [39] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (hcp-train-subset): loss: 0.8609 (0.8622) +Making plots (hcp-train-subset): example=3 +Eval (hcp-val): [39] [ 0/62] eta: 0:04:14 loss: 0.8605 (0.8605) time: 4.1005 data: 4.0243 max mem: 9377 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8582 (0.8605) time: 0.1185 data: 0.0929 max mem: 9377 +Eval (hcp-val): [39] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (hcp-val): loss: 0.8582 (0.8605) +Making plots (hcp-val): example=24 +Eval (nsd-val): [39] [ 0/62] eta: 0:05:40 loss: 0.8242 (0.8242) time: 5.4934 data: 5.4603 max mem: 9377 +Eval (nsd-val): [39] [61/62] eta: 0:00:00 loss: 0.8350 (0.8347) time: 0.1458 data: 0.1203 max mem: 9377 +Eval (nsd-val): [39] Total time: 0:00:14 (0.2295 s / it) +Averaged stats (nsd-val): loss: 0.8350 (0.8347) +Making plots (nsd-val): example=33 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 11:11:23 lr: 0.000088 grad: 0.2991 (0.2991) loss: 0.8486 (0.8486) time: 6.4454 data: 6.3436 max mem: 9377 +Train: [40] [ 100/6250] eta: 0:22:06 lr: 0.000088 grad: 0.2413 (0.2397) loss: 0.7546 (0.7896) time: 0.1736 data: 0.0769 max mem: 9377 +Train: [40] [ 200/6250] eta: 0:18:27 lr: 0.000088 grad: 0.2358 (0.2515) loss: 0.7410 (0.7675) time: 0.1450 data: 0.0577 max mem: 9377 +Train: [40] [ 300/6250] eta: 0:16:47 lr: 0.000088 grad: 0.1466 (0.2341) loss: 0.7658 (0.7596) time: 0.1514 data: 0.0660 max mem: 9377 +Train: [40] [ 400/6250] eta: 0:16:01 lr: 0.000087 grad: 0.1681 (0.2198) loss: 0.7519 (0.7543) time: 0.1502 data: 0.0586 max mem: 9377 +Train: [40] [ 500/6250] eta: 0:15:09 lr: 0.000087 grad: 0.1679 (0.2111) loss: 0.7402 (0.7507) time: 0.1390 data: 0.0602 max mem: 9377 +Train: [40] [ 600/6250] eta: 0:14:49 lr: 0.000087 grad: 0.1750 (0.2055) loss: 0.7263 (0.7471) time: 0.1659 data: 0.0871 max mem: 9377 +Train: [40] [ 700/6250] eta: 0:14:31 lr: 0.000087 grad: 0.1646 (0.2006) loss: 0.7229 (0.7445) time: 0.1706 data: 0.0839 max mem: 9377 +Train: [40] [ 800/6250] eta: 0:14:07 lr: 0.000087 grad: 0.1556 (0.1960) loss: 0.7353 (0.7431) time: 0.1319 data: 0.0444 max mem: 9377 +Train: [40] [ 900/6250] eta: 0:13:45 lr: 0.000087 grad: 0.1521 (0.1916) loss: 0.7250 (0.7422) time: 0.1449 data: 0.0551 max mem: 9377 +Train: [40] [1000/6250] eta: 0:13:20 lr: 0.000087 grad: 0.1612 (0.1884) loss: 0.7263 (0.7411) time: 0.1612 data: 0.0761 max mem: 9377 +Train: [40] [1100/6250] eta: 0:13:06 lr: 0.000087 grad: 0.1550 (0.1855) loss: 0.7350 (0.7407) time: 0.1520 data: 0.0622 max mem: 9377 +Train: [40] [1200/6250] eta: 0:12:55 lr: 0.000087 grad: 0.1499 (0.1830) loss: 0.7413 (0.7404) time: 0.1745 data: 0.0961 max mem: 9377 +Train: [40] [1300/6250] eta: 0:12:42 lr: 0.000087 grad: 0.1503 (0.1807) loss: 0.7188 (0.7395) time: 0.1777 data: 0.0976 max mem: 9377 +Train: [40] [1400/6250] eta: 0:12:26 lr: 0.000087 grad: 0.1590 (0.1786) loss: 0.7343 (0.7389) time: 0.1531 data: 0.0767 max mem: 9377 +Train: [40] [1500/6250] eta: 0:12:05 lr: 0.000087 grad: 0.1551 (0.1771) loss: 0.7407 (0.7388) time: 0.1548 data: 0.0813 max mem: 9377 +Train: [40] [1600/6250] eta: 0:11:47 lr: 0.000087 grad: 0.1506 (0.1755) loss: 0.7426 (0.7389) time: 0.1462 data: 0.0646 max mem: 9377 +Train: [40] [1700/6250] eta: 0:11:29 lr: 0.000087 grad: 0.1482 (0.1740) loss: 0.7312 (0.7387) time: 0.1451 data: 0.0617 max mem: 9377 +Train: [40] [1800/6250] eta: 0:11:09 lr: 0.000087 grad: 0.1508 (0.1726) loss: 0.7395 (0.7388) time: 0.1404 data: 0.0549 max mem: 9377 +Train: [40] [1900/6250] eta: 0:10:52 lr: 0.000087 grad: 0.1499 (0.1713) loss: 0.7387 (0.7390) time: 0.1364 data: 0.0434 max mem: 9377 +Train: [40] [2000/6250] eta: 0:10:34 lr: 0.000087 grad: 0.1417 (0.1699) loss: 0.7508 (0.7394) time: 0.1309 data: 0.0452 max mem: 9377 +Train: [40] [2100/6250] eta: 0:10:15 lr: 0.000087 grad: 0.1447 (0.1686) loss: 0.7327 (0.7396) time: 0.1210 data: 0.0382 max mem: 9377 +Train: [40] [2200/6250] eta: 0:09:59 lr: 0.000087 grad: 0.1394 (0.1675) loss: 0.7413 (0.7397) time: 0.1399 data: 0.0545 max mem: 9377 +Train: [40] [2300/6250] eta: 0:09:43 lr: 0.000087 grad: 0.1456 (0.1665) loss: 0.7318 (0.7395) time: 0.1535 data: 0.0724 max mem: 9377 +Train: [40] [2400/6250] eta: 0:09:25 lr: 0.000087 grad: 0.1490 (0.1657) loss: 0.7393 (0.7396) time: 0.1417 data: 0.0570 max mem: 9377 +Train: [40] [2500/6250] eta: 0:09:14 lr: 0.000087 grad: 0.1406 (0.1650) loss: 0.7402 (0.7398) time: 0.1793 data: 0.0948 max mem: 9377 +Train: [40] [2600/6250] eta: 0:09:02 lr: 0.000087 grad: 0.1440 (0.1642) loss: 0.7495 (0.7399) time: 0.1707 data: 0.0826 max mem: 9377 +Train: [40] [2700/6250] eta: 0:08:49 lr: 0.000087 grad: 0.1427 (0.1635) loss: 0.7336 (0.7400) time: 0.1681 data: 0.0899 max mem: 9377 +Train: [40] [2800/6250] eta: 0:08:35 lr: 0.000087 grad: 0.1394 (0.1629) loss: 0.7388 (0.7401) time: 0.1599 data: 0.0836 max mem: 9377 +Train: [40] [2900/6250] eta: 0:08:22 lr: 0.000087 grad: 0.1548 (0.1624) loss: 0.7145 (0.7399) time: 0.1660 data: 0.0805 max mem: 9377 +Train: [40] [3000/6250] eta: 0:08:08 lr: 0.000087 grad: 0.1558 (0.1621) loss: 0.7421 (0.7398) time: 0.1641 data: 0.0748 max mem: 9377 +Train: [40] [3100/6250] eta: 0:07:54 lr: 0.000087 grad: 0.1509 (0.1619) loss: 0.7293 (0.7397) time: 0.1715 data: 0.0787 max mem: 9377 +Train: [40] [3200/6250] eta: 0:07:38 lr: 0.000087 grad: 0.1439 (0.1615) loss: 0.7357 (0.7396) time: 0.1273 data: 0.0342 max mem: 9377 +Train: [40] [3300/6250] eta: 0:07:22 lr: 0.000087 grad: 0.1431 (0.1612) loss: 0.7468 (0.7395) time: 0.1383 data: 0.0539 max mem: 9377 +Train: [40] [3400/6250] eta: 0:07:06 lr: 0.000087 grad: 0.1477 (0.1610) loss: 0.7478 (0.7395) time: 0.1354 data: 0.0492 max mem: 9377 +Train: [40] [3500/6250] eta: 0:06:51 lr: 0.000087 grad: 0.1463 (0.1605) loss: 0.7409 (0.7397) time: 0.1472 data: 0.0613 max mem: 9377 +Train: [40] [3600/6250] eta: 0:06:36 lr: 0.000087 grad: 0.1514 (0.1602) loss: 0.7437 (0.7397) time: 0.1505 data: 0.0726 max mem: 9377 +Train: [40] [3700/6250] eta: 0:06:20 lr: 0.000086 grad: 0.1468 (0.1599) loss: 0.7388 (0.7397) time: 0.1300 data: 0.0417 max mem: 9377 +Train: [40] [3800/6250] eta: 0:06:04 lr: 0.000086 grad: 0.1471 (0.1596) loss: 0.7405 (0.7397) time: 0.1143 data: 0.0165 max mem: 9377 +Train: [40] [3900/6250] eta: 0:05:49 lr: 0.000086 grad: 0.1396 (0.1592) loss: 0.7468 (0.7399) time: 0.1720 data: 0.0833 max mem: 9377 +Train: [40] [4000/6250] eta: 0:05:34 lr: 0.000086 grad: 0.1436 (0.1589) loss: 0.7510 (0.7399) time: 0.1713 data: 0.0962 max mem: 9377 +Train: [40] [4100/6250] eta: 0:05:18 lr: 0.000086 grad: 0.1475 (0.1585) loss: 0.7442 (0.7400) time: 0.1361 data: 0.0539 max mem: 9377 +Train: [40] [4200/6250] eta: 0:05:03 lr: 0.000086 grad: 0.1445 (0.1582) loss: 0.7443 (0.7401) time: 0.1307 data: 0.0489 max mem: 9377 +Train: [40] [4300/6250] eta: 0:04:48 lr: 0.000086 grad: 0.1416 (0.1580) loss: 0.7531 (0.7402) time: 0.1600 data: 0.0811 max mem: 9377 +Train: [40] [4400/6250] eta: 0:04:33 lr: 0.000086 grad: 0.1353 (0.1577) loss: 0.7438 (0.7402) time: 0.1298 data: 0.0490 max mem: 9377 +Train: [40] [4500/6250] eta: 0:04:18 lr: 0.000086 grad: 0.1499 (0.1574) loss: 0.7436 (0.7403) time: 0.1572 data: 0.0711 max mem: 9377 +Train: [40] [4600/6250] eta: 0:04:03 lr: 0.000086 grad: 0.1436 (0.1571) loss: 0.7374 (0.7403) time: 0.1455 data: 0.0634 max mem: 9377 +Train: [40] [4700/6250] eta: 0:03:48 lr: 0.000086 grad: 0.1382 (0.1569) loss: 0.7337 (0.7403) time: 0.1492 data: 0.0644 max mem: 9377 +Train: [40] [4800/6250] eta: 0:03:32 lr: 0.000086 grad: 0.1384 (0.1566) loss: 0.7526 (0.7403) time: 0.1374 data: 0.0576 max mem: 9377 +Train: [40] [4900/6250] eta: 0:03:17 lr: 0.000086 grad: 0.1427 (0.1564) loss: 0.7505 (0.7403) time: 0.1368 data: 0.0499 max mem: 9377 +Train: [40] [5000/6250] eta: 0:03:03 lr: 0.000086 grad: 0.1488 (0.1562) loss: 0.7381 (0.7403) time: 0.1373 data: 0.0543 max mem: 9377 +Train: [40] [5100/6250] eta: 0:02:48 lr: 0.000086 grad: 0.1397 (0.1559) loss: 0.7474 (0.7404) time: 0.1518 data: 0.0698 max mem: 9377 +Train: [40] [5200/6250] eta: 0:02:33 lr: 0.000086 grad: 0.1418 (0.1557) loss: 0.7486 (0.7405) time: 0.1423 data: 0.0619 max mem: 9377 +Train: [40] [5300/6250] eta: 0:02:18 lr: 0.000086 grad: 0.1520 (0.1555) loss: 0.7325 (0.7405) time: 0.1332 data: 0.0483 max mem: 9377 +Train: [40] [5400/6250] eta: 0:02:03 lr: 0.000086 grad: 0.1390 (0.1553) loss: 0.7350 (0.7405) time: 0.1497 data: 0.0686 max mem: 9377 +Train: [40] [5500/6250] eta: 0:01:49 lr: 0.000086 grad: 0.1323 (0.1550) loss: 0.7526 (0.7406) time: 0.1273 data: 0.0457 max mem: 9377 +Train: [40] [5600/6250] eta: 0:01:34 lr: 0.000086 grad: 0.1414 (0.1549) loss: 0.7520 (0.7407) time: 0.1437 data: 0.0651 max mem: 9377 +Train: [40] [5700/6250] eta: 0:01:19 lr: 0.000086 grad: 0.1457 (0.1548) loss: 0.7387 (0.7408) time: 0.1353 data: 0.0532 max mem: 9377 +Train: [40] [5800/6250] eta: 0:01:05 lr: 0.000086 grad: 0.1378 (0.1546) loss: 0.7455 (0.7409) time: 0.1481 data: 0.0606 max mem: 9377 +Train: [40] [5900/6250] eta: 0:00:50 lr: 0.000086 grad: 0.1518 (0.1545) loss: 0.7280 (0.7408) time: 0.1389 data: 0.0594 max mem: 9377 +Train: [40] [6000/6250] eta: 0:00:36 lr: 0.000086 grad: 0.1547 (0.1544) loss: 0.7387 (0.7407) time: 0.1303 data: 0.0388 max mem: 9377 +Train: [40] [6100/6250] eta: 0:00:21 lr: 0.000086 grad: 0.1457 (0.1544) loss: 0.7573 (0.7408) time: 0.1298 data: 0.0451 max mem: 9377 +Train: [40] [6200/6250] eta: 0:00:07 lr: 0.000086 grad: 0.1550 (0.1544) loss: 0.7376 (0.7407) time: 0.1421 data: 0.0587 max mem: 9377 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.1456 (0.1544) loss: 0.7462 (0.7407) time: 0.1532 data: 0.0763 max mem: 9377 +Train: [40] Total time: 0:15:10 (0.1457 s / it) +Averaged stats: lr: 0.000086 grad: 0.1456 (0.1544) loss: 0.7462 (0.7407) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:04:31 loss: 0.8501 (0.8501) time: 4.3740 data: 4.3013 max mem: 9377 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8618 (0.8604) time: 0.1401 data: 0.1149 max mem: 9377 +Eval (hcp-train-subset): [40] Total time: 0:00:13 (0.2204 s / it) +Averaged stats (hcp-train-subset): loss: 0.8618 (0.8604) +Eval (hcp-val): [40] [ 0/62] eta: 0:05:33 loss: 0.8612 (0.8612) time: 5.3764 data: 5.3462 max mem: 9377 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8566 (0.8591) time: 0.1167 data: 0.0915 max mem: 9377 +Eval (hcp-val): [40] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (hcp-val): loss: 0.8566 (0.8591) +Eval (nsd-val): [40] [ 0/62] eta: 0:05:20 loss: 0.8215 (0.8215) time: 5.1668 data: 5.1337 max mem: 9377 +Eval (nsd-val): [40] [61/62] eta: 0:00:00 loss: 0.8325 (0.8317) time: 0.1252 data: 0.1001 max mem: 9377 +Eval (nsd-val): [40] Total time: 0:00:12 (0.2042 s / it) +Averaged stats (nsd-val): loss: 0.8325 (0.8317) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 9:14:38 lr: 0.000086 grad: 0.1043 (0.1043) loss: 0.8348 (0.8348) time: 5.3246 data: 5.1775 max mem: 9377 +Train: [41] [ 100/6250] eta: 0:19:38 lr: 0.000086 grad: 0.2371 (0.2738) loss: 0.7385 (0.7615) time: 0.1534 data: 0.0658 max mem: 9377 +Train: [41] [ 200/6250] eta: 0:16:31 lr: 0.000086 grad: 0.2424 (0.2539) loss: 0.7583 (0.7573) time: 0.1490 data: 0.0560 max mem: 9377 +Train: [41] [ 300/6250] eta: 0:15:30 lr: 0.000086 grad: 0.1600 (0.2306) loss: 0.7599 (0.7563) time: 0.1402 data: 0.0515 max mem: 9377 +Train: [41] [ 400/6250] eta: 0:14:47 lr: 0.000086 grad: 0.1642 (0.2146) loss: 0.7521 (0.7537) time: 0.1177 data: 0.0315 max mem: 9377 +Train: [41] [ 500/6250] eta: 0:14:09 lr: 0.000086 grad: 0.1635 (0.2056) loss: 0.7372 (0.7525) time: 0.1125 data: 0.0182 max mem: 9377 +Train: [41] [ 600/6250] eta: 0:13:43 lr: 0.000086 grad: 0.1459 (0.1974) loss: 0.7400 (0.7512) time: 0.1362 data: 0.0466 max mem: 9377 +Train: [41] [ 700/6250] eta: 0:13:24 lr: 0.000085 grad: 0.1459 (0.1902) loss: 0.7364 (0.7502) time: 0.1268 data: 0.0318 max mem: 9377 +Train: [41] [ 800/6250] eta: 0:13:05 lr: 0.000085 grad: 0.1511 (0.1853) loss: 0.7413 (0.7489) time: 0.1660 data: 0.0818 max mem: 9377 +Train: [41] [ 900/6250] eta: 0:12:45 lr: 0.000085 grad: 0.1490 (0.1812) loss: 0.7517 (0.7481) time: 0.1422 data: 0.0590 max mem: 9377 +Train: [41] [1000/6250] eta: 0:12:28 lr: 0.000085 grad: 0.1363 (0.1775) loss: 0.7498 (0.7475) time: 0.1518 data: 0.0684 max mem: 9377 +Train: [41] [1100/6250] eta: 0:12:08 lr: 0.000085 grad: 0.1452 (0.1748) loss: 0.7453 (0.7468) time: 0.1522 data: 0.0732 max mem: 9377 +Train: [41] [1200/6250] eta: 0:11:55 lr: 0.000085 grad: 0.1428 (0.1722) loss: 0.7324 (0.7460) time: 0.1910 data: 0.1175 max mem: 9377 +Train: [41] [1300/6250] eta: 0:11:46 lr: 0.000085 grad: 0.1413 (0.1703) loss: 0.7266 (0.7452) time: 0.1606 data: 0.0799 max mem: 9377 +Train: [41] [1400/6250] eta: 0:11:36 lr: 0.000085 grad: 0.1394 (0.1685) loss: 0.7443 (0.7446) time: 0.1560 data: 0.0745 max mem: 9377 +Train: [41] [1500/6250] eta: 0:11:20 lr: 0.000085 grad: 0.1495 (0.1670) loss: 0.7212 (0.7440) time: 0.1269 data: 0.0442 max mem: 9377 +Train: [41] [1600/6250] eta: 0:11:06 lr: 0.000085 grad: 0.1479 (0.1658) loss: 0.7324 (0.7433) time: 0.1388 data: 0.0622 max mem: 9377 +Train: [41] [1700/6250] eta: 0:10:51 lr: 0.000085 grad: 0.1545 (0.1652) loss: 0.7225 (0.7428) time: 0.1572 data: 0.0760 max mem: 9377 +Train: [41] [1800/6250] eta: 0:10:35 lr: 0.000085 grad: 0.1477 (0.1647) loss: 0.7358 (0.7424) time: 0.1343 data: 0.0539 max mem: 9377 +Train: [41] [1900/6250] eta: 0:10:17 lr: 0.000085 grad: 0.1460 (0.1637) loss: 0.7272 (0.7421) time: 0.1360 data: 0.0535 max mem: 9377 +Train: [41] [2000/6250] eta: 0:10:01 lr: 0.000085 grad: 0.1397 (0.1627) loss: 0.7529 (0.7419) time: 0.1329 data: 0.0473 max mem: 9377 +Train: [41] [2100/6250] eta: 0:09:47 lr: 0.000085 grad: 0.1466 (0.1620) loss: 0.7438 (0.7419) time: 0.1537 data: 0.0706 max mem: 9377 +Train: [41] [2200/6250] eta: 0:09:31 lr: 0.000085 grad: 0.1390 (0.1611) loss: 0.7511 (0.7419) time: 0.1327 data: 0.0525 max mem: 9377 +Train: [41] [2300/6250] eta: 0:09:17 lr: 0.000085 grad: 0.1350 (0.1603) loss: 0.7505 (0.7419) time: 0.1330 data: 0.0509 max mem: 9377 +Train: [41] [2400/6250] eta: 0:09:04 lr: 0.000085 grad: 0.1457 (0.1596) loss: 0.7349 (0.7419) time: 0.1417 data: 0.0645 max mem: 9377 +Train: [41] [2500/6250] eta: 0:08:52 lr: 0.000085 grad: 0.1416 (0.1589) loss: 0.7576 (0.7421) time: 0.1556 data: 0.0699 max mem: 9377 +Train: [41] [2600/6250] eta: 0:08:41 lr: 0.000085 grad: 0.1465 (0.1582) loss: 0.7345 (0.7421) time: 0.1370 data: 0.0606 max mem: 9377 +Train: [41] [2700/6250] eta: 0:08:29 lr: 0.000085 grad: 0.1423 (0.1576) loss: 0.7324 (0.7420) time: 0.1522 data: 0.0703 max mem: 9377 +Train: [41] [2800/6250] eta: 0:08:15 lr: 0.000085 grad: 0.1429 (0.1571) loss: 0.7428 (0.7418) time: 0.1606 data: 0.0722 max mem: 9377 +Train: [41] [2900/6250] eta: 0:08:00 lr: 0.000085 grad: 0.1500 (0.1566) loss: 0.7349 (0.7418) time: 0.1416 data: 0.0609 max mem: 9377 +Train: [41] [3000/6250] eta: 0:07:46 lr: 0.000085 grad: 0.1437 (0.1562) loss: 0.7345 (0.7417) time: 0.1285 data: 0.0516 max mem: 9377 +Train: [41] [3100/6250] eta: 0:07:32 lr: 0.000085 grad: 0.1410 (0.1559) loss: 0.7291 (0.7414) time: 0.1313 data: 0.0495 max mem: 9377 +Train: [41] [3200/6250] eta: 0:07:18 lr: 0.000085 grad: 0.1538 (0.1557) loss: 0.7227 (0.7412) time: 0.1447 data: 0.0556 max mem: 9377 +Train: [41] [3300/6250] eta: 0:07:05 lr: 0.000085 grad: 0.1528 (0.1556) loss: 0.7407 (0.7411) time: 0.1552 data: 0.0675 max mem: 9377 +Train: [41] [3400/6250] eta: 0:06:51 lr: 0.000085 grad: 0.1572 (0.1556) loss: 0.7384 (0.7411) time: 0.1434 data: 0.0586 max mem: 9377 +Train: [41] [3500/6250] eta: 0:06:36 lr: 0.000085 grad: 0.1449 (0.1553) loss: 0.7331 (0.7410) time: 0.1338 data: 0.0491 max mem: 9377 +Train: [41] [3600/6250] eta: 0:06:21 lr: 0.000085 grad: 0.1448 (0.1550) loss: 0.7373 (0.7410) time: 0.1282 data: 0.0459 max mem: 9377 +Train: [41] [3700/6250] eta: 0:06:06 lr: 0.000085 grad: 0.1468 (0.1548) loss: 0.7436 (0.7410) time: 0.1463 data: 0.0683 max mem: 9377 +Train: [41] [3800/6250] eta: 0:05:51 lr: 0.000085 grad: 0.1458 (0.1546) loss: 0.7423 (0.7411) time: 0.1433 data: 0.0617 max mem: 9377 +Train: [41] [3900/6250] eta: 0:05:36 lr: 0.000084 grad: 0.1491 (0.1545) loss: 0.7483 (0.7412) time: 0.1228 data: 0.0357 max mem: 9377 +Train: [41] [4000/6250] eta: 0:05:21 lr: 0.000084 grad: 0.1515 (0.1544) loss: 0.7313 (0.7412) time: 0.1198 data: 0.0340 max mem: 9377 +Train: [41] [4100/6250] eta: 0:05:07 lr: 0.000084 grad: 0.1444 (0.1543) loss: 0.7290 (0.7411) time: 0.1433 data: 0.0673 max mem: 9377 +Train: [41] [4200/6250] eta: 0:04:52 lr: 0.000084 grad: 0.1574 (0.1542) loss: 0.7226 (0.7410) time: 0.1259 data: 0.0378 max mem: 9377 +Train: [41] [4300/6250] eta: 0:04:38 lr: 0.000084 grad: 0.1495 (0.1542) loss: 0.7366 (0.7409) time: 0.1300 data: 0.0454 max mem: 9377 +Train: [41] [4400/6250] eta: 0:04:24 lr: 0.000084 grad: 0.1475 (0.1542) loss: 0.7458 (0.7409) time: 0.1419 data: 0.0593 max mem: 9377 +Train: [41] [4500/6250] eta: 0:04:09 lr: 0.000084 grad: 0.1416 (0.1541) loss: 0.7487 (0.7409) time: 0.1389 data: 0.0602 max mem: 9377 +Train: [41] [4600/6250] eta: 0:03:54 lr: 0.000084 grad: 0.1467 (0.1539) loss: 0.7428 (0.7410) time: 0.1456 data: 0.0568 max mem: 9377 +Train: [41] [4700/6250] eta: 0:03:40 lr: 0.000084 grad: 0.1449 (0.1538) loss: 0.7485 (0.7410) time: 0.1373 data: 0.0533 max mem: 9377 +Train: [41] [4800/6250] eta: 0:03:26 lr: 0.000084 grad: 0.1502 (0.1537) loss: 0.7425 (0.7412) time: 0.1428 data: 0.0563 max mem: 9377 +Train: [41] [4900/6250] eta: 0:03:11 lr: 0.000084 grad: 0.1420 (0.1535) loss: 0.7510 (0.7413) time: 0.1247 data: 0.0435 max mem: 9377 +Train: [41] [5000/6250] eta: 0:02:57 lr: 0.000084 grad: 0.1420 (0.1533) loss: 0.7479 (0.7414) time: 0.1313 data: 0.0556 max mem: 9377 +Train: [41] [5100/6250] eta: 0:02:43 lr: 0.000084 grad: 0.1453 (0.1532) loss: 0.7373 (0.7415) time: 0.1319 data: 0.0473 max mem: 9377 +Train: [41] [5200/6250] eta: 0:02:28 lr: 0.000084 grad: 0.1505 (0.1530) loss: 0.7453 (0.7416) time: 0.1418 data: 0.0568 max mem: 9377 +Train: [41] [5300/6250] eta: 0:02:14 lr: 0.000084 grad: 0.1475 (0.1529) loss: 0.7565 (0.7417) time: 0.1331 data: 0.0459 max mem: 9377 +Train: [41] [5400/6250] eta: 0:02:00 lr: 0.000084 grad: 0.1448 (0.1528) loss: 0.7492 (0.7419) time: 0.1348 data: 0.0526 max mem: 9377 +Train: [41] [5500/6250] eta: 0:01:46 lr: 0.000084 grad: 0.1426 (0.1526) loss: 0.7427 (0.7421) time: 0.1498 data: 0.0703 max mem: 9377 +Train: [41] [5600/6250] eta: 0:01:31 lr: 0.000084 grad: 0.1380 (0.1525) loss: 0.7451 (0.7422) time: 0.1217 data: 0.0342 max mem: 9377 +Train: [41] [5700/6250] eta: 0:01:17 lr: 0.000084 grad: 0.1434 (0.1524) loss: 0.7481 (0.7423) time: 0.1534 data: 0.0714 max mem: 9377 +Train: [41] [5800/6250] eta: 0:01:03 lr: 0.000084 grad: 0.1529 (0.1523) loss: 0.7427 (0.7424) time: 0.1383 data: 0.0549 max mem: 9377 +Train: [41] [5900/6250] eta: 0:00:49 lr: 0.000084 grad: 0.1491 (0.1522) loss: 0.7503 (0.7425) time: 0.1397 data: 0.0592 max mem: 9377 +Train: [41] [6000/6250] eta: 0:00:35 lr: 0.000084 grad: 0.1550 (0.1523) loss: 0.7422 (0.7426) time: 0.1524 data: 0.0720 max mem: 9377 +Train: [41] [6100/6250] eta: 0:00:21 lr: 0.000084 grad: 0.1555 (0.1523) loss: 0.7266 (0.7426) time: 0.1271 data: 0.0443 max mem: 9377 +Train: [41] [6200/6250] eta: 0:00:07 lr: 0.000084 grad: 0.1484 (0.1523) loss: 0.7427 (0.7426) time: 0.1334 data: 0.0494 max mem: 9377 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.1444 (0.1523) loss: 0.7446 (0.7426) time: 0.1374 data: 0.0556 max mem: 9377 +Train: [41] Total time: 0:14:45 (0.1417 s / it) +Averaged stats: lr: 0.000084 grad: 0.1444 (0.1523) loss: 0.7446 (0.7426) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:03:34 loss: 0.8544 (0.8544) time: 3.4578 data: 3.3932 max mem: 9377 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8571 (0.8602) time: 0.1400 data: 0.1146 max mem: 9377 +Eval (hcp-train-subset): [41] Total time: 0:00:13 (0.2169 s / it) +Averaged stats (hcp-train-subset): loss: 0.8571 (0.8602) +Eval (hcp-val): [41] [ 0/62] eta: 0:05:06 loss: 0.8574 (0.8574) time: 4.9459 data: 4.9163 max mem: 9377 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8567 (0.8584) time: 0.1359 data: 0.1108 max mem: 9377 +Eval (hcp-val): [41] Total time: 0:00:13 (0.2174 s / it) +Averaged stats (hcp-val): loss: 0.8567 (0.8584) +Eval (nsd-val): [41] [ 0/62] eta: 0:04:54 loss: 0.8221 (0.8221) time: 4.7495 data: 4.7165 max mem: 9377 +Eval (nsd-val): [41] [61/62] eta: 0:00:00 loss: 0.8291 (0.8298) time: 0.1326 data: 0.1068 max mem: 9377 +Eval (nsd-val): [41] Total time: 0:00:12 (0.2073 s / it) +Averaged stats (nsd-val): loss: 0.8291 (0.8298) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [42] [ 0/6250] eta: 10:28:16 lr: 0.000084 grad: 0.2633 (0.2633) loss: 0.7074 (0.7074) time: 6.0315 data: 5.9321 max mem: 9377 +Train: [42] [ 100/6250] eta: 0:19:43 lr: 0.000084 grad: 0.2350 (0.3028) loss: 0.7201 (0.7430) time: 0.1451 data: 0.0449 max mem: 9377 +Train: [42] [ 200/6250] eta: 0:16:47 lr: 0.000084 grad: 0.2431 (0.2773) loss: 0.7433 (0.7463) time: 0.1377 data: 0.0497 max mem: 9377 +Train: [42] [ 300/6250] eta: 0:15:37 lr: 0.000084 grad: 0.1936 (0.2543) loss: 0.7502 (0.7450) time: 0.1431 data: 0.0520 max mem: 9377 +Train: [42] [ 400/6250] eta: 0:14:47 lr: 0.000084 grad: 0.1997 (0.2391) loss: 0.7352 (0.7424) time: 0.1431 data: 0.0605 max mem: 9377 +Train: [42] [ 500/6250] eta: 0:14:14 lr: 0.000084 grad: 0.1677 (0.2272) loss: 0.7495 (0.7435) time: 0.1411 data: 0.0460 max mem: 9377 +Train: [42] [ 600/6250] eta: 0:13:48 lr: 0.000084 grad: 0.1675 (0.2170) loss: 0.7420 (0.7437) time: 0.1449 data: 0.0508 max mem: 9377 +Train: [42] [ 700/6250] eta: 0:13:22 lr: 0.000084 grad: 0.1644 (0.2097) loss: 0.7364 (0.7439) time: 0.1305 data: 0.0425 max mem: 9377 +Train: [42] [ 800/6250] eta: 0:12:56 lr: 0.000084 grad: 0.1457 (0.2022) loss: 0.7488 (0.7441) time: 0.1213 data: 0.0346 max mem: 9377 +Train: [42] [ 900/6250] eta: 0:12:43 lr: 0.000083 grad: 0.1516 (0.1965) loss: 0.7395 (0.7442) time: 0.1524 data: 0.0711 max mem: 9377 +Train: [42] [1000/6250] eta: 0:12:27 lr: 0.000083 grad: 0.1623 (0.1927) loss: 0.7206 (0.7444) time: 0.1349 data: 0.0454 max mem: 9377 +Train: [42] [1100/6250] eta: 0:12:17 lr: 0.000083 grad: 0.1539 (0.1894) loss: 0.7363 (0.7440) time: 0.1478 data: 0.0662 max mem: 9377 +Train: [42] [1200/6250] eta: 0:12:02 lr: 0.000083 grad: 0.1486 (0.1865) loss: 0.7520 (0.7441) time: 0.1361 data: 0.0451 max mem: 9377 +Train: [42] [1300/6250] eta: 0:11:53 lr: 0.000083 grad: 0.1440 (0.1835) loss: 0.7522 (0.7441) time: 0.1662 data: 0.0831 max mem: 9377 +Train: [42] [1400/6250] eta: 0:11:42 lr: 0.000083 grad: 0.1401 (0.1811) loss: 0.7381 (0.7438) time: 0.1661 data: 0.0738 max mem: 9377 +Train: [42] [1500/6250] eta: 0:11:29 lr: 0.000083 grad: 0.1546 (0.1790) loss: 0.7328 (0.7434) time: 0.1254 data: 0.0411 max mem: 9377 +Train: [42] [1600/6250] eta: 0:11:14 lr: 0.000083 grad: 0.1598 (0.1776) loss: 0.7320 (0.7433) time: 0.1647 data: 0.0868 max mem: 9377 +Train: [42] [1700/6250] eta: 0:11:04 lr: 0.000083 grad: 0.1444 (0.1762) loss: 0.7401 (0.7431) time: 0.1701 data: 0.0842 max mem: 9377 +Train: [42] [1800/6250] eta: 0:10:50 lr: 0.000083 grad: 0.1419 (0.1746) loss: 0.7409 (0.7429) time: 0.1652 data: 0.0760 max mem: 9377 +Train: [42] [1900/6250] eta: 0:10:36 lr: 0.000083 grad: 0.1517 (0.1733) loss: 0.7359 (0.7427) time: 0.1344 data: 0.0475 max mem: 9377 +Train: [42] [2000/6250] eta: 0:10:18 lr: 0.000083 grad: 0.1439 (0.1721) loss: 0.7485 (0.7428) time: 0.1399 data: 0.0582 max mem: 9377 +Train: [42] [2100/6250] eta: 0:10:01 lr: 0.000083 grad: 0.1450 (0.1709) loss: 0.7384 (0.7430) time: 0.1353 data: 0.0493 max mem: 9377 +Train: [42] [2200/6250] eta: 0:09:45 lr: 0.000083 grad: 0.1381 (0.1696) loss: 0.7499 (0.7432) time: 0.1331 data: 0.0515 max mem: 9377 +Train: [42] [2300/6250] eta: 0:09:32 lr: 0.000083 grad: 0.1441 (0.1684) loss: 0.7385 (0.7436) time: 0.1600 data: 0.0794 max mem: 9377 +Train: [42] [2400/6250] eta: 0:09:19 lr: 0.000083 grad: 0.1458 (0.1675) loss: 0.7379 (0.7437) time: 0.1364 data: 0.0514 max mem: 9377 +Train: [42] [2500/6250] eta: 0:09:05 lr: 0.000083 grad: 0.1373 (0.1666) loss: 0.7449 (0.7439) time: 0.1493 data: 0.0698 max mem: 9377 +Train: [42] [2600/6250] eta: 0:08:50 lr: 0.000083 grad: 0.1384 (0.1657) loss: 0.7506 (0.7441) time: 0.1366 data: 0.0515 max mem: 9377 +Train: [42] [2700/6250] eta: 0:08:35 lr: 0.000083 grad: 0.1468 (0.1648) loss: 0.7542 (0.7444) time: 0.1441 data: 0.0708 max mem: 9377 +Train: [42] [2800/6250] eta: 0:08:20 lr: 0.000083 grad: 0.1418 (0.1641) loss: 0.7528 (0.7448) time: 0.1369 data: 0.0535 max mem: 9377 +Train: [42] [2900/6250] eta: 0:08:06 lr: 0.000083 grad: 0.1440 (0.1634) loss: 0.7439 (0.7449) time: 0.1693 data: 0.0935 max mem: 9377 +Train: [42] [3000/6250] eta: 0:07:54 lr: 0.000083 grad: 0.1538 (0.1628) loss: 0.7396 (0.7450) time: 0.1628 data: 0.0784 max mem: 9377 +Train: [42] [3100/6250] eta: 0:07:41 lr: 0.000083 grad: 0.1474 (0.1624) loss: 0.7398 (0.7450) time: 0.1500 data: 0.0607 max mem: 9377 +Train: [42] [3200/6250] eta: 0:07:27 lr: 0.000083 grad: 0.1508 (0.1620) loss: 0.7403 (0.7449) time: 0.1516 data: 0.0727 max mem: 9377 +Train: [42] [3300/6250] eta: 0:07:13 lr: 0.000083 grad: 0.1438 (0.1616) loss: 0.7369 (0.7447) time: 0.1481 data: 0.0678 max mem: 9377 +Train: [42] [3400/6250] eta: 0:06:57 lr: 0.000083 grad: 0.1452 (0.1612) loss: 0.7407 (0.7446) time: 0.1314 data: 0.0476 max mem: 9377 +Train: [42] [3500/6250] eta: 0:06:42 lr: 0.000083 grad: 0.1454 (0.1607) loss: 0.7368 (0.7445) time: 0.1347 data: 0.0519 max mem: 9377 +Train: [42] [3600/6250] eta: 0:06:26 lr: 0.000083 grad: 0.1441 (0.1604) loss: 0.7460 (0.7444) time: 0.1384 data: 0.0520 max mem: 9377 +Train: [42] [3700/6250] eta: 0:06:10 lr: 0.000083 grad: 0.1453 (0.1601) loss: 0.7497 (0.7444) time: 0.1200 data: 0.0360 max mem: 9377 +Train: [42] [3800/6250] eta: 0:05:55 lr: 0.000083 grad: 0.1385 (0.1597) loss: 0.7502 (0.7445) time: 0.1081 data: 0.0266 max mem: 9377 +Train: [42] [3900/6250] eta: 0:05:40 lr: 0.000083 grad: 0.1444 (0.1593) loss: 0.7438 (0.7444) time: 0.1406 data: 0.0522 max mem: 9377 +Train: [42] [4000/6250] eta: 0:05:25 lr: 0.000083 grad: 0.1458 (0.1590) loss: 0.7456 (0.7445) time: 0.1420 data: 0.0581 max mem: 9377 +Train: [42] [4100/6250] eta: 0:05:10 lr: 0.000082 grad: 0.1545 (0.1587) loss: 0.7392 (0.7445) time: 0.1370 data: 0.0523 max mem: 9377 +Train: [42] [4200/6250] eta: 0:04:56 lr: 0.000082 grad: 0.1446 (0.1585) loss: 0.7491 (0.7444) time: 0.1276 data: 0.0409 max mem: 9377 +Train: [42] [4300/6250] eta: 0:04:41 lr: 0.000082 grad: 0.1520 (0.1583) loss: 0.7274 (0.7442) time: 0.1531 data: 0.0804 max mem: 9377 +Train: [42] [4400/6250] eta: 0:04:27 lr: 0.000082 grad: 0.1458 (0.1581) loss: 0.7339 (0.7441) time: 0.1519 data: 0.0687 max mem: 9377 +Train: [42] [4500/6250] eta: 0:04:12 lr: 0.000082 grad: 0.1476 (0.1579) loss: 0.7365 (0.7440) time: 0.1203 data: 0.0297 max mem: 9377 +Train: [42] [4600/6250] eta: 0:03:58 lr: 0.000082 grad: 0.1497 (0.1577) loss: 0.7491 (0.7439) time: 0.1345 data: 0.0462 max mem: 9377 +Train: [42] [4700/6250] eta: 0:03:43 lr: 0.000082 grad: 0.1426 (0.1575) loss: 0.7495 (0.7439) time: 0.1401 data: 0.0541 max mem: 9377 +Train: [42] [4800/6250] eta: 0:03:29 lr: 0.000082 grad: 0.1449 (0.1572) loss: 0.7379 (0.7438) time: 0.1391 data: 0.0564 max mem: 9377 +Train: [42] [4900/6250] eta: 0:03:14 lr: 0.000082 grad: 0.1536 (0.1571) loss: 0.7339 (0.7438) time: 0.1183 data: 0.0343 max mem: 9377 +Train: [42] [5000/6250] eta: 0:03:00 lr: 0.000082 grad: 0.1486 (0.1570) loss: 0.7346 (0.7437) time: 0.1376 data: 0.0591 max mem: 9377 +Train: [42] [5100/6250] eta: 0:02:45 lr: 0.000082 grad: 0.1531 (0.1569) loss: 0.7370 (0.7435) time: 0.1415 data: 0.0602 max mem: 9377 +Train: [42] [5200/6250] eta: 0:02:31 lr: 0.000082 grad: 0.1507 (0.1568) loss: 0.7330 (0.7434) time: 0.1502 data: 0.0739 max mem: 9377 +Train: [42] [5300/6250] eta: 0:02:16 lr: 0.000082 grad: 0.1472 (0.1566) loss: 0.7348 (0.7431) time: 0.1157 data: 0.0287 max mem: 9377 +Train: [42] [5400/6250] eta: 0:02:02 lr: 0.000082 grad: 0.1463 (0.1565) loss: 0.7288 (0.7429) time: 0.1455 data: 0.0635 max mem: 9377 +Train: [42] [5500/6250] eta: 0:01:47 lr: 0.000082 grad: 0.1497 (0.1564) loss: 0.7308 (0.7427) time: 0.1279 data: 0.0491 max mem: 9377 +Train: [42] [5600/6250] eta: 0:01:33 lr: 0.000082 grad: 0.1507 (0.1564) loss: 0.7254 (0.7425) time: 0.1458 data: 0.0660 max mem: 9377 +Train: [42] [5700/6250] eta: 0:01:18 lr: 0.000082 grad: 0.1514 (0.1563) loss: 0.7380 (0.7423) time: 0.1355 data: 0.0546 max mem: 9377 +Train: [42] [5800/6250] eta: 0:01:04 lr: 0.000082 grad: 0.1512 (0.1563) loss: 0.7485 (0.7421) time: 0.1675 data: 0.0892 max mem: 9377 +Train: [42] [5900/6250] eta: 0:00:50 lr: 0.000082 grad: 0.1498 (0.1562) loss: 0.7296 (0.7419) time: 0.1387 data: 0.0502 max mem: 9377 +Train: [42] [6000/6250] eta: 0:00:35 lr: 0.000082 grad: 0.1518 (0.1562) loss: 0.7243 (0.7418) time: 0.1451 data: 0.0661 max mem: 9377 +Train: [42] [6100/6250] eta: 0:00:21 lr: 0.000082 grad: 0.1475 (0.1562) loss: 0.7466 (0.7416) time: 0.1137 data: 0.0363 max mem: 9377 +Train: [42] [6200/6250] eta: 0:00:07 lr: 0.000082 grad: 0.1555 (0.1562) loss: 0.7442 (0.7415) time: 0.1752 data: 0.0959 max mem: 9377 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.1534 (0.1562) loss: 0.7322 (0.7415) time: 0.1152 data: 0.0272 max mem: 9377 +Train: [42] Total time: 0:14:59 (0.1439 s / it) +Averaged stats: lr: 0.000082 grad: 0.1534 (0.1562) loss: 0.7322 (0.7415) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:04:03 loss: 0.8549 (0.8549) time: 3.9335 data: 3.8797 max mem: 9377 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8610 (0.8621) time: 0.1360 data: 0.1105 max mem: 9377 +Eval (hcp-train-subset): [42] Total time: 0:00:13 (0.2124 s / it) +Averaged stats (hcp-train-subset): loss: 0.8610 (0.8621) +Eval (hcp-val): [42] [ 0/62] eta: 0:04:05 loss: 0.8678 (0.8678) time: 3.9571 data: 3.8900 max mem: 9377 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8591 (0.8602) time: 0.1378 data: 0.1112 max mem: 9377 +Eval (hcp-val): [42] Total time: 0:00:14 (0.2295 s / it) +Averaged stats (hcp-val): loss: 0.8591 (0.8602) +Eval (nsd-val): [42] [ 0/62] eta: 0:04:20 loss: 0.8238 (0.8238) time: 4.2088 data: 4.1251 max mem: 9377 +Eval (nsd-val): [42] [61/62] eta: 0:00:00 loss: 0.8294 (0.8321) time: 0.1166 data: 0.0912 max mem: 9377 +Eval (nsd-val): [42] Total time: 0:00:13 (0.2097 s / it) +Averaged stats (nsd-val): loss: 0.8294 (0.8321) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [43] [ 0/6250] eta: 9:39:22 lr: 0.000082 grad: 0.1656 (0.1656) loss: 0.8434 (0.8434) time: 5.5620 data: 5.4568 max mem: 9377 +Train: [43] [ 100/6250] eta: 0:20:44 lr: 0.000082 grad: 0.2447 (0.2775) loss: 0.7513 (0.7607) time: 0.1555 data: 0.0654 max mem: 9377 +Train: [43] [ 200/6250] eta: 0:17:46 lr: 0.000082 grad: 0.2013 (0.2481) loss: 0.7510 (0.7581) time: 0.1365 data: 0.0392 max mem: 9377 +Train: [43] [ 300/6250] eta: 0:16:43 lr: 0.000082 grad: 0.2109 (0.2315) loss: 0.7095 (0.7526) time: 0.1447 data: 0.0441 max mem: 9377 +Train: [43] [ 400/6250] eta: 0:15:48 lr: 0.000082 grad: 0.1717 (0.2216) loss: 0.7400 (0.7502) time: 0.1453 data: 0.0567 max mem: 9377 +Train: [43] [ 500/6250] eta: 0:15:12 lr: 0.000082 grad: 0.1571 (0.2095) loss: 0.7326 (0.7493) time: 0.1599 data: 0.0731 max mem: 9377 +Train: [43] [ 600/6250] eta: 0:14:31 lr: 0.000082 grad: 0.1594 (0.2016) loss: 0.7424 (0.7478) time: 0.1391 data: 0.0524 max mem: 9377 +Train: [43] [ 700/6250] eta: 0:14:01 lr: 0.000082 grad: 0.1563 (0.1966) loss: 0.7374 (0.7462) time: 0.1357 data: 0.0498 max mem: 9377 +Train: [43] [ 800/6250] eta: 0:13:49 lr: 0.000082 grad: 0.1584 (0.1918) loss: 0.7356 (0.7446) time: 0.1857 data: 0.1014 max mem: 9377 +Train: [43] [ 900/6250] eta: 0:13:36 lr: 0.000082 grad: 0.1460 (0.1877) loss: 0.7350 (0.7436) time: 0.1855 data: 0.1035 max mem: 9377 +Train: [43] [1000/6250] eta: 0:13:28 lr: 0.000081 grad: 0.1469 (0.1836) loss: 0.7364 (0.7433) time: 0.1639 data: 0.0850 max mem: 9377 +Train: [43] [1100/6250] eta: 0:13:16 lr: 0.000081 grad: 0.1457 (0.1802) loss: 0.7364 (0.7430) time: 0.1543 data: 0.0759 max mem: 9377 +Train: [43] [1200/6250] eta: 0:13:02 lr: 0.000081 grad: 0.1551 (0.1776) loss: 0.7278 (0.7425) time: 0.1591 data: 0.0752 max mem: 9377 +Train: [43] [1300/6250] eta: 0:12:47 lr: 0.000081 grad: 0.1457 (0.1757) loss: 0.7337 (0.7421) time: 0.1565 data: 0.0733 max mem: 9377 +Train: [43] [1400/6250] eta: 0:12:31 lr: 0.000081 grad: 0.1445 (0.1739) loss: 0.7367 (0.7415) time: 0.1654 data: 0.0785 max mem: 9377 +Train: [43] [1500/6250] eta: 0:12:14 lr: 0.000081 grad: 0.1448 (0.1721) loss: 0.7389 (0.7411) time: 0.1526 data: 0.0680 max mem: 9377 +Train: [43] [1600/6250] eta: 0:11:56 lr: 0.000081 grad: 0.1491 (0.1706) loss: 0.7356 (0.7406) time: 0.1696 data: 0.0894 max mem: 9377 +Train: [43] [1700/6250] eta: 0:11:36 lr: 0.000081 grad: 0.1548 (0.1693) loss: 0.7300 (0.7403) time: 0.1412 data: 0.0562 max mem: 9377 +Train: [43] [1800/6250] eta: 0:11:18 lr: 0.000081 grad: 0.1517 (0.1682) loss: 0.7313 (0.7399) time: 0.1508 data: 0.0719 max mem: 9377 +Train: [43] [1900/6250] eta: 0:10:59 lr: 0.000081 grad: 0.1516 (0.1672) loss: 0.7385 (0.7397) time: 0.1277 data: 0.0406 max mem: 9377 +Train: [43] [2000/6250] eta: 0:10:40 lr: 0.000081 grad: 0.1464 (0.1663) loss: 0.7394 (0.7397) time: 0.1390 data: 0.0524 max mem: 9377 +Train: [43] [2100/6250] eta: 0:10:21 lr: 0.000081 grad: 0.1537 (0.1656) loss: 0.7238 (0.7394) time: 0.1225 data: 0.0391 max mem: 9377 +Train: [43] [2200/6250] eta: 0:10:02 lr: 0.000081 grad: 0.1457 (0.1649) loss: 0.7401 (0.7393) time: 0.1265 data: 0.0420 max mem: 9377 +Train: [43] [2300/6250] eta: 0:09:44 lr: 0.000081 grad: 0.1423 (0.1641) loss: 0.7424 (0.7392) time: 0.1307 data: 0.0512 max mem: 9377 +Train: [43] [2400/6250] eta: 0:09:27 lr: 0.000081 grad: 0.1490 (0.1635) loss: 0.7278 (0.7393) time: 0.1277 data: 0.0448 max mem: 9377 +Train: [43] [2500/6250] eta: 0:09:11 lr: 0.000081 grad: 0.1489 (0.1628) loss: 0.7279 (0.7391) time: 0.1328 data: 0.0458 max mem: 9377 +Train: [43] [2600/6250] eta: 0:08:55 lr: 0.000081 grad: 0.1420 (0.1623) loss: 0.7374 (0.7391) time: 0.1278 data: 0.0399 max mem: 9377 +Train: [43] [2700/6250] eta: 0:08:39 lr: 0.000081 grad: 0.1474 (0.1619) loss: 0.7302 (0.7390) time: 0.1427 data: 0.0609 max mem: 9377 +Train: [43] [2800/6250] eta: 0:08:23 lr: 0.000081 grad: 0.1493 (0.1615) loss: 0.7364 (0.7389) time: 0.1356 data: 0.0558 max mem: 9377 +Train: [43] [2900/6250] eta: 0:08:06 lr: 0.000081 grad: 0.1399 (0.1611) loss: 0.7483 (0.7390) time: 0.1256 data: 0.0419 max mem: 9377 +Train: [43] [3000/6250] eta: 0:07:51 lr: 0.000081 grad: 0.1456 (0.1607) loss: 0.7453 (0.7391) time: 0.1220 data: 0.0385 max mem: 9377 +Train: [43] [3100/6250] eta: 0:07:35 lr: 0.000081 grad: 0.1416 (0.1602) loss: 0.7376 (0.7392) time: 0.1247 data: 0.0446 max mem: 9377 +Train: [43] [3200/6250] eta: 0:07:20 lr: 0.000081 grad: 0.1448 (0.1598) loss: 0.7385 (0.7393) time: 0.1497 data: 0.0685 max mem: 9377 +Train: [43] [3300/6250] eta: 0:07:06 lr: 0.000081 grad: 0.1373 (0.1593) loss: 0.7512 (0.7396) time: 0.1513 data: 0.0623 max mem: 9377 +Train: [43] [3400/6250] eta: 0:06:52 lr: 0.000081 grad: 0.1426 (0.1590) loss: 0.7476 (0.7399) time: 0.1553 data: 0.0707 max mem: 9377 +Train: [43] [3500/6250] eta: 0:06:38 lr: 0.000081 grad: 0.1431 (0.1587) loss: 0.7482 (0.7400) time: 0.1297 data: 0.0465 max mem: 9377 +Train: [43] [3600/6250] eta: 0:06:23 lr: 0.000081 grad: 0.1497 (0.1583) loss: 0.7355 (0.7401) time: 0.1564 data: 0.0722 max mem: 9377 +Train: [43] [3700/6250] eta: 0:06:08 lr: 0.000081 grad: 0.1480 (0.1581) loss: 0.7430 (0.7403) time: 0.1369 data: 0.0531 max mem: 9377 +Train: [43] [3800/6250] eta: 0:05:53 lr: 0.000081 grad: 0.1432 (0.1578) loss: 0.7345 (0.7403) time: 0.1413 data: 0.0586 max mem: 9377 +Train: [43] [3900/6250] eta: 0:05:39 lr: 0.000081 grad: 0.1499 (0.1575) loss: 0.7496 (0.7405) time: 0.1606 data: 0.0741 max mem: 9377 +Train: [43] [4000/6250] eta: 0:05:24 lr: 0.000081 grad: 0.1505 (0.1574) loss: 0.7476 (0.7407) time: 0.1393 data: 0.0529 max mem: 9377 +Train: [43] [4100/6250] eta: 0:05:09 lr: 0.000081 grad: 0.1432 (0.1572) loss: 0.7337 (0.7408) time: 0.1227 data: 0.0394 max mem: 9377 +Train: [43] [4200/6250] eta: 0:04:54 lr: 0.000080 grad: 0.1477 (0.1569) loss: 0.7293 (0.7408) time: 0.1266 data: 0.0398 max mem: 9377 +Train: [43] [4300/6250] eta: 0:04:39 lr: 0.000080 grad: 0.1460 (0.1567) loss: 0.7524 (0.7409) time: 0.1194 data: 0.0296 max mem: 9377 +Train: [43] [4400/6250] eta: 0:04:25 lr: 0.000080 grad: 0.1477 (0.1566) loss: 0.7443 (0.7411) time: 0.1594 data: 0.0796 max mem: 9377 +Train: [43] [4500/6250] eta: 0:04:11 lr: 0.000080 grad: 0.1398 (0.1563) loss: 0.7438 (0.7412) time: 0.1499 data: 0.0634 max mem: 9377 +Train: [43] [4600/6250] eta: 0:03:56 lr: 0.000080 grad: 0.1482 (0.1561) loss: 0.7418 (0.7412) time: 0.1569 data: 0.0732 max mem: 9377 +Train: [43] [4700/6250] eta: 0:03:42 lr: 0.000080 grad: 0.1435 (0.1559) loss: 0.7513 (0.7413) time: 0.1009 data: 0.0057 max mem: 9377 +Train: [43] [4800/6250] eta: 0:03:27 lr: 0.000080 grad: 0.1422 (0.1557) loss: 0.7429 (0.7414) time: 0.1061 data: 0.0171 max mem: 9377 +Train: [43] [4900/6250] eta: 0:03:13 lr: 0.000080 grad: 0.1401 (0.1555) loss: 0.7414 (0.7414) time: 0.1279 data: 0.0378 max mem: 9377 +Train: [43] [5000/6250] eta: 0:02:58 lr: 0.000080 grad: 0.1423 (0.1554) loss: 0.7366 (0.7415) time: 0.1482 data: 0.0638 max mem: 9377 +Train: [43] [5100/6250] eta: 0:02:44 lr: 0.000080 grad: 0.1432 (0.1552) loss: 0.7467 (0.7415) time: 0.1171 data: 0.0352 max mem: 9377 +Train: [43] [5200/6250] eta: 0:02:29 lr: 0.000080 grad: 0.1442 (0.1551) loss: 0.7368 (0.7415) time: 0.1261 data: 0.0421 max mem: 9377 +Train: [43] [5300/6250] eta: 0:02:15 lr: 0.000080 grad: 0.1493 (0.1550) loss: 0.7330 (0.7415) time: 0.1268 data: 0.0444 max mem: 9377 +Train: [43] [5400/6250] eta: 0:02:01 lr: 0.000080 grad: 0.1475 (0.1549) loss: 0.7337 (0.7414) time: 0.1382 data: 0.0512 max mem: 9377 +Train: [43] [5500/6250] eta: 0:01:46 lr: 0.000080 grad: 0.1437 (0.1547) loss: 0.7357 (0.7413) time: 0.1305 data: 0.0409 max mem: 9377 +Train: [43] [5600/6250] eta: 0:01:32 lr: 0.000080 grad: 0.1380 (0.1547) loss: 0.7467 (0.7412) time: 0.1517 data: 0.0717 max mem: 9377 +Train: [43] [5700/6250] eta: 0:01:18 lr: 0.000080 grad: 0.1485 (0.1546) loss: 0.7355 (0.7411) time: 0.1362 data: 0.0527 max mem: 9377 +Train: [43] [5800/6250] eta: 0:01:04 lr: 0.000080 grad: 0.1545 (0.1547) loss: 0.7366 (0.7411) time: 0.1508 data: 0.0667 max mem: 9377 +Train: [43] [5900/6250] eta: 0:00:49 lr: 0.000080 grad: 0.1455 (0.1547) loss: 0.7459 (0.7411) time: 0.1425 data: 0.0588 max mem: 9377 +Train: [43] [6000/6250] eta: 0:00:35 lr: 0.000080 grad: 0.1392 (0.1546) loss: 0.7378 (0.7410) time: 0.1344 data: 0.0486 max mem: 9377 +Train: [43] [6100/6250] eta: 0:00:21 lr: 0.000080 grad: 0.1515 (0.1545) loss: 0.7314 (0.7409) time: 0.1456 data: 0.0613 max mem: 9377 +Train: [43] [6200/6250] eta: 0:00:07 lr: 0.000080 grad: 0.1488 (0.1544) loss: 0.7295 (0.7408) time: 0.1305 data: 0.0493 max mem: 9377 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.1447 (0.1544) loss: 0.7414 (0.7408) time: 0.1426 data: 0.0585 max mem: 9377 +Train: [43] Total time: 0:14:53 (0.1429 s / it) +Averaged stats: lr: 0.000080 grad: 0.1447 (0.1544) loss: 0.7414 (0.7408) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:03:25 loss: 0.8594 (0.8594) time: 3.3077 data: 3.2197 max mem: 9377 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8642 (0.8630) time: 0.1323 data: 0.1067 max mem: 9377 +Eval (hcp-train-subset): [43] Total time: 0:00:13 (0.2118 s / it) +Averaged stats (hcp-train-subset): loss: 0.8642 (0.8630) +Eval (hcp-val): [43] [ 0/62] eta: 0:05:15 loss: 0.8642 (0.8642) time: 5.0885 data: 5.0587 max mem: 9377 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8593 (0.8605) time: 0.1231 data: 0.0962 max mem: 9377 +Eval (hcp-val): [43] Total time: 0:00:13 (0.2176 s / it) +Averaged stats (hcp-val): loss: 0.8593 (0.8605) +Eval (nsd-val): [43] [ 0/62] eta: 0:04:26 loss: 0.8206 (0.8206) time: 4.3045 data: 4.2644 max mem: 9377 +Eval (nsd-val): [43] [61/62] eta: 0:00:00 loss: 0.8333 (0.8352) time: 0.1168 data: 0.0913 max mem: 9377 +Eval (nsd-val): [43] Total time: 0:00:12 (0.2036 s / it) +Averaged stats (nsd-val): loss: 0.8333 (0.8352) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [44] [ 0/6250] eta: 6:51:12 lr: 0.000080 grad: 0.1666 (0.1666) loss: 0.8481 (0.8481) time: 3.9476 data: 3.6327 max mem: 9377 +Train: [44] [ 100/6250] eta: 0:19:21 lr: 0.000080 grad: 0.2187 (0.2498) loss: 0.7588 (0.7713) time: 0.1494 data: 0.0491 max mem: 9377 +Train: [44] [ 200/6250] eta: 0:16:30 lr: 0.000080 grad: 0.1709 (0.2377) loss: 0.7633 (0.7635) time: 0.1209 data: 0.0322 max mem: 9377 +Train: [44] [ 300/6250] eta: 0:15:24 lr: 0.000080 grad: 0.1777 (0.2247) loss: 0.7500 (0.7584) time: 0.1252 data: 0.0371 max mem: 9377 +Train: [44] [ 400/6250] eta: 0:14:50 lr: 0.000080 grad: 0.1996 (0.2175) loss: 0.7456 (0.7542) time: 0.1454 data: 0.0576 max mem: 9377 +Train: [44] [ 500/6250] eta: 0:14:25 lr: 0.000080 grad: 0.1771 (0.2115) loss: 0.7350 (0.7505) time: 0.1431 data: 0.0521 max mem: 9377 +Train: [44] [ 600/6250] eta: 0:13:52 lr: 0.000080 grad: 0.1620 (0.2047) loss: 0.7301 (0.7478) time: 0.1087 data: 0.0182 max mem: 9377 +Train: [44] [ 700/6250] eta: 0:13:28 lr: 0.000080 grad: 0.1608 (0.1996) loss: 0.7407 (0.7465) time: 0.1273 data: 0.0395 max mem: 9377 +Train: [44] [ 800/6250] eta: 0:13:06 lr: 0.000080 grad: 0.1556 (0.1945) loss: 0.7371 (0.7459) time: 0.1225 data: 0.0395 max mem: 9377 +Train: [44] [ 900/6250] eta: 0:12:48 lr: 0.000080 grad: 0.1486 (0.1902) loss: 0.7485 (0.7454) time: 0.1176 data: 0.0307 max mem: 9377 +Train: [44] [1000/6250] eta: 0:12:30 lr: 0.000080 grad: 0.1500 (0.1863) loss: 0.7417 (0.7449) time: 0.1396 data: 0.0600 max mem: 9377 +Train: [44] [1100/6250] eta: 0:12:14 lr: 0.000079 grad: 0.1446 (0.1828) loss: 0.7408 (0.7447) time: 0.1427 data: 0.0614 max mem: 9377 +Train: [44] [1200/6250] eta: 0:11:57 lr: 0.000079 grad: 0.1479 (0.1800) loss: 0.7414 (0.7442) time: 0.1363 data: 0.0516 max mem: 9377 +Train: [44] [1300/6250] eta: 0:11:44 lr: 0.000079 grad: 0.1561 (0.1778) loss: 0.7277 (0.7436) time: 0.1680 data: 0.0919 max mem: 9377 +Train: [44] [1400/6250] eta: 0:11:34 lr: 0.000079 grad: 0.1540 (0.1763) loss: 0.7299 (0.7433) time: 0.1549 data: 0.0808 max mem: 9377 +Train: [44] [1500/6250] eta: 0:11:26 lr: 0.000079 grad: 0.1441 (0.1750) loss: 0.7355 (0.7426) time: 0.1648 data: 0.0886 max mem: 9377 +Train: [44] [1600/6250] eta: 0:11:13 lr: 0.000079 grad: 0.1536 (0.1738) loss: 0.7342 (0.7420) time: 0.1448 data: 0.0643 max mem: 9377 +Train: [44] [1700/6250] eta: 0:10:59 lr: 0.000079 grad: 0.1464 (0.1723) loss: 0.7377 (0.7418) time: 0.1476 data: 0.0667 max mem: 9377 +Train: [44] [1800/6250] eta: 0:10:44 lr: 0.000079 grad: 0.1419 (0.1709) loss: 0.7311 (0.7414) time: 0.1569 data: 0.0782 max mem: 9377 +Train: [44] [1900/6250] eta: 0:10:27 lr: 0.000079 grad: 0.1532 (0.1697) loss: 0.7425 (0.7408) time: 0.1215 data: 0.0357 max mem: 9377 +Train: [44] [2000/6250] eta: 0:10:10 lr: 0.000079 grad: 0.1489 (0.1688) loss: 0.7333 (0.7403) time: 0.1017 data: 0.0087 max mem: 9377 +Train: [44] [2100/6250] eta: 0:09:53 lr: 0.000079 grad: 0.1497 (0.1679) loss: 0.7217 (0.7398) time: 0.1029 data: 0.0166 max mem: 9377 +Train: [44] [2200/6250] eta: 0:09:36 lr: 0.000079 grad: 0.1546 (0.1674) loss: 0.7337 (0.7396) time: 0.1188 data: 0.0256 max mem: 9377 +Train: [44] [2300/6250] eta: 0:09:19 lr: 0.000079 grad: 0.1489 (0.1667) loss: 0.7290 (0.7393) time: 0.1216 data: 0.0325 max mem: 9377 +Train: [44] [2400/6250] eta: 0:09:04 lr: 0.000079 grad: 0.1423 (0.1660) loss: 0.7331 (0.7391) time: 0.1461 data: 0.0668 max mem: 9377 +Train: [44] [2500/6250] eta: 0:08:50 lr: 0.000079 grad: 0.1535 (0.1654) loss: 0.7303 (0.7389) time: 0.1447 data: 0.0661 max mem: 9377 +Train: [44] [2600/6250] eta: 0:08:39 lr: 0.000079 grad: 0.1451 (0.1649) loss: 0.7387 (0.7388) time: 0.1797 data: 0.1034 max mem: 9377 +Train: [44] [2700/6250] eta: 0:08:25 lr: 0.000079 grad: 0.1566 (0.1645) loss: 0.7321 (0.7388) time: 0.1594 data: 0.0832 max mem: 9377 +Train: [44] [2800/6250] eta: 0:08:12 lr: 0.000079 grad: 0.1480 (0.1640) loss: 0.7551 (0.7390) time: 0.1393 data: 0.0594 max mem: 9377 +Train: [44] [2900/6250] eta: 0:07:59 lr: 0.000079 grad: 0.1397 (0.1635) loss: 0.7478 (0.7391) time: 0.1780 data: 0.1008 max mem: 9377 +Train: [44] [3000/6250] eta: 0:07:45 lr: 0.000079 grad: 0.1382 (0.1630) loss: 0.7402 (0.7393) time: 0.1389 data: 0.0566 max mem: 9377 +Train: [44] [3100/6250] eta: 0:07:32 lr: 0.000079 grad: 0.1476 (0.1625) loss: 0.7356 (0.7395) time: 0.1492 data: 0.0706 max mem: 9377 +Train: [44] [3200/6250] eta: 0:07:18 lr: 0.000079 grad: 0.1491 (0.1621) loss: 0.7400 (0.7395) time: 0.1561 data: 0.0757 max mem: 9377 +Train: [44] [3300/6250] eta: 0:07:04 lr: 0.000079 grad: 0.1416 (0.1616) loss: 0.7482 (0.7397) time: 0.1327 data: 0.0541 max mem: 9377 +Train: [44] [3400/6250] eta: 0:06:48 lr: 0.000079 grad: 0.1541 (0.1614) loss: 0.7364 (0.7398) time: 0.1337 data: 0.0516 max mem: 9377 +Train: [44] [3500/6250] eta: 0:06:34 lr: 0.000079 grad: 0.1593 (0.1612) loss: 0.7479 (0.7399) time: 0.1353 data: 0.0525 max mem: 9377 +Train: [44] [3600/6250] eta: 0:06:19 lr: 0.000079 grad: 0.1495 (0.1612) loss: 0.7309 (0.7399) time: 0.1371 data: 0.0582 max mem: 9377 +Train: [44] [3700/6250] eta: 0:06:04 lr: 0.000079 grad: 0.1483 (0.1609) loss: 0.7357 (0.7398) time: 0.1874 data: 0.1117 max mem: 9377 +Train: [44] [3800/6250] eta: 0:05:51 lr: 0.000079 grad: 0.1495 (0.1607) loss: 0.7498 (0.7398) time: 0.1313 data: 0.0511 max mem: 9377 +Train: [44] [3900/6250] eta: 0:05:37 lr: 0.000079 grad: 0.1581 (0.1606) loss: 0.7424 (0.7399) time: 0.1543 data: 0.0703 max mem: 9377 +Train: [44] [4000/6250] eta: 0:05:23 lr: 0.000079 grad: 0.1512 (0.1606) loss: 0.7388 (0.7399) time: 0.1424 data: 0.0600 max mem: 9377 +Train: [44] [4100/6250] eta: 0:05:09 lr: 0.000079 grad: 0.1566 (0.1605) loss: 0.7321 (0.7399) time: 0.1576 data: 0.0791 max mem: 9377 +Train: [44] [4200/6250] eta: 0:04:54 lr: 0.000078 grad: 0.1460 (0.1602) loss: 0.7452 (0.7399) time: 0.1417 data: 0.0558 max mem: 9377 +Train: [44] [4300/6250] eta: 0:04:40 lr: 0.000078 grad: 0.1443 (0.1599) loss: 0.7353 (0.7399) time: 0.1553 data: 0.0688 max mem: 9377 +Train: [44] [4400/6250] eta: 0:04:25 lr: 0.000078 grad: 0.1502 (0.1597) loss: 0.7454 (0.7399) time: 0.1433 data: 0.0624 max mem: 9377 +Train: [44] [4500/6250] eta: 0:04:10 lr: 0.000078 grad: 0.1440 (0.1595) loss: 0.7440 (0.7398) time: 0.1067 data: 0.0174 max mem: 9377 +Train: [44] [4600/6250] eta: 0:03:55 lr: 0.000078 grad: 0.1488 (0.1592) loss: 0.7359 (0.7399) time: 0.1176 data: 0.0238 max mem: 9377 +Train: [44] [4700/6250] eta: 0:03:41 lr: 0.000078 grad: 0.1681 (0.1592) loss: 0.7216 (0.7397) time: 0.1306 data: 0.0377 max mem: 9377 +Train: [44] [4800/6250] eta: 0:03:26 lr: 0.000078 grad: 0.1615 (0.1593) loss: 0.7403 (0.7396) time: 0.1284 data: 0.0420 max mem: 9377 +Train: [44] [4900/6250] eta: 0:03:11 lr: 0.000078 grad: 0.1538 (0.1592) loss: 0.7378 (0.7396) time: 0.1283 data: 0.0381 max mem: 9377 +Train: [44] [5000/6250] eta: 0:02:57 lr: 0.000078 grad: 0.1612 (0.1592) loss: 0.7338 (0.7395) time: 0.1307 data: 0.0440 max mem: 9377 +Train: [44] [5100/6250] eta: 0:02:43 lr: 0.000078 grad: 0.1508 (0.1592) loss: 0.7425 (0.7395) time: 0.1510 data: 0.0688 max mem: 9377 +Train: [44] [5200/6250] eta: 0:02:28 lr: 0.000078 grad: 0.1502 (0.1591) loss: 0.7344 (0.7395) time: 0.1381 data: 0.0568 max mem: 9377 +Train: [44] [5300/6250] eta: 0:02:14 lr: 0.000078 grad: 0.1549 (0.1589) loss: 0.7361 (0.7394) time: 0.1188 data: 0.0288 max mem: 9377 +Train: [44] [5400/6250] eta: 0:02:00 lr: 0.000078 grad: 0.1597 (0.1590) loss: 0.7182 (0.7392) time: 0.1128 data: 0.0326 max mem: 9377 +Train: [44] [5500/6250] eta: 0:01:45 lr: 0.000078 grad: 0.1634 (0.1590) loss: 0.7258 (0.7391) time: 0.1208 data: 0.0369 max mem: 9377 +Train: [44] [5600/6250] eta: 0:01:31 lr: 0.000078 grad: 0.1565 (0.1589) loss: 0.7261 (0.7389) time: 0.1550 data: 0.0703 max mem: 9377 +Train: [44] [5700/6250] eta: 0:01:17 lr: 0.000078 grad: 0.1541 (0.1588) loss: 0.7300 (0.7388) time: 0.1621 data: 0.0783 max mem: 9377 +Train: [44] [5800/6250] eta: 0:01:03 lr: 0.000078 grad: 0.1516 (0.1587) loss: 0.7252 (0.7386) time: 0.1547 data: 0.0751 max mem: 9377 +Train: [44] [5900/6250] eta: 0:00:49 lr: 0.000078 grad: 0.1474 (0.1586) loss: 0.7306 (0.7385) time: 0.1375 data: 0.0588 max mem: 9377 +Train: [44] [6000/6250] eta: 0:00:35 lr: 0.000078 grad: 0.1542 (0.1586) loss: 0.7253 (0.7383) time: 0.1502 data: 0.0665 max mem: 9377 +Train: [44] [6100/6250] eta: 0:00:21 lr: 0.000078 grad: 0.1570 (0.1585) loss: 0.7190 (0.7382) time: 0.1544 data: 0.0732 max mem: 9377 +Train: [44] [6200/6250] eta: 0:00:07 lr: 0.000078 grad: 0.1528 (0.1585) loss: 0.7260 (0.7381) time: 0.1395 data: 0.0587 max mem: 9377 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.1512 (0.1584) loss: 0.7357 (0.7380) time: 0.1389 data: 0.0576 max mem: 9377 +Train: [44] Total time: 0:14:46 (0.1419 s / it) +Averaged stats: lr: 0.000078 grad: 0.1512 (0.1584) loss: 0.7357 (0.7380) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:03:25 loss: 0.8634 (0.8634) time: 3.3118 data: 3.2346 max mem: 9377 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8610 (0.8628) time: 0.1442 data: 0.1188 max mem: 9377 +Eval (hcp-train-subset): [44] Total time: 0:00:13 (0.2240 s / it) +Averaged stats (hcp-train-subset): loss: 0.8610 (0.8628) +Making plots (hcp-train-subset): example=37 +Eval (hcp-val): [44] [ 0/62] eta: 0:03:43 loss: 0.8571 (0.8571) time: 3.5986 data: 3.5275 max mem: 9377 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8603 (0.8615) time: 0.1471 data: 0.1212 max mem: 9377 +Eval (hcp-val): [44] Total time: 0:00:14 (0.2361 s / it) +Averaged stats (hcp-val): loss: 0.8603 (0.8615) +Making plots (hcp-val): example=5 +Eval (nsd-val): [44] [ 0/62] eta: 0:05:33 loss: 0.8223 (0.8223) time: 5.3713 data: 5.3395 max mem: 9377 +Eval (nsd-val): [44] [61/62] eta: 0:00:00 loss: 0.8330 (0.8344) time: 0.1203 data: 0.0951 max mem: 9377 +Eval (nsd-val): [44] Total time: 0:00:15 (0.2436 s / it) +Averaged stats (nsd-val): loss: 0.8330 (0.8344) +Making plots (nsd-val): example=7 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00044.pth +Train: [45] [ 0/6250] eta: 10:54:41 lr: 0.000078 grad: 0.2365 (0.2365) loss: 0.7457 (0.7457) time: 6.2850 data: 6.1529 max mem: 9377 +Train: [45] [ 100/6250] eta: 0:22:13 lr: 0.000078 grad: 0.2013 (0.2359) loss: 0.7463 (0.7757) time: 0.1636 data: 0.0645 max mem: 9377 +Train: [45] [ 200/6250] eta: 0:19:04 lr: 0.000078 grad: 0.2123 (0.2352) loss: 0.7591 (0.7622) time: 0.1590 data: 0.0670 max mem: 9377 +Train: [45] [ 300/6250] eta: 0:17:53 lr: 0.000078 grad: 0.1901 (0.2247) loss: 0.7336 (0.7551) time: 0.1686 data: 0.0767 max mem: 9377 +Train: [45] [ 400/6250] eta: 0:17:00 lr: 0.000078 grad: 0.1675 (0.2124) loss: 0.7584 (0.7526) time: 0.1727 data: 0.0813 max mem: 9377 +Train: [45] [ 500/6250] eta: 0:16:12 lr: 0.000078 grad: 0.1565 (0.2030) loss: 0.7500 (0.7509) time: 0.1581 data: 0.0719 max mem: 9377 +Train: [45] [ 600/6250] eta: 0:15:30 lr: 0.000078 grad: 0.1609 (0.1972) loss: 0.7442 (0.7498) time: 0.1455 data: 0.0578 max mem: 9377 +Train: [45] [ 700/6250] eta: 0:14:56 lr: 0.000078 grad: 0.1576 (0.1915) loss: 0.7477 (0.7490) time: 0.1293 data: 0.0393 max mem: 9377 +Train: [45] [ 800/6250] eta: 0:14:30 lr: 0.000078 grad: 0.1546 (0.1868) loss: 0.7322 (0.7488) time: 0.1462 data: 0.0649 max mem: 9377 +Train: [45] [ 900/6250] eta: 0:14:05 lr: 0.000078 grad: 0.1636 (0.1836) loss: 0.7232 (0.7477) time: 0.1388 data: 0.0562 max mem: 9377 +Train: [45] [1000/6250] eta: 0:13:38 lr: 0.000078 grad: 0.1636 (0.1819) loss: 0.7369 (0.7458) time: 0.1169 data: 0.0335 max mem: 9377 +Train: [45] [1100/6250] eta: 0:13:14 lr: 0.000077 grad: 0.1563 (0.1802) loss: 0.7194 (0.7446) time: 0.1253 data: 0.0378 max mem: 9377 +Train: [45] [1200/6250] eta: 0:12:52 lr: 0.000077 grad: 0.1591 (0.1783) loss: 0.7268 (0.7438) time: 0.1435 data: 0.0599 max mem: 9377 +Train: [45] [1300/6250] eta: 0:12:37 lr: 0.000077 grad: 0.1534 (0.1767) loss: 0.7292 (0.7428) time: 0.1555 data: 0.0691 max mem: 9377 +Train: [45] [1400/6250] eta: 0:12:22 lr: 0.000077 grad: 0.1564 (0.1754) loss: 0.7225 (0.7420) time: 0.1730 data: 0.0859 max mem: 9377 +Train: [45] [1500/6250] eta: 0:12:05 lr: 0.000077 grad: 0.1545 (0.1738) loss: 0.7198 (0.7411) time: 0.1268 data: 0.0425 max mem: 9377 +Train: [45] [1600/6250] eta: 0:11:49 lr: 0.000077 grad: 0.1524 (0.1726) loss: 0.7298 (0.7402) time: 0.1392 data: 0.0653 max mem: 9377 +Train: [45] [1700/6250] eta: 0:11:32 lr: 0.000077 grad: 0.1483 (0.1714) loss: 0.7312 (0.7397) time: 0.1646 data: 0.0821 max mem: 9377 +Train: [45] [1800/6250] eta: 0:11:16 lr: 0.000077 grad: 0.1599 (0.1710) loss: 0.7265 (0.7391) time: 0.1482 data: 0.0676 max mem: 9377 +Train: [45] [1900/6250] eta: 0:11:00 lr: 0.000077 grad: 0.1494 (0.1702) loss: 0.7216 (0.7386) time: 0.1461 data: 0.0684 max mem: 9377 +Train: [45] [2000/6250] eta: 0:10:44 lr: 0.000077 grad: 0.1464 (0.1695) loss: 0.7291 (0.7383) time: 0.1526 data: 0.0629 max mem: 9377 +Train: [45] [2100/6250] eta: 0:10:27 lr: 0.000077 grad: 0.1557 (0.1690) loss: 0.7233 (0.7379) time: 0.1372 data: 0.0559 max mem: 9377 +Train: [45] [2200/6250] eta: 0:10:09 lr: 0.000077 grad: 0.1493 (0.1684) loss: 0.7306 (0.7376) time: 0.1513 data: 0.0690 max mem: 9377 +Train: [45] [2300/6250] eta: 0:09:53 lr: 0.000077 grad: 0.1465 (0.1675) loss: 0.7266 (0.7375) time: 0.1580 data: 0.0693 max mem: 9377 +Train: [45] [2400/6250] eta: 0:09:36 lr: 0.000077 grad: 0.1519 (0.1668) loss: 0.7397 (0.7373) time: 0.1251 data: 0.0328 max mem: 9377 +Train: [45] [2500/6250] eta: 0:09:19 lr: 0.000077 grad: 0.1512 (0.1662) loss: 0.7082 (0.7370) time: 0.1255 data: 0.0484 max mem: 9377 +Train: [45] [2600/6250] eta: 0:09:04 lr: 0.000077 grad: 0.1549 (0.1656) loss: 0.7248 (0.7367) time: 0.1713 data: 0.0865 max mem: 9377 +Train: [45] [2700/6250] eta: 0:08:47 lr: 0.000077 grad: 0.1487 (0.1651) loss: 0.7285 (0.7365) time: 0.1423 data: 0.0644 max mem: 9377 +Train: [45] [2800/6250] eta: 0:08:32 lr: 0.000077 grad: 0.1446 (0.1647) loss: 0.7303 (0.7364) time: 0.1452 data: 0.0638 max mem: 9377 +Train: [45] [2900/6250] eta: 0:08:17 lr: 0.000077 grad: 0.1509 (0.1643) loss: 0.7207 (0.7363) time: 0.1691 data: 0.0876 max mem: 9377 +Train: [45] [3000/6250] eta: 0:08:03 lr: 0.000077 grad: 0.1511 (0.1640) loss: 0.7333 (0.7361) time: 0.1408 data: 0.0608 max mem: 9377 +Train: [45] [3100/6250] eta: 0:07:48 lr: 0.000077 grad: 0.1628 (0.1638) loss: 0.7311 (0.7360) time: 0.1549 data: 0.0792 max mem: 9377 +Train: [45] [3200/6250] eta: 0:07:33 lr: 0.000077 grad: 0.1515 (0.1634) loss: 0.7269 (0.7359) time: 0.1291 data: 0.0505 max mem: 9377 +Train: [45] [3300/6250] eta: 0:07:19 lr: 0.000077 grad: 0.1494 (0.1631) loss: 0.7306 (0.7357) time: 0.1360 data: 0.0580 max mem: 9377 +Train: [45] [3400/6250] eta: 0:07:03 lr: 0.000077 grad: 0.1565 (0.1629) loss: 0.7288 (0.7356) time: 0.1183 data: 0.0326 max mem: 9377 +Train: [45] [3500/6250] eta: 0:06:48 lr: 0.000077 grad: 0.1609 (0.1628) loss: 0.7403 (0.7356) time: 0.1689 data: 0.0943 max mem: 9377 +Train: [45] [3600/6250] eta: 0:06:33 lr: 0.000077 grad: 0.1614 (0.1628) loss: 0.7196 (0.7354) time: 0.1352 data: 0.0542 max mem: 9377 +Train: [45] [3700/6250] eta: 0:06:18 lr: 0.000077 grad: 0.1534 (0.1627) loss: 0.7380 (0.7353) time: 0.1472 data: 0.0672 max mem: 9377 +Train: [45] [3800/6250] eta: 0:06:03 lr: 0.000077 grad: 0.1442 (0.1626) loss: 0.7376 (0.7352) time: 0.1336 data: 0.0539 max mem: 9377 +Train: [45] [3900/6250] eta: 0:05:49 lr: 0.000077 grad: 0.1502 (0.1624) loss: 0.7375 (0.7350) time: 0.1460 data: 0.0602 max mem: 9377 +Train: [45] [4000/6250] eta: 0:05:34 lr: 0.000077 grad: 0.1590 (0.1624) loss: 0.7368 (0.7349) time: 0.1682 data: 0.0769 max mem: 9377 +Train: [45] [4100/6250] eta: 0:05:20 lr: 0.000077 grad: 0.1491 (0.1624) loss: 0.7480 (0.7349) time: 0.1572 data: 0.0729 max mem: 9377 +Train: [45] [4200/6250] eta: 0:05:06 lr: 0.000076 grad: 0.1541 (0.1622) loss: 0.7248 (0.7348) time: 0.1861 data: 0.1059 max mem: 9377 +Train: [45] [4300/6250] eta: 0:04:51 lr: 0.000076 grad: 0.1543 (0.1620) loss: 0.7329 (0.7347) time: 0.1811 data: 0.0899 max mem: 9377 +Train: [45] [4400/6250] eta: 0:04:36 lr: 0.000076 grad: 0.1611 (0.1619) loss: 0.7371 (0.7347) time: 0.1183 data: 0.0271 max mem: 9377 +Train: [45] [4500/6250] eta: 0:04:21 lr: 0.000076 grad: 0.1500 (0.1618) loss: 0.7466 (0.7347) time: 0.1575 data: 0.0732 max mem: 9377 +Train: [45] [4600/6250] eta: 0:04:06 lr: 0.000076 grad: 0.1540 (0.1617) loss: 0.7384 (0.7347) time: 0.1603 data: 0.0765 max mem: 9377 +Train: [45] [4700/6250] eta: 0:03:51 lr: 0.000076 grad: 0.1518 (0.1615) loss: 0.7407 (0.7348) time: 0.1447 data: 0.0607 max mem: 9377 +Train: [45] [4800/6250] eta: 0:03:36 lr: 0.000076 grad: 0.1460 (0.1614) loss: 0.7442 (0.7348) time: 0.1366 data: 0.0592 max mem: 9377 +Train: [45] [4900/6250] eta: 0:03:21 lr: 0.000076 grad: 0.1558 (0.1614) loss: 0.7443 (0.7348) time: 0.1619 data: 0.0821 max mem: 9377 +Train: [45] [5000/6250] eta: 0:03:06 lr: 0.000076 grad: 0.1493 (0.1613) loss: 0.7392 (0.7348) time: 0.1733 data: 0.0998 max mem: 9377 +Train: [45] [5100/6250] eta: 0:02:52 lr: 0.000076 grad: 0.1484 (0.1611) loss: 0.7430 (0.7349) time: 0.1528 data: 0.0760 max mem: 9377 +Train: [45] [5200/6250] eta: 0:02:37 lr: 0.000076 grad: 0.1484 (0.1609) loss: 0.7414 (0.7350) time: 0.1747 data: 0.0963 max mem: 9377 +Train: [45] [5300/6250] eta: 0:02:22 lr: 0.000076 grad: 0.1494 (0.1607) loss: 0.7269 (0.7349) time: 0.1385 data: 0.0539 max mem: 9377 +Train: [45] [5400/6250] eta: 0:02:07 lr: 0.000076 grad: 0.1540 (0.1606) loss: 0.7332 (0.7349) time: 0.1208 data: 0.0400 max mem: 9377 +Train: [45] [5500/6250] eta: 0:01:51 lr: 0.000076 grad: 0.1484 (0.1605) loss: 0.7331 (0.7348) time: 0.1401 data: 0.0598 max mem: 9377 +Train: [45] [5600/6250] eta: 0:01:36 lr: 0.000076 grad: 0.1428 (0.1604) loss: 0.7367 (0.7348) time: 0.1567 data: 0.0724 max mem: 9377 +Train: [45] [5700/6250] eta: 0:01:21 lr: 0.000076 grad: 0.1461 (0.1602) loss: 0.7337 (0.7348) time: 0.1412 data: 0.0636 max mem: 9377 +Train: [45] [5800/6250] eta: 0:01:06 lr: 0.000076 grad: 0.1561 (0.1600) loss: 0.7186 (0.7348) time: 0.1372 data: 0.0583 max mem: 9377 +Train: [45] [5900/6250] eta: 0:00:51 lr: 0.000076 grad: 0.1438 (0.1598) loss: 0.7411 (0.7349) time: 0.1156 data: 0.0341 max mem: 9377 +Train: [45] [6000/6250] eta: 0:00:37 lr: 0.000076 grad: 0.1439 (0.1597) loss: 0.7529 (0.7350) time: 0.1550 data: 0.0794 max mem: 9377 +Train: [45] [6100/6250] eta: 0:00:22 lr: 0.000076 grad: 0.1491 (0.1596) loss: 0.7268 (0.7351) time: 0.1542 data: 0.0749 max mem: 9377 +Train: [45] [6200/6250] eta: 0:00:07 lr: 0.000076 grad: 0.1531 (0.1594) loss: 0.7367 (0.7351) time: 0.1346 data: 0.0549 max mem: 9377 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.1514 (0.1594) loss: 0.7417 (0.7352) time: 0.0991 data: 0.0050 max mem: 9377 +Train: [45] Total time: 0:15:28 (0.1486 s / it) +Averaged stats: lr: 0.000076 grad: 0.1514 (0.1594) loss: 0.7417 (0.7352) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:04:02 loss: 0.8569 (0.8569) time: 3.9033 data: 3.8200 max mem: 9377 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8620 (0.8627) time: 0.1295 data: 0.1043 max mem: 9377 +Eval (hcp-train-subset): [45] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (hcp-train-subset): loss: 0.8620 (0.8627) +Eval (hcp-val): [45] [ 0/62] eta: 0:05:58 loss: 0.8542 (0.8542) time: 5.7895 data: 5.7414 max mem: 9377 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8577 (0.8613) time: 0.1314 data: 0.1063 max mem: 9377 +Eval (hcp-val): [45] Total time: 0:00:13 (0.2165 s / it) +Averaged stats (hcp-val): loss: 0.8577 (0.8613) +Eval (nsd-val): [45] [ 0/62] eta: 0:03:50 loss: 0.8256 (0.8256) time: 3.7254 data: 3.6572 max mem: 9377 +Eval (nsd-val): [45] [61/62] eta: 0:00:00 loss: 0.8332 (0.8365) time: 0.1297 data: 0.1041 max mem: 9377 +Eval (nsd-val): [45] Total time: 0:00:12 (0.2057 s / it) +Averaged stats (nsd-val): loss: 0.8332 (0.8365) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 10:56:03 lr: 0.000076 grad: 0.3370 (0.3370) loss: 0.6605 (0.6605) time: 6.2982 data: 6.2069 max mem: 9377 +Train: [46] [ 100/6250] eta: 0:19:33 lr: 0.000076 grad: 0.2190 (0.2472) loss: 0.7598 (0.7681) time: 0.1513 data: 0.0593 max mem: 9377 +Train: [46] [ 200/6250] eta: 0:17:00 lr: 0.000076 grad: 0.2088 (0.2448) loss: 0.7417 (0.7560) time: 0.1617 data: 0.0756 max mem: 9377 +Train: [46] [ 300/6250] eta: 0:15:31 lr: 0.000076 grad: 0.1894 (0.2290) loss: 0.7282 (0.7520) time: 0.1135 data: 0.0175 max mem: 9377 +Train: [46] [ 400/6250] eta: 0:14:47 lr: 0.000076 grad: 0.2035 (0.2234) loss: 0.7357 (0.7487) time: 0.1490 data: 0.0503 max mem: 9377 +Train: [46] [ 500/6250] eta: 0:14:28 lr: 0.000076 grad: 0.1548 (0.2144) loss: 0.7423 (0.7476) time: 0.1607 data: 0.0640 max mem: 9377 +Train: [46] [ 600/6250] eta: 0:14:00 lr: 0.000076 grad: 0.1636 (0.2058) loss: 0.7344 (0.7465) time: 0.1228 data: 0.0350 max mem: 9377 +Train: [46] [ 700/6250] eta: 0:13:37 lr: 0.000076 grad: 0.1443 (0.1986) loss: 0.7529 (0.7461) time: 0.1337 data: 0.0520 max mem: 9377 +Train: [46] [ 800/6250] eta: 0:13:17 lr: 0.000076 grad: 0.1532 (0.1928) loss: 0.7305 (0.7457) time: 0.1393 data: 0.0572 max mem: 9377 +Train: [46] [ 900/6250] eta: 0:13:04 lr: 0.000076 grad: 0.1519 (0.1885) loss: 0.7356 (0.7451) time: 0.1698 data: 0.0876 max mem: 9377 +Train: [46] [1000/6250] eta: 0:12:43 lr: 0.000076 grad: 0.1561 (0.1853) loss: 0.7420 (0.7443) time: 0.1607 data: 0.0797 max mem: 9377 +Train: [46] [1100/6250] eta: 0:12:21 lr: 0.000075 grad: 0.1491 (0.1827) loss: 0.7496 (0.7436) time: 0.1443 data: 0.0644 max mem: 9377 +Train: [46] [1200/6250] eta: 0:12:03 lr: 0.000075 grad: 0.1597 (0.1806) loss: 0.7309 (0.7432) time: 0.1315 data: 0.0500 max mem: 9377 +Train: [46] [1300/6250] eta: 0:11:46 lr: 0.000075 grad: 0.1599 (0.1792) loss: 0.7345 (0.7422) time: 0.1410 data: 0.0559 max mem: 9377 +Train: [46] [1400/6250] eta: 0:11:34 lr: 0.000075 grad: 0.1498 (0.1778) loss: 0.7377 (0.7418) time: 0.1708 data: 0.0985 max mem: 9377 +Train: [46] [1500/6250] eta: 0:11:22 lr: 0.000075 grad: 0.1440 (0.1766) loss: 0.7431 (0.7413) time: 0.1496 data: 0.0629 max mem: 9377 +Train: [46] [1600/6250] eta: 0:11:12 lr: 0.000075 grad: 0.1540 (0.1751) loss: 0.7379 (0.7410) time: 0.1493 data: 0.0634 max mem: 9377 +Train: [46] [1700/6250] eta: 0:10:59 lr: 0.000075 grad: 0.1522 (0.1738) loss: 0.7304 (0.7405) time: 0.1536 data: 0.0679 max mem: 9377 +Train: [46] [1800/6250] eta: 0:10:47 lr: 0.000075 grad: 0.1543 (0.1727) loss: 0.7141 (0.7399) time: 0.1454 data: 0.0652 max mem: 9377 +Train: [46] [1900/6250] eta: 0:10:31 lr: 0.000075 grad: 0.1543 (0.1719) loss: 0.7170 (0.7395) time: 0.1489 data: 0.0741 max mem: 9377 +Train: [46] [2000/6250] eta: 0:10:16 lr: 0.000075 grad: 0.1598 (0.1712) loss: 0.7212 (0.7388) time: 0.1459 data: 0.0634 max mem: 9377 +Train: [46] [2100/6250] eta: 0:10:02 lr: 0.000075 grad: 0.1565 (0.1710) loss: 0.7261 (0.7380) time: 0.1432 data: 0.0640 max mem: 9377 +Train: [46] [2200/6250] eta: 0:09:46 lr: 0.000075 grad: 0.1476 (0.1704) loss: 0.7214 (0.7373) time: 0.1422 data: 0.0595 max mem: 9377 +Train: [46] [2300/6250] eta: 0:09:30 lr: 0.000075 grad: 0.1489 (0.1699) loss: 0.7231 (0.7366) time: 0.1480 data: 0.0627 max mem: 9377 +Train: [46] [2400/6250] eta: 0:09:15 lr: 0.000075 grad: 0.1553 (0.1693) loss: 0.7104 (0.7361) time: 0.1341 data: 0.0505 max mem: 9377 +Train: [46] [2500/6250] eta: 0:08:58 lr: 0.000075 grad: 0.1603 (0.1687) loss: 0.7178 (0.7356) time: 0.1329 data: 0.0469 max mem: 9377 +Train: [46] [2600/6250] eta: 0:08:43 lr: 0.000075 grad: 0.1521 (0.1682) loss: 0.7192 (0.7350) time: 0.1325 data: 0.0500 max mem: 9377 +Train: [46] [2700/6250] eta: 0:08:28 lr: 0.000075 grad: 0.1552 (0.1676) loss: 0.7285 (0.7348) time: 0.1313 data: 0.0536 max mem: 9377 +Train: [46] [2800/6250] eta: 0:08:13 lr: 0.000075 grad: 0.1533 (0.1672) loss: 0.7383 (0.7346) time: 0.1147 data: 0.0313 max mem: 9377 +Train: [46] [2900/6250] eta: 0:07:57 lr: 0.000075 grad: 0.1540 (0.1670) loss: 0.7233 (0.7344) time: 0.1144 data: 0.0309 max mem: 9377 +Train: [46] [3000/6250] eta: 0:07:42 lr: 0.000075 grad: 0.1576 (0.1668) loss: 0.7393 (0.7343) time: 0.1325 data: 0.0533 max mem: 9377 +Train: [46] [3100/6250] eta: 0:07:26 lr: 0.000075 grad: 0.1572 (0.1667) loss: 0.7318 (0.7341) time: 0.1353 data: 0.0558 max mem: 9377 +Train: [46] [3200/6250] eta: 0:07:12 lr: 0.000075 grad: 0.1545 (0.1665) loss: 0.7357 (0.7339) time: 0.1498 data: 0.0742 max mem: 9377 +Train: [46] [3300/6250] eta: 0:06:57 lr: 0.000075 grad: 0.1557 (0.1662) loss: 0.7375 (0.7340) time: 0.1398 data: 0.0627 max mem: 9377 +Train: [46] [3400/6250] eta: 0:06:42 lr: 0.000075 grad: 0.1463 (0.1658) loss: 0.7435 (0.7341) time: 0.1421 data: 0.0609 max mem: 9377 +Train: [46] [3500/6250] eta: 0:06:28 lr: 0.000075 grad: 0.1516 (0.1653) loss: 0.7296 (0.7342) time: 0.1272 data: 0.0501 max mem: 9377 +Train: [46] [3600/6250] eta: 0:06:14 lr: 0.000075 grad: 0.1544 (0.1649) loss: 0.7297 (0.7343) time: 0.1301 data: 0.0521 max mem: 9377 +Train: [46] [3700/6250] eta: 0:06:00 lr: 0.000075 grad: 0.1547 (0.1646) loss: 0.7390 (0.7345) time: 0.1468 data: 0.0724 max mem: 9377 +Train: [46] [3800/6250] eta: 0:05:46 lr: 0.000075 grad: 0.1538 (0.1644) loss: 0.7425 (0.7345) time: 0.1232 data: 0.0386 max mem: 9377 +Train: [46] [3900/6250] eta: 0:05:32 lr: 0.000075 grad: 0.1502 (0.1642) loss: 0.7267 (0.7345) time: 0.1482 data: 0.0710 max mem: 9377 +Train: [46] [4000/6250] eta: 0:05:17 lr: 0.000075 grad: 0.1556 (0.1641) loss: 0.7298 (0.7344) time: 0.1450 data: 0.0663 max mem: 9377 +Train: [46] [4100/6250] eta: 0:05:03 lr: 0.000075 grad: 0.1560 (0.1639) loss: 0.7324 (0.7343) time: 0.1380 data: 0.0543 max mem: 9377 +Train: [46] [4200/6250] eta: 0:04:50 lr: 0.000074 grad: 0.1542 (0.1637) loss: 0.7227 (0.7343) time: 0.1767 data: 0.0994 max mem: 9377 +Train: [46] [4300/6250] eta: 0:04:36 lr: 0.000074 grad: 0.1498 (0.1635) loss: 0.7292 (0.7343) time: 0.1373 data: 0.0615 max mem: 9377 +Train: [46] [4400/6250] eta: 0:04:23 lr: 0.000074 grad: 0.1510 (0.1632) loss: 0.7394 (0.7344) time: 0.1899 data: 0.1131 max mem: 9377 +Train: [46] [4500/6250] eta: 0:04:09 lr: 0.000074 grad: 0.1552 (0.1632) loss: 0.7423 (0.7344) time: 0.1198 data: 0.0428 max mem: 9377 +Train: [46] [4600/6250] eta: 0:03:54 lr: 0.000074 grad: 0.1526 (0.1630) loss: 0.7364 (0.7343) time: 0.1463 data: 0.0743 max mem: 9377 +Train: [46] [4700/6250] eta: 0:03:40 lr: 0.000074 grad: 0.1590 (0.1629) loss: 0.7259 (0.7344) time: 0.1560 data: 0.0790 max mem: 9377 +Train: [46] [4800/6250] eta: 0:03:26 lr: 0.000074 grad: 0.1675 (0.1629) loss: 0.7269 (0.7343) time: 0.1290 data: 0.0498 max mem: 9377 +Train: [46] [4900/6250] eta: 0:03:11 lr: 0.000074 grad: 0.1581 (0.1628) loss: 0.7224 (0.7343) time: 0.1338 data: 0.0511 max mem: 9377 +Train: [46] [5000/6250] eta: 0:02:57 lr: 0.000074 grad: 0.1506 (0.1627) loss: 0.7284 (0.7342) time: 0.1251 data: 0.0443 max mem: 9377 +Train: [46] [5100/6250] eta: 0:02:42 lr: 0.000074 grad: 0.1557 (0.1625) loss: 0.7491 (0.7343) time: 0.1232 data: 0.0406 max mem: 9377 +Train: [46] [5200/6250] eta: 0:02:28 lr: 0.000074 grad: 0.1525 (0.1624) loss: 0.7429 (0.7343) time: 0.1147 data: 0.0292 max mem: 9377 +Train: [46] [5300/6250] eta: 0:02:14 lr: 0.000074 grad: 0.1562 (0.1622) loss: 0.7373 (0.7343) time: 0.1463 data: 0.0627 max mem: 9377 +Train: [46] [5400/6250] eta: 0:01:59 lr: 0.000074 grad: 0.1523 (0.1620) loss: 0.7298 (0.7342) time: 0.1434 data: 0.0582 max mem: 9377 +Train: [46] [5500/6250] eta: 0:01:45 lr: 0.000074 grad: 0.1529 (0.1619) loss: 0.7282 (0.7342) time: 0.1548 data: 0.0747 max mem: 9377 +Train: [46] [5600/6250] eta: 0:01:31 lr: 0.000074 grad: 0.1549 (0.1618) loss: 0.7270 (0.7341) time: 0.1476 data: 0.0691 max mem: 9377 +Train: [46] [5700/6250] eta: 0:01:17 lr: 0.000074 grad: 0.1494 (0.1616) loss: 0.7347 (0.7341) time: 0.1662 data: 0.0897 max mem: 9377 +Train: [46] [5800/6250] eta: 0:01:03 lr: 0.000074 grad: 0.1576 (0.1615) loss: 0.7176 (0.7341) time: 0.1525 data: 0.0740 max mem: 9377 +Train: [46] [5900/6250] eta: 0:00:49 lr: 0.000074 grad: 0.1479 (0.1614) loss: 0.7390 (0.7341) time: 0.1304 data: 0.0482 max mem: 9377 +Train: [46] [6000/6250] eta: 0:00:35 lr: 0.000074 grad: 0.1518 (0.1613) loss: 0.7278 (0.7340) time: 0.1360 data: 0.0556 max mem: 9377 +Train: [46] [6100/6250] eta: 0:00:21 lr: 0.000074 grad: 0.1595 (0.1612) loss: 0.7313 (0.7340) time: 0.1475 data: 0.0678 max mem: 9377 +Train: [46] [6200/6250] eta: 0:00:07 lr: 0.000074 grad: 0.1591 (0.1611) loss: 0.7317 (0.7340) time: 0.1408 data: 0.0556 max mem: 9377 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.1587 (0.1611) loss: 0.7316 (0.7340) time: 0.1328 data: 0.0529 max mem: 9377 +Train: [46] Total time: 0:14:52 (0.1428 s / it) +Averaged stats: lr: 0.000074 grad: 0.1587 (0.1611) loss: 0.7316 (0.7340) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:03:23 loss: 0.8566 (0.8566) time: 3.2839 data: 3.1960 max mem: 9377 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8605 (0.8622) time: 0.1292 data: 0.1024 max mem: 9377 +Eval (hcp-train-subset): [46] Total time: 0:00:12 (0.2061 s / it) +Averaged stats (hcp-train-subset): loss: 0.8605 (0.8622) +Eval (hcp-val): [46] [ 0/62] eta: 0:03:45 loss: 0.8576 (0.8576) time: 3.6318 data: 3.5448 max mem: 9377 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8578 (0.8592) time: 0.1245 data: 0.0978 max mem: 9377 +Eval (hcp-val): [46] Total time: 0:00:13 (0.2136 s / it) +Averaged stats (hcp-val): loss: 0.8578 (0.8592) +Eval (nsd-val): [46] [ 0/62] eta: 0:03:52 loss: 0.8222 (0.8222) time: 3.7435 data: 3.6697 max mem: 9377 +Eval (nsd-val): [46] [61/62] eta: 0:00:00 loss: 0.8325 (0.8327) time: 0.1155 data: 0.0887 max mem: 9377 +Eval (nsd-val): [46] Total time: 0:00:12 (0.2047 s / it) +Averaged stats (nsd-val): loss: 0.8325 (0.8327) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 10:39:27 lr: 0.000074 grad: 0.5897 (0.5897) loss: 0.6772 (0.6772) time: 6.1389 data: 6.0287 max mem: 9377 +Train: [47] [ 100/6250] eta: 0:20:04 lr: 0.000074 grad: 0.2066 (0.2561) loss: 0.7533 (0.7631) time: 0.1600 data: 0.0604 max mem: 9377 +Train: [47] [ 200/6250] eta: 0:17:29 lr: 0.000074 grad: 0.2036 (0.2546) loss: 0.7410 (0.7543) time: 0.1486 data: 0.0524 max mem: 9377 +Train: [47] [ 300/6250] eta: 0:16:44 lr: 0.000074 grad: 0.1766 (0.2437) loss: 0.7386 (0.7490) time: 0.1566 data: 0.0647 max mem: 9377 +Train: [47] [ 400/6250] eta: 0:15:34 lr: 0.000074 grad: 0.1917 (0.2300) loss: 0.7530 (0.7468) time: 0.1210 data: 0.0262 max mem: 9377 +Train: [47] [ 500/6250] eta: 0:14:48 lr: 0.000074 grad: 0.1830 (0.2234) loss: 0.7296 (0.7441) time: 0.1345 data: 0.0427 max mem: 9377 +Train: [47] [ 600/6250] eta: 0:14:14 lr: 0.000074 grad: 0.1888 (0.2161) loss: 0.7402 (0.7433) time: 0.1350 data: 0.0527 max mem: 9377 +Train: [47] [ 700/6250] eta: 0:14:01 lr: 0.000074 grad: 0.1625 (0.2103) loss: 0.7368 (0.7428) time: 0.1779 data: 0.0935 max mem: 9377 +Train: [47] [ 800/6250] eta: 0:13:46 lr: 0.000074 grad: 0.1531 (0.2046) loss: 0.7333 (0.7428) time: 0.1616 data: 0.0798 max mem: 9377 +Train: [47] [ 900/6250] eta: 0:13:37 lr: 0.000074 grad: 0.1487 (0.1992) loss: 0.7527 (0.7429) time: 0.1818 data: 0.1002 max mem: 9377 +Train: [47] [1000/6250] eta: 0:13:15 lr: 0.000073 grad: 0.1619 (0.1947) loss: 0.7312 (0.7425) time: 0.1425 data: 0.0643 max mem: 9377 +Train: [47] [1100/6250] eta: 0:12:54 lr: 0.000073 grad: 0.1520 (0.1911) loss: 0.7460 (0.7427) time: 0.1406 data: 0.0610 max mem: 9377 +Train: [47] [1200/6250] eta: 0:12:34 lr: 0.000073 grad: 0.1544 (0.1881) loss: 0.7463 (0.7425) time: 0.1400 data: 0.0453 max mem: 9377 +Train: [47] [1300/6250] eta: 0:12:16 lr: 0.000073 grad: 0.1572 (0.1859) loss: 0.7345 (0.7422) time: 0.1257 data: 0.0496 max mem: 9377 +Train: [47] [1400/6250] eta: 0:12:06 lr: 0.000073 grad: 0.1556 (0.1838) loss: 0.7326 (0.7416) time: 0.1808 data: 0.1068 max mem: 9377 +Train: [47] [1500/6250] eta: 0:11:54 lr: 0.000073 grad: 0.1510 (0.1816) loss: 0.7351 (0.7417) time: 0.1728 data: 0.0989 max mem: 9377 +Train: [47] [1600/6250] eta: 0:11:43 lr: 0.000073 grad: 0.1543 (0.1801) loss: 0.7473 (0.7416) time: 0.1696 data: 0.0785 max mem: 9377 +Train: [47] [1700/6250] eta: 0:11:32 lr: 0.000073 grad: 0.1599 (0.1788) loss: 0.7389 (0.7418) time: 0.1512 data: 0.0664 max mem: 9377 +Train: [47] [1800/6250] eta: 0:11:25 lr: 0.000073 grad: 0.1568 (0.1776) loss: 0.7451 (0.7418) time: 0.1630 data: 0.0859 max mem: 9377 +Train: [47] [1900/6250] eta: 0:11:12 lr: 0.000073 grad: 0.1533 (0.1764) loss: 0.7327 (0.7419) time: 0.1620 data: 0.0842 max mem: 9377 +Train: [47] [2000/6250] eta: 0:10:55 lr: 0.000073 grad: 0.1502 (0.1752) loss: 0.7379 (0.7418) time: 0.1512 data: 0.0635 max mem: 9377 +Train: [47] [2100/6250] eta: 0:10:37 lr: 0.000073 grad: 0.1481 (0.1741) loss: 0.7363 (0.7418) time: 0.1331 data: 0.0471 max mem: 9377 +Train: [47] [2200/6250] eta: 0:10:20 lr: 0.000073 grad: 0.1593 (0.1733) loss: 0.7429 (0.7418) time: 0.1243 data: 0.0359 max mem: 9377 +Train: [47] [2300/6250] eta: 0:10:04 lr: 0.000073 grad: 0.1438 (0.1723) loss: 0.7555 (0.7420) time: 0.1472 data: 0.0623 max mem: 9377 +Train: [47] [2400/6250] eta: 0:09:46 lr: 0.000073 grad: 0.1515 (0.1716) loss: 0.7433 (0.7420) time: 0.1403 data: 0.0637 max mem: 9377 +Train: [47] [2500/6250] eta: 0:09:30 lr: 0.000073 grad: 0.1482 (0.1708) loss: 0.7479 (0.7422) time: 0.1636 data: 0.0873 max mem: 9377 +Train: [47] [2600/6250] eta: 0:09:13 lr: 0.000073 grad: 0.1464 (0.1700) loss: 0.7486 (0.7424) time: 0.1457 data: 0.0594 max mem: 9377 +Train: [47] [2700/6250] eta: 0:08:57 lr: 0.000073 grad: 0.1484 (0.1694) loss: 0.7456 (0.7425) time: 0.1497 data: 0.0708 max mem: 9377 +Train: [47] [2800/6250] eta: 0:08:41 lr: 0.000073 grad: 0.1503 (0.1687) loss: 0.7364 (0.7427) time: 0.1339 data: 0.0521 max mem: 9377 +Train: [47] [2900/6250] eta: 0:08:24 lr: 0.000073 grad: 0.1507 (0.1682) loss: 0.7364 (0.7427) time: 0.1378 data: 0.0549 max mem: 9377 +Train: [47] [3000/6250] eta: 0:08:08 lr: 0.000073 grad: 0.1513 (0.1677) loss: 0.7416 (0.7428) time: 0.1348 data: 0.0542 max mem: 9377 +Train: [47] [3100/6250] eta: 0:07:52 lr: 0.000073 grad: 0.1489 (0.1672) loss: 0.7466 (0.7429) time: 0.1535 data: 0.0703 max mem: 9377 +Train: [47] [3200/6250] eta: 0:07:36 lr: 0.000073 grad: 0.1488 (0.1668) loss: 0.7494 (0.7430) time: 0.1437 data: 0.0567 max mem: 9377 +Train: [47] [3300/6250] eta: 0:07:20 lr: 0.000073 grad: 0.1494 (0.1663) loss: 0.7364 (0.7430) time: 0.1412 data: 0.0626 max mem: 9377 +Train: [47] [3400/6250] eta: 0:07:04 lr: 0.000073 grad: 0.1544 (0.1659) loss: 0.7305 (0.7430) time: 0.1547 data: 0.0758 max mem: 9377 +Train: [47] [3500/6250] eta: 0:06:49 lr: 0.000073 grad: 0.1715 (0.1657) loss: 0.7254 (0.7427) time: 0.1340 data: 0.0516 max mem: 9377 +Train: [47] [3600/6250] eta: 0:06:33 lr: 0.000073 grad: 0.1598 (0.1656) loss: 0.7338 (0.7424) time: 0.1248 data: 0.0438 max mem: 9377 +Train: [47] [3700/6250] eta: 0:06:17 lr: 0.000073 grad: 0.1514 (0.1653) loss: 0.7304 (0.7420) time: 0.1340 data: 0.0503 max mem: 9377 +Train: [47] [3800/6250] eta: 0:06:01 lr: 0.000073 grad: 0.1564 (0.1650) loss: 0.7210 (0.7418) time: 0.1381 data: 0.0542 max mem: 9377 +Train: [47] [3900/6250] eta: 0:05:46 lr: 0.000073 grad: 0.1458 (0.1647) loss: 0.7349 (0.7416) time: 0.1389 data: 0.0567 max mem: 9377 +Train: [47] [4000/6250] eta: 0:05:30 lr: 0.000073 grad: 0.1522 (0.1644) loss: 0.7330 (0.7416) time: 0.1510 data: 0.0682 max mem: 9377 +Train: [47] [4100/6250] eta: 0:05:15 lr: 0.000072 grad: 0.1545 (0.1641) loss: 0.7324 (0.7415) time: 0.1569 data: 0.0736 max mem: 9377 +Train: [47] [4200/6250] eta: 0:05:00 lr: 0.000072 grad: 0.1483 (0.1638) loss: 0.7445 (0.7415) time: 0.1319 data: 0.0457 max mem: 9377 +Train: [47] [4300/6250] eta: 0:04:45 lr: 0.000072 grad: 0.1487 (0.1636) loss: 0.7329 (0.7415) time: 0.1763 data: 0.0981 max mem: 9377 +Train: [47] [4400/6250] eta: 0:04:31 lr: 0.000072 grad: 0.1501 (0.1634) loss: 0.7432 (0.7415) time: 0.1578 data: 0.0782 max mem: 9377 +Train: [47] [4500/6250] eta: 0:04:16 lr: 0.000072 grad: 0.1490 (0.1631) loss: 0.7402 (0.7415) time: 0.1300 data: 0.0511 max mem: 9377 +Train: [47] [4600/6250] eta: 0:04:02 lr: 0.000072 grad: 0.1531 (0.1629) loss: 0.7358 (0.7414) time: 0.1396 data: 0.0591 max mem: 9377 +Train: [47] [4700/6250] eta: 0:03:47 lr: 0.000072 grad: 0.1518 (0.1628) loss: 0.7460 (0.7412) time: 0.1711 data: 0.0901 max mem: 9377 +Train: [47] [4800/6250] eta: 0:03:33 lr: 0.000072 grad: 0.1517 (0.1627) loss: 0.7395 (0.7411) time: 0.1311 data: 0.0454 max mem: 9377 +Train: [47] [4900/6250] eta: 0:03:18 lr: 0.000072 grad: 0.1520 (0.1625) loss: 0.7453 (0.7412) time: 0.1535 data: 0.0738 max mem: 9377 +Train: [47] [5000/6250] eta: 0:03:03 lr: 0.000072 grad: 0.1537 (0.1623) loss: 0.7432 (0.7412) time: 0.1255 data: 0.0378 max mem: 9377 +Train: [47] [5100/6250] eta: 0:02:48 lr: 0.000072 grad: 0.1564 (0.1622) loss: 0.7364 (0.7413) time: 0.1255 data: 0.0396 max mem: 9377 +Train: [47] [5200/6250] eta: 0:02:33 lr: 0.000072 grad: 0.1473 (0.1620) loss: 0.7439 (0.7413) time: 0.1347 data: 0.0475 max mem: 9377 +Train: [47] [5300/6250] eta: 0:02:18 lr: 0.000072 grad: 0.1544 (0.1619) loss: 0.7345 (0.7412) time: 0.1375 data: 0.0538 max mem: 9377 +Train: [47] [5400/6250] eta: 0:02:03 lr: 0.000072 grad: 0.1538 (0.1618) loss: 0.7270 (0.7411) time: 0.1362 data: 0.0511 max mem: 9377 +Train: [47] [5500/6250] eta: 0:01:49 lr: 0.000072 grad: 0.1486 (0.1616) loss: 0.7362 (0.7410) time: 0.1325 data: 0.0500 max mem: 9377 +Train: [47] [5600/6250] eta: 0:01:34 lr: 0.000072 grad: 0.1506 (0.1615) loss: 0.7406 (0.7409) time: 0.1375 data: 0.0522 max mem: 9377 +Train: [47] [5700/6250] eta: 0:01:19 lr: 0.000072 grad: 0.1538 (0.1614) loss: 0.7391 (0.7408) time: 0.1343 data: 0.0541 max mem: 9377 +Train: [47] [5800/6250] eta: 0:01:05 lr: 0.000072 grad: 0.1654 (0.1614) loss: 0.7341 (0.7407) time: 0.1348 data: 0.0471 max mem: 9377 +Train: [47] [5900/6250] eta: 0:00:50 lr: 0.000072 grad: 0.1608 (0.1614) loss: 0.7282 (0.7405) time: 0.1278 data: 0.0444 max mem: 9377 +Train: [47] [6000/6250] eta: 0:00:36 lr: 0.000072 grad: 0.1597 (0.1613) loss: 0.7273 (0.7404) time: 0.1310 data: 0.0495 max mem: 9377 +Train: [47] [6100/6250] eta: 0:00:21 lr: 0.000072 grad: 0.1522 (0.1614) loss: 0.7252 (0.7403) time: 0.1303 data: 0.0453 max mem: 9377 +Train: [47] [6200/6250] eta: 0:00:07 lr: 0.000072 grad: 0.1557 (0.1613) loss: 0.7189 (0.7401) time: 0.1436 data: 0.0613 max mem: 9377 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.1567 (0.1612) loss: 0.7252 (0.7400) time: 0.1318 data: 0.0485 max mem: 9377 +Train: [47] Total time: 0:15:08 (0.1453 s / it) +Averaged stats: lr: 0.000072 grad: 0.1567 (0.1612) loss: 0.7252 (0.7400) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:04:56 loss: 0.8583 (0.8583) time: 4.7833 data: 4.7531 max mem: 9377 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8651 (0.8649) time: 0.1393 data: 0.1136 max mem: 9377 +Eval (hcp-train-subset): [47] Total time: 0:00:13 (0.2189 s / it) +Averaged stats (hcp-train-subset): loss: 0.8651 (0.8649) +Eval (hcp-val): [47] [ 0/62] eta: 0:05:49 loss: 0.8708 (0.8708) time: 5.6341 data: 5.5978 max mem: 9377 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8643 (0.8633) time: 0.1259 data: 0.1005 max mem: 9377 +Eval (hcp-val): [47] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (hcp-val): loss: 0.8643 (0.8633) +Eval (nsd-val): [47] [ 0/62] eta: 0:05:10 loss: 0.8265 (0.8265) time: 5.0016 data: 4.9368 max mem: 9377 +Eval (nsd-val): [47] [61/62] eta: 0:00:00 loss: 0.8356 (0.8369) time: 0.1099 data: 0.0847 max mem: 9377 +Eval (nsd-val): [47] Total time: 0:00:13 (0.2131 s / it) +Averaged stats (nsd-val): loss: 0.8356 (0.8369) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [48] [ 0/6250] eta: 10:36:47 lr: 0.000072 grad: 0.4516 (0.4516) loss: 0.7248 (0.7248) time: 6.1131 data: 5.9748 max mem: 9377 +Train: [48] [ 100/6250] eta: 0:19:39 lr: 0.000072 grad: 0.2188 (0.2811) loss: 0.7848 (0.7693) time: 0.1267 data: 0.0362 max mem: 9377 +Train: [48] [ 200/6250] eta: 0:17:00 lr: 0.000072 grad: 0.2230 (0.2585) loss: 0.7501 (0.7607) time: 0.1725 data: 0.0854 max mem: 9377 +Train: [48] [ 300/6250] eta: 0:15:33 lr: 0.000072 grad: 0.1816 (0.2425) loss: 0.7462 (0.7544) time: 0.1374 data: 0.0523 max mem: 9377 +Train: [48] [ 400/6250] eta: 0:14:51 lr: 0.000072 grad: 0.1817 (0.2291) loss: 0.7262 (0.7499) time: 0.1196 data: 0.0261 max mem: 9377 +Train: [48] [ 500/6250] eta: 0:14:16 lr: 0.000072 grad: 0.1884 (0.2207) loss: 0.7260 (0.7452) time: 0.1334 data: 0.0439 max mem: 9377 +Train: [48] [ 600/6250] eta: 0:13:43 lr: 0.000072 grad: 0.1674 (0.2148) loss: 0.7373 (0.7426) time: 0.1207 data: 0.0300 max mem: 9377 +Train: [48] [ 700/6250] eta: 0:13:22 lr: 0.000072 grad: 0.1590 (0.2072) loss: 0.7333 (0.7417) time: 0.1512 data: 0.0722 max mem: 9377 +Train: [48] [ 800/6250] eta: 0:12:55 lr: 0.000072 grad: 0.1578 (0.2011) loss: 0.7330 (0.7414) time: 0.1288 data: 0.0408 max mem: 9377 +Train: [48] [ 900/6250] eta: 0:12:34 lr: 0.000071 grad: 0.1552 (0.1957) loss: 0.7356 (0.7410) time: 0.1387 data: 0.0612 max mem: 9377 +Train: [48] [1000/6250] eta: 0:12:16 lr: 0.000071 grad: 0.1470 (0.1914) loss: 0.7418 (0.7407) time: 0.1317 data: 0.0494 max mem: 9377 +Train: [48] [1100/6250] eta: 0:11:58 lr: 0.000071 grad: 0.1406 (0.1874) loss: 0.7405 (0.7412) time: 0.1339 data: 0.0553 max mem: 9377 +Train: [48] [1200/6250] eta: 0:11:43 lr: 0.000071 grad: 0.1496 (0.1844) loss: 0.7414 (0.7411) time: 0.1420 data: 0.0600 max mem: 9377 +Train: [48] [1300/6250] eta: 0:11:30 lr: 0.000071 grad: 0.1531 (0.1821) loss: 0.7472 (0.7408) time: 0.1375 data: 0.0548 max mem: 9377 +Train: [48] [1400/6250] eta: 0:11:17 lr: 0.000071 grad: 0.1534 (0.1799) loss: 0.7335 (0.7408) time: 0.1932 data: 0.1189 max mem: 9377 +Train: [48] [1500/6250] eta: 0:11:07 lr: 0.000071 grad: 0.1530 (0.1781) loss: 0.7382 (0.7406) time: 0.1380 data: 0.0490 max mem: 9377 +Train: [48] [1600/6250] eta: 0:10:57 lr: 0.000071 grad: 0.1558 (0.1765) loss: 0.7384 (0.7405) time: 0.1593 data: 0.0737 max mem: 9377 +Train: [48] [1700/6250] eta: 0:10:45 lr: 0.000071 grad: 0.1559 (0.1755) loss: 0.7348 (0.7402) time: 0.1582 data: 0.0803 max mem: 9377 +Train: [48] [1800/6250] eta: 0:10:30 lr: 0.000071 grad: 0.1543 (0.1743) loss: 0.7299 (0.7399) time: 0.1346 data: 0.0545 max mem: 9377 +Train: [48] [1900/6250] eta: 0:10:15 lr: 0.000071 grad: 0.1568 (0.1735) loss: 0.7353 (0.7395) time: 0.1410 data: 0.0571 max mem: 9377 +Train: [48] [2000/6250] eta: 0:09:59 lr: 0.000071 grad: 0.1580 (0.1732) loss: 0.7358 (0.7391) time: 0.1438 data: 0.0611 max mem: 9377 +Train: [48] [2100/6250] eta: 0:09:45 lr: 0.000071 grad: 0.1561 (0.1723) loss: 0.7273 (0.7389) time: 0.1472 data: 0.0708 max mem: 9377 +Train: [48] [2200/6250] eta: 0:09:30 lr: 0.000071 grad: 0.1465 (0.1714) loss: 0.7360 (0.7388) time: 0.1202 data: 0.0355 max mem: 9377 +Train: [48] [2300/6250] eta: 0:09:13 lr: 0.000071 grad: 0.1496 (0.1707) loss: 0.7365 (0.7386) time: 0.1373 data: 0.0516 max mem: 9377 +Train: [48] [2400/6250] eta: 0:08:58 lr: 0.000071 grad: 0.1493 (0.1699) loss: 0.7300 (0.7386) time: 0.1447 data: 0.0598 max mem: 9377 +Train: [48] [2500/6250] eta: 0:08:44 lr: 0.000071 grad: 0.1493 (0.1694) loss: 0.7371 (0.7385) time: 0.1534 data: 0.0689 max mem: 9377 +Train: [48] [2600/6250] eta: 0:08:29 lr: 0.000071 grad: 0.1458 (0.1686) loss: 0.7448 (0.7384) time: 0.1528 data: 0.0650 max mem: 9377 +Train: [48] [2700/6250] eta: 0:08:14 lr: 0.000071 grad: 0.1568 (0.1682) loss: 0.7335 (0.7384) time: 0.1463 data: 0.0685 max mem: 9377 +Train: [48] [2800/6250] eta: 0:08:00 lr: 0.000071 grad: 0.1567 (0.1679) loss: 0.7443 (0.7384) time: 0.1360 data: 0.0494 max mem: 9377 +Train: [48] [2900/6250] eta: 0:07:46 lr: 0.000071 grad: 0.1608 (0.1676) loss: 0.7267 (0.7383) time: 0.1402 data: 0.0542 max mem: 9377 +Train: [48] [3000/6250] eta: 0:07:32 lr: 0.000071 grad: 0.1576 (0.1672) loss: 0.7361 (0.7383) time: 0.1281 data: 0.0477 max mem: 9377 +Train: [48] [3100/6250] eta: 0:07:17 lr: 0.000071 grad: 0.1473 (0.1668) loss: 0.7382 (0.7381) time: 0.1394 data: 0.0589 max mem: 9377 +Train: [48] [3200/6250] eta: 0:07:03 lr: 0.000071 grad: 0.1533 (0.1664) loss: 0.7395 (0.7382) time: 0.1202 data: 0.0335 max mem: 9377 +Train: [48] [3300/6250] eta: 0:06:49 lr: 0.000071 grad: 0.1476 (0.1660) loss: 0.7294 (0.7382) time: 0.1498 data: 0.0673 max mem: 9377 +Train: [48] [3400/6250] eta: 0:06:35 lr: 0.000071 grad: 0.1478 (0.1657) loss: 0.7337 (0.7381) time: 0.1175 data: 0.0411 max mem: 9377 +Train: [48] [3500/6250] eta: 0:06:21 lr: 0.000071 grad: 0.1568 (0.1653) loss: 0.7413 (0.7382) time: 0.1562 data: 0.0766 max mem: 9377 +Train: [48] [3600/6250] eta: 0:06:07 lr: 0.000071 grad: 0.1450 (0.1649) loss: 0.7509 (0.7383) time: 0.1417 data: 0.0649 max mem: 9377 +Train: [48] [3700/6250] eta: 0:05:53 lr: 0.000071 grad: 0.1597 (0.1646) loss: 0.7403 (0.7383) time: 0.1392 data: 0.0550 max mem: 9377 +Train: [48] [3800/6250] eta: 0:05:39 lr: 0.000071 grad: 0.1559 (0.1645) loss: 0.7373 (0.7383) time: 0.1397 data: 0.0601 max mem: 9377 +Train: [48] [3900/6250] eta: 0:05:25 lr: 0.000070 grad: 0.1546 (0.1644) loss: 0.7382 (0.7383) time: 0.1462 data: 0.0628 max mem: 9377 +Train: [48] [4000/6250] eta: 0:05:10 lr: 0.000070 grad: 0.1511 (0.1643) loss: 0.7327 (0.7382) time: 0.1426 data: 0.0648 max mem: 9377 +Train: [48] [4100/6250] eta: 0:04:56 lr: 0.000070 grad: 0.1610 (0.1641) loss: 0.7280 (0.7381) time: 0.1294 data: 0.0493 max mem: 9377 +Train: [48] [4200/6250] eta: 0:04:42 lr: 0.000070 grad: 0.1550 (0.1639) loss: 0.7402 (0.7381) time: 0.1266 data: 0.0427 max mem: 9377 +Train: [48] [4300/6250] eta: 0:04:28 lr: 0.000070 grad: 0.1550 (0.1637) loss: 0.7359 (0.7381) time: 0.1501 data: 0.0670 max mem: 9377 +Train: [48] [4400/6250] eta: 0:04:14 lr: 0.000070 grad: 0.1472 (0.1635) loss: 0.7427 (0.7381) time: 0.1333 data: 0.0542 max mem: 9377 +Train: [48] [4500/6250] eta: 0:04:00 lr: 0.000070 grad: 0.1597 (0.1633) loss: 0.7247 (0.7379) time: 0.1285 data: 0.0496 max mem: 9377 +Train: [48] [4600/6250] eta: 0:03:47 lr: 0.000070 grad: 0.1533 (0.1631) loss: 0.7304 (0.7379) time: 0.1331 data: 0.0540 max mem: 9377 +Train: [48] [4700/6250] eta: 0:03:33 lr: 0.000070 grad: 0.1549 (0.1630) loss: 0.7279 (0.7378) time: 0.1734 data: 0.0982 max mem: 9377 +Train: [48] [4800/6250] eta: 0:03:19 lr: 0.000070 grad: 0.1517 (0.1629) loss: 0.7331 (0.7376) time: 0.1197 data: 0.0350 max mem: 9377 +Train: [48] [4900/6250] eta: 0:03:06 lr: 0.000070 grad: 0.1564 (0.1628) loss: 0.7344 (0.7374) time: 0.1358 data: 0.0467 max mem: 9377 +Train: [48] [5000/6250] eta: 0:02:53 lr: 0.000070 grad: 0.1574 (0.1627) loss: 0.7289 (0.7372) time: 0.1489 data: 0.0627 max mem: 9377 +Train: [48] [5100/6250] eta: 0:02:39 lr: 0.000070 grad: 0.1513 (0.1626) loss: 0.7400 (0.7370) time: 0.1408 data: 0.0589 max mem: 9377 +Train: [48] [5200/6250] eta: 0:02:25 lr: 0.000070 grad: 0.1514 (0.1625) loss: 0.7332 (0.7369) time: 0.1296 data: 0.0468 max mem: 9377 +Train: [48] [5300/6250] eta: 0:02:11 lr: 0.000070 grad: 0.1490 (0.1623) loss: 0.7290 (0.7368) time: 0.1284 data: 0.0541 max mem: 9377 +Train: [48] [5400/6250] eta: 0:01:58 lr: 0.000070 grad: 0.1559 (0.1621) loss: 0.7237 (0.7368) time: 0.1217 data: 0.0415 max mem: 9377 +Train: [48] [5500/6250] eta: 0:01:44 lr: 0.000070 grad: 0.1504 (0.1620) loss: 0.7291 (0.7367) time: 0.1321 data: 0.0469 max mem: 9377 +Train: [48] [5600/6250] eta: 0:01:30 lr: 0.000070 grad: 0.1635 (0.1619) loss: 0.7241 (0.7365) time: 0.1451 data: 0.0652 max mem: 9377 +Train: [48] [5700/6250] eta: 0:01:16 lr: 0.000070 grad: 0.1521 (0.1618) loss: 0.7335 (0.7364) time: 0.1286 data: 0.0423 max mem: 9377 +Train: [48] [5800/6250] eta: 0:01:02 lr: 0.000070 grad: 0.1526 (0.1617) loss: 0.7371 (0.7362) time: 0.1293 data: 0.0438 max mem: 9377 +Train: [48] [5900/6250] eta: 0:00:48 lr: 0.000070 grad: 0.1597 (0.1617) loss: 0.7204 (0.7361) time: 0.1232 data: 0.0421 max mem: 9377 +Train: [48] [6000/6250] eta: 0:00:34 lr: 0.000070 grad: 0.1570 (0.1617) loss: 0.7193 (0.7359) time: 0.1186 data: 0.0397 max mem: 9377 +Train: [48] [6100/6250] eta: 0:00:20 lr: 0.000070 grad: 0.1529 (0.1616) loss: 0.7191 (0.7357) time: 0.1669 data: 0.0854 max mem: 9377 +Train: [48] [6200/6250] eta: 0:00:06 lr: 0.000070 grad: 0.1586 (0.1616) loss: 0.7297 (0.7356) time: 0.1552 data: 0.0808 max mem: 9377 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.1510 (0.1616) loss: 0.7416 (0.7355) time: 0.1528 data: 0.0727 max mem: 9377 +Train: [48] Total time: 0:14:30 (0.1393 s / it) +Averaged stats: lr: 0.000070 grad: 0.1510 (0.1616) loss: 0.7416 (0.7355) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:04:52 loss: 0.8590 (0.8590) time: 4.7221 data: 4.6880 max mem: 9377 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8618 (0.8628) time: 0.1385 data: 0.1069 max mem: 9377 +Eval (hcp-train-subset): [48] Total time: 0:00:13 (0.2147 s / it) +Averaged stats (hcp-train-subset): loss: 0.8618 (0.8628) +Eval (hcp-val): [48] [ 0/62] eta: 0:05:37 loss: 0.8577 (0.8577) time: 5.4432 data: 5.3984 max mem: 9377 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8599 (0.8615) time: 0.1122 data: 0.0873 max mem: 9377 +Eval (hcp-val): [48] Total time: 0:00:13 (0.2131 s / it) +Averaged stats (hcp-val): loss: 0.8599 (0.8615) +Eval (nsd-val): [48] [ 0/62] eta: 0:03:36 loss: 0.8225 (0.8225) time: 3.4905 data: 3.4110 max mem: 9377 +Eval (nsd-val): [48] [61/62] eta: 0:00:00 loss: 0.8328 (0.8337) time: 0.1182 data: 0.0932 max mem: 9377 +Eval (nsd-val): [48] Total time: 0:00:12 (0.2078 s / it) +Averaged stats (nsd-val): loss: 0.8328 (0.8337) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [49] [ 0/6250] eta: 8:41:27 lr: 0.000070 grad: 0.1111 (0.1111) loss: 0.8563 (0.8563) time: 5.0059 data: 4.8480 max mem: 9377 +Train: [49] [ 100/6250] eta: 0:19:16 lr: 0.000070 grad: 0.2371 (0.2743) loss: 0.7604 (0.7589) time: 0.1265 data: 0.0353 max mem: 9377 +Train: [49] [ 200/6250] eta: 0:16:30 lr: 0.000070 grad: 0.2312 (0.2718) loss: 0.7127 (0.7471) time: 0.1192 data: 0.0276 max mem: 9377 +Train: [49] [ 300/6250] eta: 0:15:42 lr: 0.000070 grad: 0.1853 (0.2479) loss: 0.7321 (0.7447) time: 0.1528 data: 0.0641 max mem: 9377 +Train: [49] [ 400/6250] eta: 0:15:10 lr: 0.000070 grad: 0.2023 (0.2429) loss: 0.7251 (0.7414) time: 0.1509 data: 0.0634 max mem: 9377 +Train: [49] [ 500/6250] eta: 0:14:40 lr: 0.000070 grad: 0.1859 (0.2330) loss: 0.7272 (0.7392) time: 0.1574 data: 0.0711 max mem: 9377 +Train: [49] [ 600/6250] eta: 0:14:14 lr: 0.000070 grad: 0.1680 (0.2247) loss: 0.7387 (0.7379) time: 0.1344 data: 0.0433 max mem: 9377 +Train: [49] [ 700/6250] eta: 0:13:51 lr: 0.000069 grad: 0.1667 (0.2175) loss: 0.7232 (0.7363) time: 0.1489 data: 0.0659 max mem: 9377 +Train: [49] [ 800/6250] eta: 0:13:33 lr: 0.000069 grad: 0.1735 (0.2118) loss: 0.7171 (0.7353) time: 0.1300 data: 0.0494 max mem: 9377 +Train: [49] [ 900/6250] eta: 0:13:17 lr: 0.000069 grad: 0.1630 (0.2076) loss: 0.7365 (0.7347) time: 0.1437 data: 0.0592 max mem: 9377 +Train: [49] [1000/6250] eta: 0:13:15 lr: 0.000069 grad: 0.1514 (0.2034) loss: 0.7370 (0.7344) time: 0.1637 data: 0.0838 max mem: 9377 +Train: [49] [1100/6250] eta: 0:13:04 lr: 0.000069 grad: 0.1578 (0.1998) loss: 0.7273 (0.7341) time: 0.1645 data: 0.0784 max mem: 9377 +Train: [49] [1200/6250] eta: 0:12:53 lr: 0.000069 grad: 0.1632 (0.1965) loss: 0.7316 (0.7340) time: 0.1775 data: 0.1007 max mem: 9377 +Train: [49] [1300/6250] eta: 0:12:37 lr: 0.000069 grad: 0.1580 (0.1940) loss: 0.7346 (0.7337) time: 0.1511 data: 0.0679 max mem: 9377 +Train: [49] [1400/6250] eta: 0:12:22 lr: 0.000069 grad: 0.1548 (0.1914) loss: 0.7339 (0.7337) time: 0.1552 data: 0.0731 max mem: 9377 +Train: [49] [1500/6250] eta: 0:12:07 lr: 0.000069 grad: 0.1555 (0.1892) loss: 0.7287 (0.7336) time: 0.1521 data: 0.0643 max mem: 9377 +Train: [49] [1600/6250] eta: 0:11:49 lr: 0.000069 grad: 0.1550 (0.1873) loss: 0.7350 (0.7336) time: 0.1511 data: 0.0709 max mem: 9377 +Train: [49] [1700/6250] eta: 0:11:31 lr: 0.000069 grad: 0.1671 (0.1858) loss: 0.7175 (0.7331) time: 0.1515 data: 0.0663 max mem: 9377 +Train: [49] [1800/6250] eta: 0:11:11 lr: 0.000069 grad: 0.1676 (0.1849) loss: 0.7253 (0.7328) time: 0.1338 data: 0.0453 max mem: 9377 +Train: [49] [1900/6250] eta: 0:10:54 lr: 0.000069 grad: 0.1521 (0.1834) loss: 0.7400 (0.7328) time: 0.1392 data: 0.0517 max mem: 9377 +Train: [49] [2000/6250] eta: 0:10:38 lr: 0.000069 grad: 0.1663 (0.1821) loss: 0.7226 (0.7328) time: 0.1702 data: 0.0884 max mem: 9377 +Train: [49] [2100/6250] eta: 0:10:21 lr: 0.000069 grad: 0.1528 (0.1809) loss: 0.7225 (0.7326) time: 0.1637 data: 0.0774 max mem: 9377 +Train: [49] [2200/6250] eta: 0:10:03 lr: 0.000069 grad: 0.1462 (0.1797) loss: 0.7387 (0.7324) time: 0.1388 data: 0.0532 max mem: 9377 +Train: [49] [2300/6250] eta: 0:09:46 lr: 0.000069 grad: 0.1555 (0.1786) loss: 0.7290 (0.7323) time: 0.1393 data: 0.0596 max mem: 9377 +Train: [49] [2400/6250] eta: 0:09:31 lr: 0.000069 grad: 0.1568 (0.1776) loss: 0.7151 (0.7321) time: 0.1745 data: 0.0922 max mem: 9377 +Train: [49] [2500/6250] eta: 0:09:17 lr: 0.000069 grad: 0.1528 (0.1767) loss: 0.7308 (0.7320) time: 0.1617 data: 0.0838 max mem: 9377 +Train: [49] [2600/6250] eta: 0:09:04 lr: 0.000069 grad: 0.1613 (0.1762) loss: 0.7267 (0.7317) time: 0.1588 data: 0.0780 max mem: 9377 +Train: [49] [2700/6250] eta: 0:08:51 lr: 0.000069 grad: 0.1513 (0.1756) loss: 0.7339 (0.7315) time: 0.1508 data: 0.0691 max mem: 9377 +Train: [49] [2800/6250] eta: 0:08:39 lr: 0.000069 grad: 0.1547 (0.1749) loss: 0.7182 (0.7313) time: 0.1696 data: 0.0845 max mem: 9377 +Train: [49] [2900/6250] eta: 0:08:26 lr: 0.000069 grad: 0.1524 (0.1742) loss: 0.7221 (0.7311) time: 0.1625 data: 0.0813 max mem: 9377 +Train: [49] [3000/6250] eta: 0:08:10 lr: 0.000069 grad: 0.1570 (0.1737) loss: 0.7187 (0.7308) time: 0.1392 data: 0.0532 max mem: 9377 +Train: [49] [3100/6250] eta: 0:07:53 lr: 0.000069 grad: 0.1551 (0.1732) loss: 0.7241 (0.7304) time: 0.1546 data: 0.0725 max mem: 9377 +Train: [49] [3200/6250] eta: 0:07:37 lr: 0.000069 grad: 0.1604 (0.1728) loss: 0.7278 (0.7302) time: 0.1538 data: 0.0714 max mem: 9377 +Train: [49] [3300/6250] eta: 0:07:20 lr: 0.000069 grad: 0.1650 (0.1725) loss: 0.7161 (0.7300) time: 0.1397 data: 0.0578 max mem: 9377 +Train: [49] [3400/6250] eta: 0:07:04 lr: 0.000069 grad: 0.1655 (0.1725) loss: 0.7198 (0.7298) time: 0.1206 data: 0.0389 max mem: 9377 +Train: [49] [3500/6250] eta: 0:06:48 lr: 0.000069 grad: 0.1552 (0.1721) loss: 0.7192 (0.7297) time: 0.1378 data: 0.0509 max mem: 9377 +Train: [49] [3600/6250] eta: 0:06:33 lr: 0.000069 grad: 0.1595 (0.1716) loss: 0.7342 (0.7296) time: 0.1421 data: 0.0564 max mem: 9377 +Train: [49] [3700/6250] eta: 0:06:17 lr: 0.000069 grad: 0.1545 (0.1713) loss: 0.7259 (0.7295) time: 0.1517 data: 0.0667 max mem: 9377 +Train: [49] [3800/6250] eta: 0:06:02 lr: 0.000068 grad: 0.1603 (0.1711) loss: 0.7300 (0.7294) time: 0.1551 data: 0.0718 max mem: 9377 +Train: [49] [3900/6250] eta: 0:05:46 lr: 0.000068 grad: 0.1578 (0.1708) loss: 0.7151 (0.7293) time: 0.1471 data: 0.0587 max mem: 9377 +Train: [49] [4000/6250] eta: 0:05:31 lr: 0.000068 grad: 0.1580 (0.1705) loss: 0.7284 (0.7292) time: 0.1231 data: 0.0346 max mem: 9377 +Train: [49] [4100/6250] eta: 0:05:15 lr: 0.000068 grad: 0.1558 (0.1702) loss: 0.7176 (0.7292) time: 0.1444 data: 0.0622 max mem: 9377 +Train: [49] [4200/6250] eta: 0:05:00 lr: 0.000068 grad: 0.1565 (0.1699) loss: 0.7250 (0.7292) time: 0.1352 data: 0.0511 max mem: 9377 +Train: [49] [4300/6250] eta: 0:04:45 lr: 0.000068 grad: 0.1562 (0.1695) loss: 0.7297 (0.7293) time: 0.1441 data: 0.0570 max mem: 9377 +Train: [49] [4400/6250] eta: 0:04:30 lr: 0.000068 grad: 0.1500 (0.1691) loss: 0.7309 (0.7294) time: 0.1516 data: 0.0699 max mem: 9377 +Train: [49] [4500/6250] eta: 0:04:15 lr: 0.000068 grad: 0.1572 (0.1688) loss: 0.7214 (0.7294) time: 0.1332 data: 0.0508 max mem: 9377 +Train: [49] [4600/6250] eta: 0:04:00 lr: 0.000068 grad: 0.1548 (0.1686) loss: 0.7311 (0.7294) time: 0.1291 data: 0.0433 max mem: 9377 +Train: [49] [4700/6250] eta: 0:03:45 lr: 0.000068 grad: 0.1520 (0.1683) loss: 0.7325 (0.7294) time: 0.1433 data: 0.0628 max mem: 9377 +Train: [49] [4800/6250] eta: 0:03:30 lr: 0.000068 grad: 0.1597 (0.1680) loss: 0.7323 (0.7295) time: 0.1537 data: 0.0696 max mem: 9377 +Train: [49] [4900/6250] eta: 0:03:15 lr: 0.000068 grad: 0.1510 (0.1677) loss: 0.7420 (0.7296) time: 0.1321 data: 0.0447 max mem: 9377 +Train: [49] [5000/6250] eta: 0:03:00 lr: 0.000068 grad: 0.1554 (0.1675) loss: 0.7292 (0.7296) time: 0.1235 data: 0.0372 max mem: 9377 +Train: [49] [5100/6250] eta: 0:02:46 lr: 0.000068 grad: 0.1585 (0.1673) loss: 0.7409 (0.7297) time: 0.1334 data: 0.0436 max mem: 9377 +Train: [49] [5200/6250] eta: 0:02:31 lr: 0.000068 grad: 0.1543 (0.1672) loss: 0.7342 (0.7298) time: 0.1420 data: 0.0608 max mem: 9377 +Train: [49] [5300/6250] eta: 0:02:17 lr: 0.000068 grad: 0.1468 (0.1670) loss: 0.7401 (0.7299) time: 0.1892 data: 0.1151 max mem: 9377 +Train: [49] [5400/6250] eta: 0:02:02 lr: 0.000068 grad: 0.1582 (0.1667) loss: 0.7272 (0.7299) time: 0.1470 data: 0.0579 max mem: 9377 +Train: [49] [5500/6250] eta: 0:01:48 lr: 0.000068 grad: 0.1569 (0.1665) loss: 0.7358 (0.7301) time: 0.1512 data: 0.0692 max mem: 9377 +Train: [49] [5600/6250] eta: 0:01:34 lr: 0.000068 grad: 0.1484 (0.1663) loss: 0.7337 (0.7302) time: 0.1457 data: 0.0622 max mem: 9377 +Train: [49] [5700/6250] eta: 0:01:19 lr: 0.000068 grad: 0.1457 (0.1661) loss: 0.7337 (0.7304) time: 0.1333 data: 0.0471 max mem: 9377 +Train: [49] [5800/6250] eta: 0:01:05 lr: 0.000068 grad: 0.1599 (0.1659) loss: 0.7318 (0.7304) time: 0.1470 data: 0.0619 max mem: 9377 +Train: [49] [5900/6250] eta: 0:00:50 lr: 0.000068 grad: 0.1609 (0.1658) loss: 0.7362 (0.7305) time: 0.1530 data: 0.0690 max mem: 9377 +Train: [49] [6000/6250] eta: 0:00:36 lr: 0.000068 grad: 0.1549 (0.1656) loss: 0.7457 (0.7307) time: 0.1314 data: 0.0473 max mem: 9377 +Train: [49] [6100/6250] eta: 0:00:21 lr: 0.000068 grad: 0.1468 (0.1655) loss: 0.7461 (0.7308) time: 0.1493 data: 0.0625 max mem: 9377 +Train: [49] [6200/6250] eta: 0:00:07 lr: 0.000068 grad: 0.1537 (0.1654) loss: 0.7432 (0.7309) time: 0.1117 data: 0.0256 max mem: 9377 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.1572 (0.1653) loss: 0.7322 (0.7310) time: 0.1215 data: 0.0387 max mem: 9377 +Train: [49] Total time: 0:15:04 (0.1447 s / it) +Averaged stats: lr: 0.000068 grad: 0.1572 (0.1653) loss: 0.7322 (0.7310) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:04:50 loss: 0.8606 (0.8606) time: 4.6839 data: 4.6537 max mem: 9377 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8621 (0.8646) time: 0.1058 data: 0.0811 max mem: 9377 +Eval (hcp-train-subset): [49] Total time: 0:00:12 (0.1989 s / it) +Averaged stats (hcp-train-subset): loss: 0.8621 (0.8646) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [49] [ 0/62] eta: 0:05:04 loss: 0.8635 (0.8635) time: 4.9047 data: 4.8736 max mem: 9377 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8601 (0.8621) time: 0.1158 data: 0.0907 max mem: 9377 +Eval (hcp-val): [49] Total time: 0:00:12 (0.1991 s / it) +Averaged stats (hcp-val): loss: 0.8601 (0.8621) +Making plots (hcp-val): example=14 +Eval (nsd-val): [49] [ 0/62] eta: 0:04:37 loss: 0.8172 (0.8172) time: 4.4734 data: 4.4068 max mem: 9377 +Eval (nsd-val): [49] [61/62] eta: 0:00:00 loss: 0.8322 (0.8331) time: 0.1037 data: 0.0786 max mem: 9377 +Eval (nsd-val): [49] Total time: 0:00:12 (0.1957 s / it) +Averaged stats (nsd-val): loss: 0.8322 (0.8331) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00049.pth +Train: [50] [ 0/6250] eta: 11:01:18 lr: 0.000068 grad: 0.6397 (0.6397) loss: 0.6759 (0.6759) time: 6.3485 data: 6.2553 max mem: 9377 +Train: [50] [ 100/6250] eta: 0:19:25 lr: 0.000068 grad: 0.2598 (0.2488) loss: 0.7658 (0.7866) time: 0.1367 data: 0.0492 max mem: 9377 +Train: [50] [ 200/6250] eta: 0:16:29 lr: 0.000068 grad: 0.2177 (0.2500) loss: 0.7633 (0.7723) time: 0.1379 data: 0.0460 max mem: 9377 +Train: [50] [ 300/6250] eta: 0:15:28 lr: 0.000068 grad: 0.2013 (0.2369) loss: 0.7290 (0.7631) time: 0.1567 data: 0.0710 max mem: 9377 +Train: [50] [ 400/6250] eta: 0:14:41 lr: 0.000068 grad: 0.1981 (0.2275) loss: 0.7236 (0.7573) time: 0.1494 data: 0.0635 max mem: 9377 +Train: [50] [ 500/6250] eta: 0:14:04 lr: 0.000067 grad: 0.1774 (0.2201) loss: 0.7273 (0.7518) time: 0.1350 data: 0.0439 max mem: 9377 +Train: [50] [ 600/6250] eta: 0:13:45 lr: 0.000067 grad: 0.1646 (0.2148) loss: 0.7450 (0.7496) time: 0.1428 data: 0.0600 max mem: 9377 +Train: [50] [ 700/6250] eta: 0:13:41 lr: 0.000067 grad: 0.1633 (0.2091) loss: 0.7389 (0.7480) time: 0.1858 data: 0.1036 max mem: 9377 +Train: [50] [ 800/6250] eta: 0:13:35 lr: 0.000067 grad: 0.1543 (0.2035) loss: 0.7370 (0.7463) time: 0.1745 data: 0.0906 max mem: 9377 +Train: [50] [ 900/6250] eta: 0:13:28 lr: 0.000067 grad: 0.1688 (0.1992) loss: 0.7156 (0.7447) time: 0.1676 data: 0.0786 max mem: 9377 +Train: [50] [1000/6250] eta: 0:13:17 lr: 0.000067 grad: 0.1620 (0.1954) loss: 0.7333 (0.7442) time: 0.1233 data: 0.0464 max mem: 9377 +Train: [50] [1100/6250] eta: 0:13:01 lr: 0.000067 grad: 0.1555 (0.1923) loss: 0.7224 (0.7431) time: 0.1261 data: 0.0368 max mem: 9377 +Train: [50] [1200/6250] eta: 0:12:42 lr: 0.000067 grad: 0.1691 (0.1900) loss: 0.7266 (0.7422) time: 0.1340 data: 0.0501 max mem: 9377 +Train: [50] [1300/6250] eta: 0:12:27 lr: 0.000067 grad: 0.1611 (0.1880) loss: 0.7209 (0.7415) time: 0.1346 data: 0.0505 max mem: 9377 +Train: [50] [1400/6250] eta: 0:12:12 lr: 0.000067 grad: 0.1571 (0.1857) loss: 0.7254 (0.7408) time: 0.1213 data: 0.0292 max mem: 9377 +Train: [50] [1500/6250] eta: 0:11:59 lr: 0.000067 grad: 0.1667 (0.1840) loss: 0.7120 (0.7399) time: 0.1625 data: 0.0809 max mem: 9377 +Train: [50] [1600/6250] eta: 0:11:44 lr: 0.000067 grad: 0.1611 (0.1826) loss: 0.7313 (0.7394) time: 0.1378 data: 0.0532 max mem: 9377 +Train: [50] [1700/6250] eta: 0:11:29 lr: 0.000067 grad: 0.1603 (0.1814) loss: 0.7320 (0.7388) time: 0.1646 data: 0.0735 max mem: 9377 +Train: [50] [1800/6250] eta: 0:11:12 lr: 0.000067 grad: 0.1636 (0.1801) loss: 0.7393 (0.7385) time: 0.1545 data: 0.0750 max mem: 9377 +Train: [50] [1900/6250] eta: 0:10:54 lr: 0.000067 grad: 0.1583 (0.1792) loss: 0.7304 (0.7381) time: 0.1301 data: 0.0496 max mem: 9377 +Train: [50] [2000/6250] eta: 0:10:36 lr: 0.000067 grad: 0.1581 (0.1784) loss: 0.7297 (0.7378) time: 0.1477 data: 0.0690 max mem: 9377 +Train: [50] [2100/6250] eta: 0:10:17 lr: 0.000067 grad: 0.1550 (0.1774) loss: 0.7306 (0.7374) time: 0.1282 data: 0.0415 max mem: 9377 +Train: [50] [2200/6250] eta: 0:10:00 lr: 0.000067 grad: 0.1585 (0.1765) loss: 0.7284 (0.7371) time: 0.1499 data: 0.0664 max mem: 9377 +Train: [50] [2300/6250] eta: 0:09:43 lr: 0.000067 grad: 0.1514 (0.1757) loss: 0.7274 (0.7367) time: 0.1427 data: 0.0577 max mem: 9377 +Train: [50] [2400/6250] eta: 0:09:27 lr: 0.000067 grad: 0.1612 (0.1751) loss: 0.7317 (0.7364) time: 0.1582 data: 0.0831 max mem: 9377 +Train: [50] [2500/6250] eta: 0:09:13 lr: 0.000067 grad: 0.1627 (0.1746) loss: 0.7170 (0.7362) time: 0.1735 data: 0.0996 max mem: 9377 +Train: [50] [2600/6250] eta: 0:08:58 lr: 0.000067 grad: 0.1514 (0.1739) loss: 0.7331 (0.7360) time: 0.1338 data: 0.0496 max mem: 9377 +Train: [50] [2700/6250] eta: 0:08:42 lr: 0.000067 grad: 0.1651 (0.1735) loss: 0.7322 (0.7359) time: 0.1266 data: 0.0486 max mem: 9377 +Train: [50] [2800/6250] eta: 0:08:25 lr: 0.000067 grad: 0.1514 (0.1730) loss: 0.7358 (0.7358) time: 0.1306 data: 0.0445 max mem: 9377 +Train: [50] [2900/6250] eta: 0:08:10 lr: 0.000067 grad: 0.1540 (0.1724) loss: 0.7321 (0.7357) time: 0.1492 data: 0.0688 max mem: 9377 +Train: [50] [3000/6250] eta: 0:07:54 lr: 0.000067 grad: 0.1534 (0.1720) loss: 0.7402 (0.7357) time: 0.1317 data: 0.0478 max mem: 9377 +Train: [50] [3100/6250] eta: 0:07:39 lr: 0.000067 grad: 0.1569 (0.1716) loss: 0.7301 (0.7357) time: 0.1503 data: 0.0695 max mem: 9377 +Train: [50] [3200/6250] eta: 0:07:24 lr: 0.000067 grad: 0.1544 (0.1711) loss: 0.7401 (0.7358) time: 0.1409 data: 0.0559 max mem: 9377 +Train: [50] [3300/6250] eta: 0:07:09 lr: 0.000067 grad: 0.1484 (0.1706) loss: 0.7370 (0.7359) time: 0.1372 data: 0.0504 max mem: 9377 +Train: [50] [3400/6250] eta: 0:06:54 lr: 0.000067 grad: 0.1533 (0.1702) loss: 0.7455 (0.7360) time: 0.1435 data: 0.0656 max mem: 9377 +Train: [50] [3500/6250] eta: 0:06:39 lr: 0.000067 grad: 0.1530 (0.1698) loss: 0.7434 (0.7362) time: 0.1319 data: 0.0507 max mem: 9377 +Train: [50] [3600/6250] eta: 0:06:24 lr: 0.000066 grad: 0.1568 (0.1694) loss: 0.7389 (0.7364) time: 0.1442 data: 0.0667 max mem: 9377 +Train: [50] [3700/6250] eta: 0:06:09 lr: 0.000066 grad: 0.1560 (0.1690) loss: 0.7413 (0.7364) time: 0.1343 data: 0.0556 max mem: 9377 +Train: [50] [3800/6250] eta: 0:05:54 lr: 0.000066 grad: 0.1510 (0.1687) loss: 0.7358 (0.7365) time: 0.1356 data: 0.0554 max mem: 9377 +Train: [50] [3900/6250] eta: 0:05:39 lr: 0.000066 grad: 0.1571 (0.1684) loss: 0.7421 (0.7366) time: 0.1104 data: 0.0255 max mem: 9377 +Train: [50] [4000/6250] eta: 0:05:25 lr: 0.000066 grad: 0.1462 (0.1681) loss: 0.7490 (0.7368) time: 0.1320 data: 0.0465 max mem: 9377 +Train: [50] [4100/6250] eta: 0:05:10 lr: 0.000066 grad: 0.1573 (0.1678) loss: 0.7345 (0.7368) time: 0.1224 data: 0.0398 max mem: 9377 +Train: [50] [4200/6250] eta: 0:04:55 lr: 0.000066 grad: 0.1609 (0.1676) loss: 0.7263 (0.7367) time: 0.1537 data: 0.0699 max mem: 9377 +Train: [50] [4300/6250] eta: 0:04:41 lr: 0.000066 grad: 0.1600 (0.1674) loss: 0.7362 (0.7365) time: 0.1388 data: 0.0624 max mem: 9377 +Train: [50] [4400/6250] eta: 0:04:26 lr: 0.000066 grad: 0.1552 (0.1672) loss: 0.7390 (0.7365) time: 0.1377 data: 0.0530 max mem: 9377 +Train: [50] [4500/6250] eta: 0:04:12 lr: 0.000066 grad: 0.1614 (0.1670) loss: 0.7322 (0.7364) time: 0.1327 data: 0.0541 max mem: 9377 +Train: [50] [4600/6250] eta: 0:03:57 lr: 0.000066 grad: 0.1591 (0.1669) loss: 0.7247 (0.7362) time: 0.1418 data: 0.0639 max mem: 9377 +Train: [50] [4700/6250] eta: 0:03:42 lr: 0.000066 grad: 0.1648 (0.1668) loss: 0.7248 (0.7361) time: 0.1199 data: 0.0367 max mem: 9377 +Train: [50] [4800/6250] eta: 0:03:28 lr: 0.000066 grad: 0.1650 (0.1667) loss: 0.7335 (0.7360) time: 0.1341 data: 0.0547 max mem: 9377 +Train: [50] [4900/6250] eta: 0:03:13 lr: 0.000066 grad: 0.1674 (0.1667) loss: 0.7166 (0.7358) time: 0.1201 data: 0.0347 max mem: 9377 +Train: [50] [5000/6250] eta: 0:02:59 lr: 0.000066 grad: 0.1718 (0.1668) loss: 0.7292 (0.7357) time: 0.1507 data: 0.0690 max mem: 9377 +Train: [50] [5100/6250] eta: 0:02:44 lr: 0.000066 grad: 0.1615 (0.1668) loss: 0.7218 (0.7355) time: 0.1221 data: 0.0464 max mem: 9377 +Train: [50] [5200/6250] eta: 0:02:30 lr: 0.000066 grad: 0.1601 (0.1668) loss: 0.7319 (0.7353) time: 0.1441 data: 0.0622 max mem: 9377 +Train: [50] [5300/6250] eta: 0:02:15 lr: 0.000066 grad: 0.1609 (0.1667) loss: 0.7299 (0.7352) time: 0.1420 data: 0.0635 max mem: 9377 +Train: [50] [5400/6250] eta: 0:02:01 lr: 0.000066 grad: 0.1591 (0.1667) loss: 0.7310 (0.7351) time: 0.1748 data: 0.0885 max mem: 9377 +Train: [50] [5500/6250] eta: 0:01:47 lr: 0.000066 grad: 0.1567 (0.1666) loss: 0.7320 (0.7350) time: 0.1771 data: 0.0905 max mem: 9377 +Train: [50] [5600/6250] eta: 0:01:33 lr: 0.000066 grad: 0.1564 (0.1665) loss: 0.7277 (0.7348) time: 0.1617 data: 0.0826 max mem: 9377 +Train: [50] [5700/6250] eta: 0:01:19 lr: 0.000066 grad: 0.1618 (0.1665) loss: 0.7347 (0.7347) time: 0.1605 data: 0.0748 max mem: 9377 +Train: [50] [5800/6250] eta: 0:01:04 lr: 0.000066 grad: 0.1632 (0.1664) loss: 0.7263 (0.7347) time: 0.1442 data: 0.0661 max mem: 9377 +Train: [50] [5900/6250] eta: 0:00:50 lr: 0.000066 grad: 0.1646 (0.1663) loss: 0.7324 (0.7346) time: 0.1368 data: 0.0529 max mem: 9377 +Train: [50] [6000/6250] eta: 0:00:35 lr: 0.000066 grad: 0.1544 (0.1663) loss: 0.7335 (0.7345) time: 0.1375 data: 0.0567 max mem: 9377 +Train: [50] [6100/6250] eta: 0:00:21 lr: 0.000066 grad: 0.1589 (0.1662) loss: 0.7303 (0.7344) time: 0.1376 data: 0.0554 max mem: 9377 +Train: [50] [6200/6250] eta: 0:00:07 lr: 0.000066 grad: 0.1673 (0.1663) loss: 0.7289 (0.7343) time: 0.1416 data: 0.0615 max mem: 9377 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.1626 (0.1663) loss: 0.7304 (0.7343) time: 0.1486 data: 0.0620 max mem: 9377 +Train: [50] Total time: 0:15:02 (0.1445 s / it) +Averaged stats: lr: 0.000066 grad: 0.1626 (0.1663) loss: 0.7304 (0.7343) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:05:07 loss: 0.8552 (0.8552) time: 4.9525 data: 4.9206 max mem: 9377 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8637 (0.8639) time: 0.1144 data: 0.0896 max mem: 9377 +Eval (hcp-train-subset): [50] Total time: 0:00:13 (0.2125 s / it) +Averaged stats (hcp-train-subset): loss: 0.8637 (0.8639) +Eval (hcp-val): [50] [ 0/62] eta: 0:04:40 loss: 0.8604 (0.8604) time: 4.5295 data: 4.4469 max mem: 9377 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8609 (0.8621) time: 0.1225 data: 0.0974 max mem: 9377 +Eval (hcp-val): [50] Total time: 0:00:13 (0.2125 s / it) +Averaged stats (hcp-val): loss: 0.8609 (0.8621) +Eval (nsd-val): [50] [ 0/62] eta: 0:03:32 loss: 0.8192 (0.8192) time: 3.4273 data: 3.3412 max mem: 9377 +Eval (nsd-val): [50] [61/62] eta: 0:00:00 loss: 0.8324 (0.8327) time: 0.0860 data: 0.0611 max mem: 9377 +Eval (nsd-val): [50] Total time: 0:00:13 (0.2118 s / it) +Averaged stats (nsd-val): loss: 0.8324 (0.8327) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 9:49:05 lr: 0.000066 grad: 0.2348 (0.2348) loss: 0.7946 (0.7946) time: 5.6552 data: 5.4746 max mem: 9377 +Train: [51] [ 100/6250] eta: 0:20:24 lr: 0.000066 grad: 0.3146 (0.3229) loss: 0.7308 (0.7342) time: 0.1440 data: 0.0405 max mem: 9377 +Train: [51] [ 200/6250] eta: 0:17:09 lr: 0.000066 grad: 0.2832 (0.3124) loss: 0.7143 (0.7244) time: 0.1349 data: 0.0425 max mem: 9377 +Train: [51] [ 300/6250] eta: 0:16:01 lr: 0.000065 grad: 0.2050 (0.2880) loss: 0.7159 (0.7242) time: 0.1399 data: 0.0479 max mem: 9377 +Train: [51] [ 400/6250] eta: 0:15:18 lr: 0.000065 grad: 0.1777 (0.2648) loss: 0.7366 (0.7242) time: 0.1727 data: 0.0825 max mem: 9377 +Train: [51] [ 500/6250] eta: 0:14:50 lr: 0.000065 grad: 0.1732 (0.2477) loss: 0.7473 (0.7269) time: 0.1542 data: 0.0670 max mem: 9377 +Train: [51] [ 600/6250] eta: 0:14:37 lr: 0.000065 grad: 0.1698 (0.2344) loss: 0.7258 (0.7285) time: 0.1499 data: 0.0656 max mem: 9377 +Train: [51] [ 700/6250] eta: 0:14:33 lr: 0.000065 grad: 0.1648 (0.2250) loss: 0.7460 (0.7302) time: 0.1905 data: 0.1117 max mem: 9377 +Train: [51] [ 800/6250] eta: 0:14:31 lr: 0.000065 grad: 0.1681 (0.2174) loss: 0.7443 (0.7313) time: 0.1749 data: 0.0942 max mem: 9377 +Train: [51] [ 900/6250] eta: 0:14:23 lr: 0.000065 grad: 0.1608 (0.2115) loss: 0.7355 (0.7318) time: 0.1555 data: 0.0719 max mem: 9377 +Train: [51] [1000/6250] eta: 0:13:57 lr: 0.000065 grad: 0.1556 (0.2065) loss: 0.7418 (0.7320) time: 0.1318 data: 0.0512 max mem: 9377 +Train: [51] [1100/6250] eta: 0:13:33 lr: 0.000065 grad: 0.1656 (0.2028) loss: 0.7223 (0.7318) time: 0.1518 data: 0.0715 max mem: 9377 +Train: [51] [1200/6250] eta: 0:13:05 lr: 0.000065 grad: 0.1579 (0.1995) loss: 0.7264 (0.7319) time: 0.1320 data: 0.0539 max mem: 9377 +Train: [51] [1300/6250] eta: 0:12:50 lr: 0.000065 grad: 0.1616 (0.1964) loss: 0.7311 (0.7322) time: 0.1631 data: 0.0843 max mem: 9377 +Train: [51] [1400/6250] eta: 0:12:36 lr: 0.000065 grad: 0.1571 (0.1938) loss: 0.7296 (0.7321) time: 0.1599 data: 0.0638 max mem: 9377 +Train: [51] [1500/6250] eta: 0:12:18 lr: 0.000065 grad: 0.1575 (0.1914) loss: 0.7261 (0.7321) time: 0.1425 data: 0.0563 max mem: 9377 +Train: [51] [1600/6250] eta: 0:11:59 lr: 0.000065 grad: 0.1498 (0.1893) loss: 0.7345 (0.7319) time: 0.1530 data: 0.0637 max mem: 9377 +Train: [51] [1700/6250] eta: 0:11:38 lr: 0.000065 grad: 0.1554 (0.1876) loss: 0.7365 (0.7319) time: 0.1406 data: 0.0582 max mem: 9377 +Train: [51] [1800/6250] eta: 0:11:20 lr: 0.000065 grad: 0.1584 (0.1860) loss: 0.7200 (0.7318) time: 0.1370 data: 0.0504 max mem: 9377 +Train: [51] [1900/6250] eta: 0:11:00 lr: 0.000065 grad: 0.1539 (0.1844) loss: 0.7326 (0.7318) time: 0.1309 data: 0.0446 max mem: 9377 +Train: [51] [2000/6250] eta: 0:10:40 lr: 0.000065 grad: 0.1521 (0.1830) loss: 0.7222 (0.7316) time: 0.1231 data: 0.0377 max mem: 9377 +Train: [51] [2100/6250] eta: 0:10:21 lr: 0.000065 grad: 0.1550 (0.1819) loss: 0.7239 (0.7313) time: 0.1359 data: 0.0517 max mem: 9377 +Train: [51] [2200/6250] eta: 0:10:03 lr: 0.000065 grad: 0.1587 (0.1810) loss: 0.7269 (0.7311) time: 0.1354 data: 0.0492 max mem: 9377 +Train: [51] [2300/6250] eta: 0:09:45 lr: 0.000065 grad: 0.1593 (0.1800) loss: 0.7286 (0.7309) time: 0.1276 data: 0.0430 max mem: 9377 +Train: [51] [2400/6250] eta: 0:09:29 lr: 0.000065 grad: 0.1615 (0.1791) loss: 0.7186 (0.7308) time: 0.1366 data: 0.0555 max mem: 9377 +Train: [51] [2500/6250] eta: 0:09:12 lr: 0.000065 grad: 0.1614 (0.1784) loss: 0.7228 (0.7308) time: 0.1376 data: 0.0603 max mem: 9377 +Train: [51] [2600/6250] eta: 0:08:57 lr: 0.000065 grad: 0.1660 (0.1778) loss: 0.7332 (0.7307) time: 0.1425 data: 0.0641 max mem: 9377 +Train: [51] [2700/6250] eta: 0:08:41 lr: 0.000065 grad: 0.1638 (0.1774) loss: 0.7194 (0.7305) time: 0.1238 data: 0.0375 max mem: 9377 +Train: [51] [2800/6250] eta: 0:08:24 lr: 0.000065 grad: 0.1586 (0.1769) loss: 0.7330 (0.7303) time: 0.1166 data: 0.0336 max mem: 9377 +Train: [51] [2900/6250] eta: 0:08:08 lr: 0.000065 grad: 0.1615 (0.1765) loss: 0.7190 (0.7301) time: 0.1187 data: 0.0308 max mem: 9377 +Train: [51] [3000/6250] eta: 0:07:52 lr: 0.000065 grad: 0.1655 (0.1761) loss: 0.7156 (0.7298) time: 0.1281 data: 0.0427 max mem: 9377 +Train: [51] [3100/6250] eta: 0:07:38 lr: 0.000065 grad: 0.1579 (0.1758) loss: 0.7178 (0.7297) time: 0.1447 data: 0.0718 max mem: 9377 +Train: [51] [3200/6250] eta: 0:07:23 lr: 0.000065 grad: 0.1657 (0.1754) loss: 0.7267 (0.7297) time: 0.1486 data: 0.0669 max mem: 9377 +Train: [51] [3300/6250] eta: 0:07:09 lr: 0.000065 grad: 0.1634 (0.1751) loss: 0.7329 (0.7298) time: 0.1615 data: 0.0854 max mem: 9377 +Train: [51] [3400/6250] eta: 0:06:53 lr: 0.000064 grad: 0.1600 (0.1747) loss: 0.7311 (0.7298) time: 0.1371 data: 0.0571 max mem: 9377 +Train: [51] [3500/6250] eta: 0:06:39 lr: 0.000064 grad: 0.1642 (0.1743) loss: 0.7403 (0.7299) time: 0.1410 data: 0.0600 max mem: 9377 +Train: [51] [3600/6250] eta: 0:06:25 lr: 0.000064 grad: 0.1597 (0.1740) loss: 0.7247 (0.7299) time: 0.1635 data: 0.0777 max mem: 9377 +Train: [51] [3700/6250] eta: 0:06:09 lr: 0.000064 grad: 0.1683 (0.1736) loss: 0.7332 (0.7298) time: 0.1413 data: 0.0615 max mem: 9377 +Train: [51] [3800/6250] eta: 0:05:54 lr: 0.000064 grad: 0.1558 (0.1734) loss: 0.7334 (0.7299) time: 0.1426 data: 0.0615 max mem: 9377 +Train: [51] [3900/6250] eta: 0:05:40 lr: 0.000064 grad: 0.1603 (0.1730) loss: 0.7357 (0.7300) time: 0.1207 data: 0.0356 max mem: 9377 +Train: [51] [4000/6250] eta: 0:05:25 lr: 0.000064 grad: 0.1622 (0.1727) loss: 0.7231 (0.7301) time: 0.1300 data: 0.0443 max mem: 9377 +Train: [51] [4100/6250] eta: 0:05:09 lr: 0.000064 grad: 0.1601 (0.1723) loss: 0.7305 (0.7302) time: 0.1236 data: 0.0429 max mem: 9377 +Train: [51] [4200/6250] eta: 0:04:55 lr: 0.000064 grad: 0.1527 (0.1720) loss: 0.7404 (0.7303) time: 0.1374 data: 0.0555 max mem: 9377 +Train: [51] [4300/6250] eta: 0:04:40 lr: 0.000064 grad: 0.1482 (0.1716) loss: 0.7429 (0.7304) time: 0.1349 data: 0.0519 max mem: 9377 +Train: [51] [4400/6250] eta: 0:04:25 lr: 0.000064 grad: 0.1530 (0.1713) loss: 0.7417 (0.7305) time: 0.1370 data: 0.0538 max mem: 9377 +Train: [51] [4500/6250] eta: 0:04:10 lr: 0.000064 grad: 0.1464 (0.1710) loss: 0.7394 (0.7307) time: 0.1345 data: 0.0521 max mem: 9377 +Train: [51] [4600/6250] eta: 0:03:56 lr: 0.000064 grad: 0.1629 (0.1707) loss: 0.7377 (0.7309) time: 0.1305 data: 0.0526 max mem: 9377 +Train: [51] [4700/6250] eta: 0:03:41 lr: 0.000064 grad: 0.1450 (0.1704) loss: 0.7495 (0.7312) time: 0.1289 data: 0.0394 max mem: 9377 +Train: [51] [4800/6250] eta: 0:03:27 lr: 0.000064 grad: 0.1517 (0.1701) loss: 0.7356 (0.7313) time: 0.1396 data: 0.0556 max mem: 9377 +Train: [51] [4900/6250] eta: 0:03:13 lr: 0.000064 grad: 0.1563 (0.1698) loss: 0.7434 (0.7314) time: 0.1473 data: 0.0689 max mem: 9377 +Train: [51] [5000/6250] eta: 0:02:58 lr: 0.000064 grad: 0.1603 (0.1696) loss: 0.7317 (0.7313) time: 0.1289 data: 0.0482 max mem: 9377 +Train: [51] [5100/6250] eta: 0:02:44 lr: 0.000064 grad: 0.1521 (0.1695) loss: 0.7426 (0.7313) time: 0.1419 data: 0.0623 max mem: 9377 +Train: [51] [5200/6250] eta: 0:02:30 lr: 0.000064 grad: 0.1595 (0.1694) loss: 0.7253 (0.7313) time: 0.1377 data: 0.0520 max mem: 9377 +Train: [51] [5300/6250] eta: 0:02:15 lr: 0.000064 grad: 0.1626 (0.1692) loss: 0.7346 (0.7312) time: 0.1712 data: 0.0905 max mem: 9377 +Train: [51] [5400/6250] eta: 0:02:01 lr: 0.000064 grad: 0.1663 (0.1691) loss: 0.7240 (0.7312) time: 0.1465 data: 0.0695 max mem: 9377 +Train: [51] [5500/6250] eta: 0:01:47 lr: 0.000064 grad: 0.1563 (0.1690) loss: 0.7261 (0.7311) time: 0.1362 data: 0.0601 max mem: 9377 +Train: [51] [5600/6250] eta: 0:01:32 lr: 0.000064 grad: 0.1501 (0.1688) loss: 0.7297 (0.7310) time: 0.1382 data: 0.0619 max mem: 9377 +Train: [51] [5700/6250] eta: 0:01:18 lr: 0.000064 grad: 0.1538 (0.1687) loss: 0.7324 (0.7311) time: 0.2190 data: 0.1449 max mem: 9377 +Train: [51] [5800/6250] eta: 0:01:04 lr: 0.000064 grad: 0.1607 (0.1685) loss: 0.7367 (0.7311) time: 0.1607 data: 0.0781 max mem: 9377 +Train: [51] [5900/6250] eta: 0:00:50 lr: 0.000064 grad: 0.1623 (0.1684) loss: 0.7309 (0.7311) time: 0.1672 data: 0.0843 max mem: 9377 +Train: [51] [6000/6250] eta: 0:00:36 lr: 0.000064 grad: 0.1592 (0.1683) loss: 0.7232 (0.7310) time: 0.1706 data: 0.0849 max mem: 9377 +Train: [51] [6100/6250] eta: 0:00:21 lr: 0.000064 grad: 0.1658 (0.1683) loss: 0.7172 (0.7309) time: 0.1470 data: 0.0673 max mem: 9377 +Train: [51] [6200/6250] eta: 0:00:07 lr: 0.000064 grad: 0.1599 (0.1683) loss: 0.7329 (0.7309) time: 0.1586 data: 0.0796 max mem: 9377 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.1538 (0.1683) loss: 0.7222 (0.7309) time: 0.1617 data: 0.0784 max mem: 9377 +Train: [51] Total time: 0:15:12 (0.1459 s / it) +Averaged stats: lr: 0.000064 grad: 0.1538 (0.1683) loss: 0.7222 (0.7309) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:05:20 loss: 0.8573 (0.8573) time: 5.1757 data: 5.1454 max mem: 9377 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8666 (0.8655) time: 0.1300 data: 0.1050 max mem: 9377 +Eval (hcp-train-subset): [51] Total time: 0:00:13 (0.2215 s / it) +Averaged stats (hcp-train-subset): loss: 0.8666 (0.8655) +Eval (hcp-val): [51] [ 0/62] eta: 0:05:00 loss: 0.8735 (0.8735) time: 4.8465 data: 4.7649 max mem: 9377 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8605 (0.8624) time: 0.1442 data: 0.1189 max mem: 9377 +Eval (hcp-val): [51] Total time: 0:00:13 (0.2204 s / it) +Averaged stats (hcp-val): loss: 0.8605 (0.8624) +Eval (nsd-val): [51] [ 0/62] eta: 0:05:47 loss: 0.8207 (0.8207) time: 5.6004 data: 5.5687 max mem: 9377 +Eval (nsd-val): [51] [61/62] eta: 0:00:00 loss: 0.8345 (0.8346) time: 0.1104 data: 0.0838 max mem: 9377 +Eval (nsd-val): [51] Total time: 0:00:13 (0.2240 s / it) +Averaged stats (nsd-val): loss: 0.8345 (0.8346) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 9:40:26 lr: 0.000064 grad: 0.2021 (0.2021) loss: 0.7865 (0.7865) time: 5.5723 data: 5.3186 max mem: 9377 +Train: [52] [ 100/6250] eta: 0:21:23 lr: 0.000063 grad: 0.2969 (0.3296) loss: 0.7229 (0.7316) time: 0.1289 data: 0.0305 max mem: 9377 +Train: [52] [ 200/6250] eta: 0:18:08 lr: 0.000063 grad: 0.2485 (0.3101) loss: 0.7306 (0.7319) time: 0.1700 data: 0.0825 max mem: 9377 +Train: [52] [ 300/6250] eta: 0:17:00 lr: 0.000063 grad: 0.2114 (0.2840) loss: 0.7325 (0.7323) time: 0.1258 data: 0.0343 max mem: 9377 +Train: [52] [ 400/6250] eta: 0:16:13 lr: 0.000063 grad: 0.2031 (0.2679) loss: 0.7222 (0.7315) time: 0.1424 data: 0.0479 max mem: 9377 +Train: [52] [ 500/6250] eta: 0:15:38 lr: 0.000063 grad: 0.1740 (0.2497) loss: 0.7382 (0.7320) time: 0.1469 data: 0.0585 max mem: 9377 +Train: [52] [ 600/6250] eta: 0:14:57 lr: 0.000063 grad: 0.1699 (0.2365) loss: 0.7483 (0.7327) time: 0.1299 data: 0.0410 max mem: 9377 +Train: [52] [ 700/6250] eta: 0:14:30 lr: 0.000063 grad: 0.1703 (0.2280) loss: 0.7478 (0.7332) time: 0.1795 data: 0.0978 max mem: 9377 +Train: [52] [ 800/6250] eta: 0:14:05 lr: 0.000063 grad: 0.1706 (0.2203) loss: 0.7328 (0.7331) time: 0.1591 data: 0.0774 max mem: 9377 +Train: [52] [ 900/6250] eta: 0:13:42 lr: 0.000063 grad: 0.1532 (0.2138) loss: 0.7413 (0.7328) time: 0.1534 data: 0.0678 max mem: 9377 +Train: [52] [1000/6250] eta: 0:13:20 lr: 0.000063 grad: 0.1579 (0.2085) loss: 0.7447 (0.7329) time: 0.1382 data: 0.0574 max mem: 9377 +Train: [52] [1100/6250] eta: 0:12:58 lr: 0.000063 grad: 0.1637 (0.2046) loss: 0.7522 (0.7333) time: 0.1337 data: 0.0480 max mem: 9377 +Train: [52] [1200/6250] eta: 0:12:38 lr: 0.000063 grad: 0.1673 (0.2013) loss: 0.7308 (0.7336) time: 0.1381 data: 0.0536 max mem: 9377 +Train: [52] [1300/6250] eta: 0:12:23 lr: 0.000063 grad: 0.1578 (0.1985) loss: 0.7367 (0.7339) time: 0.1524 data: 0.0669 max mem: 9377 +Train: [52] [1400/6250] eta: 0:12:11 lr: 0.000063 grad: 0.1661 (0.1957) loss: 0.7435 (0.7345) time: 0.1526 data: 0.0673 max mem: 9377 +Train: [52] [1500/6250] eta: 0:11:57 lr: 0.000063 grad: 0.1665 (0.1940) loss: 0.7441 (0.7347) time: 0.1564 data: 0.0744 max mem: 9377 +Train: [52] [1600/6250] eta: 0:11:38 lr: 0.000063 grad: 0.1578 (0.1921) loss: 0.7269 (0.7348) time: 0.1500 data: 0.0693 max mem: 9377 +Train: [52] [1700/6250] eta: 0:11:20 lr: 0.000063 grad: 0.1500 (0.1902) loss: 0.7443 (0.7348) time: 0.1172 data: 0.0313 max mem: 9377 +Train: [52] [1800/6250] eta: 0:11:05 lr: 0.000063 grad: 0.1545 (0.1885) loss: 0.7371 (0.7347) time: 0.1642 data: 0.0811 max mem: 9377 +Train: [52] [1900/6250] eta: 0:10:45 lr: 0.000063 grad: 0.1526 (0.1870) loss: 0.7300 (0.7344) time: 0.1353 data: 0.0567 max mem: 9377 +Train: [52] [2000/6250] eta: 0:10:28 lr: 0.000063 grad: 0.1617 (0.1857) loss: 0.7297 (0.7343) time: 0.1285 data: 0.0477 max mem: 9377 +Train: [52] [2100/6250] eta: 0:10:10 lr: 0.000063 grad: 0.1558 (0.1845) loss: 0.7285 (0.7338) time: 0.1354 data: 0.0469 max mem: 9377 +Train: [52] [2200/6250] eta: 0:09:52 lr: 0.000063 grad: 0.1631 (0.1835) loss: 0.7236 (0.7334) time: 0.1384 data: 0.0520 max mem: 9377 +Train: [52] [2300/6250] eta: 0:09:37 lr: 0.000063 grad: 0.1620 (0.1826) loss: 0.7352 (0.7334) time: 0.1263 data: 0.0455 max mem: 9377 +Train: [52] [2400/6250] eta: 0:09:23 lr: 0.000063 grad: 0.1500 (0.1818) loss: 0.7291 (0.7335) time: 0.1590 data: 0.0787 max mem: 9377 +Train: [52] [2500/6250] eta: 0:09:09 lr: 0.000063 grad: 0.1543 (0.1808) loss: 0.7378 (0.7335) time: 0.1778 data: 0.0976 max mem: 9377 +Train: [52] [2600/6250] eta: 0:08:56 lr: 0.000063 grad: 0.1618 (0.1799) loss: 0.7124 (0.7335) time: 0.1622 data: 0.0823 max mem: 9377 +Train: [52] [2700/6250] eta: 0:08:42 lr: 0.000063 grad: 0.1519 (0.1791) loss: 0.7343 (0.7335) time: 0.1546 data: 0.0733 max mem: 9377 +Train: [52] [2800/6250] eta: 0:08:29 lr: 0.000063 grad: 0.1626 (0.1783) loss: 0.7224 (0.7335) time: 0.1635 data: 0.0839 max mem: 9377 +Train: [52] [2900/6250] eta: 0:08:14 lr: 0.000063 grad: 0.1614 (0.1776) loss: 0.7151 (0.7334) time: 0.1356 data: 0.0527 max mem: 9377 +Train: [52] [3000/6250] eta: 0:07:58 lr: 0.000063 grad: 0.1594 (0.1770) loss: 0.7359 (0.7333) time: 0.1119 data: 0.0312 max mem: 9377 +Train: [52] [3100/6250] eta: 0:07:43 lr: 0.000063 grad: 0.1522 (0.1763) loss: 0.7326 (0.7332) time: 0.1253 data: 0.0498 max mem: 9377 +Train: [52] [3200/6250] eta: 0:07:27 lr: 0.000062 grad: 0.1544 (0.1758) loss: 0.7222 (0.7331) time: 0.1289 data: 0.0435 max mem: 9377 +Train: [52] [3300/6250] eta: 0:07:11 lr: 0.000062 grad: 0.1644 (0.1755) loss: 0.7232 (0.7330) time: 0.1211 data: 0.0463 max mem: 9377 +Train: [52] [3400/6250] eta: 0:06:56 lr: 0.000062 grad: 0.1570 (0.1751) loss: 0.7301 (0.7328) time: 0.1457 data: 0.0661 max mem: 9377 +Train: [52] [3500/6250] eta: 0:06:41 lr: 0.000062 grad: 0.1561 (0.1747) loss: 0.7383 (0.7327) time: 0.1366 data: 0.0517 max mem: 9377 +Train: [52] [3600/6250] eta: 0:06:26 lr: 0.000062 grad: 0.1622 (0.1743) loss: 0.7247 (0.7327) time: 0.1468 data: 0.0585 max mem: 9377 +Train: [52] [3700/6250] eta: 0:06:11 lr: 0.000062 grad: 0.1587 (0.1740) loss: 0.7167 (0.7325) time: 0.1208 data: 0.0420 max mem: 9377 +Train: [52] [3800/6250] eta: 0:05:55 lr: 0.000062 grad: 0.1542 (0.1737) loss: 0.7403 (0.7324) time: 0.1390 data: 0.0520 max mem: 9377 +Train: [52] [3900/6250] eta: 0:05:40 lr: 0.000062 grad: 0.1508 (0.1733) loss: 0.7329 (0.7324) time: 0.1385 data: 0.0602 max mem: 9377 +Train: [52] [4000/6250] eta: 0:05:25 lr: 0.000062 grad: 0.1683 (0.1730) loss: 0.7175 (0.7322) time: 0.1211 data: 0.0390 max mem: 9377 +Train: [52] [4100/6250] eta: 0:05:11 lr: 0.000062 grad: 0.1559 (0.1726) loss: 0.7386 (0.7322) time: 0.1502 data: 0.0677 max mem: 9377 +Train: [52] [4200/6250] eta: 0:04:56 lr: 0.000062 grad: 0.1611 (0.1724) loss: 0.7301 (0.7322) time: 0.1196 data: 0.0376 max mem: 9377 +Train: [52] [4300/6250] eta: 0:04:41 lr: 0.000062 grad: 0.1600 (0.1721) loss: 0.7274 (0.7321) time: 0.1582 data: 0.0722 max mem: 9377 +Train: [52] [4400/6250] eta: 0:04:26 lr: 0.000062 grad: 0.1597 (0.1719) loss: 0.7182 (0.7320) time: 0.1218 data: 0.0332 max mem: 9377 +Train: [52] [4500/6250] eta: 0:04:12 lr: 0.000062 grad: 0.1558 (0.1716) loss: 0.7378 (0.7320) time: 0.1503 data: 0.0714 max mem: 9377 +Train: [52] [4600/6250] eta: 0:03:57 lr: 0.000062 grad: 0.1571 (0.1713) loss: 0.7381 (0.7320) time: 0.1312 data: 0.0441 max mem: 9377 +Train: [52] [4700/6250] eta: 0:03:42 lr: 0.000062 grad: 0.1629 (0.1711) loss: 0.7238 (0.7320) time: 0.1292 data: 0.0426 max mem: 9377 +Train: [52] [4800/6250] eta: 0:03:28 lr: 0.000062 grad: 0.1563 (0.1708) loss: 0.7264 (0.7320) time: 0.1499 data: 0.0661 max mem: 9377 +Train: [52] [4900/6250] eta: 0:03:13 lr: 0.000062 grad: 0.1563 (0.1706) loss: 0.7253 (0.7320) time: 0.1175 data: 0.0409 max mem: 9377 +Train: [52] [5000/6250] eta: 0:02:59 lr: 0.000062 grad: 0.1562 (0.1705) loss: 0.7311 (0.7320) time: 0.1373 data: 0.0544 max mem: 9377 +Train: [52] [5100/6250] eta: 0:02:44 lr: 0.000062 grad: 0.1610 (0.1704) loss: 0.7114 (0.7318) time: 0.1522 data: 0.0736 max mem: 9377 +Train: [52] [5200/6250] eta: 0:02:30 lr: 0.000062 grad: 0.1578 (0.1702) loss: 0.7324 (0.7318) time: 0.1472 data: 0.0562 max mem: 9377 +Train: [52] [5300/6250] eta: 0:02:15 lr: 0.000062 grad: 0.1558 (0.1700) loss: 0.7202 (0.7317) time: 0.1378 data: 0.0523 max mem: 9377 +Train: [52] [5400/6250] eta: 0:02:01 lr: 0.000062 grad: 0.1561 (0.1697) loss: 0.7386 (0.7318) time: 0.1454 data: 0.0706 max mem: 9377 +Train: [52] [5500/6250] eta: 0:01:47 lr: 0.000062 grad: 0.1578 (0.1695) loss: 0.7279 (0.7318) time: 0.1511 data: 0.0718 max mem: 9377 +Train: [52] [5600/6250] eta: 0:01:32 lr: 0.000062 grad: 0.1560 (0.1693) loss: 0.7372 (0.7319) time: 0.1423 data: 0.0548 max mem: 9377 +Train: [52] [5700/6250] eta: 0:01:18 lr: 0.000062 grad: 0.1640 (0.1691) loss: 0.7336 (0.7319) time: 0.1364 data: 0.0499 max mem: 9377 +Train: [52] [5800/6250] eta: 0:01:04 lr: 0.000062 grad: 0.1648 (0.1690) loss: 0.7362 (0.7320) time: 0.1653 data: 0.0891 max mem: 9377 +Train: [52] [5900/6250] eta: 0:00:50 lr: 0.000062 grad: 0.1585 (0.1689) loss: 0.7279 (0.7320) time: 0.1599 data: 0.0753 max mem: 9377 +Train: [52] [6000/6250] eta: 0:00:35 lr: 0.000062 grad: 0.1607 (0.1687) loss: 0.7295 (0.7320) time: 0.1708 data: 0.0825 max mem: 9377 +Train: [52] [6100/6250] eta: 0:00:21 lr: 0.000062 grad: 0.1561 (0.1686) loss: 0.7262 (0.7319) time: 0.1404 data: 0.0617 max mem: 9377 +Train: [52] [6200/6250] eta: 0:00:07 lr: 0.000061 grad: 0.1639 (0.1685) loss: 0.7198 (0.7318) time: 0.1328 data: 0.0536 max mem: 9377 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.1646 (0.1685) loss: 0.7147 (0.7318) time: 0.1344 data: 0.0488 max mem: 9377 +Train: [52] Total time: 0:15:04 (0.1448 s / it) +Averaged stats: lr: 0.000061 grad: 0.1646 (0.1685) loss: 0.7147 (0.7318) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:03:30 loss: 0.8627 (0.8627) time: 3.3911 data: 3.2808 max mem: 9377 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8641 (0.8652) time: 0.1376 data: 0.1123 max mem: 9377 +Eval (hcp-train-subset): [52] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (hcp-train-subset): loss: 0.8641 (0.8652) +Eval (hcp-val): [52] [ 0/62] eta: 0:03:57 loss: 0.8633 (0.8633) time: 3.8283 data: 3.7496 max mem: 9377 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8605 (0.8621) time: 0.1341 data: 0.1071 max mem: 9377 +Eval (hcp-val): [52] Total time: 0:00:13 (0.2255 s / it) +Averaged stats (hcp-val): loss: 0.8605 (0.8621) +Eval (nsd-val): [52] [ 0/62] eta: 0:05:54 loss: 0.8191 (0.8191) time: 5.7236 data: 5.6865 max mem: 9377 +Eval (nsd-val): [52] [61/62] eta: 0:00:00 loss: 0.8318 (0.8335) time: 0.1257 data: 0.0986 max mem: 9377 +Eval (nsd-val): [52] Total time: 0:00:14 (0.2340 s / it) +Averaged stats (nsd-val): loss: 0.8318 (0.8335) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [53] [ 0/6250] eta: 10:38:07 lr: 0.000061 grad: 0.3774 (0.3774) loss: 0.5541 (0.5541) time: 6.1260 data: 5.8981 max mem: 9377 +Train: [53] [ 100/6250] eta: 0:22:25 lr: 0.000061 grad: 0.2333 (0.3071) loss: 0.7360 (0.7429) time: 0.1664 data: 0.0535 max mem: 9377 +Train: [53] [ 200/6250] eta: 0:19:15 lr: 0.000061 grad: 0.1760 (0.2694) loss: 0.7518 (0.7411) time: 0.1445 data: 0.0330 max mem: 9377 +Train: [53] [ 300/6250] eta: 0:18:08 lr: 0.000061 grad: 0.2130 (0.2485) loss: 0.7242 (0.7366) time: 0.1704 data: 0.0746 max mem: 9377 +Train: [53] [ 400/6250] eta: 0:17:06 lr: 0.000061 grad: 0.2086 (0.2417) loss: 0.7249 (0.7347) time: 0.1515 data: 0.0563 max mem: 9377 +Train: [53] [ 500/6250] eta: 0:16:13 lr: 0.000061 grad: 0.1755 (0.2311) loss: 0.7428 (0.7347) time: 0.1464 data: 0.0527 max mem: 9377 +Train: [53] [ 600/6250] eta: 0:15:41 lr: 0.000061 grad: 0.1653 (0.2217) loss: 0.7425 (0.7353) time: 0.1557 data: 0.0686 max mem: 9377 +Train: [53] [ 700/6250] eta: 0:15:06 lr: 0.000061 grad: 0.1689 (0.2140) loss: 0.7412 (0.7359) time: 0.1377 data: 0.0504 max mem: 9377 +Train: [53] [ 800/6250] eta: 0:14:41 lr: 0.000061 grad: 0.1673 (0.2081) loss: 0.7348 (0.7364) time: 0.1388 data: 0.0489 max mem: 9377 +Train: [53] [ 900/6250] eta: 0:14:14 lr: 0.000061 grad: 0.1623 (0.2037) loss: 0.7474 (0.7366) time: 0.1545 data: 0.0690 max mem: 9377 +Train: [53] [1000/6250] eta: 0:13:47 lr: 0.000061 grad: 0.1598 (0.1997) loss: 0.7427 (0.7371) time: 0.1193 data: 0.0372 max mem: 9377 +Train: [53] [1100/6250] eta: 0:13:23 lr: 0.000061 grad: 0.1538 (0.1959) loss: 0.7399 (0.7377) time: 0.1284 data: 0.0401 max mem: 9377 +Train: [53] [1200/6250] eta: 0:13:01 lr: 0.000061 grad: 0.1579 (0.1928) loss: 0.7278 (0.7380) time: 0.1432 data: 0.0534 max mem: 9377 +Train: [53] [1300/6250] eta: 0:12:45 lr: 0.000061 grad: 0.1572 (0.1901) loss: 0.7383 (0.7381) time: 0.2034 data: 0.1287 max mem: 9377 +Train: [53] [1400/6250] eta: 0:12:26 lr: 0.000061 grad: 0.1648 (0.1882) loss: 0.7396 (0.7380) time: 0.1289 data: 0.0388 max mem: 9377 +Train: [53] [1500/6250] eta: 0:12:12 lr: 0.000061 grad: 0.1676 (0.1867) loss: 0.7245 (0.7376) time: 0.1451 data: 0.0638 max mem: 9377 +Train: [53] [1600/6250] eta: 0:11:55 lr: 0.000061 grad: 0.1756 (0.1856) loss: 0.7252 (0.7373) time: 0.1382 data: 0.0489 max mem: 9377 +Train: [53] [1700/6250] eta: 0:11:38 lr: 0.000061 grad: 0.1616 (0.1847) loss: 0.7233 (0.7370) time: 0.1541 data: 0.0720 max mem: 9377 +Train: [53] [1800/6250] eta: 0:11:20 lr: 0.000061 grad: 0.1636 (0.1837) loss: 0.7245 (0.7364) time: 0.1325 data: 0.0478 max mem: 9377 +Train: [53] [1900/6250] eta: 0:11:03 lr: 0.000061 grad: 0.1594 (0.1827) loss: 0.7383 (0.7361) time: 0.1471 data: 0.0654 max mem: 9377 +Train: [53] [2000/6250] eta: 0:10:46 lr: 0.000061 grad: 0.1649 (0.1817) loss: 0.7251 (0.7359) time: 0.1496 data: 0.0634 max mem: 9377 +Train: [53] [2100/6250] eta: 0:10:29 lr: 0.000061 grad: 0.1595 (0.1807) loss: 0.7291 (0.7356) time: 0.1449 data: 0.0669 max mem: 9377 +Train: [53] [2200/6250] eta: 0:10:09 lr: 0.000061 grad: 0.1673 (0.1799) loss: 0.7272 (0.7353) time: 0.1347 data: 0.0506 max mem: 9377 +Train: [53] [2300/6250] eta: 0:09:50 lr: 0.000061 grad: 0.1549 (0.1792) loss: 0.7301 (0.7351) time: 0.1145 data: 0.0252 max mem: 9377 +Train: [53] [2400/6250] eta: 0:09:32 lr: 0.000061 grad: 0.1617 (0.1788) loss: 0.7435 (0.7349) time: 0.1312 data: 0.0454 max mem: 9377 +Train: [53] [2500/6250] eta: 0:09:14 lr: 0.000061 grad: 0.1638 (0.1782) loss: 0.7366 (0.7347) time: 0.1325 data: 0.0492 max mem: 9377 +Train: [53] [2600/6250] eta: 0:08:58 lr: 0.000061 grad: 0.1620 (0.1777) loss: 0.7330 (0.7345) time: 0.1200 data: 0.0396 max mem: 9377 +Train: [53] [2700/6250] eta: 0:08:44 lr: 0.000061 grad: 0.1602 (0.1773) loss: 0.7234 (0.7343) time: 0.1430 data: 0.0684 max mem: 9377 +Train: [53] [2800/6250] eta: 0:08:28 lr: 0.000061 grad: 0.1620 (0.1768) loss: 0.7353 (0.7341) time: 0.1333 data: 0.0539 max mem: 9377 +Train: [53] [2900/6250] eta: 0:08:13 lr: 0.000061 grad: 0.1573 (0.1763) loss: 0.7289 (0.7340) time: 0.1314 data: 0.0465 max mem: 9377 +Train: [53] [3000/6250] eta: 0:07:57 lr: 0.000060 grad: 0.1654 (0.1759) loss: 0.7302 (0.7338) time: 0.1481 data: 0.0661 max mem: 9377 +Train: [53] [3100/6250] eta: 0:07:42 lr: 0.000060 grad: 0.1619 (0.1754) loss: 0.7325 (0.7337) time: 0.1480 data: 0.0695 max mem: 9377 +Train: [53] [3200/6250] eta: 0:07:26 lr: 0.000060 grad: 0.1632 (0.1750) loss: 0.7288 (0.7335) time: 0.1333 data: 0.0483 max mem: 9377 +Train: [53] [3300/6250] eta: 0:07:11 lr: 0.000060 grad: 0.1677 (0.1748) loss: 0.7123 (0.7330) time: 0.1186 data: 0.0348 max mem: 9377 +Train: [53] [3400/6250] eta: 0:06:55 lr: 0.000060 grad: 0.1583 (0.1746) loss: 0.7246 (0.7327) time: 0.1429 data: 0.0597 max mem: 9377 +Train: [53] [3500/6250] eta: 0:06:40 lr: 0.000060 grad: 0.1703 (0.1743) loss: 0.7251 (0.7325) time: 0.1163 data: 0.0289 max mem: 9377 +Train: [53] [3600/6250] eta: 0:06:25 lr: 0.000060 grad: 0.1733 (0.1742) loss: 0.7283 (0.7324) time: 0.1668 data: 0.0878 max mem: 9377 +Train: [53] [3700/6250] eta: 0:06:10 lr: 0.000060 grad: 0.1747 (0.1742) loss: 0.7156 (0.7322) time: 0.1524 data: 0.0729 max mem: 9377 +Train: [53] [3800/6250] eta: 0:05:55 lr: 0.000060 grad: 0.1645 (0.1741) loss: 0.7324 (0.7321) time: 0.1252 data: 0.0352 max mem: 9377 +Train: [53] [3900/6250] eta: 0:05:41 lr: 0.000060 grad: 0.1600 (0.1739) loss: 0.7273 (0.7320) time: 0.1536 data: 0.0738 max mem: 9377 +Train: [53] [4000/6250] eta: 0:05:25 lr: 0.000060 grad: 0.1621 (0.1738) loss: 0.7295 (0.7319) time: 0.1124 data: 0.0289 max mem: 9377 +Train: [53] [4100/6250] eta: 0:05:11 lr: 0.000060 grad: 0.1634 (0.1736) loss: 0.7238 (0.7318) time: 0.1280 data: 0.0434 max mem: 9377 +Train: [53] [4200/6250] eta: 0:04:56 lr: 0.000060 grad: 0.1683 (0.1734) loss: 0.7359 (0.7316) time: 0.1541 data: 0.0747 max mem: 9377 +Train: [53] [4300/6250] eta: 0:04:41 lr: 0.000060 grad: 0.1694 (0.1734) loss: 0.7312 (0.7316) time: 0.1170 data: 0.0292 max mem: 9377 +Train: [53] [4400/6250] eta: 0:04:26 lr: 0.000060 grad: 0.1692 (0.1733) loss: 0.7299 (0.7316) time: 0.1259 data: 0.0437 max mem: 9377 +Train: [53] [4500/6250] eta: 0:04:11 lr: 0.000060 grad: 0.1603 (0.1732) loss: 0.7478 (0.7316) time: 0.1450 data: 0.0650 max mem: 9377 +Train: [53] [4600/6250] eta: 0:03:57 lr: 0.000060 grad: 0.1655 (0.1731) loss: 0.7302 (0.7316) time: 0.1246 data: 0.0434 max mem: 9377 +Train: [53] [4700/6250] eta: 0:03:42 lr: 0.000060 grad: 0.1713 (0.1730) loss: 0.7185 (0.7316) time: 0.1312 data: 0.0505 max mem: 9377 +Train: [53] [4800/6250] eta: 0:03:28 lr: 0.000060 grad: 0.1616 (0.1728) loss: 0.7385 (0.7317) time: 0.1451 data: 0.0616 max mem: 9377 +Train: [53] [4900/6250] eta: 0:03:13 lr: 0.000060 grad: 0.1603 (0.1726) loss: 0.7316 (0.7317) time: 0.1519 data: 0.0692 max mem: 9377 +Train: [53] [5000/6250] eta: 0:02:59 lr: 0.000060 grad: 0.1582 (0.1725) loss: 0.7323 (0.7317) time: 0.1437 data: 0.0601 max mem: 9377 +Train: [53] [5100/6250] eta: 0:02:44 lr: 0.000060 grad: 0.1629 (0.1724) loss: 0.7241 (0.7316) time: 0.1542 data: 0.0681 max mem: 9377 +Train: [53] [5200/6250] eta: 0:02:30 lr: 0.000060 grad: 0.1614 (0.1723) loss: 0.7385 (0.7316) time: 0.1450 data: 0.0582 max mem: 9377 +Train: [53] [5300/6250] eta: 0:02:15 lr: 0.000060 grad: 0.1772 (0.1722) loss: 0.7328 (0.7315) time: 0.1231 data: 0.0444 max mem: 9377 +Train: [53] [5400/6250] eta: 0:02:01 lr: 0.000060 grad: 0.1710 (0.1722) loss: 0.7271 (0.7315) time: 0.1324 data: 0.0499 max mem: 9377 +Train: [53] [5500/6250] eta: 0:01:47 lr: 0.000060 grad: 0.1683 (0.1721) loss: 0.7123 (0.7314) time: 0.1373 data: 0.0566 max mem: 9377 +Train: [53] [5600/6250] eta: 0:01:32 lr: 0.000060 grad: 0.1639 (0.1720) loss: 0.7260 (0.7313) time: 0.1267 data: 0.0461 max mem: 9377 +Train: [53] [5700/6250] eta: 0:01:18 lr: 0.000060 grad: 0.1661 (0.1719) loss: 0.7333 (0.7313) time: 0.1231 data: 0.0369 max mem: 9377 +Train: [53] [5800/6250] eta: 0:01:04 lr: 0.000060 grad: 0.1684 (0.1719) loss: 0.7265 (0.7313) time: 0.1356 data: 0.0575 max mem: 9377 +Train: [53] [5900/6250] eta: 0:00:49 lr: 0.000060 grad: 0.1641 (0.1719) loss: 0.7323 (0.7312) time: 0.1310 data: 0.0412 max mem: 9377 +Train: [53] [6000/6250] eta: 0:00:35 lr: 0.000059 grad: 0.1607 (0.1717) loss: 0.7290 (0.7312) time: 0.1295 data: 0.0453 max mem: 9377 +Train: [53] [6100/6250] eta: 0:00:21 lr: 0.000059 grad: 0.1691 (0.1717) loss: 0.7161 (0.7312) time: 0.1370 data: 0.0579 max mem: 9377 +Train: [53] [6200/6250] eta: 0:00:07 lr: 0.000059 grad: 0.1652 (0.1716) loss: 0.7159 (0.7311) time: 0.1366 data: 0.0517 max mem: 9377 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.1660 (0.1715) loss: 0.7223 (0.7311) time: 0.2242 data: 0.1523 max mem: 9377 +Train: [53] Total time: 0:14:56 (0.1435 s / it) +Averaged stats: lr: 0.000059 grad: 0.1660 (0.1715) loss: 0.7223 (0.7311) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:03:22 loss: 0.8601 (0.8601) time: 3.2629 data: 3.1987 max mem: 9377 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8645 (0.8672) time: 0.1329 data: 0.1082 max mem: 9377 +Eval (hcp-train-subset): [53] Total time: 0:00:14 (0.2285 s / it) +Averaged stats (hcp-train-subset): loss: 0.8645 (0.8672) +Eval (hcp-val): [53] [ 0/62] eta: 0:04:25 loss: 0.8615 (0.8615) time: 4.2839 data: 4.2004 max mem: 9377 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8645 (0.8655) time: 0.1334 data: 0.1084 max mem: 9377 +Eval (hcp-val): [53] Total time: 0:00:13 (0.2225 s / it) +Averaged stats (hcp-val): loss: 0.8645 (0.8655) +Eval (nsd-val): [53] [ 0/62] eta: 0:03:25 loss: 0.8325 (0.8325) time: 3.3141 data: 3.2579 max mem: 9377 +Eval (nsd-val): [53] [61/62] eta: 0:00:00 loss: 0.8384 (0.8399) time: 0.1208 data: 0.0960 max mem: 9377 +Eval (nsd-val): [53] Total time: 0:00:13 (0.2105 s / it) +Averaged stats (nsd-val): loss: 0.8384 (0.8399) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 9:23:14 lr: 0.000059 grad: 0.1634 (0.1634) loss: 0.8170 (0.8170) time: 5.4072 data: 5.2641 max mem: 9377 +Train: [54] [ 100/6250] eta: 0:20:26 lr: 0.000059 grad: 0.2924 (0.3570) loss: 0.7303 (0.7423) time: 0.1586 data: 0.0604 max mem: 9377 +Train: [54] [ 200/6250] eta: 0:17:30 lr: 0.000059 grad: 0.2356 (0.3169) loss: 0.7263 (0.7338) time: 0.1509 data: 0.0576 max mem: 9377 +Train: [54] [ 300/6250] eta: 0:16:14 lr: 0.000059 grad: 0.2152 (0.2893) loss: 0.7350 (0.7305) time: 0.1378 data: 0.0475 max mem: 9377 +Train: [54] [ 400/6250] eta: 0:15:22 lr: 0.000059 grad: 0.1846 (0.2668) loss: 0.7370 (0.7322) time: 0.1583 data: 0.0627 max mem: 9377 +Train: [54] [ 500/6250] eta: 0:14:37 lr: 0.000059 grad: 0.1718 (0.2506) loss: 0.7373 (0.7325) time: 0.1277 data: 0.0404 max mem: 9377 +Train: [54] [ 600/6250] eta: 0:14:02 lr: 0.000059 grad: 0.1702 (0.2384) loss: 0.7282 (0.7324) time: 0.1298 data: 0.0353 max mem: 9377 +Train: [54] [ 700/6250] eta: 0:13:38 lr: 0.000059 grad: 0.1702 (0.2297) loss: 0.7253 (0.7323) time: 0.1357 data: 0.0374 max mem: 9377 +Train: [54] [ 800/6250] eta: 0:13:13 lr: 0.000059 grad: 0.1699 (0.2224) loss: 0.7317 (0.7319) time: 0.1277 data: 0.0304 max mem: 9377 +Train: [54] [ 900/6250] eta: 0:12:59 lr: 0.000059 grad: 0.1604 (0.2164) loss: 0.7354 (0.7319) time: 0.1419 data: 0.0573 max mem: 9377 +Train: [54] [1000/6250] eta: 0:12:41 lr: 0.000059 grad: 0.1661 (0.2116) loss: 0.7238 (0.7317) time: 0.1463 data: 0.0634 max mem: 9377 +Train: [54] [1100/6250] eta: 0:12:22 lr: 0.000059 grad: 0.1634 (0.2077) loss: 0.7262 (0.7316) time: 0.1383 data: 0.0552 max mem: 9377 +Train: [54] [1200/6250] eta: 0:12:03 lr: 0.000059 grad: 0.1699 (0.2041) loss: 0.7281 (0.7317) time: 0.1195 data: 0.0363 max mem: 9377 +Train: [54] [1300/6250] eta: 0:11:47 lr: 0.000059 grad: 0.1666 (0.2016) loss: 0.7332 (0.7319) time: 0.1410 data: 0.0530 max mem: 9377 +Train: [54] [1400/6250] eta: 0:11:37 lr: 0.000059 grad: 0.1661 (0.1992) loss: 0.7312 (0.7320) time: 0.1349 data: 0.0511 max mem: 9377 +Train: [54] [1500/6250] eta: 0:11:28 lr: 0.000059 grad: 0.1687 (0.1970) loss: 0.7295 (0.7319) time: 0.1816 data: 0.1028 max mem: 9377 +Train: [54] [1600/6250] eta: 0:11:15 lr: 0.000059 grad: 0.1704 (0.1953) loss: 0.7332 (0.7318) time: 0.1515 data: 0.0661 max mem: 9377 +Train: [54] [1700/6250] eta: 0:11:01 lr: 0.000059 grad: 0.1625 (0.1935) loss: 0.7397 (0.7317) time: 0.1365 data: 0.0574 max mem: 9377 +Train: [54] [1800/6250] eta: 0:10:46 lr: 0.000059 grad: 0.1667 (0.1920) loss: 0.7316 (0.7316) time: 0.1294 data: 0.0427 max mem: 9377 +Train: [54] [1900/6250] eta: 0:10:32 lr: 0.000059 grad: 0.1599 (0.1905) loss: 0.7325 (0.7319) time: 0.1365 data: 0.0526 max mem: 9377 +Train: [54] [2000/6250] eta: 0:10:16 lr: 0.000059 grad: 0.1643 (0.1892) loss: 0.7241 (0.7319) time: 0.1246 data: 0.0324 max mem: 9377 +Train: [54] [2100/6250] eta: 0:10:01 lr: 0.000059 grad: 0.1678 (0.1881) loss: 0.7264 (0.7318) time: 0.1424 data: 0.0487 max mem: 9377 +Train: [54] [2200/6250] eta: 0:09:45 lr: 0.000059 grad: 0.1639 (0.1869) loss: 0.7354 (0.7320) time: 0.1321 data: 0.0448 max mem: 9377 +Train: [54] [2300/6250] eta: 0:09:29 lr: 0.000059 grad: 0.1632 (0.1859) loss: 0.7261 (0.7320) time: 0.1243 data: 0.0378 max mem: 9377 +Train: [54] [2400/6250] eta: 0:09:14 lr: 0.000059 grad: 0.1634 (0.1850) loss: 0.7198 (0.7320) time: 0.1317 data: 0.0424 max mem: 9377 +Train: [54] [2500/6250] eta: 0:08:59 lr: 0.000059 grad: 0.1554 (0.1840) loss: 0.7343 (0.7322) time: 0.1417 data: 0.0553 max mem: 9377 +Train: [54] [2600/6250] eta: 0:08:43 lr: 0.000059 grad: 0.1700 (0.1834) loss: 0.7299 (0.7322) time: 0.1451 data: 0.0642 max mem: 9377 +Train: [54] [2700/6250] eta: 0:08:27 lr: 0.000059 grad: 0.1669 (0.1828) loss: 0.7132 (0.7320) time: 0.1283 data: 0.0395 max mem: 9377 +Train: [54] [2800/6250] eta: 0:08:12 lr: 0.000058 grad: 0.1593 (0.1821) loss: 0.7271 (0.7319) time: 0.1354 data: 0.0552 max mem: 9377 +Train: [54] [2900/6250] eta: 0:07:56 lr: 0.000058 grad: 0.1562 (0.1816) loss: 0.7291 (0.7317) time: 0.1393 data: 0.0527 max mem: 9377 +Train: [54] [3000/6250] eta: 0:07:41 lr: 0.000058 grad: 0.1609 (0.1809) loss: 0.7267 (0.7316) time: 0.1329 data: 0.0519 max mem: 9377 +Train: [54] [3100/6250] eta: 0:07:27 lr: 0.000058 grad: 0.1666 (0.1803) loss: 0.7349 (0.7315) time: 0.1371 data: 0.0556 max mem: 9377 +Train: [54] [3200/6250] eta: 0:07:13 lr: 0.000058 grad: 0.1607 (0.1798) loss: 0.7221 (0.7313) time: 0.1203 data: 0.0383 max mem: 9377 +Train: [54] [3300/6250] eta: 0:06:58 lr: 0.000058 grad: 0.1666 (0.1794) loss: 0.7272 (0.7311) time: 0.1221 data: 0.0401 max mem: 9377 +Train: [54] [3400/6250] eta: 0:06:44 lr: 0.000058 grad: 0.1663 (0.1791) loss: 0.7388 (0.7310) time: 0.1445 data: 0.0639 max mem: 9377 +Train: [54] [3500/6250] eta: 0:06:30 lr: 0.000058 grad: 0.1668 (0.1787) loss: 0.7168 (0.7309) time: 0.1564 data: 0.0757 max mem: 9377 +Train: [54] [3600/6250] eta: 0:06:16 lr: 0.000058 grad: 0.1626 (0.1784) loss: 0.7365 (0.7309) time: 0.1455 data: 0.0645 max mem: 9377 +Train: [54] [3700/6250] eta: 0:06:02 lr: 0.000058 grad: 0.1591 (0.1781) loss: 0.7350 (0.7309) time: 0.1286 data: 0.0487 max mem: 9377 +Train: [54] [3800/6250] eta: 0:05:48 lr: 0.000058 grad: 0.1651 (0.1778) loss: 0.7195 (0.7308) time: 0.1555 data: 0.0750 max mem: 9377 +Train: [54] [3900/6250] eta: 0:05:34 lr: 0.000058 grad: 0.1658 (0.1774) loss: 0.7288 (0.7308) time: 0.1567 data: 0.0768 max mem: 9377 +Train: [54] [4000/6250] eta: 0:05:19 lr: 0.000058 grad: 0.1618 (0.1771) loss: 0.7168 (0.7307) time: 0.1688 data: 0.0863 max mem: 9377 +Train: [54] [4100/6250] eta: 0:05:04 lr: 0.000058 grad: 0.1661 (0.1769) loss: 0.7286 (0.7307) time: 0.1491 data: 0.0652 max mem: 9377 +Train: [54] [4200/6250] eta: 0:04:50 lr: 0.000058 grad: 0.1678 (0.1766) loss: 0.7253 (0.7307) time: 0.1485 data: 0.0691 max mem: 9377 +Train: [54] [4300/6250] eta: 0:04:36 lr: 0.000058 grad: 0.1637 (0.1763) loss: 0.7237 (0.7307) time: 0.1275 data: 0.0454 max mem: 9377 +Train: [54] [4400/6250] eta: 0:04:22 lr: 0.000058 grad: 0.1542 (0.1761) loss: 0.7359 (0.7305) time: 0.1624 data: 0.0847 max mem: 9377 +Train: [54] [4500/6250] eta: 0:04:08 lr: 0.000058 grad: 0.1585 (0.1758) loss: 0.7259 (0.7305) time: 0.1572 data: 0.0752 max mem: 9377 +Train: [54] [4600/6250] eta: 0:03:53 lr: 0.000058 grad: 0.1666 (0.1755) loss: 0.7335 (0.7304) time: 0.1387 data: 0.0629 max mem: 9377 +Train: [54] [4700/6250] eta: 0:03:39 lr: 0.000058 grad: 0.1637 (0.1753) loss: 0.7260 (0.7303) time: 0.1434 data: 0.0676 max mem: 9377 +Train: [54] [4800/6250] eta: 0:03:25 lr: 0.000058 grad: 0.1638 (0.1750) loss: 0.7297 (0.7303) time: 0.1498 data: 0.0666 max mem: 9377 +Train: [54] [4900/6250] eta: 0:03:11 lr: 0.000058 grad: 0.1578 (0.1748) loss: 0.7312 (0.7303) time: 0.1455 data: 0.0673 max mem: 9377 +Train: [54] [5000/6250] eta: 0:02:57 lr: 0.000058 grad: 0.1649 (0.1746) loss: 0.7189 (0.7301) time: 0.1474 data: 0.0687 max mem: 9377 +Train: [54] [5100/6250] eta: 0:02:43 lr: 0.000058 grad: 0.1616 (0.1745) loss: 0.7413 (0.7301) time: 0.1622 data: 0.0804 max mem: 9377 +Train: [54] [5200/6250] eta: 0:02:28 lr: 0.000058 grad: 0.1611 (0.1743) loss: 0.7246 (0.7300) time: 0.1409 data: 0.0564 max mem: 9377 +Train: [54] [5300/6250] eta: 0:02:14 lr: 0.000058 grad: 0.1723 (0.1742) loss: 0.7216 (0.7298) time: 0.1392 data: 0.0566 max mem: 9377 +Train: [54] [5400/6250] eta: 0:02:00 lr: 0.000058 grad: 0.1613 (0.1739) loss: 0.7228 (0.7297) time: 0.1399 data: 0.0566 max mem: 9377 +Train: [54] [5500/6250] eta: 0:01:46 lr: 0.000058 grad: 0.1741 (0.1738) loss: 0.7204 (0.7296) time: 0.1399 data: 0.0559 max mem: 9377 +Train: [54] [5600/6250] eta: 0:01:31 lr: 0.000058 grad: 0.1600 (0.1737) loss: 0.7194 (0.7295) time: 0.1525 data: 0.0712 max mem: 9377 +Train: [54] [5700/6250] eta: 0:01:17 lr: 0.000058 grad: 0.1686 (0.1736) loss: 0.7193 (0.7293) time: 0.1268 data: 0.0425 max mem: 9377 +Train: [54] [5800/6250] eta: 0:01:03 lr: 0.000057 grad: 0.1625 (0.1735) loss: 0.7304 (0.7292) time: 0.1585 data: 0.0763 max mem: 9377 +Train: [54] [5900/6250] eta: 0:00:49 lr: 0.000057 grad: 0.1597 (0.1734) loss: 0.7291 (0.7292) time: 0.1305 data: 0.0487 max mem: 9377 +Train: [54] [6000/6250] eta: 0:00:35 lr: 0.000057 grad: 0.1653 (0.1732) loss: 0.7220 (0.7290) time: 0.1493 data: 0.0709 max mem: 9377 +Train: [54] [6100/6250] eta: 0:00:21 lr: 0.000057 grad: 0.1683 (0.1731) loss: 0.7123 (0.7289) time: 0.1459 data: 0.0647 max mem: 9377 +Train: [54] [6200/6250] eta: 0:00:07 lr: 0.000057 grad: 0.1595 (0.1730) loss: 0.7193 (0.7288) time: 0.1527 data: 0.0708 max mem: 9377 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1580 (0.1729) loss: 0.7316 (0.7288) time: 0.1333 data: 0.0529 max mem: 9377 +Train: [54] Total time: 0:14:47 (0.1420 s / it) +Averaged stats: lr: 0.000057 grad: 0.1580 (0.1729) loss: 0.7316 (0.7288) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:05:05 loss: 0.8593 (0.8593) time: 4.9228 data: 4.8928 max mem: 9377 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8647 (0.8662) time: 0.1260 data: 0.1010 max mem: 9377 +Eval (hcp-train-subset): [54] Total time: 0:00:13 (0.2149 s / it) +Averaged stats (hcp-train-subset): loss: 0.8647 (0.8662) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [54] [ 0/62] eta: 0:03:59 loss: 0.8615 (0.8615) time: 3.8642 data: 3.7969 max mem: 9377 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8617 (0.8639) time: 0.1283 data: 0.1031 max mem: 9377 +Eval (hcp-val): [54] Total time: 0:00:13 (0.2172 s / it) +Averaged stats (hcp-val): loss: 0.8617 (0.8639) +Making plots (hcp-val): example=7 +Eval (nsd-val): [54] [ 0/62] eta: 0:04:04 loss: 0.8291 (0.8291) time: 3.9359 data: 3.8700 max mem: 9377 +Eval (nsd-val): [54] [61/62] eta: 0:00:00 loss: 0.8365 (0.8375) time: 0.1069 data: 0.0805 max mem: 9377 +Eval (nsd-val): [54] Total time: 0:00:13 (0.2254 s / it) +Averaged stats (nsd-val): loss: 0.8365 (0.8375) +Making plots (nsd-val): example=45 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00054.pth +Train: [55] [ 0/6250] eta: 8:09:09 lr: 0.000057 grad: 0.1262 (0.1262) loss: 0.8401 (0.8401) time: 4.6959 data: 4.5141 max mem: 9377 +Train: [55] [ 100/6250] eta: 0:20:19 lr: 0.000057 grad: 0.3133 (0.3337) loss: 0.7262 (0.7506) time: 0.1356 data: 0.0423 max mem: 9377 +Train: [55] [ 200/6250] eta: 0:17:47 lr: 0.000057 grad: 0.2341 (0.2957) loss: 0.7229 (0.7418) time: 0.1685 data: 0.0734 max mem: 9377 +Train: [55] [ 300/6250] eta: 0:17:10 lr: 0.000057 grad: 0.2085 (0.2770) loss: 0.7313 (0.7411) time: 0.1798 data: 0.0920 max mem: 9377 +Train: [55] [ 400/6250] eta: 0:16:30 lr: 0.000057 grad: 0.1771 (0.2568) loss: 0.7347 (0.7404) time: 0.1338 data: 0.0407 max mem: 9377 +Train: [55] [ 500/6250] eta: 0:15:39 lr: 0.000057 grad: 0.1755 (0.2433) loss: 0.7466 (0.7406) time: 0.1323 data: 0.0359 max mem: 9377 +Train: [55] [ 600/6250] eta: 0:15:03 lr: 0.000057 grad: 0.1877 (0.2328) loss: 0.7305 (0.7402) time: 0.1304 data: 0.0356 max mem: 9377 +Train: [55] [ 700/6250] eta: 0:14:28 lr: 0.000057 grad: 0.1818 (0.2260) loss: 0.7308 (0.7391) time: 0.1400 data: 0.0499 max mem: 9377 +Train: [55] [ 800/6250] eta: 0:13:59 lr: 0.000057 grad: 0.1797 (0.2203) loss: 0.7328 (0.7382) time: 0.1284 data: 0.0403 max mem: 9377 +Train: [55] [ 900/6250] eta: 0:13:32 lr: 0.000057 grad: 0.1678 (0.2146) loss: 0.7321 (0.7381) time: 0.1174 data: 0.0207 max mem: 9377 +Train: [55] [1000/6250] eta: 0:13:09 lr: 0.000057 grad: 0.1688 (0.2103) loss: 0.7285 (0.7377) time: 0.1470 data: 0.0603 max mem: 9377 +Train: [55] [1100/6250] eta: 0:12:52 lr: 0.000057 grad: 0.1748 (0.2065) loss: 0.7261 (0.7374) time: 0.1466 data: 0.0627 max mem: 9377 +Train: [55] [1200/6250] eta: 0:12:36 lr: 0.000057 grad: 0.1533 (0.2031) loss: 0.7485 (0.7374) time: 0.1418 data: 0.0587 max mem: 9377 +Train: [55] [1300/6250] eta: 0:12:24 lr: 0.000057 grad: 0.1631 (0.2005) loss: 0.7313 (0.7368) time: 0.1538 data: 0.0674 max mem: 9377 +Train: [55] [1400/6250] eta: 0:12:13 lr: 0.000057 grad: 0.1621 (0.1981) loss: 0.7308 (0.7362) time: 0.1594 data: 0.0707 max mem: 9377 +Train: [55] [1500/6250] eta: 0:12:00 lr: 0.000057 grad: 0.1645 (0.1960) loss: 0.7130 (0.7355) time: 0.1635 data: 0.0775 max mem: 9377 +Train: [55] [1600/6250] eta: 0:11:45 lr: 0.000057 grad: 0.1664 (0.1942) loss: 0.7330 (0.7349) time: 0.1451 data: 0.0651 max mem: 9377 +Train: [55] [1700/6250] eta: 0:11:34 lr: 0.000057 grad: 0.1667 (0.1927) loss: 0.7248 (0.7349) time: 0.1712 data: 0.0827 max mem: 9377 +Train: [55] [1800/6250] eta: 0:11:23 lr: 0.000057 grad: 0.1656 (0.1914) loss: 0.7250 (0.7346) time: 0.1527 data: 0.0705 max mem: 9377 +Train: [55] [1900/6250] eta: 0:11:09 lr: 0.000057 grad: 0.1573 (0.1899) loss: 0.7505 (0.7347) time: 0.1519 data: 0.0670 max mem: 9377 +Train: [55] [2000/6250] eta: 0:10:54 lr: 0.000057 grad: 0.1667 (0.1887) loss: 0.7349 (0.7346) time: 0.1538 data: 0.0669 max mem: 9377 +Train: [55] [2100/6250] eta: 0:10:36 lr: 0.000057 grad: 0.1720 (0.1876) loss: 0.7303 (0.7346) time: 0.1439 data: 0.0538 max mem: 9377 +Train: [55] [2200/6250] eta: 0:10:19 lr: 0.000057 grad: 0.1493 (0.1862) loss: 0.7500 (0.7347) time: 0.1468 data: 0.0641 max mem: 9377 +Train: [55] [2300/6250] eta: 0:10:02 lr: 0.000057 grad: 0.1611 (0.1852) loss: 0.7313 (0.7346) time: 0.1561 data: 0.0731 max mem: 9377 +Train: [55] [2400/6250] eta: 0:09:45 lr: 0.000057 grad: 0.1643 (0.1843) loss: 0.7371 (0.7345) time: 0.1579 data: 0.0837 max mem: 9377 +Train: [55] [2500/6250] eta: 0:09:28 lr: 0.000057 grad: 0.1639 (0.1836) loss: 0.7250 (0.7342) time: 0.1405 data: 0.0590 max mem: 9377 +Train: [55] [2600/6250] eta: 0:09:13 lr: 0.000056 grad: 0.1644 (0.1829) loss: 0.7229 (0.7339) time: 0.1586 data: 0.0751 max mem: 9377 +Train: [55] [2700/6250] eta: 0:08:56 lr: 0.000056 grad: 0.1587 (0.1822) loss: 0.7178 (0.7336) time: 0.1453 data: 0.0660 max mem: 9377 +Train: [55] [2800/6250] eta: 0:08:39 lr: 0.000056 grad: 0.1631 (0.1817) loss: 0.7339 (0.7334) time: 0.1487 data: 0.0616 max mem: 9377 +Train: [55] [2900/6250] eta: 0:08:23 lr: 0.000056 grad: 0.1590 (0.1812) loss: 0.7345 (0.7334) time: 0.1653 data: 0.0867 max mem: 9377 +Train: [55] [3000/6250] eta: 0:08:07 lr: 0.000056 grad: 0.1605 (0.1808) loss: 0.7196 (0.7331) time: 0.1480 data: 0.0643 max mem: 9377 +Train: [55] [3100/6250] eta: 0:07:50 lr: 0.000056 grad: 0.1775 (0.1805) loss: 0.7257 (0.7329) time: 0.1290 data: 0.0434 max mem: 9377 +Train: [55] [3200/6250] eta: 0:07:34 lr: 0.000056 grad: 0.1702 (0.1802) loss: 0.7302 (0.7330) time: 0.1471 data: 0.0667 max mem: 9377 +Train: [55] [3300/6250] eta: 0:07:18 lr: 0.000056 grad: 0.1577 (0.1797) loss: 0.7334 (0.7329) time: 0.1493 data: 0.0688 max mem: 9377 +Train: [55] [3400/6250] eta: 0:07:02 lr: 0.000056 grad: 0.1618 (0.1792) loss: 0.7320 (0.7330) time: 0.1365 data: 0.0491 max mem: 9377 +Train: [55] [3500/6250] eta: 0:06:47 lr: 0.000056 grad: 0.1583 (0.1788) loss: 0.7364 (0.7329) time: 0.1328 data: 0.0477 max mem: 9377 +Train: [55] [3600/6250] eta: 0:06:31 lr: 0.000056 grad: 0.1624 (0.1783) loss: 0.7276 (0.7328) time: 0.1320 data: 0.0518 max mem: 9377 +Train: [55] [3700/6250] eta: 0:06:15 lr: 0.000056 grad: 0.1580 (0.1780) loss: 0.7288 (0.7329) time: 0.1341 data: 0.0504 max mem: 9377 +Train: [55] [3800/6250] eta: 0:06:00 lr: 0.000056 grad: 0.1692 (0.1777) loss: 0.7164 (0.7327) time: 0.1518 data: 0.0743 max mem: 9377 +Train: [55] [3900/6250] eta: 0:05:45 lr: 0.000056 grad: 0.1706 (0.1775) loss: 0.7156 (0.7324) time: 0.1462 data: 0.0574 max mem: 9377 +Train: [55] [4000/6250] eta: 0:05:30 lr: 0.000056 grad: 0.1632 (0.1773) loss: 0.7234 (0.7321) time: 0.1335 data: 0.0437 max mem: 9377 +Train: [55] [4100/6250] eta: 0:05:14 lr: 0.000056 grad: 0.1688 (0.1770) loss: 0.7207 (0.7319) time: 0.1415 data: 0.0603 max mem: 9377 +Train: [55] [4200/6250] eta: 0:04:59 lr: 0.000056 grad: 0.1652 (0.1768) loss: 0.7240 (0.7316) time: 0.1378 data: 0.0625 max mem: 9377 +Train: [55] [4300/6250] eta: 0:04:44 lr: 0.000056 grad: 0.1597 (0.1766) loss: 0.7235 (0.7315) time: 0.1315 data: 0.0550 max mem: 9377 +Train: [55] [4400/6250] eta: 0:04:29 lr: 0.000056 grad: 0.1641 (0.1763) loss: 0.7222 (0.7312) time: 0.1540 data: 0.0724 max mem: 9377 +Train: [55] [4500/6250] eta: 0:04:15 lr: 0.000056 grad: 0.1625 (0.1761) loss: 0.7264 (0.7311) time: 0.1714 data: 0.0909 max mem: 9377 +Train: [55] [4600/6250] eta: 0:04:00 lr: 0.000056 grad: 0.1666 (0.1759) loss: 0.7164 (0.7309) time: 0.1421 data: 0.0609 max mem: 9377 +Train: [55] [4700/6250] eta: 0:03:45 lr: 0.000056 grad: 0.1654 (0.1757) loss: 0.7126 (0.7307) time: 0.1438 data: 0.0627 max mem: 9377 +Train: [55] [4800/6250] eta: 0:03:31 lr: 0.000056 grad: 0.1651 (0.1756) loss: 0.7343 (0.7305) time: 0.1359 data: 0.0562 max mem: 9377 +Train: [55] [4900/6250] eta: 0:03:16 lr: 0.000056 grad: 0.1685 (0.1755) loss: 0.7259 (0.7304) time: 0.1404 data: 0.0514 max mem: 9377 +Train: [55] [5000/6250] eta: 0:03:01 lr: 0.000056 grad: 0.1622 (0.1753) loss: 0.7234 (0.7303) time: 0.1495 data: 0.0714 max mem: 9377 +Train: [55] [5100/6250] eta: 0:02:47 lr: 0.000056 grad: 0.1743 (0.1751) loss: 0.7281 (0.7302) time: 0.1537 data: 0.0741 max mem: 9377 +Train: [55] [5200/6250] eta: 0:02:32 lr: 0.000056 grad: 0.1667 (0.1749) loss: 0.7299 (0.7302) time: 0.1588 data: 0.0861 max mem: 9377 +Train: [55] [5300/6250] eta: 0:02:18 lr: 0.000056 grad: 0.1693 (0.1748) loss: 0.7198 (0.7301) time: 0.1362 data: 0.0591 max mem: 9377 +Train: [55] [5400/6250] eta: 0:02:03 lr: 0.000056 grad: 0.1631 (0.1747) loss: 0.7302 (0.7300) time: 0.1234 data: 0.0389 max mem: 9377 +Train: [55] [5500/6250] eta: 0:01:49 lr: 0.000056 grad: 0.1663 (0.1745) loss: 0.7289 (0.7299) time: 0.1553 data: 0.0766 max mem: 9377 +Train: [55] [5600/6250] eta: 0:01:34 lr: 0.000055 grad: 0.1710 (0.1744) loss: 0.7200 (0.7297) time: 0.1521 data: 0.0765 max mem: 9377 +Train: [55] [5700/6250] eta: 0:01:20 lr: 0.000055 grad: 0.1626 (0.1743) loss: 0.7230 (0.7297) time: 0.1321 data: 0.0502 max mem: 9377 +Train: [55] [5800/6250] eta: 0:01:05 lr: 0.000055 grad: 0.1685 (0.1741) loss: 0.7248 (0.7297) time: 0.1585 data: 0.0823 max mem: 9377 +Train: [55] [5900/6250] eta: 0:00:50 lr: 0.000055 grad: 0.1641 (0.1739) loss: 0.7380 (0.7298) time: 0.1419 data: 0.0593 max mem: 9377 +Train: [55] [6000/6250] eta: 0:00:36 lr: 0.000055 grad: 0.1706 (0.1738) loss: 0.7301 (0.7298) time: 0.1486 data: 0.0675 max mem: 9377 +Train: [55] [6100/6250] eta: 0:00:21 lr: 0.000055 grad: 0.1679 (0.1738) loss: 0.7182 (0.7297) time: 0.1101 data: 0.0267 max mem: 9377 +Train: [55] [6200/6250] eta: 0:00:07 lr: 0.000055 grad: 0.1629 (0.1737) loss: 0.7228 (0.7297) time: 0.1464 data: 0.0617 max mem: 9377 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.1621 (0.1736) loss: 0.7316 (0.7297) time: 0.1428 data: 0.0578 max mem: 9377 +Train: [55] Total time: 0:15:12 (0.1460 s / it) +Averaged stats: lr: 0.000055 grad: 0.1621 (0.1736) loss: 0.7316 (0.7297) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:03:49 loss: 0.8604 (0.8604) time: 3.6973 data: 3.6048 max mem: 9377 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8628 (0.8652) time: 0.1243 data: 0.0976 max mem: 9377 +Eval (hcp-train-subset): [55] Total time: 0:00:13 (0.2119 s / it) +Averaged stats (hcp-train-subset): loss: 0.8628 (0.8652) +Eval (hcp-val): [55] [ 0/62] eta: 0:05:55 loss: 0.8596 (0.8596) time: 5.7339 data: 5.7036 max mem: 9377 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8615 (0.8637) time: 0.1099 data: 0.0850 max mem: 9377 +Eval (hcp-val): [55] Total time: 0:00:13 (0.2155 s / it) +Averaged stats (hcp-val): loss: 0.8615 (0.8637) +Eval (nsd-val): [55] [ 0/62] eta: 0:05:34 loss: 0.8171 (0.8171) time: 5.3913 data: 5.3608 max mem: 9377 +Eval (nsd-val): [55] [61/62] eta: 0:00:00 loss: 0.8335 (0.8347) time: 0.1156 data: 0.0909 max mem: 9377 +Eval (nsd-val): [55] Total time: 0:00:12 (0.2060 s / it) +Averaged stats (nsd-val): loss: 0.8335 (0.8347) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [56] [ 0/6250] eta: 10:50:44 lr: 0.000055 grad: 0.1208 (0.1208) loss: 0.8557 (0.8557) time: 6.2471 data: 6.1131 max mem: 9377 +Train: [56] [ 100/6250] eta: 0:22:11 lr: 0.000055 grad: 0.3102 (0.3245) loss: 0.7477 (0.7502) time: 0.1442 data: 0.0442 max mem: 9377 +Train: [56] [ 200/6250] eta: 0:18:38 lr: 0.000055 grad: 0.2019 (0.2864) loss: 0.7439 (0.7481) time: 0.1311 data: 0.0399 max mem: 9377 +Train: [56] [ 300/6250] eta: 0:16:49 lr: 0.000055 grad: 0.2143 (0.2716) loss: 0.7392 (0.7439) time: 0.1351 data: 0.0535 max mem: 9377 +Train: [56] [ 400/6250] eta: 0:15:49 lr: 0.000055 grad: 0.1761 (0.2546) loss: 0.7196 (0.7418) time: 0.1610 data: 0.0717 max mem: 9377 +Train: [56] [ 500/6250] eta: 0:15:06 lr: 0.000055 grad: 0.1660 (0.2377) loss: 0.7455 (0.7410) time: 0.1244 data: 0.0291 max mem: 9377 +Train: [56] [ 600/6250] eta: 0:14:35 lr: 0.000055 grad: 0.1811 (0.2277) loss: 0.7323 (0.7395) time: 0.1436 data: 0.0516 max mem: 9377 +Train: [56] [ 700/6250] eta: 0:14:09 lr: 0.000055 grad: 0.1733 (0.2202) loss: 0.7260 (0.7384) time: 0.1427 data: 0.0563 max mem: 9377 +Train: [56] [ 800/6250] eta: 0:13:37 lr: 0.000055 grad: 0.1670 (0.2139) loss: 0.7399 (0.7382) time: 0.1402 data: 0.0533 max mem: 9377 +Train: [56] [ 900/6250] eta: 0:13:13 lr: 0.000055 grad: 0.1683 (0.2089) loss: 0.7308 (0.7382) time: 0.1383 data: 0.0361 max mem: 9377 +Train: [56] [1000/6250] eta: 0:12:50 lr: 0.000055 grad: 0.1646 (0.2051) loss: 0.7299 (0.7381) time: 0.1431 data: 0.0583 max mem: 9377 +Train: [56] [1100/6250] eta: 0:12:28 lr: 0.000055 grad: 0.1622 (0.2017) loss: 0.7275 (0.7377) time: 0.1240 data: 0.0387 max mem: 9377 +Train: [56] [1200/6250] eta: 0:12:07 lr: 0.000055 grad: 0.1713 (0.1987) loss: 0.7338 (0.7374) time: 0.1322 data: 0.0449 max mem: 9377 +Train: [56] [1300/6250] eta: 0:11:54 lr: 0.000055 grad: 0.1670 (0.1965) loss: 0.7209 (0.7368) time: 0.1910 data: 0.1159 max mem: 9377 +Train: [56] [1400/6250] eta: 0:11:39 lr: 0.000055 grad: 0.1639 (0.1945) loss: 0.7261 (0.7364) time: 0.1482 data: 0.0674 max mem: 9377 +Train: [56] [1500/6250] eta: 0:11:27 lr: 0.000055 grad: 0.1659 (0.1926) loss: 0.7248 (0.7363) time: 0.1609 data: 0.0786 max mem: 9377 +Train: [56] [1600/6250] eta: 0:11:13 lr: 0.000055 grad: 0.1716 (0.1911) loss: 0.7323 (0.7359) time: 0.1380 data: 0.0598 max mem: 9377 +Train: [56] [1700/6250] eta: 0:10:59 lr: 0.000055 grad: 0.1659 (0.1901) loss: 0.7253 (0.7354) time: 0.1296 data: 0.0460 max mem: 9377 +Train: [56] [1800/6250] eta: 0:10:43 lr: 0.000055 grad: 0.1713 (0.1889) loss: 0.7319 (0.7350) time: 0.1432 data: 0.0610 max mem: 9377 +Train: [56] [1900/6250] eta: 0:10:27 lr: 0.000055 grad: 0.1644 (0.1878) loss: 0.7338 (0.7348) time: 0.1432 data: 0.0537 max mem: 9377 +Train: [56] [2000/6250] eta: 0:10:09 lr: 0.000055 grad: 0.1528 (0.1867) loss: 0.7380 (0.7346) time: 0.1379 data: 0.0510 max mem: 9377 +Train: [56] [2100/6250] eta: 0:09:53 lr: 0.000055 grad: 0.1623 (0.1856) loss: 0.7268 (0.7343) time: 0.1249 data: 0.0330 max mem: 9377 +Train: [56] [2200/6250] eta: 0:09:36 lr: 0.000055 grad: 0.1601 (0.1848) loss: 0.7275 (0.7342) time: 0.1231 data: 0.0308 max mem: 9377 +Train: [56] [2300/6250] eta: 0:09:20 lr: 0.000055 grad: 0.1696 (0.1842) loss: 0.7267 (0.7339) time: 0.1304 data: 0.0450 max mem: 9377 +Train: [56] [2400/6250] eta: 0:09:04 lr: 0.000054 grad: 0.1616 (0.1835) loss: 0.7240 (0.7338) time: 0.1361 data: 0.0466 max mem: 9377 +Train: [56] [2500/6250] eta: 0:08:48 lr: 0.000054 grad: 0.1689 (0.1829) loss: 0.7314 (0.7337) time: 0.1341 data: 0.0443 max mem: 9377 +Train: [56] [2600/6250] eta: 0:08:33 lr: 0.000054 grad: 0.1749 (0.1825) loss: 0.7242 (0.7335) time: 0.1285 data: 0.0419 max mem: 9377 +Train: [56] [2700/6250] eta: 0:08:20 lr: 0.000054 grad: 0.1724 (0.1822) loss: 0.7255 (0.7333) time: 0.1851 data: 0.1014 max mem: 9377 +Train: [56] [2800/6250] eta: 0:08:04 lr: 0.000054 grad: 0.1729 (0.1819) loss: 0.7264 (0.7330) time: 0.1371 data: 0.0542 max mem: 9377 +Train: [56] [2900/6250] eta: 0:07:50 lr: 0.000054 grad: 0.1672 (0.1815) loss: 0.7302 (0.7328) time: 0.1206 data: 0.0336 max mem: 9377 +Train: [56] [3000/6250] eta: 0:07:35 lr: 0.000054 grad: 0.1741 (0.1811) loss: 0.7243 (0.7325) time: 0.1399 data: 0.0618 max mem: 9377 +Train: [56] [3100/6250] eta: 0:07:20 lr: 0.000054 grad: 0.1767 (0.1808) loss: 0.7237 (0.7323) time: 0.1245 data: 0.0422 max mem: 9377 +Train: [56] [3200/6250] eta: 0:07:06 lr: 0.000054 grad: 0.1767 (0.1806) loss: 0.7209 (0.7321) time: 0.1316 data: 0.0416 max mem: 9377 +Train: [56] [3300/6250] eta: 0:06:52 lr: 0.000054 grad: 0.1683 (0.1803) loss: 0.7310 (0.7319) time: 0.1392 data: 0.0565 max mem: 9377 +Train: [56] [3400/6250] eta: 0:06:38 lr: 0.000054 grad: 0.1732 (0.1801) loss: 0.7248 (0.7318) time: 0.1382 data: 0.0587 max mem: 9377 +Train: [56] [3500/6250] eta: 0:06:24 lr: 0.000054 grad: 0.1633 (0.1797) loss: 0.7234 (0.7317) time: 0.1251 data: 0.0425 max mem: 9377 +Train: [56] [3600/6250] eta: 0:06:10 lr: 0.000054 grad: 0.1588 (0.1793) loss: 0.7300 (0.7317) time: 0.1404 data: 0.0602 max mem: 9377 +Train: [56] [3700/6250] eta: 0:05:55 lr: 0.000054 grad: 0.1676 (0.1789) loss: 0.7346 (0.7317) time: 0.1165 data: 0.0247 max mem: 9377 +Train: [56] [3800/6250] eta: 0:05:41 lr: 0.000054 grad: 0.1634 (0.1784) loss: 0.7296 (0.7318) time: 0.1336 data: 0.0559 max mem: 9377 +Train: [56] [3900/6250] eta: 0:05:27 lr: 0.000054 grad: 0.1539 (0.1780) loss: 0.7280 (0.7319) time: 0.1360 data: 0.0498 max mem: 9377 +Train: [56] [4000/6250] eta: 0:05:13 lr: 0.000054 grad: 0.1579 (0.1776) loss: 0.7271 (0.7320) time: 0.1694 data: 0.0867 max mem: 9377 +Train: [56] [4100/6250] eta: 0:04:59 lr: 0.000054 grad: 0.1554 (0.1772) loss: 0.7379 (0.7320) time: 0.1406 data: 0.0616 max mem: 9377 +Train: [56] [4200/6250] eta: 0:04:45 lr: 0.000054 grad: 0.1676 (0.1770) loss: 0.7272 (0.7320) time: 0.1351 data: 0.0533 max mem: 9377 +Train: [56] [4300/6250] eta: 0:04:31 lr: 0.000054 grad: 0.1606 (0.1768) loss: 0.7327 (0.7321) time: 0.1456 data: 0.0640 max mem: 9377 +Train: [56] [4400/6250] eta: 0:04:17 lr: 0.000054 grad: 0.1635 (0.1765) loss: 0.7201 (0.7321) time: 0.1316 data: 0.0482 max mem: 9377 +Train: [56] [4500/6250] eta: 0:04:03 lr: 0.000054 grad: 0.1688 (0.1763) loss: 0.7359 (0.7320) time: 0.1474 data: 0.0644 max mem: 9377 +Train: [56] [4600/6250] eta: 0:03:49 lr: 0.000054 grad: 0.1655 (0.1761) loss: 0.7253 (0.7320) time: 0.1356 data: 0.0465 max mem: 9377 +Train: [56] [4700/6250] eta: 0:03:35 lr: 0.000054 grad: 0.1665 (0.1760) loss: 0.7237 (0.7319) time: 0.1355 data: 0.0579 max mem: 9377 +Train: [56] [4800/6250] eta: 0:03:21 lr: 0.000054 grad: 0.1803 (0.1759) loss: 0.7355 (0.7319) time: 0.1403 data: 0.0577 max mem: 9377 +Train: [56] [4900/6250] eta: 0:03:07 lr: 0.000054 grad: 0.1714 (0.1758) loss: 0.7358 (0.7318) time: 0.1269 data: 0.0397 max mem: 9377 +Train: [56] [5000/6250] eta: 0:02:53 lr: 0.000054 grad: 0.1712 (0.1757) loss: 0.7282 (0.7317) time: 0.1760 data: 0.0931 max mem: 9377 +Train: [56] [5100/6250] eta: 0:02:39 lr: 0.000054 grad: 0.1699 (0.1755) loss: 0.7225 (0.7316) time: 0.1417 data: 0.0556 max mem: 9377 +Train: [56] [5200/6250] eta: 0:02:25 lr: 0.000054 grad: 0.1706 (0.1755) loss: 0.7335 (0.7315) time: 0.1396 data: 0.0548 max mem: 9377 +Train: [56] [5300/6250] eta: 0:02:11 lr: 0.000054 grad: 0.1717 (0.1755) loss: 0.7285 (0.7314) time: 0.1412 data: 0.0577 max mem: 9377 +Train: [56] [5400/6250] eta: 0:01:57 lr: 0.000054 grad: 0.1679 (0.1755) loss: 0.7241 (0.7313) time: 0.1291 data: 0.0468 max mem: 9377 +Train: [56] [5500/6250] eta: 0:01:43 lr: 0.000053 grad: 0.1640 (0.1753) loss: 0.7375 (0.7313) time: 0.1462 data: 0.0717 max mem: 9377 +Train: [56] [5600/6250] eta: 0:01:30 lr: 0.000053 grad: 0.1811 (0.1753) loss: 0.7190 (0.7312) time: 0.1403 data: 0.0598 max mem: 9377 +Train: [56] [5700/6250] eta: 0:01:16 lr: 0.000053 grad: 0.1671 (0.1753) loss: 0.7223 (0.7311) time: 0.1728 data: 0.0925 max mem: 9377 +Train: [56] [5800/6250] eta: 0:01:02 lr: 0.000053 grad: 0.1698 (0.1752) loss: 0.7228 (0.7310) time: 0.1102 data: 0.0280 max mem: 9377 +Train: [56] [5900/6250] eta: 0:00:48 lr: 0.000053 grad: 0.1729 (0.1752) loss: 0.7184 (0.7309) time: 0.1418 data: 0.0610 max mem: 9377 +Train: [56] [6000/6250] eta: 0:00:34 lr: 0.000053 grad: 0.1747 (0.1751) loss: 0.7132 (0.7307) time: 0.1439 data: 0.0688 max mem: 9377 +Train: [56] [6100/6250] eta: 0:00:20 lr: 0.000053 grad: 0.1680 (0.1750) loss: 0.7193 (0.7307) time: 0.1336 data: 0.0525 max mem: 9377 +Train: [56] [6200/6250] eta: 0:00:06 lr: 0.000053 grad: 0.1675 (0.1749) loss: 0.7238 (0.7305) time: 0.1098 data: 0.0190 max mem: 9377 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.1778 (0.1749) loss: 0.7190 (0.7304) time: 0.1336 data: 0.0561 max mem: 9377 +Train: [56] Total time: 0:14:34 (0.1399 s / it) +Averaged stats: lr: 0.000053 grad: 0.1778 (0.1749) loss: 0.7190 (0.7304) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:05:11 loss: 0.8638 (0.8638) time: 5.0246 data: 4.9941 max mem: 9377 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8676 (0.8685) time: 0.1418 data: 0.1149 max mem: 9377 +Eval (hcp-train-subset): [56] Total time: 0:00:13 (0.2205 s / it) +Averaged stats (hcp-train-subset): loss: 0.8676 (0.8685) +Eval (hcp-val): [56] [ 0/62] eta: 0:05:49 loss: 0.8692 (0.8692) time: 5.6441 data: 5.6100 max mem: 9377 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8637 (0.8662) time: 0.1256 data: 0.1006 max mem: 9377 +Eval (hcp-val): [56] Total time: 0:00:14 (0.2258 s / it) +Averaged stats (hcp-val): loss: 0.8637 (0.8662) +Eval (nsd-val): [56] [ 0/62] eta: 0:03:56 loss: 0.8269 (0.8269) time: 3.8090 data: 3.7464 max mem: 9377 +Eval (nsd-val): [56] [61/62] eta: 0:00:00 loss: 0.8381 (0.8388) time: 0.1257 data: 0.1005 max mem: 9377 +Eval (nsd-val): [56] Total time: 0:00:13 (0.2123 s / it) +Averaged stats (nsd-val): loss: 0.8381 (0.8388) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [57] [ 0/6250] eta: 11:18:56 lr: 0.000053 grad: 0.2693 (0.2693) loss: 0.7385 (0.7385) time: 6.5179 data: 6.4230 max mem: 9377 +Train: [57] [ 100/6250] eta: 0:20:18 lr: 0.000053 grad: 0.2647 (0.3106) loss: 0.7247 (0.7506) time: 0.1638 data: 0.0678 max mem: 9377 +Train: [57] [ 200/6250] eta: 0:18:06 lr: 0.000053 grad: 0.2458 (0.2946) loss: 0.7278 (0.7343) time: 0.1396 data: 0.0469 max mem: 9377 +Train: [57] [ 300/6250] eta: 0:17:16 lr: 0.000053 grad: 0.2350 (0.2810) loss: 0.7093 (0.7306) time: 0.1562 data: 0.0750 max mem: 9377 +Train: [57] [ 400/6250] eta: 0:16:48 lr: 0.000053 grad: 0.2235 (0.2642) loss: 0.7277 (0.7285) time: 0.1530 data: 0.0591 max mem: 9377 +Train: [57] [ 500/6250] eta: 0:16:03 lr: 0.000053 grad: 0.1854 (0.2535) loss: 0.7128 (0.7280) time: 0.1526 data: 0.0752 max mem: 9377 +Train: [57] [ 600/6250] eta: 0:15:45 lr: 0.000053 grad: 0.1766 (0.2417) loss: 0.7322 (0.7287) time: 0.1554 data: 0.0630 max mem: 9377 +Train: [57] [ 700/6250] eta: 0:15:07 lr: 0.000053 grad: 0.1810 (0.2332) loss: 0.7329 (0.7297) time: 0.1396 data: 0.0470 max mem: 9377 +Train: [57] [ 800/6250] eta: 0:14:30 lr: 0.000053 grad: 0.1754 (0.2261) loss: 0.7379 (0.7304) time: 0.1076 data: 0.0021 max mem: 9377 +Train: [57] [ 900/6250] eta: 0:14:01 lr: 0.000053 grad: 0.1914 (0.2213) loss: 0.7226 (0.7300) time: 0.1427 data: 0.0509 max mem: 9377 +Train: [57] [1000/6250] eta: 0:13:30 lr: 0.000053 grad: 0.1725 (0.2176) loss: 0.7298 (0.7297) time: 0.1316 data: 0.0533 max mem: 9377 +Train: [57] [1100/6250] eta: 0:13:12 lr: 0.000053 grad: 0.1707 (0.2137) loss: 0.7238 (0.7291) time: 0.1477 data: 0.0700 max mem: 9377 +Train: [57] [1200/6250] eta: 0:12:55 lr: 0.000053 grad: 0.1728 (0.2110) loss: 0.7356 (0.7286) time: 0.1509 data: 0.0635 max mem: 9377 +Train: [57] [1300/6250] eta: 0:12:38 lr: 0.000053 grad: 0.1754 (0.2085) loss: 0.7174 (0.7281) time: 0.1408 data: 0.0606 max mem: 9377 +Train: [57] [1400/6250] eta: 0:12:23 lr: 0.000053 grad: 0.1695 (0.2059) loss: 0.7299 (0.7279) time: 0.1590 data: 0.0735 max mem: 9377 +Train: [57] [1500/6250] eta: 0:12:15 lr: 0.000053 grad: 0.1748 (0.2035) loss: 0.7184 (0.7276) time: 0.1736 data: 0.0919 max mem: 9377 +Train: [57] [1600/6250] eta: 0:11:57 lr: 0.000053 grad: 0.1802 (0.2019) loss: 0.7305 (0.7272) time: 0.1733 data: 0.0856 max mem: 9377 +Train: [57] [1700/6250] eta: 0:11:45 lr: 0.000053 grad: 0.1693 (0.2001) loss: 0.7157 (0.7270) time: 0.1629 data: 0.0820 max mem: 9377 +Train: [57] [1800/6250] eta: 0:11:29 lr: 0.000053 grad: 0.1697 (0.1983) loss: 0.7207 (0.7268) time: 0.1395 data: 0.0522 max mem: 9377 +Train: [57] [1900/6250] eta: 0:11:11 lr: 0.000053 grad: 0.1690 (0.1968) loss: 0.7242 (0.7268) time: 0.1495 data: 0.0611 max mem: 9377 +Train: [57] [2000/6250] eta: 0:10:51 lr: 0.000053 grad: 0.1685 (0.1953) loss: 0.7333 (0.7268) time: 0.1539 data: 0.0765 max mem: 9377 +Train: [57] [2100/6250] eta: 0:10:33 lr: 0.000053 grad: 0.1662 (0.1940) loss: 0.7381 (0.7270) time: 0.1459 data: 0.0599 max mem: 9377 +Train: [57] [2200/6250] eta: 0:10:17 lr: 0.000053 grad: 0.1730 (0.1929) loss: 0.7072 (0.7269) time: 0.1595 data: 0.0805 max mem: 9377 +Train: [57] [2300/6250] eta: 0:10:02 lr: 0.000052 grad: 0.1569 (0.1917) loss: 0.7382 (0.7271) time: 0.1616 data: 0.0758 max mem: 9377 +Train: [57] [2400/6250] eta: 0:09:46 lr: 0.000052 grad: 0.1603 (0.1906) loss: 0.7384 (0.7275) time: 0.1559 data: 0.0738 max mem: 9377 +Train: [57] [2500/6250] eta: 0:09:32 lr: 0.000052 grad: 0.1678 (0.1897) loss: 0.7294 (0.7277) time: 0.1644 data: 0.0883 max mem: 9377 +Train: [57] [2600/6250] eta: 0:09:19 lr: 0.000052 grad: 0.1671 (0.1888) loss: 0.7348 (0.7278) time: 0.1786 data: 0.1041 max mem: 9377 +Train: [57] [2700/6250] eta: 0:09:04 lr: 0.000052 grad: 0.1640 (0.1879) loss: 0.7335 (0.7282) time: 0.1645 data: 0.0842 max mem: 9377 +Train: [57] [2800/6250] eta: 0:08:50 lr: 0.000052 grad: 0.1713 (0.1873) loss: 0.7264 (0.7283) time: 0.1621 data: 0.0828 max mem: 9377 +Train: [57] [2900/6250] eta: 0:08:35 lr: 0.000052 grad: 0.1684 (0.1866) loss: 0.7379 (0.7284) time: 0.1679 data: 0.0917 max mem: 9377 +Train: [57] [3000/6250] eta: 0:08:19 lr: 0.000052 grad: 0.1676 (0.1861) loss: 0.7255 (0.7283) time: 0.1429 data: 0.0658 max mem: 9377 +Train: [57] [3100/6250] eta: 0:08:02 lr: 0.000052 grad: 0.1602 (0.1858) loss: 0.7340 (0.7283) time: 0.1553 data: 0.0739 max mem: 9377 +Train: [57] [3200/6250] eta: 0:07:44 lr: 0.000052 grad: 0.1624 (0.1853) loss: 0.7350 (0.7283) time: 0.1304 data: 0.0418 max mem: 9377 +Train: [57] [3300/6250] eta: 0:07:28 lr: 0.000052 grad: 0.1724 (0.1848) loss: 0.7186 (0.7283) time: 0.1337 data: 0.0495 max mem: 9377 +Train: [57] [3400/6250] eta: 0:07:11 lr: 0.000052 grad: 0.1683 (0.1844) loss: 0.7259 (0.7281) time: 0.1334 data: 0.0550 max mem: 9377 +Train: [57] [3500/6250] eta: 0:06:55 lr: 0.000052 grad: 0.1652 (0.1840) loss: 0.7492 (0.7281) time: 0.1387 data: 0.0536 max mem: 9377 +Train: [57] [3600/6250] eta: 0:06:39 lr: 0.000052 grad: 0.1697 (0.1836) loss: 0.7279 (0.7281) time: 0.1436 data: 0.0598 max mem: 9377 +Train: [57] [3700/6250] eta: 0:06:23 lr: 0.000052 grad: 0.1667 (0.1832) loss: 0.7354 (0.7282) time: 0.1104 data: 0.0149 max mem: 9377 +Train: [57] [3800/6250] eta: 0:06:08 lr: 0.000052 grad: 0.1706 (0.1828) loss: 0.7224 (0.7282) time: 0.1501 data: 0.0721 max mem: 9377 +Train: [57] [3900/6250] eta: 0:05:51 lr: 0.000052 grad: 0.1725 (0.1826) loss: 0.7356 (0.7281) time: 0.1244 data: 0.0388 max mem: 9377 +Train: [57] [4000/6250] eta: 0:05:36 lr: 0.000052 grad: 0.1653 (0.1823) loss: 0.7356 (0.7281) time: 0.1468 data: 0.0617 max mem: 9377 +Train: [57] [4100/6250] eta: 0:05:20 lr: 0.000052 grad: 0.1635 (0.1820) loss: 0.7250 (0.7281) time: 0.1399 data: 0.0587 max mem: 9377 +Train: [57] [4200/6250] eta: 0:05:05 lr: 0.000052 grad: 0.1757 (0.1817) loss: 0.7145 (0.7280) time: 0.1298 data: 0.0443 max mem: 9377 +Train: [57] [4300/6250] eta: 0:04:49 lr: 0.000052 grad: 0.1654 (0.1815) loss: 0.7249 (0.7279) time: 0.1528 data: 0.0708 max mem: 9377 +Train: [57] [4400/6250] eta: 0:04:33 lr: 0.000052 grad: 0.1592 (0.1812) loss: 0.7248 (0.7279) time: 0.1199 data: 0.0269 max mem: 9377 +Train: [57] [4500/6250] eta: 0:04:18 lr: 0.000052 grad: 0.1693 (0.1809) loss: 0.7297 (0.7279) time: 0.1473 data: 0.0619 max mem: 9377 +Train: [57] [4600/6250] eta: 0:04:03 lr: 0.000052 grad: 0.1712 (0.1808) loss: 0.7375 (0.7279) time: 0.1321 data: 0.0512 max mem: 9377 +Train: [57] [4700/6250] eta: 0:03:48 lr: 0.000052 grad: 0.1690 (0.1806) loss: 0.7247 (0.7278) time: 0.1359 data: 0.0535 max mem: 9377 +Train: [57] [4800/6250] eta: 0:03:33 lr: 0.000052 grad: 0.1653 (0.1803) loss: 0.7304 (0.7278) time: 0.1319 data: 0.0496 max mem: 9377 +Train: [57] [4900/6250] eta: 0:03:18 lr: 0.000052 grad: 0.1679 (0.1802) loss: 0.7297 (0.7277) time: 0.1315 data: 0.0479 max mem: 9377 +Train: [57] [5000/6250] eta: 0:03:03 lr: 0.000052 grad: 0.1787 (0.1801) loss: 0.7020 (0.7276) time: 0.1519 data: 0.0685 max mem: 9377 +Train: [57] [5100/6250] eta: 0:02:48 lr: 0.000052 grad: 0.1742 (0.1800) loss: 0.7219 (0.7276) time: 0.1268 data: 0.0417 max mem: 9377 +Train: [57] [5200/6250] eta: 0:02:33 lr: 0.000052 grad: 0.1715 (0.1798) loss: 0.7222 (0.7276) time: 0.1317 data: 0.0458 max mem: 9377 +Train: [57] [5300/6250] eta: 0:02:18 lr: 0.000052 grad: 0.1689 (0.1796) loss: 0.7271 (0.7276) time: 0.1359 data: 0.0522 max mem: 9377 +Train: [57] [5400/6250] eta: 0:02:04 lr: 0.000051 grad: 0.1721 (0.1795) loss: 0.7223 (0.7275) time: 0.1450 data: 0.0647 max mem: 9377 +Train: [57] [5500/6250] eta: 0:01:49 lr: 0.000051 grad: 0.1717 (0.1794) loss: 0.7163 (0.7275) time: 0.1300 data: 0.0510 max mem: 9377 +Train: [57] [5600/6250] eta: 0:01:34 lr: 0.000051 grad: 0.1708 (0.1793) loss: 0.7203 (0.7274) time: 0.1364 data: 0.0508 max mem: 9377 +Train: [57] [5700/6250] eta: 0:01:20 lr: 0.000051 grad: 0.1762 (0.1792) loss: 0.7117 (0.7273) time: 0.1482 data: 0.0652 max mem: 9377 +Train: [57] [5800/6250] eta: 0:01:05 lr: 0.000051 grad: 0.1751 (0.1791) loss: 0.7153 (0.7273) time: 0.1437 data: 0.0570 max mem: 9377 +Train: [57] [5900/6250] eta: 0:00:51 lr: 0.000051 grad: 0.1723 (0.1791) loss: 0.7242 (0.7272) time: 0.1583 data: 0.0771 max mem: 9377 +Train: [57] [6000/6250] eta: 0:00:36 lr: 0.000051 grad: 0.1703 (0.1791) loss: 0.7207 (0.7270) time: 0.1450 data: 0.0598 max mem: 9377 +Train: [57] [6100/6250] eta: 0:00:21 lr: 0.000051 grad: 0.1713 (0.1790) loss: 0.7221 (0.7270) time: 0.1563 data: 0.0814 max mem: 9377 +Train: [57] [6200/6250] eta: 0:00:07 lr: 0.000051 grad: 0.1713 (0.1790) loss: 0.7220 (0.7269) time: 0.1716 data: 0.0941 max mem: 9377 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1710 (0.1789) loss: 0.7126 (0.7269) time: 0.1692 data: 0.0924 max mem: 9377 +Train: [57] Total time: 0:15:17 (0.1468 s / it) +Averaged stats: lr: 0.000051 grad: 0.1710 (0.1789) loss: 0.7126 (0.7269) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:05:36 loss: 0.8627 (0.8627) time: 5.4277 data: 5.3984 max mem: 9377 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8635 (0.8664) time: 0.1424 data: 0.1169 max mem: 9377 +Eval (hcp-train-subset): [57] Total time: 0:00:13 (0.2242 s / it) +Averaged stats (hcp-train-subset): loss: 0.8635 (0.8664) +Eval (hcp-val): [57] [ 0/62] eta: 0:06:00 loss: 0.8610 (0.8610) time: 5.8214 data: 5.7891 max mem: 9377 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8624 (0.8642) time: 0.1242 data: 0.0993 max mem: 9377 +Eval (hcp-val): [57] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (hcp-val): loss: 0.8624 (0.8642) +Eval (nsd-val): [57] [ 0/62] eta: 0:04:03 loss: 0.8219 (0.8219) time: 3.9334 data: 3.8602 max mem: 9377 +Eval (nsd-val): [57] [61/62] eta: 0:00:00 loss: 0.8320 (0.8342) time: 0.1195 data: 0.0942 max mem: 9377 +Eval (nsd-val): [57] Total time: 0:00:13 (0.2102 s / it) +Averaged stats (nsd-val): loss: 0.8320 (0.8342) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [58] [ 0/6250] eta: 7:32:59 lr: 0.000051 grad: 0.3148 (0.3148) loss: 0.8153 (0.8153) time: 4.3488 data: 3.9788 max mem: 9377 +Train: [58] [ 100/6250] eta: 0:19:44 lr: 0.000051 grad: 0.3012 (0.2915) loss: 0.7146 (0.7485) time: 0.1370 data: 0.0363 max mem: 9377 +Train: [58] [ 200/6250] eta: 0:17:11 lr: 0.000051 grad: 0.2845 (0.2991) loss: 0.7211 (0.7359) time: 0.1657 data: 0.0705 max mem: 9377 +Train: [58] [ 300/6250] eta: 0:15:47 lr: 0.000051 grad: 0.2457 (0.2782) loss: 0.7168 (0.7307) time: 0.1415 data: 0.0503 max mem: 9377 +Train: [58] [ 400/6250] eta: 0:15:11 lr: 0.000051 grad: 0.2229 (0.2624) loss: 0.7279 (0.7295) time: 0.1752 data: 0.0955 max mem: 9377 +Train: [58] [ 500/6250] eta: 0:14:40 lr: 0.000051 grad: 0.2072 (0.2550) loss: 0.7161 (0.7278) time: 0.1463 data: 0.0577 max mem: 9377 +Train: [58] [ 600/6250] eta: 0:14:19 lr: 0.000051 grad: 0.1867 (0.2458) loss: 0.7210 (0.7270) time: 0.1532 data: 0.0716 max mem: 9377 +Train: [58] [ 700/6250] eta: 0:13:47 lr: 0.000051 grad: 0.1985 (0.2376) loss: 0.7258 (0.7268) time: 0.1404 data: 0.0512 max mem: 9377 +Train: [58] [ 800/6250] eta: 0:13:25 lr: 0.000051 grad: 0.1791 (0.2319) loss: 0.7367 (0.7270) time: 0.1243 data: 0.0324 max mem: 9377 +Train: [58] [ 900/6250] eta: 0:13:02 lr: 0.000051 grad: 0.1848 (0.2266) loss: 0.7250 (0.7269) time: 0.1401 data: 0.0504 max mem: 9377 +Train: [58] [1000/6250] eta: 0:12:39 lr: 0.000051 grad: 0.1753 (0.2222) loss: 0.7246 (0.7268) time: 0.1224 data: 0.0307 max mem: 9377 +Train: [58] [1100/6250] eta: 0:12:16 lr: 0.000051 grad: 0.1772 (0.2184) loss: 0.7277 (0.7267) time: 0.1359 data: 0.0491 max mem: 9377 +Train: [58] [1200/6250] eta: 0:11:54 lr: 0.000051 grad: 0.1779 (0.2154) loss: 0.7172 (0.7264) time: 0.1175 data: 0.0241 max mem: 9377 +Train: [58] [1300/6250] eta: 0:11:37 lr: 0.000051 grad: 0.1781 (0.2125) loss: 0.7252 (0.7262) time: 0.1290 data: 0.0385 max mem: 9377 +Train: [58] [1400/6250] eta: 0:11:31 lr: 0.000051 grad: 0.1677 (0.2094) loss: 0.7230 (0.7263) time: 0.1824 data: 0.0953 max mem: 9377 +Train: [58] [1500/6250] eta: 0:11:19 lr: 0.000051 grad: 0.1728 (0.2071) loss: 0.7243 (0.7261) time: 0.1382 data: 0.0556 max mem: 9377 +Train: [58] [1600/6250] eta: 0:11:07 lr: 0.000051 grad: 0.1709 (0.2048) loss: 0.7303 (0.7261) time: 0.1457 data: 0.0636 max mem: 9377 +Train: [58] [1700/6250] eta: 0:10:48 lr: 0.000051 grad: 0.1715 (0.2030) loss: 0.7120 (0.7259) time: 0.1395 data: 0.0534 max mem: 9377 +Train: [58] [1800/6250] eta: 0:10:34 lr: 0.000051 grad: 0.1717 (0.2013) loss: 0.7267 (0.7257) time: 0.1627 data: 0.0769 max mem: 9377 +Train: [58] [1900/6250] eta: 0:10:17 lr: 0.000051 grad: 0.1717 (0.1999) loss: 0.7303 (0.7257) time: 0.1392 data: 0.0576 max mem: 9377 +Train: [58] [2000/6250] eta: 0:10:02 lr: 0.000051 grad: 0.1635 (0.1986) loss: 0.7281 (0.7256) time: 0.1226 data: 0.0354 max mem: 9377 +Train: [58] [2100/6250] eta: 0:09:46 lr: 0.000051 grad: 0.1805 (0.1976) loss: 0.7119 (0.7254) time: 0.1255 data: 0.0408 max mem: 9377 +Train: [58] [2200/6250] eta: 0:09:31 lr: 0.000050 grad: 0.1825 (0.1970) loss: 0.7179 (0.7251) time: 0.1450 data: 0.0651 max mem: 9377 +Train: [58] [2300/6250] eta: 0:09:19 lr: 0.000050 grad: 0.1854 (0.1963) loss: 0.7165 (0.7247) time: 0.1358 data: 0.0525 max mem: 9377 +Train: [58] [2400/6250] eta: 0:09:04 lr: 0.000050 grad: 0.1777 (0.1956) loss: 0.7217 (0.7245) time: 0.1442 data: 0.0622 max mem: 9377 +Train: [58] [2500/6250] eta: 0:08:49 lr: 0.000050 grad: 0.1692 (0.1947) loss: 0.7158 (0.7245) time: 0.1472 data: 0.0621 max mem: 9377 +Train: [58] [2600/6250] eta: 0:08:35 lr: 0.000050 grad: 0.1713 (0.1938) loss: 0.7313 (0.7244) time: 0.1367 data: 0.0490 max mem: 9377 +Train: [58] [2700/6250] eta: 0:08:20 lr: 0.000050 grad: 0.1705 (0.1930) loss: 0.7145 (0.7243) time: 0.1428 data: 0.0606 max mem: 9377 +Train: [58] [2800/6250] eta: 0:08:07 lr: 0.000050 grad: 0.1773 (0.1924) loss: 0.7256 (0.7243) time: 0.1357 data: 0.0583 max mem: 9377 +Train: [58] [2900/6250] eta: 0:07:53 lr: 0.000050 grad: 0.1700 (0.1917) loss: 0.7261 (0.7244) time: 0.1377 data: 0.0434 max mem: 9377 +Train: [58] [3000/6250] eta: 0:07:38 lr: 0.000050 grad: 0.1726 (0.1910) loss: 0.7172 (0.7243) time: 0.1392 data: 0.0591 max mem: 9377 +Train: [58] [3100/6250] eta: 0:07:24 lr: 0.000050 grad: 0.1723 (0.1905) loss: 0.7138 (0.7241) time: 0.1358 data: 0.0521 max mem: 9377 +Train: [58] [3200/6250] eta: 0:07:10 lr: 0.000050 grad: 0.1704 (0.1900) loss: 0.7210 (0.7239) time: 0.1495 data: 0.0674 max mem: 9377 +Train: [58] [3300/6250] eta: 0:06:56 lr: 0.000050 grad: 0.1752 (0.1895) loss: 0.7179 (0.7238) time: 0.1417 data: 0.0579 max mem: 9377 +Train: [58] [3400/6250] eta: 0:06:41 lr: 0.000050 grad: 0.1703 (0.1890) loss: 0.7186 (0.7237) time: 0.1257 data: 0.0398 max mem: 9377 +Train: [58] [3500/6250] eta: 0:06:27 lr: 0.000050 grad: 0.1718 (0.1886) loss: 0.7241 (0.7237) time: 0.1399 data: 0.0527 max mem: 9377 +Train: [58] [3600/6250] eta: 0:06:13 lr: 0.000050 grad: 0.1706 (0.1881) loss: 0.7167 (0.7236) time: 0.1583 data: 0.0774 max mem: 9377 +Train: [58] [3700/6250] eta: 0:05:58 lr: 0.000050 grad: 0.1726 (0.1876) loss: 0.7235 (0.7236) time: 0.1313 data: 0.0437 max mem: 9377 +Train: [58] [3800/6250] eta: 0:05:44 lr: 0.000050 grad: 0.1629 (0.1872) loss: 0.7285 (0.7238) time: 0.1290 data: 0.0427 max mem: 9377 +Train: [58] [3900/6250] eta: 0:05:30 lr: 0.000050 grad: 0.1664 (0.1867) loss: 0.7249 (0.7239) time: 0.1340 data: 0.0512 max mem: 9377 +Train: [58] [4000/6250] eta: 0:05:16 lr: 0.000050 grad: 0.1715 (0.1862) loss: 0.7298 (0.7241) time: 0.1529 data: 0.0687 max mem: 9377 +Train: [58] [4100/6250] eta: 0:05:02 lr: 0.000050 grad: 0.1724 (0.1859) loss: 0.7142 (0.7241) time: 0.1427 data: 0.0602 max mem: 9377 +Train: [58] [4200/6250] eta: 0:04:47 lr: 0.000050 grad: 0.1671 (0.1855) loss: 0.7250 (0.7242) time: 0.1325 data: 0.0467 max mem: 9377 +Train: [58] [4300/6250] eta: 0:04:33 lr: 0.000050 grad: 0.1695 (0.1852) loss: 0.7321 (0.7241) time: 0.1242 data: 0.0418 max mem: 9377 +Train: [58] [4400/6250] eta: 0:04:19 lr: 0.000050 grad: 0.1718 (0.1850) loss: 0.7211 (0.7240) time: 0.1528 data: 0.0753 max mem: 9377 +Train: [58] [4500/6250] eta: 0:04:05 lr: 0.000050 grad: 0.1664 (0.1847) loss: 0.7190 (0.7240) time: 0.1422 data: 0.0611 max mem: 9377 +Train: [58] [4600/6250] eta: 0:03:51 lr: 0.000050 grad: 0.1677 (0.1844) loss: 0.7288 (0.7241) time: 0.1414 data: 0.0578 max mem: 9377 +Train: [58] [4700/6250] eta: 0:03:37 lr: 0.000050 grad: 0.1787 (0.1843) loss: 0.7231 (0.7240) time: 0.1595 data: 0.0752 max mem: 9377 +Train: [58] [4800/6250] eta: 0:03:23 lr: 0.000050 grad: 0.1709 (0.1840) loss: 0.7291 (0.7240) time: 0.1483 data: 0.0691 max mem: 9377 +Train: [58] [4900/6250] eta: 0:03:09 lr: 0.000050 grad: 0.1743 (0.1838) loss: 0.7216 (0.7240) time: 0.1310 data: 0.0469 max mem: 9377 +Train: [58] [5000/6250] eta: 0:02:55 lr: 0.000050 grad: 0.1721 (0.1835) loss: 0.7251 (0.7241) time: 0.1530 data: 0.0661 max mem: 9377 +Train: [58] [5100/6250] eta: 0:02:41 lr: 0.000050 grad: 0.1774 (0.1834) loss: 0.7227 (0.7242) time: 0.1449 data: 0.0640 max mem: 9377 +Train: [58] [5200/6250] eta: 0:02:27 lr: 0.000050 grad: 0.1682 (0.1833) loss: 0.7320 (0.7243) time: 0.1197 data: 0.0344 max mem: 9377 +Train: [58] [5300/6250] eta: 0:02:12 lr: 0.000049 grad: 0.1791 (0.1832) loss: 0.7327 (0.7244) time: 0.1337 data: 0.0533 max mem: 9377 +Train: [58] [5400/6250] eta: 0:01:58 lr: 0.000049 grad: 0.1737 (0.1830) loss: 0.7331 (0.7244) time: 0.1392 data: 0.0532 max mem: 9377 +Train: [58] [5500/6250] eta: 0:01:44 lr: 0.000049 grad: 0.1700 (0.1828) loss: 0.7298 (0.7244) time: 0.1302 data: 0.0478 max mem: 9377 +Train: [58] [5600/6250] eta: 0:01:30 lr: 0.000049 grad: 0.1720 (0.1827) loss: 0.7222 (0.7245) time: 0.1291 data: 0.0448 max mem: 9377 +Train: [58] [5700/6250] eta: 0:01:16 lr: 0.000049 grad: 0.1707 (0.1825) loss: 0.7262 (0.7245) time: 0.1160 data: 0.0264 max mem: 9377 +Train: [58] [5800/6250] eta: 0:01:02 lr: 0.000049 grad: 0.1754 (0.1824) loss: 0.7345 (0.7246) time: 0.1405 data: 0.0612 max mem: 9377 +Train: [58] [5900/6250] eta: 0:00:48 lr: 0.000049 grad: 0.1781 (0.1823) loss: 0.7380 (0.7247) time: 0.1366 data: 0.0566 max mem: 9377 +Train: [58] [6000/6250] eta: 0:00:34 lr: 0.000049 grad: 0.1771 (0.1823) loss: 0.7365 (0.7248) time: 0.1394 data: 0.0579 max mem: 9377 +Train: [58] [6100/6250] eta: 0:00:20 lr: 0.000049 grad: 0.1800 (0.1822) loss: 0.7236 (0.7249) time: 0.1595 data: 0.0775 max mem: 9377 +Train: [58] [6200/6250] eta: 0:00:06 lr: 0.000049 grad: 0.1788 (0.1821) loss: 0.7207 (0.7250) time: 0.1537 data: 0.0578 max mem: 9377 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.1700 (0.1820) loss: 0.7375 (0.7250) time: 0.1319 data: 0.0478 max mem: 9377 +Train: [58] Total time: 0:14:38 (0.1405 s / it) +Averaged stats: lr: 0.000049 grad: 0.1700 (0.1820) loss: 0.7375 (0.7250) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:05:39 loss: 0.8576 (0.8576) time: 5.4786 data: 5.4439 max mem: 9377 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8660 (0.8663) time: 0.1405 data: 0.1144 max mem: 9377 +Eval (hcp-train-subset): [58] Total time: 0:00:13 (0.2228 s / it) +Averaged stats (hcp-train-subset): loss: 0.8660 (0.8663) +Eval (hcp-val): [58] [ 0/62] eta: 0:04:44 loss: 0.8657 (0.8657) time: 4.5937 data: 4.5139 max mem: 9377 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8615 (0.8643) time: 0.1436 data: 0.1164 max mem: 9377 +Eval (hcp-val): [58] Total time: 0:00:14 (0.2326 s / it) +Averaged stats (hcp-val): loss: 0.8615 (0.8643) +Eval (nsd-val): [58] [ 0/62] eta: 0:04:13 loss: 0.8239 (0.8239) time: 4.0862 data: 4.0094 max mem: 9377 +Eval (nsd-val): [58] [61/62] eta: 0:00:00 loss: 0.8351 (0.8362) time: 0.0999 data: 0.0748 max mem: 9377 +Eval (nsd-val): [58] Total time: 0:00:13 (0.2124 s / it) +Averaged stats (nsd-val): loss: 0.8351 (0.8362) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [59] [ 0/6250] eta: 8:54:35 lr: 0.000049 grad: 0.1227 (0.1227) loss: 0.8688 (0.8688) time: 5.1321 data: 4.8509 max mem: 9377 +Train: [59] [ 100/6250] eta: 0:19:34 lr: 0.000049 grad: 0.3091 (0.3294) loss: 0.7202 (0.7373) time: 0.1297 data: 0.0272 max mem: 9377 +Train: [59] [ 200/6250] eta: 0:16:55 lr: 0.000049 grad: 0.2502 (0.3139) loss: 0.7346 (0.7313) time: 0.1321 data: 0.0335 max mem: 9377 +Train: [59] [ 300/6250] eta: 0:15:39 lr: 0.000049 grad: 0.2164 (0.2804) loss: 0.7325 (0.7328) time: 0.1334 data: 0.0436 max mem: 9377 +Train: [59] [ 400/6250] eta: 0:15:02 lr: 0.000049 grad: 0.2246 (0.2672) loss: 0.7217 (0.7332) time: 0.1404 data: 0.0549 max mem: 9377 +Train: [59] [ 500/6250] eta: 0:14:28 lr: 0.000049 grad: 0.1960 (0.2549) loss: 0.7078 (0.7307) time: 0.1392 data: 0.0476 max mem: 9377 +Train: [59] [ 600/6250] eta: 0:14:04 lr: 0.000049 grad: 0.1812 (0.2459) loss: 0.7285 (0.7286) time: 0.1508 data: 0.0647 max mem: 9377 +Train: [59] [ 700/6250] eta: 0:13:43 lr: 0.000049 grad: 0.1805 (0.2373) loss: 0.7191 (0.7274) time: 0.1384 data: 0.0516 max mem: 9377 +Train: [59] [ 800/6250] eta: 0:13:27 lr: 0.000049 grad: 0.1743 (0.2299) loss: 0.7156 (0.7267) time: 0.1450 data: 0.0571 max mem: 9377 +Train: [59] [ 900/6250] eta: 0:13:05 lr: 0.000049 grad: 0.1826 (0.2247) loss: 0.7238 (0.7263) time: 0.1571 data: 0.0693 max mem: 9377 +Train: [59] [1000/6250] eta: 0:12:46 lr: 0.000049 grad: 0.1681 (0.2195) loss: 0.7283 (0.7264) time: 0.1455 data: 0.0658 max mem: 9377 +Train: [59] [1100/6250] eta: 0:12:27 lr: 0.000049 grad: 0.1739 (0.2153) loss: 0.7248 (0.7262) time: 0.1371 data: 0.0616 max mem: 9377 +Train: [59] [1200/6250] eta: 0:12:04 lr: 0.000049 grad: 0.1699 (0.2115) loss: 0.7223 (0.7263) time: 0.1229 data: 0.0375 max mem: 9377 +Train: [59] [1300/6250] eta: 0:11:44 lr: 0.000049 grad: 0.1651 (0.2084) loss: 0.7267 (0.7261) time: 0.1375 data: 0.0549 max mem: 9377 +Train: [59] [1400/6250] eta: 0:11:32 lr: 0.000049 grad: 0.1732 (0.2059) loss: 0.7172 (0.7262) time: 0.1694 data: 0.0896 max mem: 9377 +Train: [59] [1500/6250] eta: 0:11:17 lr: 0.000049 grad: 0.1657 (0.2034) loss: 0.7335 (0.7264) time: 0.1452 data: 0.0601 max mem: 9377 +Train: [59] [1600/6250] eta: 0:10:59 lr: 0.000049 grad: 0.1737 (0.2016) loss: 0.7308 (0.7264) time: 0.1374 data: 0.0544 max mem: 9377 +Train: [59] [1700/6250] eta: 0:10:43 lr: 0.000049 grad: 0.1750 (0.2002) loss: 0.7239 (0.7262) time: 0.1423 data: 0.0604 max mem: 9377 +Train: [59] [1800/6250] eta: 0:10:27 lr: 0.000049 grad: 0.1737 (0.1987) loss: 0.7323 (0.7264) time: 0.1171 data: 0.0317 max mem: 9377 +Train: [59] [1900/6250] eta: 0:10:12 lr: 0.000049 grad: 0.1742 (0.1973) loss: 0.7339 (0.7266) time: 0.1229 data: 0.0451 max mem: 9377 +Train: [59] [2000/6250] eta: 0:09:55 lr: 0.000049 grad: 0.1788 (0.1961) loss: 0.7262 (0.7266) time: 0.1251 data: 0.0395 max mem: 9377 +Train: [59] [2100/6250] eta: 0:09:40 lr: 0.000048 grad: 0.1763 (0.1952) loss: 0.7284 (0.7267) time: 0.1221 data: 0.0363 max mem: 9377 +Train: [59] [2200/6250] eta: 0:09:25 lr: 0.000048 grad: 0.1628 (0.1941) loss: 0.7289 (0.7266) time: 0.1395 data: 0.0579 max mem: 9377 +Train: [59] [2300/6250] eta: 0:09:10 lr: 0.000048 grad: 0.1721 (0.1932) loss: 0.7108 (0.7263) time: 0.1537 data: 0.0685 max mem: 9377 +Train: [59] [2400/6250] eta: 0:08:56 lr: 0.000048 grad: 0.1718 (0.1924) loss: 0.7106 (0.7260) time: 0.1399 data: 0.0602 max mem: 9377 +Train: [59] [2500/6250] eta: 0:08:42 lr: 0.000048 grad: 0.1736 (0.1917) loss: 0.7174 (0.7256) time: 0.1288 data: 0.0533 max mem: 9377 +Train: [59] [2600/6250] eta: 0:08:28 lr: 0.000048 grad: 0.1758 (0.1911) loss: 0.7198 (0.7253) time: 0.1404 data: 0.0584 max mem: 9377 +Train: [59] [2700/6250] eta: 0:08:15 lr: 0.000048 grad: 0.1743 (0.1906) loss: 0.7124 (0.7251) time: 0.1511 data: 0.0713 max mem: 9377 +Train: [59] [2800/6250] eta: 0:08:03 lr: 0.000048 grad: 0.1783 (0.1901) loss: 0.7268 (0.7250) time: 0.1470 data: 0.0706 max mem: 9377 +Train: [59] [2900/6250] eta: 0:07:50 lr: 0.000048 grad: 0.1674 (0.1898) loss: 0.7289 (0.7248) time: 0.1551 data: 0.0734 max mem: 9377 +Train: [59] [3000/6250] eta: 0:07:37 lr: 0.000048 grad: 0.1752 (0.1892) loss: 0.7182 (0.7248) time: 0.1597 data: 0.0788 max mem: 9377 +Train: [59] [3100/6250] eta: 0:07:24 lr: 0.000048 grad: 0.1653 (0.1886) loss: 0.7366 (0.7249) time: 0.1463 data: 0.0687 max mem: 9377 +Train: [59] [3200/6250] eta: 0:07:11 lr: 0.000048 grad: 0.1823 (0.1883) loss: 0.7221 (0.7248) time: 0.1285 data: 0.0475 max mem: 9377 +Train: [59] [3300/6250] eta: 0:06:58 lr: 0.000048 grad: 0.1751 (0.1879) loss: 0.7251 (0.7248) time: 0.1501 data: 0.0679 max mem: 9377 +Train: [59] [3400/6250] eta: 0:06:45 lr: 0.000048 grad: 0.1739 (0.1875) loss: 0.7192 (0.7248) time: 0.1395 data: 0.0564 max mem: 9377 +Train: [59] [3500/6250] eta: 0:06:31 lr: 0.000048 grad: 0.1695 (0.1871) loss: 0.7249 (0.7248) time: 0.1326 data: 0.0484 max mem: 9377 +Train: [59] [3600/6250] eta: 0:06:16 lr: 0.000048 grad: 0.1659 (0.1866) loss: 0.7359 (0.7249) time: 0.1408 data: 0.0594 max mem: 9377 +Train: [59] [3700/6250] eta: 0:06:02 lr: 0.000048 grad: 0.1670 (0.1863) loss: 0.7146 (0.7248) time: 0.1284 data: 0.0493 max mem: 9377 +Train: [59] [3800/6250] eta: 0:05:47 lr: 0.000048 grad: 0.1727 (0.1859) loss: 0.7392 (0.7250) time: 0.1417 data: 0.0623 max mem: 9377 +Train: [59] [3900/6250] eta: 0:05:33 lr: 0.000048 grad: 0.1674 (0.1856) loss: 0.7306 (0.7251) time: 0.1374 data: 0.0560 max mem: 9377 +Train: [59] [4000/6250] eta: 0:05:18 lr: 0.000048 grad: 0.1713 (0.1853) loss: 0.7316 (0.7251) time: 0.1119 data: 0.0298 max mem: 9377 +Train: [59] [4100/6250] eta: 0:05:04 lr: 0.000048 grad: 0.1749 (0.1850) loss: 0.7167 (0.7251) time: 0.1182 data: 0.0329 max mem: 9377 +Train: [59] [4200/6250] eta: 0:04:49 lr: 0.000048 grad: 0.1744 (0.1848) loss: 0.7140 (0.7250) time: 0.1287 data: 0.0398 max mem: 9377 +Train: [59] [4300/6250] eta: 0:04:34 lr: 0.000048 grad: 0.1784 (0.1845) loss: 0.7208 (0.7250) time: 0.1296 data: 0.0433 max mem: 9377 +Train: [59] [4400/6250] eta: 0:04:20 lr: 0.000048 grad: 0.1725 (0.1842) loss: 0.7170 (0.7250) time: 0.1340 data: 0.0451 max mem: 9377 +Train: [59] [4500/6250] eta: 0:04:06 lr: 0.000048 grad: 0.1722 (0.1840) loss: 0.7246 (0.7250) time: 0.1440 data: 0.0616 max mem: 9377 +Train: [59] [4600/6250] eta: 0:03:52 lr: 0.000048 grad: 0.1675 (0.1837) loss: 0.7255 (0.7251) time: 0.1340 data: 0.0547 max mem: 9377 +Train: [59] [4700/6250] eta: 0:03:38 lr: 0.000048 grad: 0.1710 (0.1834) loss: 0.7263 (0.7252) time: 0.1304 data: 0.0447 max mem: 9377 +Train: [59] [4800/6250] eta: 0:03:24 lr: 0.000048 grad: 0.1737 (0.1832) loss: 0.7153 (0.7251) time: 0.1542 data: 0.0754 max mem: 9377 +Train: [59] [4900/6250] eta: 0:03:10 lr: 0.000048 grad: 0.1739 (0.1830) loss: 0.7273 (0.7251) time: 0.1585 data: 0.0772 max mem: 9377 +Train: [59] [5000/6250] eta: 0:02:56 lr: 0.000048 grad: 0.1708 (0.1829) loss: 0.7332 (0.7252) time: 0.1596 data: 0.0770 max mem: 9377 +Train: [59] [5100/6250] eta: 0:02:42 lr: 0.000048 grad: 0.1711 (0.1827) loss: 0.7289 (0.7252) time: 0.1467 data: 0.0619 max mem: 9377 +Train: [59] [5200/6250] eta: 0:02:27 lr: 0.000047 grad: 0.1665 (0.1824) loss: 0.7278 (0.7254) time: 0.1188 data: 0.0375 max mem: 9377 +Train: [59] [5300/6250] eta: 0:02:13 lr: 0.000047 grad: 0.1707 (0.1822) loss: 0.7236 (0.7254) time: 0.1480 data: 0.0682 max mem: 9377 +Train: [59] [5400/6250] eta: 0:01:59 lr: 0.000047 grad: 0.1714 (0.1819) loss: 0.7226 (0.7255) time: 0.1389 data: 0.0499 max mem: 9377 +Train: [59] [5500/6250] eta: 0:01:45 lr: 0.000047 grad: 0.1689 (0.1817) loss: 0.7326 (0.7257) time: 0.1284 data: 0.0399 max mem: 9377 +Train: [59] [5600/6250] eta: 0:01:31 lr: 0.000047 grad: 0.1692 (0.1815) loss: 0.7290 (0.7258) time: 0.1297 data: 0.0432 max mem: 9377 +Train: [59] [5700/6250] eta: 0:01:17 lr: 0.000047 grad: 0.1639 (0.1813) loss: 0.7341 (0.7259) time: 0.1621 data: 0.0821 max mem: 9377 +Train: [59] [5800/6250] eta: 0:01:03 lr: 0.000047 grad: 0.1652 (0.1811) loss: 0.7366 (0.7260) time: 0.1433 data: 0.0625 max mem: 9377 +Train: [59] [5900/6250] eta: 0:00:49 lr: 0.000047 grad: 0.1695 (0.1809) loss: 0.7363 (0.7261) time: 0.1474 data: 0.0665 max mem: 9377 +Train: [59] [6000/6250] eta: 0:00:35 lr: 0.000047 grad: 0.1728 (0.1808) loss: 0.7241 (0.7261) time: 0.1262 data: 0.0396 max mem: 9377 +Train: [59] [6100/6250] eta: 0:00:21 lr: 0.000047 grad: 0.1657 (0.1807) loss: 0.7234 (0.7260) time: 0.1404 data: 0.0630 max mem: 9377 +Train: [59] [6200/6250] eta: 0:00:07 lr: 0.000047 grad: 0.1771 (0.1806) loss: 0.7177 (0.7260) time: 0.1344 data: 0.0516 max mem: 9377 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.1715 (0.1806) loss: 0.7197 (0.7259) time: 0.1375 data: 0.0514 max mem: 9377 +Train: [59] Total time: 0:14:41 (0.1411 s / it) +Averaged stats: lr: 0.000047 grad: 0.1715 (0.1806) loss: 0.7197 (0.7259) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:04:56 loss: 0.8624 (0.8624) time: 4.7829 data: 4.7531 max mem: 9377 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8662 (0.8676) time: 0.1378 data: 0.1109 max mem: 9377 +Eval (hcp-train-subset): [59] Total time: 0:00:13 (0.2241 s / it) +Averaged stats (hcp-train-subset): loss: 0.8662 (0.8676) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [59] [ 0/62] eta: 0:05:39 loss: 0.8667 (0.8667) time: 5.4713 data: 5.4409 max mem: 9377 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8643 (0.8645) time: 0.1351 data: 0.1096 max mem: 9377 +Eval (hcp-val): [59] Total time: 0:00:14 (0.2320 s / it) +Averaged stats (hcp-val): loss: 0.8643 (0.8645) +Making plots (hcp-val): example=2 +Eval (nsd-val): [59] [ 0/62] eta: 0:05:41 loss: 0.8285 (0.8285) time: 5.5032 data: 5.4688 max mem: 9377 +Eval (nsd-val): [59] [61/62] eta: 0:00:00 loss: 0.8356 (0.8378) time: 0.1235 data: 0.0982 max mem: 9377 +Eval (nsd-val): [59] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (nsd-val): loss: 0.8356 (0.8378) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 7:37:55 lr: 0.000047 grad: 0.2516 (0.2516) loss: 0.7835 (0.7835) time: 4.3960 data: 4.2207 max mem: 9377 +Train: [60] [ 100/6250] eta: 0:20:37 lr: 0.000047 grad: 0.2831 (0.3205) loss: 0.7259 (0.7499) time: 0.1611 data: 0.0627 max mem: 9377 +Train: [60] [ 200/6250] eta: 0:17:58 lr: 0.000047 grad: 0.2427 (0.2922) loss: 0.7242 (0.7424) time: 0.1559 data: 0.0599 max mem: 9377 +Train: [60] [ 300/6250] eta: 0:16:52 lr: 0.000047 grad: 0.2133 (0.2698) loss: 0.7222 (0.7384) time: 0.1647 data: 0.0845 max mem: 9377 +Train: [60] [ 400/6250] eta: 0:15:59 lr: 0.000047 grad: 0.2232 (0.2572) loss: 0.7257 (0.7348) time: 0.1514 data: 0.0649 max mem: 9377 +Train: [60] [ 500/6250] eta: 0:15:25 lr: 0.000047 grad: 0.2466 (0.2502) loss: 0.7253 (0.7323) time: 0.1486 data: 0.0520 max mem: 9377 +Train: [60] [ 600/6250] eta: 0:15:11 lr: 0.000047 grad: 0.1865 (0.2418) loss: 0.7183 (0.7316) time: 0.1736 data: 0.0824 max mem: 9377 +Train: [60] [ 700/6250] eta: 0:14:48 lr: 0.000047 grad: 0.1789 (0.2335) loss: 0.7371 (0.7314) time: 0.1601 data: 0.0731 max mem: 9377 +Train: [60] [ 800/6250] eta: 0:14:24 lr: 0.000047 grad: 0.1764 (0.2273) loss: 0.7391 (0.7310) time: 0.1362 data: 0.0463 max mem: 9377 +Train: [60] [ 900/6250] eta: 0:13:59 lr: 0.000047 grad: 0.1807 (0.2221) loss: 0.7278 (0.7310) time: 0.1668 data: 0.0881 max mem: 9377 +Train: [60] [1000/6250] eta: 0:13:33 lr: 0.000047 grad: 0.1861 (0.2176) loss: 0.7309 (0.7309) time: 0.1444 data: 0.0607 max mem: 9377 +Train: [60] [1100/6250] eta: 0:13:11 lr: 0.000047 grad: 0.1694 (0.2140) loss: 0.7366 (0.7306) time: 0.1446 data: 0.0642 max mem: 9377 +Train: [60] [1200/6250] eta: 0:12:47 lr: 0.000047 grad: 0.1805 (0.2110) loss: 0.7249 (0.7302) time: 0.1282 data: 0.0486 max mem: 9377 +Train: [60] [1300/6250] eta: 0:12:26 lr: 0.000047 grad: 0.1890 (0.2084) loss: 0.7059 (0.7299) time: 0.1367 data: 0.0477 max mem: 9377 +Train: [60] [1400/6250] eta: 0:12:06 lr: 0.000047 grad: 0.1751 (0.2064) loss: 0.7227 (0.7293) time: 0.1399 data: 0.0591 max mem: 9377 +Train: [60] [1500/6250] eta: 0:11:48 lr: 0.000047 grad: 0.1723 (0.2043) loss: 0.7168 (0.7291) time: 0.1473 data: 0.0640 max mem: 9377 +Train: [60] [1600/6250] eta: 0:11:28 lr: 0.000047 grad: 0.1773 (0.2024) loss: 0.7270 (0.7287) time: 0.1441 data: 0.0590 max mem: 9377 +Train: [60] [1700/6250] eta: 0:11:08 lr: 0.000047 grad: 0.1675 (0.2008) loss: 0.7281 (0.7285) time: 0.1244 data: 0.0476 max mem: 9377 +Train: [60] [1800/6250] eta: 0:10:51 lr: 0.000047 grad: 0.1706 (0.1995) loss: 0.7309 (0.7282) time: 0.1592 data: 0.0739 max mem: 9377 +Train: [60] [1900/6250] eta: 0:10:37 lr: 0.000047 grad: 0.1752 (0.1983) loss: 0.7279 (0.7280) time: 0.1756 data: 0.0938 max mem: 9377 +Train: [60] [2000/6250] eta: 0:10:18 lr: 0.000047 grad: 0.1778 (0.1973) loss: 0.7230 (0.7278) time: 0.1354 data: 0.0530 max mem: 9377 +Train: [60] [2100/6250] eta: 0:10:01 lr: 0.000046 grad: 0.1746 (0.1962) loss: 0.7192 (0.7277) time: 0.1268 data: 0.0417 max mem: 9377 +Train: [60] [2200/6250] eta: 0:09:44 lr: 0.000046 grad: 0.1684 (0.1954) loss: 0.7244 (0.7276) time: 0.1228 data: 0.0352 max mem: 9377 +Train: [60] [2300/6250] eta: 0:09:27 lr: 0.000046 grad: 0.1703 (0.1948) loss: 0.7265 (0.7273) time: 0.1272 data: 0.0323 max mem: 9377 +Train: [60] [2400/6250] eta: 0:09:11 lr: 0.000046 grad: 0.1728 (0.1941) loss: 0.7257 (0.7272) time: 0.1567 data: 0.0737 max mem: 9377 +Train: [60] [2500/6250] eta: 0:08:57 lr: 0.000046 grad: 0.1747 (0.1933) loss: 0.7190 (0.7271) time: 0.1529 data: 0.0764 max mem: 9377 +Train: [60] [2600/6250] eta: 0:08:41 lr: 0.000046 grad: 0.1739 (0.1925) loss: 0.7103 (0.7270) time: 0.1260 data: 0.0401 max mem: 9377 +Train: [60] [2700/6250] eta: 0:08:27 lr: 0.000046 grad: 0.1700 (0.1919) loss: 0.7234 (0.7269) time: 0.1412 data: 0.0599 max mem: 9377 +Train: [60] [2800/6250] eta: 0:08:12 lr: 0.000046 grad: 0.1745 (0.1913) loss: 0.7287 (0.7269) time: 0.1428 data: 0.0700 max mem: 9377 +Train: [60] [2900/6250] eta: 0:07:57 lr: 0.000046 grad: 0.1753 (0.1908) loss: 0.7191 (0.7269) time: 0.1446 data: 0.0627 max mem: 9377 +Train: [60] [3000/6250] eta: 0:07:43 lr: 0.000046 grad: 0.1818 (0.1903) loss: 0.7202 (0.7268) time: 0.1466 data: 0.0661 max mem: 9377 +Train: [60] [3100/6250] eta: 0:07:28 lr: 0.000046 grad: 0.1738 (0.1898) loss: 0.7269 (0.7269) time: 0.1558 data: 0.0702 max mem: 9377 +Train: [60] [3200/6250] eta: 0:07:14 lr: 0.000046 grad: 0.1739 (0.1894) loss: 0.7228 (0.7270) time: 0.1373 data: 0.0529 max mem: 9377 +Train: [60] [3300/6250] eta: 0:06:59 lr: 0.000046 grad: 0.1738 (0.1889) loss: 0.7238 (0.7271) time: 0.1333 data: 0.0462 max mem: 9377 +Train: [60] [3400/6250] eta: 0:06:45 lr: 0.000046 grad: 0.1739 (0.1884) loss: 0.7210 (0.7272) time: 0.1392 data: 0.0585 max mem: 9377 +Train: [60] [3500/6250] eta: 0:06:30 lr: 0.000046 grad: 0.1655 (0.1879) loss: 0.7392 (0.7272) time: 0.1303 data: 0.0393 max mem: 9377 +Train: [60] [3600/6250] eta: 0:06:16 lr: 0.000046 grad: 0.1711 (0.1875) loss: 0.7296 (0.7272) time: 0.1467 data: 0.0714 max mem: 9377 +Train: [60] [3700/6250] eta: 0:06:01 lr: 0.000046 grad: 0.1754 (0.1872) loss: 0.7297 (0.7272) time: 0.1599 data: 0.0732 max mem: 9377 +Train: [60] [3800/6250] eta: 0:05:46 lr: 0.000046 grad: 0.1691 (0.1870) loss: 0.7186 (0.7271) time: 0.1255 data: 0.0492 max mem: 9377 +Train: [60] [3900/6250] eta: 0:05:32 lr: 0.000046 grad: 0.1780 (0.1867) loss: 0.7250 (0.7271) time: 0.1342 data: 0.0562 max mem: 9377 +Train: [60] [4000/6250] eta: 0:05:17 lr: 0.000046 grad: 0.1868 (0.1865) loss: 0.7125 (0.7270) time: 0.1463 data: 0.0590 max mem: 9377 +Train: [60] [4100/6250] eta: 0:05:03 lr: 0.000046 grad: 0.1801 (0.1862) loss: 0.7110 (0.7268) time: 0.1328 data: 0.0487 max mem: 9377 +Train: [60] [4200/6250] eta: 0:04:49 lr: 0.000046 grad: 0.1771 (0.1860) loss: 0.7271 (0.7266) time: 0.1353 data: 0.0514 max mem: 9377 +Train: [60] [4300/6250] eta: 0:04:35 lr: 0.000046 grad: 0.1742 (0.1859) loss: 0.7212 (0.7264) time: 0.1355 data: 0.0480 max mem: 9377 +Train: [60] [4400/6250] eta: 0:04:20 lr: 0.000046 grad: 0.1729 (0.1856) loss: 0.7284 (0.7263) time: 0.1291 data: 0.0411 max mem: 9377 +Train: [60] [4500/6250] eta: 0:04:06 lr: 0.000046 grad: 0.1702 (0.1854) loss: 0.7040 (0.7261) time: 0.1308 data: 0.0436 max mem: 9377 +Train: [60] [4600/6250] eta: 0:03:52 lr: 0.000046 grad: 0.1761 (0.1852) loss: 0.7167 (0.7260) time: 0.1345 data: 0.0534 max mem: 9377 +Train: [60] [4700/6250] eta: 0:03:38 lr: 0.000046 grad: 0.1744 (0.1850) loss: 0.7172 (0.7258) time: 0.1623 data: 0.0820 max mem: 9377 +Train: [60] [4800/6250] eta: 0:03:23 lr: 0.000046 grad: 0.1745 (0.1848) loss: 0.7256 (0.7257) time: 0.1437 data: 0.0541 max mem: 9377 +Train: [60] [4900/6250] eta: 0:03:09 lr: 0.000046 grad: 0.1715 (0.1847) loss: 0.7188 (0.7256) time: 0.1195 data: 0.0358 max mem: 9377 +Train: [60] [5000/6250] eta: 0:02:55 lr: 0.000046 grad: 0.1745 (0.1845) loss: 0.7198 (0.7254) time: 0.1373 data: 0.0600 max mem: 9377 +Train: [60] [5100/6250] eta: 0:02:41 lr: 0.000046 grad: 0.1698 (0.1844) loss: 0.7138 (0.7253) time: 0.1315 data: 0.0495 max mem: 9377 +Train: [60] [5200/6250] eta: 0:02:27 lr: 0.000045 grad: 0.1821 (0.1842) loss: 0.7095 (0.7251) time: 0.1327 data: 0.0550 max mem: 9377 +Train: [60] [5300/6250] eta: 0:02:13 lr: 0.000045 grad: 0.1812 (0.1841) loss: 0.7137 (0.7250) time: 0.1426 data: 0.0618 max mem: 9377 +Train: [60] [5400/6250] eta: 0:01:59 lr: 0.000045 grad: 0.1702 (0.1840) loss: 0.7201 (0.7249) time: 0.1449 data: 0.0634 max mem: 9377 +Train: [60] [5500/6250] eta: 0:01:45 lr: 0.000045 grad: 0.1797 (0.1841) loss: 0.7130 (0.7248) time: 0.1187 data: 0.0372 max mem: 9377 +Train: [60] [5600/6250] eta: 0:01:31 lr: 0.000045 grad: 0.1787 (0.1840) loss: 0.7058 (0.7247) time: 0.1407 data: 0.0609 max mem: 9377 +Train: [60] [5700/6250] eta: 0:01:17 lr: 0.000045 grad: 0.1765 (0.1839) loss: 0.7275 (0.7245) time: 0.1369 data: 0.0550 max mem: 9377 +Train: [60] [5800/6250] eta: 0:01:03 lr: 0.000045 grad: 0.1840 (0.1839) loss: 0.7066 (0.7244) time: 0.1368 data: 0.0594 max mem: 9377 +Train: [60] [5900/6250] eta: 0:00:49 lr: 0.000045 grad: 0.1775 (0.1839) loss: 0.7193 (0.7244) time: 0.1217 data: 0.0456 max mem: 9377 +Train: [60] [6000/6250] eta: 0:00:35 lr: 0.000045 grad: 0.1856 (0.1838) loss: 0.7097 (0.7243) time: 0.1406 data: 0.0571 max mem: 9377 +Train: [60] [6100/6250] eta: 0:00:21 lr: 0.000045 grad: 0.1761 (0.1838) loss: 0.7195 (0.7242) time: 0.1268 data: 0.0416 max mem: 9377 +Train: [60] [6200/6250] eta: 0:00:07 lr: 0.000045 grad: 0.1819 (0.1838) loss: 0.7179 (0.7242) time: 0.1351 data: 0.0514 max mem: 9377 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1756 (0.1837) loss: 0.7289 (0.7242) time: 0.1253 data: 0.0377 max mem: 9377 +Train: [60] Total time: 0:14:42 (0.1412 s / it) +Averaged stats: lr: 0.000045 grad: 0.1756 (0.1837) loss: 0.7289 (0.7242) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:04:15 loss: 0.8586 (0.8586) time: 4.1255 data: 4.0489 max mem: 9377 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8642 (0.8669) time: 0.1429 data: 0.1164 max mem: 9377 +Eval (hcp-train-subset): [60] Total time: 0:00:14 (0.2383 s / it) +Averaged stats (hcp-train-subset): loss: 0.8642 (0.8669) +Eval (hcp-val): [60] [ 0/62] eta: 0:05:11 loss: 0.8621 (0.8621) time: 5.0301 data: 5.0003 max mem: 9377 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.8616 (0.8644) time: 0.1509 data: 0.1250 max mem: 9377 +Eval (hcp-val): [60] Total time: 0:00:15 (0.2552 s / it) +Averaged stats (hcp-val): loss: 0.8616 (0.8644) +Eval (nsd-val): [60] [ 0/62] eta: 0:03:53 loss: 0.8277 (0.8277) time: 3.7721 data: 3.7030 max mem: 9377 +Eval (nsd-val): [60] [61/62] eta: 0:00:00 loss: 0.8336 (0.8359) time: 0.1413 data: 0.1156 max mem: 9377 +Eval (nsd-val): [60] Total time: 0:00:14 (0.2371 s / it) +Averaged stats (nsd-val): loss: 0.8336 (0.8359) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [61] [ 0/6250] eta: 8:04:35 lr: 0.000045 grad: 0.2044 (0.2044) loss: 0.8180 (0.8180) time: 4.6520 data: 4.2894 max mem: 9377 +Train: [61] [ 100/6250] eta: 0:23:02 lr: 0.000045 grad: 0.2968 (0.3383) loss: 0.7131 (0.7319) time: 0.1871 data: 0.0716 max mem: 9377 +Train: [61] [ 200/6250] eta: 0:19:33 lr: 0.000045 grad: 0.2830 (0.3189) loss: 0.7279 (0.7318) time: 0.1383 data: 0.0436 max mem: 9377 +Train: [61] [ 300/6250] eta: 0:18:20 lr: 0.000045 grad: 0.2182 (0.2972) loss: 0.7386 (0.7327) time: 0.1556 data: 0.0546 max mem: 9377 +Train: [61] [ 400/6250] eta: 0:17:14 lr: 0.000045 grad: 0.2329 (0.2780) loss: 0.7325 (0.7314) time: 0.1332 data: 0.0379 max mem: 9377 +Train: [61] [ 500/6250] eta: 0:16:24 lr: 0.000045 grad: 0.1859 (0.2629) loss: 0.7182 (0.7298) time: 0.1578 data: 0.0715 max mem: 9377 +Train: [61] [ 600/6250] eta: 0:15:41 lr: 0.000045 grad: 0.1876 (0.2510) loss: 0.7224 (0.7280) time: 0.1304 data: 0.0423 max mem: 9377 +Train: [61] [ 700/6250] eta: 0:15:09 lr: 0.000045 grad: 0.1820 (0.2424) loss: 0.7325 (0.7277) time: 0.1439 data: 0.0574 max mem: 9377 +Train: [61] [ 800/6250] eta: 0:14:52 lr: 0.000045 grad: 0.1758 (0.2352) loss: 0.7209 (0.7264) time: 0.1827 data: 0.0958 max mem: 9377 +Train: [61] [ 900/6250] eta: 0:14:31 lr: 0.000045 grad: 0.1820 (0.2290) loss: 0.7222 (0.7259) time: 0.1513 data: 0.0654 max mem: 9377 +Train: [61] [1000/6250] eta: 0:14:11 lr: 0.000045 grad: 0.1734 (0.2238) loss: 0.7302 (0.7256) time: 0.1416 data: 0.0489 max mem: 9377 +Train: [61] [1100/6250] eta: 0:13:46 lr: 0.000045 grad: 0.1761 (0.2194) loss: 0.7167 (0.7254) time: 0.1222 data: 0.0397 max mem: 9377 +Train: [61] [1200/6250] eta: 0:13:26 lr: 0.000045 grad: 0.1813 (0.2159) loss: 0.7222 (0.7254) time: 0.1476 data: 0.0662 max mem: 9377 +Train: [61] [1300/6250] eta: 0:13:05 lr: 0.000045 grad: 0.1716 (0.2128) loss: 0.7360 (0.7255) time: 0.1416 data: 0.0638 max mem: 9377 +Train: [61] [1400/6250] eta: 0:12:45 lr: 0.000045 grad: 0.1732 (0.2100) loss: 0.7337 (0.7255) time: 0.1396 data: 0.0538 max mem: 9377 +Train: [61] [1500/6250] eta: 0:12:23 lr: 0.000045 grad: 0.1695 (0.2076) loss: 0.7234 (0.7258) time: 0.1257 data: 0.0468 max mem: 9377 +Train: [61] [1600/6250] eta: 0:12:05 lr: 0.000045 grad: 0.1689 (0.2054) loss: 0.7218 (0.7259) time: 0.1236 data: 0.0434 max mem: 9377 +Train: [61] [1700/6250] eta: 0:11:45 lr: 0.000045 grad: 0.1743 (0.2036) loss: 0.7183 (0.7257) time: 0.1569 data: 0.0738 max mem: 9377 +Train: [61] [1800/6250] eta: 0:11:23 lr: 0.000045 grad: 0.1765 (0.2022) loss: 0.7141 (0.7254) time: 0.1166 data: 0.0285 max mem: 9377 +Train: [61] [1900/6250] eta: 0:11:05 lr: 0.000045 grad: 0.1716 (0.2010) loss: 0.7179 (0.7250) time: 0.1198 data: 0.0391 max mem: 9377 +Train: [61] [2000/6250] eta: 0:10:47 lr: 0.000045 grad: 0.1778 (0.1997) loss: 0.7164 (0.7247) time: 0.1367 data: 0.0535 max mem: 9377 +Train: [61] [2100/6250] eta: 0:10:29 lr: 0.000044 grad: 0.1760 (0.1987) loss: 0.7138 (0.7245) time: 0.1368 data: 0.0514 max mem: 9377 +Train: [61] [2200/6250] eta: 0:10:09 lr: 0.000044 grad: 0.1732 (0.1977) loss: 0.7266 (0.7246) time: 0.1137 data: 0.0279 max mem: 9377 +Train: [61] [2300/6250] eta: 0:09:50 lr: 0.000044 grad: 0.1759 (0.1967) loss: 0.7226 (0.7243) time: 0.1302 data: 0.0443 max mem: 9377 +Train: [61] [2400/6250] eta: 0:09:34 lr: 0.000044 grad: 0.1748 (0.1961) loss: 0.7206 (0.7240) time: 0.1385 data: 0.0497 max mem: 9377 +Train: [61] [2500/6250] eta: 0:09:17 lr: 0.000044 grad: 0.1936 (0.1954) loss: 0.7001 (0.7236) time: 0.1286 data: 0.0455 max mem: 9377 +Train: [61] [2600/6250] eta: 0:09:00 lr: 0.000044 grad: 0.1811 (0.1949) loss: 0.7278 (0.7233) time: 0.1287 data: 0.0430 max mem: 9377 +Train: [61] [2700/6250] eta: 0:08:44 lr: 0.000044 grad: 0.1789 (0.1943) loss: 0.7013 (0.7231) time: 0.1406 data: 0.0578 max mem: 9377 +Train: [61] [2800/6250] eta: 0:08:29 lr: 0.000044 grad: 0.1731 (0.1936) loss: 0.7297 (0.7231) time: 0.1704 data: 0.0950 max mem: 9377 +Train: [61] [2900/6250] eta: 0:08:15 lr: 0.000044 grad: 0.1755 (0.1930) loss: 0.7235 (0.7231) time: 0.1499 data: 0.0765 max mem: 9377 +Train: [61] [3000/6250] eta: 0:07:59 lr: 0.000044 grad: 0.1757 (0.1925) loss: 0.7187 (0.7231) time: 0.1445 data: 0.0663 max mem: 9377 +Train: [61] [3100/6250] eta: 0:07:45 lr: 0.000044 grad: 0.1715 (0.1920) loss: 0.7181 (0.7229) time: 0.1481 data: 0.0710 max mem: 9377 +Train: [61] [3200/6250] eta: 0:07:30 lr: 0.000044 grad: 0.1715 (0.1914) loss: 0.7080 (0.7227) time: 0.1325 data: 0.0504 max mem: 9377 +Train: [61] [3300/6250] eta: 0:07:15 lr: 0.000044 grad: 0.1697 (0.1910) loss: 0.7277 (0.7227) time: 0.1386 data: 0.0539 max mem: 9377 +Train: [61] [3400/6250] eta: 0:07:00 lr: 0.000044 grad: 0.1729 (0.1905) loss: 0.7242 (0.7227) time: 0.1579 data: 0.0820 max mem: 9377 +Train: [61] [3500/6250] eta: 0:06:46 lr: 0.000044 grad: 0.1745 (0.1900) loss: 0.7242 (0.7227) time: 0.1654 data: 0.0874 max mem: 9377 +Train: [61] [3600/6250] eta: 0:06:30 lr: 0.000044 grad: 0.1743 (0.1895) loss: 0.7204 (0.7227) time: 0.1495 data: 0.0695 max mem: 9377 +Train: [61] [3700/6250] eta: 0:06:16 lr: 0.000044 grad: 0.1683 (0.1890) loss: 0.7324 (0.7227) time: 0.1505 data: 0.0701 max mem: 9377 +Train: [61] [3800/6250] eta: 0:06:01 lr: 0.000044 grad: 0.1652 (0.1886) loss: 0.7369 (0.7228) time: 0.1632 data: 0.0818 max mem: 9377 +Train: [61] [3900/6250] eta: 0:05:45 lr: 0.000044 grad: 0.1721 (0.1882) loss: 0.7236 (0.7229) time: 0.1271 data: 0.0382 max mem: 9377 +Train: [61] [4000/6250] eta: 0:05:29 lr: 0.000044 grad: 0.1742 (0.1878) loss: 0.7112 (0.7229) time: 0.1279 data: 0.0427 max mem: 9377 +Train: [61] [4100/6250] eta: 0:05:14 lr: 0.000044 grad: 0.1709 (0.1874) loss: 0.7208 (0.7230) time: 0.1452 data: 0.0664 max mem: 9377 +Train: [61] [4200/6250] eta: 0:04:59 lr: 0.000044 grad: 0.1673 (0.1870) loss: 0.7321 (0.7231) time: 0.1526 data: 0.0769 max mem: 9377 +Train: [61] [4300/6250] eta: 0:04:44 lr: 0.000044 grad: 0.1744 (0.1867) loss: 0.7081 (0.7232) time: 0.1292 data: 0.0385 max mem: 9377 +Train: [61] [4400/6250] eta: 0:04:29 lr: 0.000044 grad: 0.1641 (0.1864) loss: 0.7279 (0.7233) time: 0.1211 data: 0.0369 max mem: 9377 +Train: [61] [4500/6250] eta: 0:04:14 lr: 0.000044 grad: 0.1707 (0.1861) loss: 0.7229 (0.7234) time: 0.1421 data: 0.0613 max mem: 9377 +Train: [61] [4600/6250] eta: 0:03:59 lr: 0.000044 grad: 0.1682 (0.1858) loss: 0.7256 (0.7235) time: 0.1332 data: 0.0479 max mem: 9377 +Train: [61] [4700/6250] eta: 0:03:44 lr: 0.000044 grad: 0.1724 (0.1855) loss: 0.7229 (0.7237) time: 0.1389 data: 0.0580 max mem: 9377 +Train: [61] [4800/6250] eta: 0:03:29 lr: 0.000044 grad: 0.1684 (0.1852) loss: 0.7286 (0.7239) time: 0.1424 data: 0.0628 max mem: 9377 +Train: [61] [4900/6250] eta: 0:03:15 lr: 0.000044 grad: 0.1668 (0.1849) loss: 0.7252 (0.7240) time: 0.1387 data: 0.0611 max mem: 9377 +Train: [61] [5000/6250] eta: 0:03:00 lr: 0.000044 grad: 0.1728 (0.1847) loss: 0.7315 (0.7242) time: 0.1468 data: 0.0617 max mem: 9377 +Train: [61] [5100/6250] eta: 0:02:45 lr: 0.000044 grad: 0.1669 (0.1844) loss: 0.7368 (0.7243) time: 0.1119 data: 0.0301 max mem: 9377 +Train: [61] [5200/6250] eta: 0:02:31 lr: 0.000044 grad: 0.1672 (0.1842) loss: 0.7380 (0.7245) time: 0.1524 data: 0.0713 max mem: 9377 +Train: [61] [5300/6250] eta: 0:02:17 lr: 0.000043 grad: 0.1686 (0.1840) loss: 0.7304 (0.7246) time: 0.1555 data: 0.0787 max mem: 9377 +Train: [61] [5400/6250] eta: 0:02:02 lr: 0.000043 grad: 0.1609 (0.1837) loss: 0.7314 (0.7247) time: 0.1315 data: 0.0505 max mem: 9377 +Train: [61] [5500/6250] eta: 0:01:48 lr: 0.000043 grad: 0.1785 (0.1836) loss: 0.7307 (0.7248) time: 0.1236 data: 0.0412 max mem: 9377 +Train: [61] [5600/6250] eta: 0:01:33 lr: 0.000043 grad: 0.1736 (0.1834) loss: 0.7274 (0.7250) time: 0.1151 data: 0.0288 max mem: 9377 +Train: [61] [5700/6250] eta: 0:01:19 lr: 0.000043 grad: 0.1769 (0.1833) loss: 0.7199 (0.7250) time: 0.1505 data: 0.0684 max mem: 9377 +Train: [61] [5800/6250] eta: 0:01:04 lr: 0.000043 grad: 0.1807 (0.1832) loss: 0.7211 (0.7251) time: 0.1309 data: 0.0461 max mem: 9377 +Train: [61] [5900/6250] eta: 0:00:50 lr: 0.000043 grad: 0.1749 (0.1831) loss: 0.7167 (0.7251) time: 0.1453 data: 0.0661 max mem: 9377 +Train: [61] [6000/6250] eta: 0:00:35 lr: 0.000043 grad: 0.1760 (0.1830) loss: 0.7107 (0.7250) time: 0.1259 data: 0.0400 max mem: 9377 +Train: [61] [6100/6250] eta: 0:00:21 lr: 0.000043 grad: 0.1781 (0.1829) loss: 0.7161 (0.7250) time: 0.1447 data: 0.0627 max mem: 9377 +Train: [61] [6200/6250] eta: 0:00:07 lr: 0.000043 grad: 0.1773 (0.1829) loss: 0.7319 (0.7250) time: 0.1453 data: 0.0640 max mem: 9377 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1784 (0.1828) loss: 0.7161 (0.7250) time: 0.1357 data: 0.0493 max mem: 9377 +Train: [61] Total time: 0:15:00 (0.1440 s / it) +Averaged stats: lr: 0.000043 grad: 0.1784 (0.1828) loss: 0.7161 (0.7250) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:05:08 loss: 0.8649 (0.8649) time: 4.9713 data: 4.9412 max mem: 9377 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8654 (0.8679) time: 0.1447 data: 0.1185 max mem: 9377 +Eval (hcp-train-subset): [61] Total time: 0:00:13 (0.2253 s / it) +Averaged stats (hcp-train-subset): loss: 0.8654 (0.8679) +Eval (hcp-val): [61] [ 0/62] eta: 0:05:38 loss: 0.8717 (0.8717) time: 5.4599 data: 5.4238 max mem: 9377 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8624 (0.8662) time: 0.1249 data: 0.0978 max mem: 9377 +Eval (hcp-val): [61] Total time: 0:00:13 (0.2205 s / it) +Averaged stats (hcp-val): loss: 0.8624 (0.8662) +Eval (nsd-val): [61] [ 0/62] eta: 0:03:31 loss: 0.8269 (0.8269) time: 3.4049 data: 3.3212 max mem: 9377 +Eval (nsd-val): [61] [61/62] eta: 0:00:00 loss: 0.8396 (0.8392) time: 0.1205 data: 0.0937 max mem: 9377 +Eval (nsd-val): [61] Total time: 0:00:13 (0.2180 s / it) +Averaged stats (nsd-val): loss: 0.8396 (0.8392) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [62] [ 0/6250] eta: 8:40:15 lr: 0.000043 grad: 0.1310 (0.1310) loss: 0.8749 (0.8749) time: 4.9945 data: 4.6717 max mem: 9377 +Train: [62] [ 100/6250] eta: 0:20:17 lr: 0.000043 grad: 0.2505 (0.3059) loss: 0.7150 (0.7536) time: 0.1254 data: 0.0261 max mem: 9377 +Train: [62] [ 200/6250] eta: 0:16:59 lr: 0.000043 grad: 0.2373 (0.2876) loss: 0.7097 (0.7440) time: 0.1339 data: 0.0315 max mem: 9377 +Train: [62] [ 300/6250] eta: 0:15:52 lr: 0.000043 grad: 0.2275 (0.2732) loss: 0.7281 (0.7416) time: 0.1245 data: 0.0352 max mem: 9377 +Train: [62] [ 400/6250] eta: 0:14:56 lr: 0.000043 grad: 0.1861 (0.2575) loss: 0.7295 (0.7392) time: 0.1434 data: 0.0528 max mem: 9377 +Train: [62] [ 500/6250] eta: 0:14:17 lr: 0.000043 grad: 0.1884 (0.2440) loss: 0.7476 (0.7385) time: 0.1221 data: 0.0262 max mem: 9377 +Train: [62] [ 600/6250] eta: 0:13:53 lr: 0.000043 grad: 0.1993 (0.2352) loss: 0.7245 (0.7375) time: 0.1428 data: 0.0393 max mem: 9377 +Train: [62] [ 700/6250] eta: 0:13:25 lr: 0.000043 grad: 0.1804 (0.2284) loss: 0.7341 (0.7365) time: 0.1334 data: 0.0448 max mem: 9377 +Train: [62] [ 800/6250] eta: 0:13:05 lr: 0.000043 grad: 0.1751 (0.2225) loss: 0.7413 (0.7364) time: 0.1363 data: 0.0463 max mem: 9377 +Train: [62] [ 900/6250] eta: 0:12:53 lr: 0.000043 grad: 0.1782 (0.2177) loss: 0.7369 (0.7364) time: 0.1596 data: 0.0716 max mem: 9377 +Train: [62] [1000/6250] eta: 0:12:37 lr: 0.000043 grad: 0.1786 (0.2140) loss: 0.7230 (0.7357) time: 0.1484 data: 0.0682 max mem: 9377 +Train: [62] [1100/6250] eta: 0:12:29 lr: 0.000043 grad: 0.1782 (0.2110) loss: 0.7281 (0.7346) time: 0.1597 data: 0.0805 max mem: 9377 +Train: [62] [1200/6250] eta: 0:12:16 lr: 0.000043 grad: 0.1777 (0.2083) loss: 0.7274 (0.7341) time: 0.1361 data: 0.0520 max mem: 9377 +Train: [62] [1300/6250] eta: 0:12:03 lr: 0.000043 grad: 0.1714 (0.2058) loss: 0.7233 (0.7339) time: 0.1512 data: 0.0676 max mem: 9377 +Train: [62] [1400/6250] eta: 0:11:45 lr: 0.000043 grad: 0.1750 (0.2038) loss: 0.7354 (0.7340) time: 0.1233 data: 0.0421 max mem: 9377 +Train: [62] [1500/6250] eta: 0:11:32 lr: 0.000043 grad: 0.1766 (0.2020) loss: 0.7244 (0.7339) time: 0.1667 data: 0.0841 max mem: 9377 +Train: [62] [1600/6250] eta: 0:11:20 lr: 0.000043 grad: 0.1758 (0.2004) loss: 0.7285 (0.7339) time: 0.1308 data: 0.0489 max mem: 9377 +Train: [62] [1700/6250] eta: 0:11:05 lr: 0.000043 grad: 0.1756 (0.1992) loss: 0.7302 (0.7338) time: 0.1369 data: 0.0587 max mem: 9377 +Train: [62] [1800/6250] eta: 0:10:50 lr: 0.000043 grad: 0.1748 (0.1979) loss: 0.7327 (0.7339) time: 0.1428 data: 0.0574 max mem: 9377 +Train: [62] [1900/6250] eta: 0:10:35 lr: 0.000043 grad: 0.1719 (0.1967) loss: 0.7280 (0.7340) time: 0.1607 data: 0.0849 max mem: 9377 +Train: [62] [2000/6250] eta: 0:10:19 lr: 0.000043 grad: 0.1757 (0.1957) loss: 0.7367 (0.7338) time: 0.1317 data: 0.0482 max mem: 9377 +Train: [62] [2100/6250] eta: 0:10:04 lr: 0.000043 grad: 0.1725 (0.1948) loss: 0.7333 (0.7336) time: 0.1318 data: 0.0397 max mem: 9377 +Train: [62] [2200/6250] eta: 0:09:49 lr: 0.000042 grad: 0.1736 (0.1939) loss: 0.7358 (0.7336) time: 0.1353 data: 0.0397 max mem: 9377 +Train: [62] [2300/6250] eta: 0:09:33 lr: 0.000042 grad: 0.1730 (0.1932) loss: 0.7243 (0.7334) time: 0.1404 data: 0.0583 max mem: 9377 +Train: [62] [2400/6250] eta: 0:09:17 lr: 0.000042 grad: 0.1712 (0.1925) loss: 0.7317 (0.7332) time: 0.1396 data: 0.0543 max mem: 9377 +Train: [62] [2500/6250] eta: 0:09:00 lr: 0.000042 grad: 0.1720 (0.1919) loss: 0.7268 (0.7329) time: 0.1311 data: 0.0523 max mem: 9377 +Train: [62] [2600/6250] eta: 0:08:46 lr: 0.000042 grad: 0.1777 (0.1914) loss: 0.7178 (0.7327) time: 0.1584 data: 0.0816 max mem: 9377 +Train: [62] [2700/6250] eta: 0:08:32 lr: 0.000042 grad: 0.1738 (0.1908) loss: 0.7198 (0.7326) time: 0.1514 data: 0.0672 max mem: 9377 +Train: [62] [2800/6250] eta: 0:08:20 lr: 0.000042 grad: 0.1728 (0.1902) loss: 0.7255 (0.7323) time: 0.1432 data: 0.0616 max mem: 9377 +Train: [62] [2900/6250] eta: 0:08:07 lr: 0.000042 grad: 0.1665 (0.1897) loss: 0.7228 (0.7320) time: 0.1631 data: 0.0894 max mem: 9377 +Train: [62] [3000/6250] eta: 0:07:55 lr: 0.000042 grad: 0.1709 (0.1892) loss: 0.7251 (0.7319) time: 0.1832 data: 0.1033 max mem: 9377 +Train: [62] [3100/6250] eta: 0:07:41 lr: 0.000042 grad: 0.1708 (0.1886) loss: 0.7287 (0.7318) time: 0.1573 data: 0.0722 max mem: 9377 +Train: [62] [3200/6250] eta: 0:07:28 lr: 0.000042 grad: 0.1717 (0.1883) loss: 0.7255 (0.7316) time: 0.1511 data: 0.0756 max mem: 9377 +Train: [62] [3300/6250] eta: 0:07:15 lr: 0.000042 grad: 0.1706 (0.1880) loss: 0.7212 (0.7314) time: 0.1840 data: 0.0989 max mem: 9377 +Train: [62] [3400/6250] eta: 0:07:01 lr: 0.000042 grad: 0.1725 (0.1878) loss: 0.7251 (0.7311) time: 0.1594 data: 0.0854 max mem: 9377 +Train: [62] [3500/6250] eta: 0:06:47 lr: 0.000042 grad: 0.1679 (0.1874) loss: 0.7206 (0.7308) time: 0.1649 data: 0.0845 max mem: 9377 +Train: [62] [3600/6250] eta: 0:06:32 lr: 0.000042 grad: 0.1722 (0.1871) loss: 0.7046 (0.7304) time: 0.1626 data: 0.0881 max mem: 9377 +Train: [62] [3700/6250] eta: 0:06:17 lr: 0.000042 grad: 0.1737 (0.1868) loss: 0.7228 (0.7301) time: 0.1430 data: 0.0640 max mem: 9377 +Train: [62] [3800/6250] eta: 0:06:02 lr: 0.000042 grad: 0.1764 (0.1865) loss: 0.7134 (0.7297) time: 0.1387 data: 0.0548 max mem: 9377 +Train: [62] [3900/6250] eta: 0:05:46 lr: 0.000042 grad: 0.1727 (0.1863) loss: 0.7144 (0.7294) time: 0.1541 data: 0.0735 max mem: 9377 +Train: [62] [4000/6250] eta: 0:05:31 lr: 0.000042 grad: 0.1750 (0.1861) loss: 0.7111 (0.7290) time: 0.1267 data: 0.0378 max mem: 9377 +Train: [62] [4100/6250] eta: 0:05:16 lr: 0.000042 grad: 0.1713 (0.1859) loss: 0.7251 (0.7288) time: 0.1568 data: 0.0764 max mem: 9377 +Train: [62] [4200/6250] eta: 0:05:01 lr: 0.000042 grad: 0.1737 (0.1857) loss: 0.7183 (0.7284) time: 0.1363 data: 0.0582 max mem: 9377 +Train: [62] [4300/6250] eta: 0:04:46 lr: 0.000042 grad: 0.1779 (0.1855) loss: 0.7189 (0.7282) time: 0.1344 data: 0.0473 max mem: 9377 +Train: [62] [4400/6250] eta: 0:04:31 lr: 0.000042 grad: 0.1703 (0.1854) loss: 0.7258 (0.7279) time: 0.1647 data: 0.0819 max mem: 9377 +Train: [62] [4500/6250] eta: 0:04:16 lr: 0.000042 grad: 0.1787 (0.1852) loss: 0.7214 (0.7278) time: 0.1418 data: 0.0648 max mem: 9377 +Train: [62] [4600/6250] eta: 0:04:02 lr: 0.000042 grad: 0.1753 (0.1851) loss: 0.7148 (0.7276) time: 0.1271 data: 0.0465 max mem: 9377 +Train: [62] [4700/6250] eta: 0:03:47 lr: 0.000042 grad: 0.1777 (0.1850) loss: 0.7148 (0.7275) time: 0.1561 data: 0.0772 max mem: 9377 +Train: [62] [4800/6250] eta: 0:03:32 lr: 0.000042 grad: 0.1857 (0.1849) loss: 0.7153 (0.7273) time: 0.1154 data: 0.0278 max mem: 9377 +Train: [62] [4900/6250] eta: 0:03:17 lr: 0.000042 grad: 0.1761 (0.1848) loss: 0.7218 (0.7271) time: 0.1251 data: 0.0402 max mem: 9377 +Train: [62] [5000/6250] eta: 0:03:02 lr: 0.000042 grad: 0.1863 (0.1848) loss: 0.7131 (0.7270) time: 0.1193 data: 0.0306 max mem: 9377 +Train: [62] [5100/6250] eta: 0:02:47 lr: 0.000042 grad: 0.1885 (0.1847) loss: 0.7140 (0.7268) time: 0.1682 data: 0.0915 max mem: 9377 +Train: [62] [5200/6250] eta: 0:02:33 lr: 0.000042 grad: 0.1793 (0.1847) loss: 0.7196 (0.7266) time: 0.1370 data: 0.0563 max mem: 9377 +Train: [62] [5300/6250] eta: 0:02:18 lr: 0.000042 grad: 0.1762 (0.1847) loss: 0.7384 (0.7265) time: 0.1467 data: 0.0643 max mem: 9377 +Train: [62] [5400/6250] eta: 0:02:03 lr: 0.000041 grad: 0.1802 (0.1847) loss: 0.7044 (0.7263) time: 0.1821 data: 0.1068 max mem: 9377 +Train: [62] [5500/6250] eta: 0:01:49 lr: 0.000041 grad: 0.1855 (0.1847) loss: 0.7151 (0.7262) time: 0.1101 data: 0.0217 max mem: 9377 +Train: [62] [5600/6250] eta: 0:01:34 lr: 0.000041 grad: 0.1821 (0.1846) loss: 0.7174 (0.7261) time: 0.1362 data: 0.0477 max mem: 9377 +Train: [62] [5700/6250] eta: 0:01:19 lr: 0.000041 grad: 0.1819 (0.1846) loss: 0.7225 (0.7259) time: 0.1262 data: 0.0412 max mem: 9377 +Train: [62] [5800/6250] eta: 0:01:05 lr: 0.000041 grad: 0.1781 (0.1846) loss: 0.7217 (0.7258) time: 0.1496 data: 0.0682 max mem: 9377 +Train: [62] [5900/6250] eta: 0:00:50 lr: 0.000041 grad: 0.1855 (0.1846) loss: 0.6991 (0.7255) time: 0.1477 data: 0.0681 max mem: 9377 +Train: [62] [6000/6250] eta: 0:00:36 lr: 0.000041 grad: 0.1811 (0.1846) loss: 0.7109 (0.7254) time: 0.1250 data: 0.0464 max mem: 9377 +Train: [62] [6100/6250] eta: 0:00:21 lr: 0.000041 grad: 0.1840 (0.1846) loss: 0.7069 (0.7252) time: 0.1391 data: 0.0631 max mem: 9377 +Train: [62] [6200/6250] eta: 0:00:07 lr: 0.000041 grad: 0.1838 (0.1846) loss: 0.7119 (0.7250) time: 0.1717 data: 0.0924 max mem: 9377 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1833 (0.1846) loss: 0.7050 (0.7250) time: 0.1326 data: 0.0554 max mem: 9377 +Train: [62] Total time: 0:15:08 (0.1453 s / it) +Averaged stats: lr: 0.000041 grad: 0.1833 (0.1846) loss: 0.7050 (0.7250) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:03:47 loss: 0.8585 (0.8585) time: 3.6614 data: 3.5892 max mem: 9377 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.8718 (0.8711) time: 0.1310 data: 0.1056 max mem: 9377 +Eval (hcp-train-subset): [62] Total time: 0:00:13 (0.2165 s / it) +Averaged stats (hcp-train-subset): loss: 0.8718 (0.8711) +Eval (hcp-val): [62] [ 0/62] eta: 0:04:51 loss: 0.8660 (0.8660) time: 4.7023 data: 4.6696 max mem: 9377 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8680 (0.8678) time: 0.1183 data: 0.0930 max mem: 9377 +Eval (hcp-val): [62] Total time: 0:00:13 (0.2163 s / it) +Averaged stats (hcp-val): loss: 0.8680 (0.8678) +Eval (nsd-val): [62] [ 0/62] eta: 0:04:27 loss: 0.8291 (0.8291) time: 4.3120 data: 4.2512 max mem: 9377 +Eval (nsd-val): [62] [61/62] eta: 0:00:00 loss: 0.8377 (0.8378) time: 0.1188 data: 0.0926 max mem: 9377 +Eval (nsd-val): [62] Total time: 0:00:12 (0.2090 s / it) +Averaged stats (nsd-val): loss: 0.8377 (0.8378) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 10:37:50 lr: 0.000041 grad: 0.1771 (0.1771) loss: 0.8217 (0.8217) time: 6.1233 data: 6.0299 max mem: 9377 +Train: [63] [ 100/6250] eta: 0:20:14 lr: 0.000041 grad: 0.2995 (0.2954) loss: 0.7369 (0.7536) time: 0.1594 data: 0.0593 max mem: 9377 +Train: [63] [ 200/6250] eta: 0:17:55 lr: 0.000041 grad: 0.3190 (0.3106) loss: 0.7024 (0.7376) time: 0.1524 data: 0.0488 max mem: 9377 +Train: [63] [ 300/6250] eta: 0:16:53 lr: 0.000041 grad: 0.2368 (0.3007) loss: 0.7069 (0.7271) time: 0.1753 data: 0.0750 max mem: 9377 +Train: [63] [ 400/6250] eta: 0:16:05 lr: 0.000041 grad: 0.2291 (0.2896) loss: 0.6957 (0.7222) time: 0.1550 data: 0.0540 max mem: 9377 +Train: [63] [ 500/6250] eta: 0:15:30 lr: 0.000041 grad: 0.1997 (0.2734) loss: 0.7069 (0.7191) time: 0.1388 data: 0.0461 max mem: 9377 +Train: [63] [ 600/6250] eta: 0:14:58 lr: 0.000041 grad: 0.1933 (0.2605) loss: 0.7116 (0.7174) time: 0.1460 data: 0.0459 max mem: 9377 +Train: [63] [ 700/6250] eta: 0:14:32 lr: 0.000041 grad: 0.1894 (0.2509) loss: 0.7115 (0.7171) time: 0.1713 data: 0.0879 max mem: 9377 +Train: [63] [ 800/6250] eta: 0:14:16 lr: 0.000041 grad: 0.1870 (0.2425) loss: 0.7094 (0.7178) time: 0.1796 data: 0.1001 max mem: 9377 +Train: [63] [ 900/6250] eta: 0:14:05 lr: 0.000041 grad: 0.1772 (0.2357) loss: 0.7295 (0.7184) time: 0.1565 data: 0.0738 max mem: 9377 +Train: [63] [1000/6250] eta: 0:13:53 lr: 0.000041 grad: 0.1766 (0.2302) loss: 0.7272 (0.7187) time: 0.1661 data: 0.0767 max mem: 9377 +Train: [63] [1100/6250] eta: 0:13:39 lr: 0.000041 grad: 0.1768 (0.2262) loss: 0.7135 (0.7183) time: 0.1545 data: 0.0710 max mem: 9377 +Train: [63] [1200/6250] eta: 0:13:16 lr: 0.000041 grad: 0.1822 (0.2227) loss: 0.7067 (0.7180) time: 0.1246 data: 0.0421 max mem: 9377 +Train: [63] [1300/6250] eta: 0:13:01 lr: 0.000041 grad: 0.1718 (0.2195) loss: 0.7240 (0.7179) time: 0.1421 data: 0.0546 max mem: 9377 +Train: [63] [1400/6250] eta: 0:12:46 lr: 0.000041 grad: 0.1716 (0.2165) loss: 0.7219 (0.7180) time: 0.1661 data: 0.0732 max mem: 9377 +Train: [63] [1500/6250] eta: 0:12:34 lr: 0.000041 grad: 0.1749 (0.2139) loss: 0.7190 (0.7181) time: 0.1776 data: 0.0963 max mem: 9377 +Train: [63] [1600/6250] eta: 0:12:24 lr: 0.000041 grad: 0.1801 (0.2116) loss: 0.7135 (0.7183) time: 0.2545 data: 0.1807 max mem: 9377 +Train: [63] [1700/6250] eta: 0:12:09 lr: 0.000041 grad: 0.1753 (0.2096) loss: 0.7144 (0.7187) time: 0.1622 data: 0.0786 max mem: 9377 +Train: [63] [1800/6250] eta: 0:11:53 lr: 0.000041 grad: 0.1779 (0.2081) loss: 0.7298 (0.7189) time: 0.1333 data: 0.0559 max mem: 9377 +Train: [63] [1900/6250] eta: 0:11:38 lr: 0.000041 grad: 0.1789 (0.2066) loss: 0.7224 (0.7189) time: 0.1451 data: 0.0640 max mem: 9377 +Train: [63] [2000/6250] eta: 0:11:25 lr: 0.000041 grad: 0.1878 (0.2054) loss: 0.7098 (0.7189) time: 0.1536 data: 0.0664 max mem: 9377 +Train: [63] [2100/6250] eta: 0:11:10 lr: 0.000041 grad: 0.1841 (0.2043) loss: 0.7129 (0.7188) time: 0.1400 data: 0.0569 max mem: 9377 +Train: [63] [2200/6250] eta: 0:10:55 lr: 0.000041 grad: 0.1814 (0.2033) loss: 0.7158 (0.7188) time: 0.1318 data: 0.0404 max mem: 9377 +Train: [63] [2300/6250] eta: 0:10:38 lr: 0.000041 grad: 0.1754 (0.2022) loss: 0.7141 (0.7191) time: 0.1416 data: 0.0619 max mem: 9377 +Train: [63] [2400/6250] eta: 0:10:20 lr: 0.000040 grad: 0.1803 (0.2013) loss: 0.7180 (0.7193) time: 0.1399 data: 0.0582 max mem: 9377 +Train: [63] [2500/6250] eta: 0:10:01 lr: 0.000040 grad: 0.1793 (0.2005) loss: 0.7198 (0.7193) time: 0.1356 data: 0.0518 max mem: 9377 +Train: [63] [2600/6250] eta: 0:09:43 lr: 0.000040 grad: 0.1763 (0.1998) loss: 0.7251 (0.7192) time: 0.1548 data: 0.0734 max mem: 9377 +Train: [63] [2700/6250] eta: 0:09:25 lr: 0.000040 grad: 0.1839 (0.1990) loss: 0.7107 (0.7192) time: 0.1505 data: 0.0682 max mem: 9377 +Train: [63] [2800/6250] eta: 0:09:07 lr: 0.000040 grad: 0.1765 (0.1984) loss: 0.7211 (0.7192) time: 0.1482 data: 0.0722 max mem: 9377 +Train: [63] [2900/6250] eta: 0:08:49 lr: 0.000040 grad: 0.1813 (0.1977) loss: 0.7206 (0.7193) time: 0.1407 data: 0.0573 max mem: 9377 +Train: [63] [3000/6250] eta: 0:08:32 lr: 0.000040 grad: 0.1864 (0.1972) loss: 0.7193 (0.7193) time: 0.1172 data: 0.0376 max mem: 9377 +Train: [63] [3100/6250] eta: 0:08:15 lr: 0.000040 grad: 0.1822 (0.1966) loss: 0.7190 (0.7194) time: 0.1635 data: 0.0840 max mem: 9377 +Train: [63] [3200/6250] eta: 0:07:57 lr: 0.000040 grad: 0.1839 (0.1961) loss: 0.7143 (0.7193) time: 0.1270 data: 0.0434 max mem: 9377 +Train: [63] [3300/6250] eta: 0:07:40 lr: 0.000040 grad: 0.1733 (0.1957) loss: 0.7220 (0.7193) time: 0.1568 data: 0.0788 max mem: 9377 +Train: [63] [3400/6250] eta: 0:07:23 lr: 0.000040 grad: 0.1846 (0.1954) loss: 0.7057 (0.7193) time: 0.1460 data: 0.0618 max mem: 9377 +Train: [63] [3500/6250] eta: 0:07:06 lr: 0.000040 grad: 0.1840 (0.1951) loss: 0.7124 (0.7191) time: 0.1290 data: 0.0472 max mem: 9377 +Train: [63] [3600/6250] eta: 0:06:50 lr: 0.000040 grad: 0.1878 (0.1948) loss: 0.7084 (0.7190) time: 0.1381 data: 0.0542 max mem: 9377 +Train: [63] [3700/6250] eta: 0:06:33 lr: 0.000040 grad: 0.1817 (0.1946) loss: 0.7127 (0.7190) time: 0.1275 data: 0.0442 max mem: 9377 +Train: [63] [3800/6250] eta: 0:06:17 lr: 0.000040 grad: 0.1812 (0.1943) loss: 0.7126 (0.7189) time: 0.1353 data: 0.0559 max mem: 9377 +Train: [63] [3900/6250] eta: 0:06:00 lr: 0.000040 grad: 0.1842 (0.1941) loss: 0.7133 (0.7187) time: 0.1204 data: 0.0313 max mem: 9377 +Train: [63] [4000/6250] eta: 0:05:44 lr: 0.000040 grad: 0.1809 (0.1939) loss: 0.7153 (0.7186) time: 0.1408 data: 0.0585 max mem: 9377 +Train: [63] [4100/6250] eta: 0:05:28 lr: 0.000040 grad: 0.1686 (0.1935) loss: 0.7293 (0.7187) time: 0.1360 data: 0.0504 max mem: 9377 +Train: [63] [4200/6250] eta: 0:05:12 lr: 0.000040 grad: 0.1816 (0.1932) loss: 0.7189 (0.7187) time: 0.1438 data: 0.0617 max mem: 9377 +Train: [63] [4300/6250] eta: 0:04:56 lr: 0.000040 grad: 0.1781 (0.1929) loss: 0.7299 (0.7188) time: 0.1356 data: 0.0513 max mem: 9377 +Train: [63] [4400/6250] eta: 0:04:40 lr: 0.000040 grad: 0.1793 (0.1926) loss: 0.7226 (0.7189) time: 0.1357 data: 0.0508 max mem: 9377 +Train: [63] [4500/6250] eta: 0:04:24 lr: 0.000040 grad: 0.1822 (0.1924) loss: 0.7228 (0.7189) time: 0.1408 data: 0.0633 max mem: 9377 +Train: [63] [4600/6250] eta: 0:04:09 lr: 0.000040 grad: 0.1834 (0.1923) loss: 0.7244 (0.7190) time: 0.1418 data: 0.0570 max mem: 9377 +Train: [63] [4700/6250] eta: 0:03:53 lr: 0.000040 grad: 0.1799 (0.1920) loss: 0.7196 (0.7191) time: 0.1387 data: 0.0578 max mem: 9377 +Train: [63] [4800/6250] eta: 0:03:38 lr: 0.000040 grad: 0.1834 (0.1918) loss: 0.7280 (0.7193) time: 0.1461 data: 0.0649 max mem: 9377 +Train: [63] [4900/6250] eta: 0:03:22 lr: 0.000040 grad: 0.1935 (0.1917) loss: 0.7214 (0.7194) time: 0.1310 data: 0.0482 max mem: 9377 +Train: [63] [5000/6250] eta: 0:03:07 lr: 0.000040 grad: 0.1925 (0.1916) loss: 0.7235 (0.7193) time: 0.1147 data: 0.0293 max mem: 9377 +Train: [63] [5100/6250] eta: 0:02:52 lr: 0.000040 grad: 0.1773 (0.1914) loss: 0.7232 (0.7194) time: 0.1335 data: 0.0538 max mem: 9377 +Train: [63] [5200/6250] eta: 0:02:37 lr: 0.000040 grad: 0.1778 (0.1913) loss: 0.7305 (0.7194) time: 0.1192 data: 0.0363 max mem: 9377 +Train: [63] [5300/6250] eta: 0:02:21 lr: 0.000040 grad: 0.1740 (0.1911) loss: 0.7337 (0.7195) time: 0.1434 data: 0.0600 max mem: 9377 +Train: [63] [5400/6250] eta: 0:02:06 lr: 0.000040 grad: 0.1810 (0.1910) loss: 0.7217 (0.7195) time: 0.1469 data: 0.0717 max mem: 9377 +Train: [63] [5500/6250] eta: 0:01:51 lr: 0.000040 grad: 0.1844 (0.1909) loss: 0.7200 (0.7195) time: 0.1269 data: 0.0461 max mem: 9377 +Train: [63] [5600/6250] eta: 0:01:36 lr: 0.000039 grad: 0.1790 (0.1907) loss: 0.7260 (0.7196) time: 0.1508 data: 0.0745 max mem: 9377 +Train: [63] [5700/6250] eta: 0:01:21 lr: 0.000039 grad: 0.1855 (0.1905) loss: 0.7175 (0.7196) time: 0.1284 data: 0.0423 max mem: 9377 +Train: [63] [5800/6250] eta: 0:01:06 lr: 0.000039 grad: 0.1873 (0.1904) loss: 0.7079 (0.7196) time: 0.1628 data: 0.0776 max mem: 9377 +Train: [63] [5900/6250] eta: 0:00:51 lr: 0.000039 grad: 0.1797 (0.1903) loss: 0.7171 (0.7196) time: 0.1448 data: 0.0587 max mem: 9377 +Train: [63] [6000/6250] eta: 0:00:37 lr: 0.000039 grad: 0.1782 (0.1902) loss: 0.7192 (0.7197) time: 0.1389 data: 0.0525 max mem: 9377 +Train: [63] [6100/6250] eta: 0:00:22 lr: 0.000039 grad: 0.1762 (0.1901) loss: 0.7220 (0.7197) time: 0.1561 data: 0.0736 max mem: 9377 +Train: [63] [6200/6250] eta: 0:00:07 lr: 0.000039 grad: 0.1821 (0.1900) loss: 0.7087 (0.7197) time: 0.1388 data: 0.0537 max mem: 9377 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1767 (0.1899) loss: 0.7165 (0.7197) time: 0.1320 data: 0.0511 max mem: 9377 +Train: [63] Total time: 0:15:30 (0.1488 s / it) +Averaged stats: lr: 0.000039 grad: 0.1767 (0.1899) loss: 0.7165 (0.7197) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:05:32 loss: 0.8634 (0.8634) time: 5.3609 data: 5.3287 max mem: 9377 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.8692 (0.8695) time: 0.1159 data: 0.0861 max mem: 9377 +Eval (hcp-train-subset): [63] Total time: 0:00:13 (0.2188 s / it) +Averaged stats (hcp-train-subset): loss: 0.8692 (0.8695) +Eval (hcp-val): [63] [ 0/62] eta: 0:03:31 loss: 0.8750 (0.8750) time: 3.4116 data: 3.3234 max mem: 9377 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8650 (0.8661) time: 0.1223 data: 0.0973 max mem: 9377 +Eval (hcp-val): [63] Total time: 0:00:13 (0.2169 s / it) +Averaged stats (hcp-val): loss: 0.8650 (0.8661) +Eval (nsd-val): [63] [ 0/62] eta: 0:04:51 loss: 0.8229 (0.8229) time: 4.6981 data: 4.6676 max mem: 9377 +Eval (nsd-val): [63] [61/62] eta: 0:00:00 loss: 0.8358 (0.8372) time: 0.1170 data: 0.0919 max mem: 9377 +Eval (nsd-val): [63] Total time: 0:00:13 (0.2118 s / it) +Averaged stats (nsd-val): loss: 0.8358 (0.8372) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 9:34:19 lr: 0.000039 grad: 0.2995 (0.2995) loss: 0.7305 (0.7305) time: 5.5135 data: 5.3622 max mem: 9377 +Train: [64] [ 100/6250] eta: 0:19:10 lr: 0.000039 grad: 0.3263 (0.3595) loss: 0.7232 (0.7404) time: 0.1595 data: 0.0658 max mem: 9377 +Train: [64] [ 200/6250] eta: 0:16:25 lr: 0.000039 grad: 0.2765 (0.3323) loss: 0.6997 (0.7304) time: 0.1333 data: 0.0380 max mem: 9377 +Train: [64] [ 300/6250] eta: 0:15:17 lr: 0.000039 grad: 0.2225 (0.3020) loss: 0.7335 (0.7299) time: 0.1317 data: 0.0400 max mem: 9377 +Train: [64] [ 400/6250] eta: 0:14:32 lr: 0.000039 grad: 0.2152 (0.2825) loss: 0.7374 (0.7286) time: 0.1466 data: 0.0561 max mem: 9377 +Train: [64] [ 500/6250] eta: 0:14:03 lr: 0.000039 grad: 0.1911 (0.2661) loss: 0.7264 (0.7280) time: 0.1461 data: 0.0632 max mem: 9377 +Train: [64] [ 600/6250] eta: 0:13:37 lr: 0.000039 grad: 0.1876 (0.2527) loss: 0.7213 (0.7283) time: 0.1343 data: 0.0425 max mem: 9377 +Train: [64] [ 700/6250] eta: 0:13:12 lr: 0.000039 grad: 0.1877 (0.2428) loss: 0.7247 (0.7285) time: 0.1435 data: 0.0374 max mem: 9377 +Train: [64] [ 800/6250] eta: 0:12:49 lr: 0.000039 grad: 0.1906 (0.2356) loss: 0.7236 (0.7283) time: 0.1279 data: 0.0387 max mem: 9377 +Train: [64] [ 900/6250] eta: 0:12:29 lr: 0.000039 grad: 0.1725 (0.2300) loss: 0.7320 (0.7281) time: 0.1366 data: 0.0441 max mem: 9377 +Train: [64] [1000/6250] eta: 0:12:13 lr: 0.000039 grad: 0.1789 (0.2252) loss: 0.7150 (0.7277) time: 0.1260 data: 0.0382 max mem: 9377 +Train: [64] [1100/6250] eta: 0:11:57 lr: 0.000039 grad: 0.1792 (0.2214) loss: 0.7170 (0.7270) time: 0.1124 data: 0.0300 max mem: 9377 +Train: [64] [1200/6250] eta: 0:11:42 lr: 0.000039 grad: 0.1790 (0.2183) loss: 0.7197 (0.7265) time: 0.1461 data: 0.0559 max mem: 9377 +Train: [64] [1300/6250] eta: 0:11:25 lr: 0.000039 grad: 0.1816 (0.2155) loss: 0.7223 (0.7263) time: 0.1275 data: 0.0434 max mem: 9377 +Train: [64] [1400/6250] eta: 0:11:11 lr: 0.000039 grad: 0.1810 (0.2130) loss: 0.7180 (0.7262) time: 0.1176 data: 0.0337 max mem: 9377 +Train: [64] [1500/6250] eta: 0:10:55 lr: 0.000039 grad: 0.1839 (0.2109) loss: 0.7139 (0.7257) time: 0.1236 data: 0.0390 max mem: 9377 +Train: [64] [1600/6250] eta: 0:10:46 lr: 0.000039 grad: 0.1840 (0.2093) loss: 0.7118 (0.7251) time: 0.1664 data: 0.0820 max mem: 9377 +Train: [64] [1700/6250] eta: 0:10:37 lr: 0.000039 grad: 0.1750 (0.2076) loss: 0.7214 (0.7247) time: 0.1523 data: 0.0591 max mem: 9377 +Train: [64] [1800/6250] eta: 0:10:26 lr: 0.000039 grad: 0.1765 (0.2062) loss: 0.7253 (0.7244) time: 0.1719 data: 0.0867 max mem: 9377 +Train: [64] [1900/6250] eta: 0:10:12 lr: 0.000039 grad: 0.1853 (0.2049) loss: 0.7004 (0.7241) time: 0.1421 data: 0.0665 max mem: 9377 +Train: [64] [2000/6250] eta: 0:09:59 lr: 0.000039 grad: 0.1810 (0.2037) loss: 0.7130 (0.7240) time: 0.1465 data: 0.0586 max mem: 9377 +Train: [64] [2100/6250] eta: 0:09:45 lr: 0.000039 grad: 0.1814 (0.2028) loss: 0.7202 (0.7237) time: 0.1495 data: 0.0707 max mem: 9377 +Train: [64] [2200/6250] eta: 0:09:30 lr: 0.000039 grad: 0.1810 (0.2020) loss: 0.7251 (0.7235) time: 0.1368 data: 0.0535 max mem: 9377 +Train: [64] [2300/6250] eta: 0:09:15 lr: 0.000039 grad: 0.1893 (0.2013) loss: 0.7116 (0.7231) time: 0.1310 data: 0.0470 max mem: 9377 +Train: [64] [2400/6250] eta: 0:09:01 lr: 0.000039 grad: 0.1838 (0.2007) loss: 0.7281 (0.7230) time: 0.1432 data: 0.0598 max mem: 9377 +Train: [64] [2500/6250] eta: 0:08:45 lr: 0.000039 grad: 0.1855 (0.2000) loss: 0.7164 (0.7229) time: 0.1278 data: 0.0371 max mem: 9377 +Train: [64] [2600/6250] eta: 0:08:32 lr: 0.000039 grad: 0.1768 (0.1993) loss: 0.7216 (0.7228) time: 0.1417 data: 0.0553 max mem: 9377 +Train: [64] [2700/6250] eta: 0:08:18 lr: 0.000038 grad: 0.1829 (0.1987) loss: 0.7187 (0.7227) time: 0.1314 data: 0.0341 max mem: 9377 +Train: [64] [2800/6250] eta: 0:08:03 lr: 0.000038 grad: 0.1829 (0.1982) loss: 0.7137 (0.7226) time: 0.1286 data: 0.0378 max mem: 9377 +Train: [64] [2900/6250] eta: 0:07:49 lr: 0.000038 grad: 0.1788 (0.1976) loss: 0.7192 (0.7226) time: 0.1329 data: 0.0493 max mem: 9377 +Train: [64] [3000/6250] eta: 0:07:35 lr: 0.000038 grad: 0.1848 (0.1970) loss: 0.7129 (0.7225) time: 0.1300 data: 0.0428 max mem: 9377 +Train: [64] [3100/6250] eta: 0:07:20 lr: 0.000038 grad: 0.1825 (0.1965) loss: 0.7225 (0.7223) time: 0.1344 data: 0.0506 max mem: 9377 +Train: [64] [3200/6250] eta: 0:07:06 lr: 0.000038 grad: 0.1842 (0.1961) loss: 0.7222 (0.7222) time: 0.1352 data: 0.0503 max mem: 9377 +Train: [64] [3300/6250] eta: 0:06:51 lr: 0.000038 grad: 0.1803 (0.1957) loss: 0.7009 (0.7220) time: 0.1283 data: 0.0441 max mem: 9377 +Train: [64] [3400/6250] eta: 0:06:37 lr: 0.000038 grad: 0.1785 (0.1953) loss: 0.7144 (0.7219) time: 0.1351 data: 0.0550 max mem: 9377 +Train: [64] [3500/6250] eta: 0:06:23 lr: 0.000038 grad: 0.1884 (0.1950) loss: 0.7160 (0.7218) time: 0.1467 data: 0.0607 max mem: 9377 +Train: [64] [3600/6250] eta: 0:06:09 lr: 0.000038 grad: 0.1866 (0.1948) loss: 0.7139 (0.7215) time: 0.1463 data: 0.0631 max mem: 9377 +Train: [64] [3700/6250] eta: 0:05:55 lr: 0.000038 grad: 0.1771 (0.1943) loss: 0.7100 (0.7214) time: 0.1464 data: 0.0638 max mem: 9377 +Train: [64] [3800/6250] eta: 0:05:41 lr: 0.000038 grad: 0.1882 (0.1940) loss: 0.7077 (0.7212) time: 0.1279 data: 0.0447 max mem: 9377 +Train: [64] [3900/6250] eta: 0:05:27 lr: 0.000038 grad: 0.1848 (0.1938) loss: 0.7088 (0.7210) time: 0.1342 data: 0.0472 max mem: 9377 +Train: [64] [4000/6250] eta: 0:05:13 lr: 0.000038 grad: 0.1764 (0.1935) loss: 0.7209 (0.7208) time: 0.1655 data: 0.0829 max mem: 9377 +Train: [64] [4100/6250] eta: 0:04:58 lr: 0.000038 grad: 0.1837 (0.1933) loss: 0.7065 (0.7206) time: 0.1260 data: 0.0485 max mem: 9377 +Train: [64] [4200/6250] eta: 0:04:45 lr: 0.000038 grad: 0.1804 (0.1931) loss: 0.7097 (0.7205) time: 0.1427 data: 0.0611 max mem: 9377 +Train: [64] [4300/6250] eta: 0:04:31 lr: 0.000038 grad: 0.1798 (0.1929) loss: 0.7131 (0.7203) time: 0.1435 data: 0.0674 max mem: 9377 +Train: [64] [4400/6250] eta: 0:04:16 lr: 0.000038 grad: 0.1731 (0.1926) loss: 0.7252 (0.7202) time: 0.1390 data: 0.0581 max mem: 9377 +Train: [64] [4500/6250] eta: 0:04:02 lr: 0.000038 grad: 0.1844 (0.1924) loss: 0.7164 (0.7201) time: 0.1260 data: 0.0389 max mem: 9377 +Train: [64] [4600/6250] eta: 0:03:48 lr: 0.000038 grad: 0.1739 (0.1922) loss: 0.7162 (0.7200) time: 0.1392 data: 0.0582 max mem: 9377 +Train: [64] [4700/6250] eta: 0:03:34 lr: 0.000038 grad: 0.1833 (0.1920) loss: 0.7296 (0.7199) time: 0.1590 data: 0.0802 max mem: 9377 +Train: [64] [4800/6250] eta: 0:03:20 lr: 0.000038 grad: 0.1773 (0.1918) loss: 0.7166 (0.7198) time: 0.1658 data: 0.0858 max mem: 9377 +Train: [64] [4900/6250] eta: 0:03:06 lr: 0.000038 grad: 0.1779 (0.1917) loss: 0.7275 (0.7198) time: 0.1458 data: 0.0620 max mem: 9377 +Train: [64] [5000/6250] eta: 0:02:53 lr: 0.000038 grad: 0.1767 (0.1915) loss: 0.7192 (0.7198) time: 0.1548 data: 0.0729 max mem: 9377 +Train: [64] [5100/6250] eta: 0:02:39 lr: 0.000038 grad: 0.1818 (0.1913) loss: 0.7133 (0.7199) time: 0.1296 data: 0.0399 max mem: 9377 +Train: [64] [5200/6250] eta: 0:02:25 lr: 0.000038 grad: 0.1770 (0.1911) loss: 0.7160 (0.7199) time: 0.1393 data: 0.0572 max mem: 9377 +Train: [64] [5300/6250] eta: 0:02:11 lr: 0.000038 grad: 0.1772 (0.1909) loss: 0.7222 (0.7200) time: 0.1419 data: 0.0599 max mem: 9377 +Train: [64] [5400/6250] eta: 0:01:57 lr: 0.000038 grad: 0.1794 (0.1907) loss: 0.7212 (0.7201) time: 0.1428 data: 0.0616 max mem: 9377 +Train: [64] [5500/6250] eta: 0:01:43 lr: 0.000038 grad: 0.1817 (0.1905) loss: 0.7123 (0.7201) time: 0.1405 data: 0.0594 max mem: 9377 +Train: [64] [5600/6250] eta: 0:01:29 lr: 0.000038 grad: 0.1768 (0.1903) loss: 0.7155 (0.7201) time: 0.1323 data: 0.0579 max mem: 9377 +Train: [64] [5700/6250] eta: 0:01:16 lr: 0.000038 grad: 0.1705 (0.1901) loss: 0.7286 (0.7202) time: 0.1010 data: 0.0177 max mem: 9377 +Train: [64] [5800/6250] eta: 0:01:02 lr: 0.000038 grad: 0.1803 (0.1900) loss: 0.7229 (0.7202) time: 0.1245 data: 0.0427 max mem: 9377 +Train: [64] [5900/6250] eta: 0:00:48 lr: 0.000037 grad: 0.1848 (0.1898) loss: 0.7162 (0.7202) time: 0.1156 data: 0.0333 max mem: 9377 +Train: [64] [6000/6250] eta: 0:00:34 lr: 0.000037 grad: 0.1747 (0.1896) loss: 0.7149 (0.7203) time: 0.1312 data: 0.0509 max mem: 9377 +Train: [64] [6100/6250] eta: 0:00:20 lr: 0.000037 grad: 0.1801 (0.1895) loss: 0.7223 (0.7204) time: 0.1307 data: 0.0506 max mem: 9377 +Train: [64] [6200/6250] eta: 0:00:06 lr: 0.000037 grad: 0.1731 (0.1893) loss: 0.7269 (0.7205) time: 0.1364 data: 0.0553 max mem: 9377 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1783 (0.1892) loss: 0.7176 (0.7205) time: 0.1221 data: 0.0431 max mem: 9377 +Train: [64] Total time: 0:14:28 (0.1390 s / it) +Averaged stats: lr: 0.000037 grad: 0.1783 (0.1892) loss: 0.7176 (0.7205) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:03:24 loss: 0.8652 (0.8652) time: 3.2922 data: 3.2174 max mem: 9377 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.8677 (0.8705) time: 0.1237 data: 0.0968 max mem: 9377 +Eval (hcp-train-subset): [64] Total time: 0:00:13 (0.2161 s / it) +Averaged stats (hcp-train-subset): loss: 0.8677 (0.8705) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [64] [ 0/62] eta: 0:03:46 loss: 0.8693 (0.8693) time: 3.6562 data: 3.5917 max mem: 9377 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8674 (0.8681) time: 0.1187 data: 0.0923 max mem: 9377 +Eval (hcp-val): [64] Total time: 0:00:13 (0.2143 s / it) +Averaged stats (hcp-val): loss: 0.8674 (0.8681) +Making plots (hcp-val): example=60 +Eval (nsd-val): [64] [ 0/62] eta: 0:05:14 loss: 0.8262 (0.8262) time: 5.0801 data: 5.0484 max mem: 9377 +Eval (nsd-val): [64] [61/62] eta: 0:00:00 loss: 0.8373 (0.8387) time: 0.1067 data: 0.0817 max mem: 9377 +Eval (nsd-val): [64] Total time: 0:00:13 (0.2116 s / it) +Averaged stats (nsd-val): loss: 0.8373 (0.8387) +Making plots (nsd-val): example=24 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00064.pth +Train: [65] [ 0/6250] eta: 7:20:00 lr: 0.000037 grad: 0.3825 (0.3825) loss: 0.7300 (0.7300) time: 4.2241 data: 4.0338 max mem: 9377 +Train: [65] [ 100/6250] eta: 0:20:44 lr: 0.000037 grad: 0.3101 (0.3086) loss: 0.7243 (0.7419) time: 0.1653 data: 0.0714 max mem: 9377 +Train: [65] [ 200/6250] eta: 0:18:04 lr: 0.000037 grad: 0.2359 (0.2861) loss: 0.7319 (0.7383) time: 0.1485 data: 0.0395 max mem: 9377 +Train: [65] [ 300/6250] eta: 0:17:05 lr: 0.000037 grad: 0.2206 (0.2701) loss: 0.7238 (0.7349) time: 0.1442 data: 0.0454 max mem: 9377 +Train: [65] [ 400/6250] eta: 0:16:23 lr: 0.000037 grad: 0.2361 (0.2611) loss: 0.7156 (0.7312) time: 0.1596 data: 0.0667 max mem: 9377 +Train: [65] [ 500/6250] eta: 0:15:30 lr: 0.000037 grad: 0.2110 (0.2534) loss: 0.7229 (0.7286) time: 0.1307 data: 0.0472 max mem: 9377 +Train: [65] [ 600/6250] eta: 0:14:51 lr: 0.000037 grad: 0.1997 (0.2454) loss: 0.7183 (0.7270) time: 0.1494 data: 0.0589 max mem: 9377 +Train: [65] [ 700/6250] eta: 0:14:27 lr: 0.000037 grad: 0.1797 (0.2381) loss: 0.7311 (0.7270) time: 0.1310 data: 0.0471 max mem: 9377 +Train: [65] [ 800/6250] eta: 0:14:24 lr: 0.000037 grad: 0.1854 (0.2318) loss: 0.7226 (0.7263) time: 0.1627 data: 0.0771 max mem: 9377 +Train: [65] [ 900/6250] eta: 0:14:16 lr: 0.000037 grad: 0.1776 (0.2266) loss: 0.7317 (0.7258) time: 0.1867 data: 0.0919 max mem: 9377 +Train: [65] [1000/6250] eta: 0:14:03 lr: 0.000037 grad: 0.1855 (0.2225) loss: 0.7105 (0.7253) time: 0.1800 data: 0.0978 max mem: 9377 +Train: [65] [1100/6250] eta: 0:13:46 lr: 0.000037 grad: 0.1752 (0.2189) loss: 0.7330 (0.7252) time: 0.1554 data: 0.0754 max mem: 9377 +Train: [65] [1200/6250] eta: 0:13:24 lr: 0.000037 grad: 0.1800 (0.2159) loss: 0.7298 (0.7248) time: 0.1645 data: 0.0863 max mem: 9377 +Train: [65] [1300/6250] eta: 0:13:00 lr: 0.000037 grad: 0.1757 (0.2131) loss: 0.7236 (0.7248) time: 0.1396 data: 0.0553 max mem: 9377 +Train: [65] [1400/6250] eta: 0:12:38 lr: 0.000037 grad: 0.1811 (0.2111) loss: 0.7259 (0.7246) time: 0.1042 data: 0.0117 max mem: 9377 +Train: [65] [1500/6250] eta: 0:12:18 lr: 0.000037 grad: 0.1816 (0.2092) loss: 0.7260 (0.7245) time: 0.1372 data: 0.0572 max mem: 9377 +Train: [65] [1600/6250] eta: 0:12:02 lr: 0.000037 grad: 0.1835 (0.2077) loss: 0.7226 (0.7243) time: 0.1454 data: 0.0557 max mem: 9377 +Train: [65] [1700/6250] eta: 0:11:46 lr: 0.000037 grad: 0.1822 (0.2062) loss: 0.7250 (0.7241) time: 0.1519 data: 0.0535 max mem: 9377 +Train: [65] [1800/6250] eta: 0:11:39 lr: 0.000037 grad: 0.1879 (0.2049) loss: 0.7189 (0.7243) time: 0.2037 data: 0.1277 max mem: 9377 +Train: [65] [1900/6250] eta: 0:11:23 lr: 0.000037 grad: 0.1780 (0.2039) loss: 0.7286 (0.7241) time: 0.1348 data: 0.0587 max mem: 9377 +Train: [65] [2000/6250] eta: 0:11:08 lr: 0.000037 grad: 0.1834 (0.2030) loss: 0.7144 (0.7238) time: 0.1880 data: 0.1108 max mem: 9377 +Train: [65] [2100/6250] eta: 0:10:54 lr: 0.000037 grad: 0.1884 (0.2022) loss: 0.7152 (0.7237) time: 0.1524 data: 0.0678 max mem: 9377 +Train: [65] [2200/6250] eta: 0:10:39 lr: 0.000037 grad: 0.1847 (0.2015) loss: 0.7261 (0.7236) time: 0.1482 data: 0.0615 max mem: 9377 +Train: [65] [2300/6250] eta: 0:10:21 lr: 0.000037 grad: 0.1789 (0.2007) loss: 0.7239 (0.7237) time: 0.1596 data: 0.0795 max mem: 9377 +Train: [65] [2400/6250] eta: 0:10:03 lr: 0.000037 grad: 0.1811 (0.1999) loss: 0.7145 (0.7235) time: 0.1381 data: 0.0556 max mem: 9377 +Train: [65] [2500/6250] eta: 0:09:44 lr: 0.000037 grad: 0.1783 (0.1993) loss: 0.7142 (0.7231) time: 0.1297 data: 0.0393 max mem: 9377 +Train: [65] [2600/6250] eta: 0:09:26 lr: 0.000037 grad: 0.1777 (0.1986) loss: 0.7268 (0.7231) time: 0.1246 data: 0.0364 max mem: 9377 +Train: [65] [2700/6250] eta: 0:09:07 lr: 0.000037 grad: 0.1820 (0.1980) loss: 0.7253 (0.7231) time: 0.1383 data: 0.0506 max mem: 9377 +Train: [65] [2800/6250] eta: 0:08:48 lr: 0.000037 grad: 0.1789 (0.1974) loss: 0.7252 (0.7232) time: 0.1110 data: 0.0180 max mem: 9377 +Train: [65] [2900/6250] eta: 0:08:32 lr: 0.000037 grad: 0.1852 (0.1969) loss: 0.7181 (0.7231) time: 0.1712 data: 0.0923 max mem: 9377 +Train: [65] [3000/6250] eta: 0:08:15 lr: 0.000036 grad: 0.1836 (0.1964) loss: 0.7196 (0.7231) time: 0.1294 data: 0.0437 max mem: 9377 +Train: [65] [3100/6250] eta: 0:07:58 lr: 0.000036 grad: 0.1862 (0.1960) loss: 0.7109 (0.7229) time: 0.1315 data: 0.0490 max mem: 9377 +Train: [65] [3200/6250] eta: 0:07:41 lr: 0.000036 grad: 0.1820 (0.1956) loss: 0.7182 (0.7229) time: 0.1499 data: 0.0719 max mem: 9377 +Train: [65] [3300/6250] eta: 0:07:25 lr: 0.000036 grad: 0.1860 (0.1953) loss: 0.7102 (0.7228) time: 0.1310 data: 0.0487 max mem: 9377 +Train: [65] [3400/6250] eta: 0:07:09 lr: 0.000036 grad: 0.1817 (0.1949) loss: 0.7186 (0.7228) time: 0.1415 data: 0.0583 max mem: 9377 +Train: [65] [3500/6250] eta: 0:06:53 lr: 0.000036 grad: 0.1835 (0.1946) loss: 0.7270 (0.7227) time: 0.1389 data: 0.0591 max mem: 9377 +Train: [65] [3600/6250] eta: 0:06:38 lr: 0.000036 grad: 0.1865 (0.1943) loss: 0.7236 (0.7226) time: 0.1385 data: 0.0558 max mem: 9377 +Train: [65] [3700/6250] eta: 0:06:23 lr: 0.000036 grad: 0.1868 (0.1940) loss: 0.7146 (0.7226) time: 0.1762 data: 0.0967 max mem: 9377 +Train: [65] [3800/6250] eta: 0:06:07 lr: 0.000036 grad: 0.1816 (0.1937) loss: 0.7199 (0.7227) time: 0.1491 data: 0.0687 max mem: 9377 +Train: [65] [3900/6250] eta: 0:05:51 lr: 0.000036 grad: 0.1762 (0.1933) loss: 0.7257 (0.7228) time: 0.1248 data: 0.0449 max mem: 9377 +Train: [65] [4000/6250] eta: 0:05:36 lr: 0.000036 grad: 0.1829 (0.1930) loss: 0.7223 (0.7228) time: 0.1655 data: 0.0827 max mem: 9377 +Train: [65] [4100/6250] eta: 0:05:20 lr: 0.000036 grad: 0.1753 (0.1926) loss: 0.7347 (0.7230) time: 0.1258 data: 0.0380 max mem: 9377 +Train: [65] [4200/6250] eta: 0:05:04 lr: 0.000036 grad: 0.1803 (0.1922) loss: 0.7241 (0.7231) time: 0.1420 data: 0.0611 max mem: 9377 +Train: [65] [4300/6250] eta: 0:04:49 lr: 0.000036 grad: 0.1775 (0.1919) loss: 0.7220 (0.7231) time: 0.1317 data: 0.0493 max mem: 9377 +Train: [65] [4400/6250] eta: 0:04:33 lr: 0.000036 grad: 0.1739 (0.1916) loss: 0.7327 (0.7231) time: 0.1353 data: 0.0542 max mem: 9377 +Train: [65] [4500/6250] eta: 0:04:18 lr: 0.000036 grad: 0.1787 (0.1914) loss: 0.7187 (0.7231) time: 0.1424 data: 0.0578 max mem: 9377 +Train: [65] [4600/6250] eta: 0:04:03 lr: 0.000036 grad: 0.1887 (0.1913) loss: 0.7121 (0.7230) time: 0.1352 data: 0.0571 max mem: 9377 +Train: [65] [4700/6250] eta: 0:03:48 lr: 0.000036 grad: 0.1898 (0.1913) loss: 0.7081 (0.7229) time: 0.1557 data: 0.0738 max mem: 9377 +Train: [65] [4800/6250] eta: 0:03:33 lr: 0.000036 grad: 0.1942 (0.1912) loss: 0.7137 (0.7227) time: 0.1074 data: 0.0211 max mem: 9377 +Train: [65] [4900/6250] eta: 0:03:18 lr: 0.000036 grad: 0.1823 (0.1911) loss: 0.7191 (0.7226) time: 0.1446 data: 0.0627 max mem: 9377 +Train: [65] [5000/6250] eta: 0:03:03 lr: 0.000036 grad: 0.1774 (0.1909) loss: 0.7183 (0.7225) time: 0.1397 data: 0.0559 max mem: 9377 +Train: [65] [5100/6250] eta: 0:02:48 lr: 0.000036 grad: 0.1794 (0.1907) loss: 0.7146 (0.7224) time: 0.1472 data: 0.0670 max mem: 9377 +Train: [65] [5200/6250] eta: 0:02:33 lr: 0.000036 grad: 0.1850 (0.1906) loss: 0.7212 (0.7223) time: 0.1320 data: 0.0451 max mem: 9377 +Train: [65] [5300/6250] eta: 0:02:18 lr: 0.000036 grad: 0.1779 (0.1905) loss: 0.7196 (0.7222) time: 0.1401 data: 0.0636 max mem: 9377 +Train: [65] [5400/6250] eta: 0:02:04 lr: 0.000036 grad: 0.1856 (0.1904) loss: 0.7256 (0.7221) time: 0.1405 data: 0.0612 max mem: 9377 +Train: [65] [5500/6250] eta: 0:01:49 lr: 0.000036 grad: 0.1768 (0.1903) loss: 0.7294 (0.7222) time: 0.1515 data: 0.0728 max mem: 9377 +Train: [65] [5600/6250] eta: 0:01:34 lr: 0.000036 grad: 0.1758 (0.1901) loss: 0.7219 (0.7222) time: 0.1381 data: 0.0536 max mem: 9377 +Train: [65] [5700/6250] eta: 0:01:20 lr: 0.000036 grad: 0.1836 (0.1900) loss: 0.7218 (0.7222) time: 0.1539 data: 0.0700 max mem: 9377 +Train: [65] [5800/6250] eta: 0:01:05 lr: 0.000036 grad: 0.1879 (0.1899) loss: 0.7158 (0.7222) time: 0.1521 data: 0.0740 max mem: 9377 +Train: [65] [5900/6250] eta: 0:00:50 lr: 0.000036 grad: 0.1786 (0.1898) loss: 0.7175 (0.7222) time: 0.1293 data: 0.0485 max mem: 9377 +Train: [65] [6000/6250] eta: 0:00:36 lr: 0.000036 grad: 0.1804 (0.1898) loss: 0.7221 (0.7222) time: 0.1498 data: 0.0647 max mem: 9377 +Train: [65] [6100/6250] eta: 0:00:21 lr: 0.000036 grad: 0.1817 (0.1897) loss: 0.7244 (0.7222) time: 0.1416 data: 0.0613 max mem: 9377 +Train: [65] [6200/6250] eta: 0:00:07 lr: 0.000036 grad: 0.1871 (0.1895) loss: 0.7172 (0.7222) time: 0.1159 data: 0.0238 max mem: 9377 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1909 (0.1896) loss: 0.7193 (0.7221) time: 0.1426 data: 0.0618 max mem: 9377 +Train: [65] Total time: 0:15:13 (0.1461 s / it) +Averaged stats: lr: 0.000036 grad: 0.1909 (0.1896) loss: 0.7193 (0.7221) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:04:47 loss: 0.8634 (0.8634) time: 4.6387 data: 4.5985 max mem: 9377 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.8718 (0.8718) time: 0.1181 data: 0.0914 max mem: 9377 +Eval (hcp-train-subset): [65] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (hcp-train-subset): loss: 0.8718 (0.8718) +Eval (hcp-val): [65] [ 0/62] eta: 0:05:53 loss: 0.8722 (0.8722) time: 5.6975 data: 5.6653 max mem: 9377 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8646 (0.8686) time: 0.1318 data: 0.1033 max mem: 9377 +Eval (hcp-val): [65] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (hcp-val): loss: 0.8646 (0.8686) +Eval (nsd-val): [65] [ 0/62] eta: 0:03:58 loss: 0.8316 (0.8316) time: 3.8542 data: 3.7847 max mem: 9377 +Eval (nsd-val): [65] [61/62] eta: 0:00:00 loss: 0.8442 (0.8461) time: 0.1168 data: 0.0911 max mem: 9377 +Eval (nsd-val): [65] Total time: 0:00:13 (0.2113 s / it) +Averaged stats (nsd-val): loss: 0.8442 (0.8461) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [66] [ 0/6250] eta: 10:22:35 lr: 0.000036 grad: 0.2429 (0.2429) loss: 0.7511 (0.7511) time: 5.9769 data: 5.8468 max mem: 9377 +Train: [66] [ 100/6250] eta: 0:19:27 lr: 0.000035 grad: 0.2907 (0.3112) loss: 0.7266 (0.7432) time: 0.1441 data: 0.0387 max mem: 9377 +Train: [66] [ 200/6250] eta: 0:16:56 lr: 0.000035 grad: 0.2725 (0.2901) loss: 0.7158 (0.7351) time: 0.1319 data: 0.0395 max mem: 9377 +Train: [66] [ 300/6250] eta: 0:15:49 lr: 0.000035 grad: 0.2402 (0.2855) loss: 0.6851 (0.7264) time: 0.1470 data: 0.0536 max mem: 9377 +Train: [66] [ 400/6250] eta: 0:14:54 lr: 0.000035 grad: 0.2460 (0.2784) loss: 0.7068 (0.7225) time: 0.1114 data: 0.0206 max mem: 9377 +Train: [66] [ 500/6250] eta: 0:14:18 lr: 0.000035 grad: 0.1951 (0.2655) loss: 0.7342 (0.7216) time: 0.1394 data: 0.0446 max mem: 9377 +Train: [66] [ 600/6250] eta: 0:13:45 lr: 0.000035 grad: 0.2014 (0.2541) loss: 0.7126 (0.7214) time: 0.1305 data: 0.0357 max mem: 9377 +Train: [66] [ 700/6250] eta: 0:13:26 lr: 0.000035 grad: 0.1862 (0.2461) loss: 0.7092 (0.7205) time: 0.1366 data: 0.0479 max mem: 9377 +Train: [66] [ 800/6250] eta: 0:13:06 lr: 0.000035 grad: 0.1967 (0.2397) loss: 0.6982 (0.7197) time: 0.1349 data: 0.0425 max mem: 9377 +Train: [66] [ 900/6250] eta: 0:12:50 lr: 0.000035 grad: 0.1911 (0.2341) loss: 0.7128 (0.7194) time: 0.1521 data: 0.0633 max mem: 9377 +Train: [66] [1000/6250] eta: 0:12:29 lr: 0.000035 grad: 0.1938 (0.2294) loss: 0.6874 (0.7188) time: 0.1043 data: 0.0209 max mem: 9377 +Train: [66] [1100/6250] eta: 0:12:11 lr: 0.000035 grad: 0.1829 (0.2259) loss: 0.7148 (0.7181) time: 0.1427 data: 0.0620 max mem: 9377 +Train: [66] [1200/6250] eta: 0:11:56 lr: 0.000035 grad: 0.1850 (0.2228) loss: 0.7107 (0.7176) time: 0.1483 data: 0.0597 max mem: 9377 +Train: [66] [1300/6250] eta: 0:11:36 lr: 0.000035 grad: 0.1854 (0.2202) loss: 0.7171 (0.7175) time: 0.1341 data: 0.0428 max mem: 9377 +Train: [66] [1400/6250] eta: 0:11:21 lr: 0.000035 grad: 0.1844 (0.2177) loss: 0.7126 (0.7175) time: 0.1411 data: 0.0568 max mem: 9377 +Train: [66] [1500/6250] eta: 0:11:06 lr: 0.000035 grad: 0.1834 (0.2157) loss: 0.7191 (0.7173) time: 0.1464 data: 0.0652 max mem: 9377 +Train: [66] [1600/6250] eta: 0:10:52 lr: 0.000035 grad: 0.1816 (0.2136) loss: 0.7123 (0.7174) time: 0.1629 data: 0.0853 max mem: 9377 +Train: [66] [1700/6250] eta: 0:10:43 lr: 0.000035 grad: 0.1826 (0.2119) loss: 0.7218 (0.7174) time: 0.1436 data: 0.0625 max mem: 9377 +Train: [66] [1800/6250] eta: 0:10:31 lr: 0.000035 grad: 0.1884 (0.2107) loss: 0.7117 (0.7174) time: 0.1494 data: 0.0645 max mem: 9377 +Train: [66] [1900/6250] eta: 0:10:19 lr: 0.000035 grad: 0.1727 (0.2092) loss: 0.7196 (0.7176) time: 0.1488 data: 0.0494 max mem: 9377 +Train: [66] [2000/6250] eta: 0:10:05 lr: 0.000035 grad: 0.1807 (0.2079) loss: 0.7164 (0.7176) time: 0.1500 data: 0.0687 max mem: 9377 +Train: [66] [2100/6250] eta: 0:09:54 lr: 0.000035 grad: 0.1761 (0.2067) loss: 0.7294 (0.7178) time: 0.1447 data: 0.0609 max mem: 9377 +Train: [66] [2200/6250] eta: 0:09:39 lr: 0.000035 grad: 0.1785 (0.2056) loss: 0.7249 (0.7179) time: 0.1370 data: 0.0599 max mem: 9377 +Train: [66] [2300/6250] eta: 0:09:23 lr: 0.000035 grad: 0.1825 (0.2046) loss: 0.7284 (0.7180) time: 0.1324 data: 0.0543 max mem: 9377 +Train: [66] [2400/6250] eta: 0:09:07 lr: 0.000035 grad: 0.1749 (0.2036) loss: 0.7242 (0.7182) time: 0.1421 data: 0.0581 max mem: 9377 +Train: [66] [2500/6250] eta: 0:08:53 lr: 0.000035 grad: 0.1796 (0.2028) loss: 0.7282 (0.7184) time: 0.1526 data: 0.0695 max mem: 9377 +Train: [66] [2600/6250] eta: 0:08:39 lr: 0.000035 grad: 0.1777 (0.2020) loss: 0.7243 (0.7187) time: 0.1566 data: 0.0754 max mem: 9377 +Train: [66] [2700/6250] eta: 0:08:26 lr: 0.000035 grad: 0.1802 (0.2013) loss: 0.7259 (0.7191) time: 0.1429 data: 0.0537 max mem: 9377 +Train: [66] [2800/6250] eta: 0:08:13 lr: 0.000035 grad: 0.1841 (0.2007) loss: 0.7307 (0.7194) time: 0.1778 data: 0.0900 max mem: 9377 +Train: [66] [2900/6250] eta: 0:07:59 lr: 0.000035 grad: 0.1823 (0.2000) loss: 0.7257 (0.7196) time: 0.1437 data: 0.0555 max mem: 9377 +Train: [66] [3000/6250] eta: 0:07:44 lr: 0.000035 grad: 0.1778 (0.1994) loss: 0.7225 (0.7197) time: 0.1344 data: 0.0477 max mem: 9377 +Train: [66] [3100/6250] eta: 0:07:29 lr: 0.000035 grad: 0.1859 (0.1988) loss: 0.7178 (0.7198) time: 0.1322 data: 0.0454 max mem: 9377 +Train: [66] [3200/6250] eta: 0:07:16 lr: 0.000035 grad: 0.1845 (0.1983) loss: 0.7164 (0.7200) time: 0.1795 data: 0.0920 max mem: 9377 +Train: [66] [3300/6250] eta: 0:07:02 lr: 0.000035 grad: 0.1834 (0.1978) loss: 0.7253 (0.7201) time: 0.1394 data: 0.0600 max mem: 9377 +Train: [66] [3400/6250] eta: 0:06:50 lr: 0.000035 grad: 0.1835 (0.1974) loss: 0.7173 (0.7204) time: 0.1790 data: 0.1021 max mem: 9377 +Train: [66] [3500/6250] eta: 0:06:37 lr: 0.000034 grad: 0.1744 (0.1969) loss: 0.7290 (0.7205) time: 0.1951 data: 0.1225 max mem: 9377 +Train: [66] [3600/6250] eta: 0:06:23 lr: 0.000034 grad: 0.1784 (0.1965) loss: 0.7109 (0.7206) time: 0.1703 data: 0.0902 max mem: 9377 +Train: [66] [3700/6250] eta: 0:06:10 lr: 0.000034 grad: 0.1819 (0.1962) loss: 0.7267 (0.7206) time: 0.1822 data: 0.1106 max mem: 9377 +Train: [66] [3800/6250] eta: 0:05:56 lr: 0.000034 grad: 0.1828 (0.1958) loss: 0.7271 (0.7206) time: 0.1531 data: 0.0716 max mem: 9377 +Train: [66] [3900/6250] eta: 0:05:42 lr: 0.000034 grad: 0.1781 (0.1955) loss: 0.7228 (0.7207) time: 0.1649 data: 0.0851 max mem: 9377 +Train: [66] [4000/6250] eta: 0:05:28 lr: 0.000034 grad: 0.1752 (0.1951) loss: 0.7325 (0.7208) time: 0.1687 data: 0.0893 max mem: 9377 +Train: [66] [4100/6250] eta: 0:05:13 lr: 0.000034 grad: 0.1796 (0.1948) loss: 0.7302 (0.7209) time: 0.1317 data: 0.0434 max mem: 9377 +Train: [66] [4200/6250] eta: 0:04:58 lr: 0.000034 grad: 0.1771 (0.1945) loss: 0.7350 (0.7210) time: 0.1421 data: 0.0608 max mem: 9377 +Train: [66] [4300/6250] eta: 0:04:43 lr: 0.000034 grad: 0.1780 (0.1941) loss: 0.7251 (0.7211) time: 0.1217 data: 0.0436 max mem: 9377 +Train: [66] [4400/6250] eta: 0:04:28 lr: 0.000034 grad: 0.1745 (0.1938) loss: 0.7216 (0.7212) time: 0.1390 data: 0.0616 max mem: 9377 +Train: [66] [4500/6250] eta: 0:04:14 lr: 0.000034 grad: 0.1815 (0.1936) loss: 0.7123 (0.7212) time: 0.1321 data: 0.0477 max mem: 9377 +Train: [66] [4600/6250] eta: 0:03:59 lr: 0.000034 grad: 0.1832 (0.1934) loss: 0.7219 (0.7212) time: 0.1301 data: 0.0461 max mem: 9377 +Train: [66] [4700/6250] eta: 0:03:44 lr: 0.000034 grad: 0.1686 (0.1931) loss: 0.7309 (0.7212) time: 0.1425 data: 0.0631 max mem: 9377 +Train: [66] [4800/6250] eta: 0:03:29 lr: 0.000034 grad: 0.1828 (0.1928) loss: 0.7256 (0.7212) time: 0.1359 data: 0.0512 max mem: 9377 +Train: [66] [4900/6250] eta: 0:03:15 lr: 0.000034 grad: 0.1760 (0.1925) loss: 0.7263 (0.7213) time: 0.1557 data: 0.0775 max mem: 9377 +Train: [66] [5000/6250] eta: 0:03:00 lr: 0.000034 grad: 0.1842 (0.1924) loss: 0.7205 (0.7214) time: 0.1393 data: 0.0557 max mem: 9377 +Train: [66] [5100/6250] eta: 0:02:45 lr: 0.000034 grad: 0.1816 (0.1922) loss: 0.7238 (0.7214) time: 0.1504 data: 0.0715 max mem: 9377 +Train: [66] [5200/6250] eta: 0:02:31 lr: 0.000034 grad: 0.1793 (0.1921) loss: 0.7197 (0.7214) time: 0.1026 data: 0.0206 max mem: 9377 +Train: [66] [5300/6250] eta: 0:02:16 lr: 0.000034 grad: 0.1812 (0.1919) loss: 0.7142 (0.7214) time: 0.1336 data: 0.0552 max mem: 9377 +Train: [66] [5400/6250] eta: 0:02:02 lr: 0.000034 grad: 0.1834 (0.1918) loss: 0.7332 (0.7215) time: 0.1270 data: 0.0424 max mem: 9377 +Train: [66] [5500/6250] eta: 0:01:47 lr: 0.000034 grad: 0.1848 (0.1916) loss: 0.7174 (0.7215) time: 0.1512 data: 0.0668 max mem: 9377 +Train: [66] [5600/6250] eta: 0:01:33 lr: 0.000034 grad: 0.1850 (0.1914) loss: 0.7228 (0.7216) time: 0.0973 data: 0.0128 max mem: 9377 +Train: [66] [5700/6250] eta: 0:01:18 lr: 0.000034 grad: 0.1761 (0.1913) loss: 0.7193 (0.7216) time: 0.1275 data: 0.0457 max mem: 9377 +Train: [66] [5800/6250] eta: 0:01:04 lr: 0.000034 grad: 0.1848 (0.1911) loss: 0.7098 (0.7215) time: 0.1388 data: 0.0619 max mem: 9377 +Train: [66] [5900/6250] eta: 0:00:50 lr: 0.000034 grad: 0.1819 (0.1910) loss: 0.7204 (0.7215) time: 0.1216 data: 0.0376 max mem: 9377 +Train: [66] [6000/6250] eta: 0:00:35 lr: 0.000034 grad: 0.1760 (0.1909) loss: 0.7282 (0.7215) time: 0.1420 data: 0.0618 max mem: 9377 +Train: [66] [6100/6250] eta: 0:00:21 lr: 0.000034 grad: 0.1805 (0.1908) loss: 0.7297 (0.7215) time: 0.1214 data: 0.0424 max mem: 9377 +Train: [66] [6200/6250] eta: 0:00:07 lr: 0.000034 grad: 0.1786 (0.1906) loss: 0.7299 (0.7216) time: 0.0869 data: 0.0002 max mem: 9377 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1848 (0.1905) loss: 0.7158 (0.7216) time: 0.1009 data: 0.0142 max mem: 9377 +Train: [66] Total time: 0:14:58 (0.1437 s / it) +Averaged stats: lr: 0.000034 grad: 0.1848 (0.1905) loss: 0.7158 (0.7216) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:03:43 loss: 0.8642 (0.8642) time: 3.6040 data: 3.4825 max mem: 9377 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.8693 (0.8701) time: 0.1469 data: 0.1194 max mem: 9377 +Eval (hcp-train-subset): [66] Total time: 0:00:13 (0.2212 s / it) +Averaged stats (hcp-train-subset): loss: 0.8693 (0.8701) +Eval (hcp-val): [66] [ 0/62] eta: 0:04:50 loss: 0.8646 (0.8646) time: 4.6908 data: 4.6588 max mem: 9377 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.8633 (0.8664) time: 0.1158 data: 0.0886 max mem: 9377 +Eval (hcp-val): [66] Total time: 0:00:13 (0.2215 s / it) +Averaged stats (hcp-val): loss: 0.8633 (0.8664) +Eval (nsd-val): [66] [ 0/62] eta: 0:05:49 loss: 0.8240 (0.8240) time: 5.6359 data: 5.6049 max mem: 9377 +Eval (nsd-val): [66] [61/62] eta: 0:00:00 loss: 0.8385 (0.8382) time: 0.1218 data: 0.0950 max mem: 9377 +Eval (nsd-val): [66] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (nsd-val): loss: 0.8385 (0.8382) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [67] [ 0/6250] eta: 7:59:42 lr: 0.000034 grad: 0.1277 (0.1277) loss: 0.7969 (0.7969) time: 4.6051 data: 4.2952 max mem: 9377 +Train: [67] [ 100/6250] eta: 0:19:44 lr: 0.000034 grad: 0.3011 (0.3763) loss: 0.7053 (0.7263) time: 0.1539 data: 0.0661 max mem: 9377 +Train: [67] [ 200/6250] eta: 0:16:54 lr: 0.000034 grad: 0.3084 (0.3467) loss: 0.6923 (0.7142) time: 0.1527 data: 0.0653 max mem: 9377 +Train: [67] [ 300/6250] eta: 0:15:44 lr: 0.000034 grad: 0.2571 (0.3226) loss: 0.7147 (0.7101) time: 0.1348 data: 0.0472 max mem: 9377 +Train: [67] [ 400/6250] eta: 0:14:51 lr: 0.000034 grad: 0.2291 (0.2999) loss: 0.7005 (0.7086) time: 0.1402 data: 0.0484 max mem: 9377 +Train: [67] [ 500/6250] eta: 0:14:11 lr: 0.000034 grad: 0.1960 (0.2832) loss: 0.7209 (0.7092) time: 0.1329 data: 0.0348 max mem: 9377 +Train: [67] [ 600/6250] eta: 0:13:43 lr: 0.000033 grad: 0.1947 (0.2705) loss: 0.7138 (0.7097) time: 0.1434 data: 0.0569 max mem: 9377 +Train: [67] [ 700/6250] eta: 0:13:17 lr: 0.000033 grad: 0.1993 (0.2614) loss: 0.6992 (0.7105) time: 0.1473 data: 0.0621 max mem: 9377 +Train: [67] [ 800/6250] eta: 0:13:00 lr: 0.000033 grad: 0.1918 (0.2534) loss: 0.7090 (0.7107) time: 0.1468 data: 0.0578 max mem: 9377 +Train: [67] [ 900/6250] eta: 0:12:48 lr: 0.000033 grad: 0.1927 (0.2466) loss: 0.7048 (0.7112) time: 0.1526 data: 0.0656 max mem: 9377 +Train: [67] [1000/6250] eta: 0:12:40 lr: 0.000033 grad: 0.1888 (0.2410) loss: 0.7280 (0.7116) time: 0.1424 data: 0.0608 max mem: 9377 +Train: [67] [1100/6250] eta: 0:12:28 lr: 0.000033 grad: 0.1895 (0.2365) loss: 0.7128 (0.7121) time: 0.1509 data: 0.0712 max mem: 9377 +Train: [67] [1200/6250] eta: 0:12:18 lr: 0.000033 grad: 0.1853 (0.2322) loss: 0.7223 (0.7126) time: 0.1583 data: 0.0763 max mem: 9377 +Train: [67] [1300/6250] eta: 0:12:07 lr: 0.000033 grad: 0.1747 (0.2287) loss: 0.7206 (0.7129) time: 0.1629 data: 0.0765 max mem: 9377 +Train: [67] [1400/6250] eta: 0:11:55 lr: 0.000033 grad: 0.1859 (0.2256) loss: 0.7230 (0.7134) time: 0.1587 data: 0.0734 max mem: 9377 +Train: [67] [1500/6250] eta: 0:11:38 lr: 0.000033 grad: 0.1832 (0.2229) loss: 0.7142 (0.7135) time: 0.1500 data: 0.0655 max mem: 9377 +Train: [67] [1600/6250] eta: 0:11:21 lr: 0.000033 grad: 0.1827 (0.2204) loss: 0.7245 (0.7137) time: 0.1636 data: 0.0849 max mem: 9377 +Train: [67] [1700/6250] eta: 0:11:12 lr: 0.000033 grad: 0.1886 (0.2184) loss: 0.7170 (0.7138) time: 0.1656 data: 0.0735 max mem: 9377 +Train: [67] [1800/6250] eta: 0:11:03 lr: 0.000033 grad: 0.1870 (0.2167) loss: 0.7100 (0.7137) time: 0.1804 data: 0.0874 max mem: 9377 +Train: [67] [1900/6250] eta: 0:10:52 lr: 0.000033 grad: 0.1821 (0.2150) loss: 0.7188 (0.7137) time: 0.1610 data: 0.0819 max mem: 9377 +Train: [67] [2000/6250] eta: 0:10:37 lr: 0.000033 grad: 0.1831 (0.2136) loss: 0.7047 (0.7134) time: 0.1431 data: 0.0600 max mem: 9377 +Train: [67] [2100/6250] eta: 0:10:22 lr: 0.000033 grad: 0.1905 (0.2124) loss: 0.7039 (0.7133) time: 0.1479 data: 0.0602 max mem: 9377 +Train: [67] [2200/6250] eta: 0:10:09 lr: 0.000033 grad: 0.1771 (0.2111) loss: 0.7193 (0.7133) time: 0.1625 data: 0.0743 max mem: 9377 +Train: [67] [2300/6250] eta: 0:09:51 lr: 0.000033 grad: 0.1842 (0.2097) loss: 0.7091 (0.7134) time: 0.1358 data: 0.0581 max mem: 9377 +Train: [67] [2400/6250] eta: 0:09:37 lr: 0.000033 grad: 0.1880 (0.2089) loss: 0.7050 (0.7133) time: 0.1537 data: 0.0739 max mem: 9377 +Train: [67] [2500/6250] eta: 0:09:20 lr: 0.000033 grad: 0.1830 (0.2080) loss: 0.7094 (0.7134) time: 0.1292 data: 0.0450 max mem: 9377 +Train: [67] [2600/6250] eta: 0:09:02 lr: 0.000033 grad: 0.1865 (0.2073) loss: 0.7255 (0.7133) time: 0.1296 data: 0.0449 max mem: 9377 +Train: [67] [2700/6250] eta: 0:08:46 lr: 0.000033 grad: 0.1851 (0.2065) loss: 0.7133 (0.7133) time: 0.1322 data: 0.0539 max mem: 9377 +Train: [67] [2800/6250] eta: 0:08:28 lr: 0.000033 grad: 0.1828 (0.2059) loss: 0.7170 (0.7133) time: 0.1336 data: 0.0568 max mem: 9377 +Train: [67] [2900/6250] eta: 0:08:13 lr: 0.000033 grad: 0.1847 (0.2053) loss: 0.7116 (0.7132) time: 0.1616 data: 0.0773 max mem: 9377 +Train: [67] [3000/6250] eta: 0:07:57 lr: 0.000033 grad: 0.1884 (0.2047) loss: 0.7122 (0.7131) time: 0.1428 data: 0.0604 max mem: 9377 +Train: [67] [3100/6250] eta: 0:07:40 lr: 0.000033 grad: 0.1886 (0.2042) loss: 0.7102 (0.7131) time: 0.1107 data: 0.0230 max mem: 9377 +Train: [67] [3200/6250] eta: 0:07:24 lr: 0.000033 grad: 0.1872 (0.2038) loss: 0.7112 (0.7131) time: 0.1242 data: 0.0384 max mem: 9377 +Train: [67] [3300/6250] eta: 0:07:08 lr: 0.000033 grad: 0.1795 (0.2032) loss: 0.7271 (0.7133) time: 0.1189 data: 0.0354 max mem: 9377 +Train: [67] [3400/6250] eta: 0:06:53 lr: 0.000033 grad: 0.1859 (0.2027) loss: 0.7131 (0.7133) time: 0.1306 data: 0.0494 max mem: 9377 +Train: [67] [3500/6250] eta: 0:06:37 lr: 0.000033 grad: 0.1841 (0.2023) loss: 0.7159 (0.7134) time: 0.1276 data: 0.0462 max mem: 9377 +Train: [67] [3600/6250] eta: 0:06:22 lr: 0.000033 grad: 0.1859 (0.2020) loss: 0.7267 (0.7136) time: 0.1501 data: 0.0728 max mem: 9377 +Train: [67] [3700/6250] eta: 0:06:07 lr: 0.000033 grad: 0.1802 (0.2014) loss: 0.7205 (0.7139) time: 0.1098 data: 0.0297 max mem: 9377 +Train: [67] [3800/6250] eta: 0:05:53 lr: 0.000033 grad: 0.1817 (0.2010) loss: 0.7283 (0.7140) time: 0.1359 data: 0.0521 max mem: 9377 +Train: [67] [3900/6250] eta: 0:05:38 lr: 0.000033 grad: 0.1778 (0.2005) loss: 0.7305 (0.7142) time: 0.1155 data: 0.0293 max mem: 9377 +Train: [67] [4000/6250] eta: 0:05:23 lr: 0.000032 grad: 0.1837 (0.2001) loss: 0.7119 (0.7143) time: 0.1424 data: 0.0576 max mem: 9377 +Train: [67] [4100/6250] eta: 0:05:08 lr: 0.000032 grad: 0.1834 (0.1998) loss: 0.7293 (0.7145) time: 0.1371 data: 0.0521 max mem: 9377 +Train: [67] [4200/6250] eta: 0:04:54 lr: 0.000032 grad: 0.1881 (0.1996) loss: 0.7004 (0.7146) time: 0.1392 data: 0.0556 max mem: 9377 +Train: [67] [4300/6250] eta: 0:04:39 lr: 0.000032 grad: 0.1798 (0.1992) loss: 0.7298 (0.7147) time: 0.1373 data: 0.0532 max mem: 9377 +Train: [67] [4400/6250] eta: 0:04:24 lr: 0.000032 grad: 0.1845 (0.1989) loss: 0.7258 (0.7148) time: 0.1392 data: 0.0601 max mem: 9377 +Train: [67] [4500/6250] eta: 0:04:10 lr: 0.000032 grad: 0.1792 (0.1986) loss: 0.7209 (0.7150) time: 0.1484 data: 0.0687 max mem: 9377 +Train: [67] [4600/6250] eta: 0:03:55 lr: 0.000032 grad: 0.1849 (0.1982) loss: 0.7181 (0.7151) time: 0.1387 data: 0.0567 max mem: 9377 +Train: [67] [4700/6250] eta: 0:03:41 lr: 0.000032 grad: 0.1841 (0.1979) loss: 0.7145 (0.7151) time: 0.1366 data: 0.0550 max mem: 9377 +Train: [67] [4800/6250] eta: 0:03:27 lr: 0.000032 grad: 0.1822 (0.1977) loss: 0.7165 (0.7151) time: 0.1530 data: 0.0725 max mem: 9377 +Train: [67] [4900/6250] eta: 0:03:13 lr: 0.000032 grad: 0.1836 (0.1975) loss: 0.7218 (0.7152) time: 0.1508 data: 0.0709 max mem: 9377 +Train: [67] [5000/6250] eta: 0:02:58 lr: 0.000032 grad: 0.1860 (0.1974) loss: 0.7094 (0.7150) time: 0.1497 data: 0.0703 max mem: 9377 +Train: [67] [5100/6250] eta: 0:02:44 lr: 0.000032 grad: 0.1945 (0.1972) loss: 0.7124 (0.7149) time: 0.1278 data: 0.0478 max mem: 9377 +Train: [67] [5200/6250] eta: 0:02:30 lr: 0.000032 grad: 0.1922 (0.1971) loss: 0.7074 (0.7149) time: 0.1465 data: 0.0661 max mem: 9377 +Train: [67] [5300/6250] eta: 0:02:15 lr: 0.000032 grad: 0.1810 (0.1969) loss: 0.7188 (0.7149) time: 0.1464 data: 0.0735 max mem: 9377 +Train: [67] [5400/6250] eta: 0:02:01 lr: 0.000032 grad: 0.1845 (0.1968) loss: 0.7200 (0.7149) time: 0.1226 data: 0.0334 max mem: 9377 +Train: [67] [5500/6250] eta: 0:01:47 lr: 0.000032 grad: 0.1937 (0.1967) loss: 0.7153 (0.7148) time: 0.1102 data: 0.0260 max mem: 9377 +Train: [67] [5600/6250] eta: 0:01:32 lr: 0.000032 grad: 0.1890 (0.1966) loss: 0.7043 (0.7148) time: 0.1343 data: 0.0559 max mem: 9377 +Train: [67] [5700/6250] eta: 0:01:18 lr: 0.000032 grad: 0.1889 (0.1965) loss: 0.7146 (0.7148) time: 0.1435 data: 0.0586 max mem: 9377 +Train: [67] [5800/6250] eta: 0:01:04 lr: 0.000032 grad: 0.1930 (0.1963) loss: 0.7105 (0.7148) time: 0.1293 data: 0.0505 max mem: 9377 +Train: [67] [5900/6250] eta: 0:00:49 lr: 0.000032 grad: 0.1858 (0.1962) loss: 0.7272 (0.7148) time: 0.1314 data: 0.0475 max mem: 9377 +Train: [67] [6000/6250] eta: 0:00:35 lr: 0.000032 grad: 0.1915 (0.1962) loss: 0.7096 (0.7148) time: 0.1238 data: 0.0419 max mem: 9377 +Train: [67] [6100/6250] eta: 0:00:21 lr: 0.000032 grad: 0.1844 (0.1961) loss: 0.7094 (0.7147) time: 0.1212 data: 0.0362 max mem: 9377 +Train: [67] [6200/6250] eta: 0:00:07 lr: 0.000032 grad: 0.1861 (0.1960) loss: 0.7114 (0.7148) time: 0.1274 data: 0.0396 max mem: 9377 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1882 (0.1959) loss: 0.7196 (0.7148) time: 0.1440 data: 0.0643 max mem: 9377 +Train: [67] Total time: 0:14:51 (0.1426 s / it) +Averaged stats: lr: 0.000032 grad: 0.1882 (0.1959) loss: 0.7196 (0.7148) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:03:48 loss: 0.8643 (0.8643) time: 3.6856 data: 3.6214 max mem: 9377 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.8705 (0.8714) time: 0.1297 data: 0.1046 max mem: 9377 +Eval (hcp-train-subset): [67] Total time: 0:00:13 (0.2181 s / it) +Averaged stats (hcp-train-subset): loss: 0.8705 (0.8714) +Eval (hcp-val): [67] [ 0/62] eta: 0:05:22 loss: 0.8657 (0.8657) time: 5.2000 data: 5.1692 max mem: 9377 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.8665 (0.8683) time: 0.1311 data: 0.1040 max mem: 9377 +Eval (hcp-val): [67] Total time: 0:00:13 (0.2233 s / it) +Averaged stats (hcp-val): loss: 0.8665 (0.8683) +Eval (nsd-val): [67] [ 0/62] eta: 0:05:55 loss: 0.8305 (0.8305) time: 5.7322 data: 5.7000 max mem: 9377 +Eval (nsd-val): [67] [61/62] eta: 0:00:00 loss: 0.8384 (0.8398) time: 0.1264 data: 0.1007 max mem: 9377 +Eval (nsd-val): [67] Total time: 0:00:13 (0.2152 s / it) +Averaged stats (nsd-val): loss: 0.8384 (0.8398) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [68] [ 0/6250] eta: 10:41:57 lr: 0.000032 grad: 0.4708 (0.4708) loss: 0.4600 (0.4600) time: 6.1628 data: 6.0198 max mem: 9377 +Train: [68] [ 100/6250] eta: 0:20:06 lr: 0.000032 grad: 0.3219 (0.3783) loss: 0.7185 (0.7153) time: 0.1355 data: 0.0348 max mem: 9377 +Train: [68] [ 200/6250] eta: 0:17:50 lr: 0.000032 grad: 0.2455 (0.3392) loss: 0.7256 (0.7163) time: 0.1751 data: 0.0775 max mem: 9377 +Train: [68] [ 300/6250] eta: 0:16:20 lr: 0.000032 grad: 0.2360 (0.3056) loss: 0.7226 (0.7179) time: 0.1495 data: 0.0647 max mem: 9377 +Train: [68] [ 400/6250] eta: 0:15:23 lr: 0.000032 grad: 0.1911 (0.2829) loss: 0.7409 (0.7192) time: 0.1255 data: 0.0350 max mem: 9377 +Train: [68] [ 500/6250] eta: 0:14:45 lr: 0.000032 grad: 0.1835 (0.2643) loss: 0.7241 (0.7217) time: 0.1349 data: 0.0504 max mem: 9377 +Train: [68] [ 600/6250] eta: 0:14:13 lr: 0.000032 grad: 0.1985 (0.2523) loss: 0.7222 (0.7225) time: 0.1344 data: 0.0474 max mem: 9377 +Train: [68] [ 700/6250] eta: 0:13:43 lr: 0.000032 grad: 0.1863 (0.2436) loss: 0.7126 (0.7218) time: 0.1292 data: 0.0447 max mem: 9377 +Train: [68] [ 800/6250] eta: 0:13:16 lr: 0.000032 grad: 0.1866 (0.2368) loss: 0.7186 (0.7210) time: 0.1315 data: 0.0440 max mem: 9377 +Train: [68] [ 900/6250] eta: 0:13:00 lr: 0.000032 grad: 0.1852 (0.2317) loss: 0.7203 (0.7207) time: 0.1624 data: 0.0775 max mem: 9377 +Train: [68] [1000/6250] eta: 0:12:51 lr: 0.000032 grad: 0.1839 (0.2271) loss: 0.7195 (0.7204) time: 0.1789 data: 0.0992 max mem: 9377 +Train: [68] [1100/6250] eta: 0:12:37 lr: 0.000032 grad: 0.1833 (0.2230) loss: 0.7273 (0.7205) time: 0.1453 data: 0.0590 max mem: 9377 +Train: [68] [1200/6250] eta: 0:12:23 lr: 0.000032 grad: 0.1786 (0.2197) loss: 0.7258 (0.7206) time: 0.1494 data: 0.0693 max mem: 9377 +Train: [68] [1300/6250] eta: 0:12:05 lr: 0.000031 grad: 0.1836 (0.2169) loss: 0.7137 (0.7208) time: 0.1301 data: 0.0481 max mem: 9377 +Train: [68] [1400/6250] eta: 0:11:49 lr: 0.000031 grad: 0.1830 (0.2145) loss: 0.7268 (0.7208) time: 0.1528 data: 0.0758 max mem: 9377 +Train: [68] [1500/6250] eta: 0:11:34 lr: 0.000031 grad: 0.1843 (0.2124) loss: 0.7223 (0.7210) time: 0.1308 data: 0.0411 max mem: 9377 +Train: [68] [1600/6250] eta: 0:11:17 lr: 0.000031 grad: 0.1742 (0.2103) loss: 0.7372 (0.7217) time: 0.1313 data: 0.0485 max mem: 9377 +Train: [68] [1700/6250] eta: 0:11:07 lr: 0.000031 grad: 0.1884 (0.2087) loss: 0.7195 (0.7217) time: 0.1571 data: 0.0673 max mem: 9377 +Train: [68] [1800/6250] eta: 0:10:55 lr: 0.000031 grad: 0.1809 (0.2072) loss: 0.7156 (0.7217) time: 0.1532 data: 0.0728 max mem: 9377 +Train: [68] [1900/6250] eta: 0:10:40 lr: 0.000031 grad: 0.1790 (0.2060) loss: 0.7320 (0.7217) time: 0.1285 data: 0.0484 max mem: 9377 +Train: [68] [2000/6250] eta: 0:10:24 lr: 0.000031 grad: 0.1826 (0.2050) loss: 0.7232 (0.7214) time: 0.1288 data: 0.0387 max mem: 9377 +Train: [68] [2100/6250] eta: 0:10:06 lr: 0.000031 grad: 0.1835 (0.2042) loss: 0.7146 (0.7211) time: 0.1152 data: 0.0398 max mem: 9377 +Train: [68] [2200/6250] eta: 0:09:50 lr: 0.000031 grad: 0.1789 (0.2033) loss: 0.7205 (0.7209) time: 0.1396 data: 0.0655 max mem: 9377 +Train: [68] [2300/6250] eta: 0:09:34 lr: 0.000031 grad: 0.1820 (0.2025) loss: 0.7196 (0.7208) time: 0.1284 data: 0.0427 max mem: 9377 +Train: [68] [2400/6250] eta: 0:09:17 lr: 0.000031 grad: 0.1879 (0.2018) loss: 0.7169 (0.7208) time: 0.1189 data: 0.0306 max mem: 9377 +Train: [68] [2500/6250] eta: 0:09:00 lr: 0.000031 grad: 0.1794 (0.2011) loss: 0.7242 (0.7208) time: 0.1326 data: 0.0499 max mem: 9377 +Train: [68] [2600/6250] eta: 0:08:44 lr: 0.000031 grad: 0.1869 (0.2005) loss: 0.7106 (0.7208) time: 0.1410 data: 0.0623 max mem: 9377 +Train: [68] [2700/6250] eta: 0:08:27 lr: 0.000031 grad: 0.1809 (0.2000) loss: 0.7302 (0.7207) time: 0.1294 data: 0.0474 max mem: 9377 +Train: [68] [2800/6250] eta: 0:08:12 lr: 0.000031 grad: 0.1774 (0.1994) loss: 0.7276 (0.7209) time: 0.1427 data: 0.0633 max mem: 9377 +Train: [68] [2900/6250] eta: 0:07:57 lr: 0.000031 grad: 0.1814 (0.1989) loss: 0.7246 (0.7211) time: 0.1575 data: 0.0764 max mem: 9377 +Train: [68] [3000/6250] eta: 0:07:44 lr: 0.000031 grad: 0.1827 (0.1985) loss: 0.7243 (0.7211) time: 0.1558 data: 0.0704 max mem: 9377 +Train: [68] [3100/6250] eta: 0:07:32 lr: 0.000031 grad: 0.1899 (0.1980) loss: 0.7211 (0.7212) time: 0.1747 data: 0.0880 max mem: 9377 +Train: [68] [3200/6250] eta: 0:07:20 lr: 0.000031 grad: 0.1814 (0.1975) loss: 0.7013 (0.7213) time: 0.1696 data: 0.0905 max mem: 9377 +Train: [68] [3300/6250] eta: 0:07:06 lr: 0.000031 grad: 0.1907 (0.1973) loss: 0.7228 (0.7212) time: 0.1489 data: 0.0646 max mem: 9377 +Train: [68] [3400/6250] eta: 0:06:53 lr: 0.000031 grad: 0.1915 (0.1970) loss: 0.7094 (0.7210) time: 0.1662 data: 0.0866 max mem: 9377 +Train: [68] [3500/6250] eta: 0:06:38 lr: 0.000031 grad: 0.1831 (0.1968) loss: 0.7197 (0.7210) time: 0.1460 data: 0.0629 max mem: 9377 +Train: [68] [3600/6250] eta: 0:06:23 lr: 0.000031 grad: 0.1840 (0.1965) loss: 0.7156 (0.7209) time: 0.1360 data: 0.0485 max mem: 9377 +Train: [68] [3700/6250] eta: 0:06:08 lr: 0.000031 grad: 0.1882 (0.1963) loss: 0.7101 (0.7208) time: 0.1469 data: 0.0669 max mem: 9377 +Train: [68] [3800/6250] eta: 0:05:53 lr: 0.000031 grad: 0.1910 (0.1961) loss: 0.7160 (0.7206) time: 0.1385 data: 0.0510 max mem: 9377 +Train: [68] [3900/6250] eta: 0:05:38 lr: 0.000031 grad: 0.1907 (0.1959) loss: 0.7163 (0.7205) time: 0.1202 data: 0.0333 max mem: 9377 +Train: [68] [4000/6250] eta: 0:05:23 lr: 0.000031 grad: 0.1864 (0.1957) loss: 0.7133 (0.7204) time: 0.1428 data: 0.0550 max mem: 9377 +Train: [68] [4100/6250] eta: 0:05:08 lr: 0.000031 grad: 0.1936 (0.1955) loss: 0.7026 (0.7202) time: 0.1497 data: 0.0621 max mem: 9377 +Train: [68] [4200/6250] eta: 0:04:53 lr: 0.000031 grad: 0.1879 (0.1954) loss: 0.7154 (0.7200) time: 0.1287 data: 0.0489 max mem: 9377 +Train: [68] [4300/6250] eta: 0:04:38 lr: 0.000031 grad: 0.1839 (0.1953) loss: 0.7145 (0.7199) time: 0.1348 data: 0.0501 max mem: 9377 +Train: [68] [4400/6250] eta: 0:04:24 lr: 0.000031 grad: 0.1927 (0.1951) loss: 0.7064 (0.7198) time: 0.1402 data: 0.0525 max mem: 9377 +Train: [68] [4500/6250] eta: 0:04:09 lr: 0.000031 grad: 0.1908 (0.1949) loss: 0.7084 (0.7196) time: 0.1429 data: 0.0598 max mem: 9377 +Train: [68] [4600/6250] eta: 0:03:55 lr: 0.000031 grad: 0.1920 (0.1949) loss: 0.7114 (0.7194) time: 0.1508 data: 0.0651 max mem: 9377 +Train: [68] [4700/6250] eta: 0:03:40 lr: 0.000031 grad: 0.1950 (0.1948) loss: 0.7138 (0.7193) time: 0.1356 data: 0.0547 max mem: 9377 +Train: [68] [4800/6250] eta: 0:03:26 lr: 0.000030 grad: 0.1941 (0.1948) loss: 0.7038 (0.7191) time: 0.1719 data: 0.0935 max mem: 9377 +Train: [68] [4900/6250] eta: 0:03:12 lr: 0.000030 grad: 0.1897 (0.1947) loss: 0.7028 (0.7189) time: 0.1286 data: 0.0536 max mem: 9377 +Train: [68] [5000/6250] eta: 0:02:58 lr: 0.000030 grad: 0.1881 (0.1947) loss: 0.7205 (0.7187) time: 0.1360 data: 0.0570 max mem: 9377 +Train: [68] [5100/6250] eta: 0:02:43 lr: 0.000030 grad: 0.1962 (0.1947) loss: 0.7065 (0.7185) time: 0.1432 data: 0.0529 max mem: 9377 +Train: [68] [5200/6250] eta: 0:02:29 lr: 0.000030 grad: 0.1900 (0.1946) loss: 0.7096 (0.7183) time: 0.1297 data: 0.0482 max mem: 9377 +Train: [68] [5300/6250] eta: 0:02:14 lr: 0.000030 grad: 0.1960 (0.1945) loss: 0.6951 (0.7182) time: 0.1483 data: 0.0721 max mem: 9377 +Train: [68] [5400/6250] eta: 0:02:00 lr: 0.000030 grad: 0.1943 (0.1945) loss: 0.7161 (0.7181) time: 0.1308 data: 0.0486 max mem: 9377 +Train: [68] [5500/6250] eta: 0:01:46 lr: 0.000030 grad: 0.1896 (0.1945) loss: 0.7115 (0.7179) time: 0.1259 data: 0.0349 max mem: 9377 +Train: [68] [5600/6250] eta: 0:01:32 lr: 0.000030 grad: 0.1841 (0.1944) loss: 0.7166 (0.7178) time: 0.1464 data: 0.0635 max mem: 9377 +Train: [68] [5700/6250] eta: 0:01:17 lr: 0.000030 grad: 0.1934 (0.1943) loss: 0.7111 (0.7177) time: 0.1433 data: 0.0608 max mem: 9377 +Train: [68] [5800/6250] eta: 0:01:03 lr: 0.000030 grad: 0.1911 (0.1942) loss: 0.7151 (0.7177) time: 0.1391 data: 0.0546 max mem: 9377 +Train: [68] [5900/6250] eta: 0:00:49 lr: 0.000030 grad: 0.1921 (0.1942) loss: 0.7089 (0.7176) time: 0.1128 data: 0.0271 max mem: 9377 +Train: [68] [6000/6250] eta: 0:00:35 lr: 0.000030 grad: 0.1934 (0.1941) loss: 0.7150 (0.7175) time: 0.1366 data: 0.0467 max mem: 9377 +Train: [68] [6100/6250] eta: 0:00:21 lr: 0.000030 grad: 0.1815 (0.1940) loss: 0.7278 (0.7175) time: 0.1450 data: 0.0588 max mem: 9377 +Train: [68] [6200/6250] eta: 0:00:07 lr: 0.000030 grad: 0.1887 (0.1939) loss: 0.7181 (0.7175) time: 0.1211 data: 0.0368 max mem: 9377 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1949 (0.1939) loss: 0.7230 (0.7175) time: 0.1460 data: 0.0550 max mem: 9377 +Train: [68] Total time: 0:14:46 (0.1418 s / it) +Averaged stats: lr: 0.000030 grad: 0.1949 (0.1939) loss: 0.7230 (0.7175) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:05:38 loss: 0.8688 (0.8688) time: 5.4559 data: 5.4256 max mem: 9377 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.8689 (0.8711) time: 0.1348 data: 0.1079 max mem: 9377 +Eval (hcp-train-subset): [68] Total time: 0:00:14 (0.2270 s / it) +Averaged stats (hcp-train-subset): loss: 0.8689 (0.8711) +Eval (hcp-val): [68] [ 0/62] eta: 0:03:55 loss: 0.8696 (0.8696) time: 3.8003 data: 3.7244 max mem: 9377 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.8679 (0.8686) time: 0.1395 data: 0.1143 max mem: 9377 +Eval (hcp-val): [68] Total time: 0:00:14 (0.2395 s / it) +Averaged stats (hcp-val): loss: 0.8679 (0.8686) +Eval (nsd-val): [68] [ 0/62] eta: 0:06:16 loss: 0.8298 (0.8298) time: 6.0667 data: 6.0336 max mem: 9377 +Eval (nsd-val): [68] [61/62] eta: 0:00:00 loss: 0.8372 (0.8403) time: 0.1407 data: 0.1122 max mem: 9377 +Eval (nsd-val): [68] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (nsd-val): loss: 0.8372 (0.8403) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [69] [ 0/6250] eta: 8:46:48 lr: 0.000030 grad: 0.1855 (0.1855) loss: 0.8512 (0.8512) time: 5.0574 data: 4.8144 max mem: 9377 +Train: [69] [ 100/6250] eta: 0:22:40 lr: 0.000030 grad: 0.2827 (0.3148) loss: 0.7447 (0.7472) time: 0.1549 data: 0.0463 max mem: 9377 +Train: [69] [ 200/6250] eta: 0:19:37 lr: 0.000030 grad: 0.2763 (0.2999) loss: 0.7028 (0.7341) time: 0.1650 data: 0.0695 max mem: 9377 +Train: [69] [ 300/6250] eta: 0:18:24 lr: 0.000030 grad: 0.2256 (0.2805) loss: 0.7294 (0.7303) time: 0.1779 data: 0.0891 max mem: 9377 +Train: [69] [ 400/6250] eta: 0:17:11 lr: 0.000030 grad: 0.2182 (0.2657) loss: 0.7252 (0.7296) time: 0.1646 data: 0.0745 max mem: 9377 +Train: [69] [ 500/6250] eta: 0:16:17 lr: 0.000030 grad: 0.2043 (0.2549) loss: 0.7150 (0.7275) time: 0.1621 data: 0.0718 max mem: 9377 +Train: [69] [ 600/6250] eta: 0:15:38 lr: 0.000030 grad: 0.1940 (0.2458) loss: 0.7183 (0.7258) time: 0.1660 data: 0.0728 max mem: 9377 +Train: [69] [ 700/6250] eta: 0:15:05 lr: 0.000030 grad: 0.1878 (0.2386) loss: 0.7136 (0.7248) time: 0.1604 data: 0.0809 max mem: 9377 +Train: [69] [ 800/6250] eta: 0:14:37 lr: 0.000030 grad: 0.1951 (0.2338) loss: 0.7077 (0.7231) time: 0.1554 data: 0.0666 max mem: 9377 +Train: [69] [ 900/6250] eta: 0:14:14 lr: 0.000030 grad: 0.1870 (0.2291) loss: 0.7167 (0.7226) time: 0.1620 data: 0.0743 max mem: 9377 +Train: [69] [1000/6250] eta: 0:13:47 lr: 0.000030 grad: 0.1949 (0.2254) loss: 0.7128 (0.7219) time: 0.1365 data: 0.0527 max mem: 9377 +Train: [69] [1100/6250] eta: 0:13:24 lr: 0.000030 grad: 0.1866 (0.2223) loss: 0.7171 (0.7214) time: 0.1422 data: 0.0632 max mem: 9377 +Train: [69] [1200/6250] eta: 0:13:02 lr: 0.000030 grad: 0.1917 (0.2198) loss: 0.7112 (0.7208) time: 0.1399 data: 0.0622 max mem: 9377 +Train: [69] [1300/6250] eta: 0:12:38 lr: 0.000030 grad: 0.1945 (0.2178) loss: 0.7120 (0.7200) time: 0.1241 data: 0.0366 max mem: 9377 +Train: [69] [1400/6250] eta: 0:12:19 lr: 0.000030 grad: 0.1892 (0.2157) loss: 0.7157 (0.7195) time: 0.1517 data: 0.0710 max mem: 9377 +Train: [69] [1500/6250] eta: 0:12:00 lr: 0.000030 grad: 0.1906 (0.2138) loss: 0.7058 (0.7194) time: 0.1438 data: 0.0641 max mem: 9377 +Train: [69] [1600/6250] eta: 0:11:42 lr: 0.000030 grad: 0.1835 (0.2121) loss: 0.7156 (0.7192) time: 0.1411 data: 0.0599 max mem: 9377 +Train: [69] [1700/6250] eta: 0:11:30 lr: 0.000030 grad: 0.1889 (0.2106) loss: 0.7056 (0.7190) time: 0.1602 data: 0.0706 max mem: 9377 +Train: [69] [1800/6250] eta: 0:11:16 lr: 0.000030 grad: 0.1789 (0.2091) loss: 0.7151 (0.7189) time: 0.1363 data: 0.0545 max mem: 9377 +Train: [69] [1900/6250] eta: 0:11:01 lr: 0.000030 grad: 0.1794 (0.2078) loss: 0.7258 (0.7190) time: 0.1452 data: 0.0636 max mem: 9377 +Train: [69] [2000/6250] eta: 0:10:44 lr: 0.000030 grad: 0.1859 (0.2068) loss: 0.7239 (0.7190) time: 0.1278 data: 0.0447 max mem: 9377 +Train: [69] [2100/6250] eta: 0:10:28 lr: 0.000029 grad: 0.1825 (0.2060) loss: 0.7283 (0.7190) time: 0.1605 data: 0.0754 max mem: 9377 +Train: [69] [2200/6250] eta: 0:10:10 lr: 0.000029 grad: 0.1800 (0.2052) loss: 0.7185 (0.7190) time: 0.1332 data: 0.0417 max mem: 9377 +Train: [69] [2300/6250] eta: 0:09:54 lr: 0.000029 grad: 0.1816 (0.2043) loss: 0.7181 (0.7192) time: 0.1534 data: 0.0725 max mem: 9377 +Train: [69] [2400/6250] eta: 0:09:36 lr: 0.000029 grad: 0.1795 (0.2037) loss: 0.7155 (0.7191) time: 0.1343 data: 0.0495 max mem: 9377 +Train: [69] [2500/6250] eta: 0:09:18 lr: 0.000029 grad: 0.1826 (0.2030) loss: 0.7237 (0.7192) time: 0.1392 data: 0.0475 max mem: 9377 +Train: [69] [2600/6250] eta: 0:09:02 lr: 0.000029 grad: 0.1886 (0.2025) loss: 0.7011 (0.7190) time: 0.1390 data: 0.0495 max mem: 9377 +Train: [69] [2700/6250] eta: 0:08:45 lr: 0.000029 grad: 0.1819 (0.2020) loss: 0.7231 (0.7188) time: 0.1379 data: 0.0585 max mem: 9377 +Train: [69] [2800/6250] eta: 0:08:29 lr: 0.000029 grad: 0.1936 (0.2015) loss: 0.6990 (0.7186) time: 0.1521 data: 0.0726 max mem: 9377 +Train: [69] [2900/6250] eta: 0:08:15 lr: 0.000029 grad: 0.1836 (0.2011) loss: 0.7188 (0.7184) time: 0.1760 data: 0.0912 max mem: 9377 +Train: [69] [3000/6250] eta: 0:08:01 lr: 0.000029 grad: 0.1867 (0.2007) loss: 0.7196 (0.7183) time: 0.1608 data: 0.0846 max mem: 9377 +Train: [69] [3100/6250] eta: 0:07:47 lr: 0.000029 grad: 0.1862 (0.2003) loss: 0.7171 (0.7182) time: 0.1614 data: 0.0810 max mem: 9377 +Train: [69] [3200/6250] eta: 0:07:32 lr: 0.000029 grad: 0.1816 (0.1999) loss: 0.7117 (0.7182) time: 0.1339 data: 0.0487 max mem: 9377 +Train: [69] [3300/6250] eta: 0:07:18 lr: 0.000029 grad: 0.1846 (0.1995) loss: 0.7093 (0.7182) time: 0.1683 data: 0.0809 max mem: 9377 +Train: [69] [3400/6250] eta: 0:07:03 lr: 0.000029 grad: 0.1875 (0.1991) loss: 0.7152 (0.7183) time: 0.1436 data: 0.0623 max mem: 9377 +Train: [69] [3500/6250] eta: 0:06:48 lr: 0.000029 grad: 0.1853 (0.1989) loss: 0.7269 (0.7183) time: 0.1564 data: 0.0680 max mem: 9377 +Train: [69] [3600/6250] eta: 0:06:34 lr: 0.000029 grad: 0.1857 (0.1986) loss: 0.7217 (0.7183) time: 0.1579 data: 0.0735 max mem: 9377 +Train: [69] [3700/6250] eta: 0:06:19 lr: 0.000029 grad: 0.1853 (0.1983) loss: 0.7128 (0.7183) time: 0.1499 data: 0.0590 max mem: 9377 +Train: [69] [3800/6250] eta: 0:06:05 lr: 0.000029 grad: 0.1892 (0.1981) loss: 0.7059 (0.7183) time: 0.1599 data: 0.0714 max mem: 9377 +Train: [69] [3900/6250] eta: 0:05:49 lr: 0.000029 grad: 0.1868 (0.1979) loss: 0.7051 (0.7181) time: 0.1434 data: 0.0575 max mem: 9377 +Train: [69] [4000/6250] eta: 0:05:33 lr: 0.000029 grad: 0.1905 (0.1977) loss: 0.7177 (0.7180) time: 0.1404 data: 0.0579 max mem: 9377 +Train: [69] [4100/6250] eta: 0:05:18 lr: 0.000029 grad: 0.1856 (0.1975) loss: 0.7235 (0.7179) time: 0.1244 data: 0.0387 max mem: 9377 +Train: [69] [4200/6250] eta: 0:05:03 lr: 0.000029 grad: 0.1930 (0.1973) loss: 0.7088 (0.7177) time: 0.1515 data: 0.0689 max mem: 9377 +Train: [69] [4300/6250] eta: 0:04:48 lr: 0.000029 grad: 0.1854 (0.1971) loss: 0.7143 (0.7176) time: 0.1795 data: 0.1017 max mem: 9377 +Train: [69] [4400/6250] eta: 0:04:33 lr: 0.000029 grad: 0.1780 (0.1969) loss: 0.7209 (0.7175) time: 0.1647 data: 0.0832 max mem: 9377 +Train: [69] [4500/6250] eta: 0:04:19 lr: 0.000029 grad: 0.1883 (0.1967) loss: 0.7115 (0.7174) time: 0.1595 data: 0.0816 max mem: 9377 +Train: [69] [4600/6250] eta: 0:04:04 lr: 0.000029 grad: 0.1847 (0.1966) loss: 0.7109 (0.7173) time: 0.1329 data: 0.0455 max mem: 9377 +Train: [69] [4700/6250] eta: 0:03:49 lr: 0.000029 grad: 0.1859 (0.1964) loss: 0.7100 (0.7173) time: 0.1300 data: 0.0489 max mem: 9377 +Train: [69] [4800/6250] eta: 0:03:34 lr: 0.000029 grad: 0.1850 (0.1963) loss: 0.7063 (0.7172) time: 0.1523 data: 0.0745 max mem: 9377 +Train: [69] [4900/6250] eta: 0:03:19 lr: 0.000029 grad: 0.1902 (0.1962) loss: 0.7077 (0.7172) time: 0.1522 data: 0.0758 max mem: 9377 +Train: [69] [5000/6250] eta: 0:03:04 lr: 0.000029 grad: 0.1879 (0.1961) loss: 0.7079 (0.7171) time: 0.1285 data: 0.0473 max mem: 9377 +Train: [69] [5100/6250] eta: 0:02:49 lr: 0.000029 grad: 0.1887 (0.1960) loss: 0.7084 (0.7170) time: 0.1473 data: 0.0707 max mem: 9377 +Train: [69] [5200/6250] eta: 0:02:34 lr: 0.000029 grad: 0.1840 (0.1959) loss: 0.7178 (0.7168) time: 0.1333 data: 0.0495 max mem: 9377 +Train: [69] [5300/6250] eta: 0:02:19 lr: 0.000029 grad: 0.1917 (0.1958) loss: 0.7171 (0.7168) time: 0.1276 data: 0.0366 max mem: 9377 +Train: [69] [5400/6250] eta: 0:02:04 lr: 0.000029 grad: 0.1956 (0.1957) loss: 0.7114 (0.7167) time: 0.1519 data: 0.0726 max mem: 9377 +Train: [69] [5500/6250] eta: 0:01:49 lr: 0.000029 grad: 0.1890 (0.1956) loss: 0.7176 (0.7166) time: 0.1302 data: 0.0431 max mem: 9377 +Train: [69] [5600/6250] eta: 0:01:35 lr: 0.000028 grad: 0.1906 (0.1956) loss: 0.7060 (0.7165) time: 0.1339 data: 0.0517 max mem: 9377 +Train: [69] [5700/6250] eta: 0:01:20 lr: 0.000028 grad: 0.1844 (0.1956) loss: 0.7056 (0.7164) time: 0.1459 data: 0.0664 max mem: 9377 +Train: [69] [5800/6250] eta: 0:01:05 lr: 0.000028 grad: 0.1897 (0.1955) loss: 0.7143 (0.7164) time: 0.1445 data: 0.0663 max mem: 9377 +Train: [69] [5900/6250] eta: 0:00:51 lr: 0.000028 grad: 0.1906 (0.1954) loss: 0.7138 (0.7164) time: 0.1486 data: 0.0713 max mem: 9377 +Train: [69] [6000/6250] eta: 0:00:36 lr: 0.000028 grad: 0.1889 (0.1954) loss: 0.7162 (0.7163) time: 0.1383 data: 0.0584 max mem: 9377 +Train: [69] [6100/6250] eta: 0:00:21 lr: 0.000028 grad: 0.1853 (0.1953) loss: 0.7201 (0.7163) time: 0.1241 data: 0.0323 max mem: 9377 +Train: [69] [6200/6250] eta: 0:00:07 lr: 0.000028 grad: 0.1893 (0.1952) loss: 0.7201 (0.7163) time: 0.1289 data: 0.0472 max mem: 9377 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.1911 (0.1952) loss: 0.7127 (0.7163) time: 0.1552 data: 0.0740 max mem: 9377 +Train: [69] Total time: 0:15:16 (0.1466 s / it) +Averaged stats: lr: 0.000028 grad: 0.1911 (0.1952) loss: 0.7127 (0.7163) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:05:47 loss: 0.8670 (0.8670) time: 5.6111 data: 5.5806 max mem: 9377 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.8711 (0.8705) time: 0.1305 data: 0.1051 max mem: 9377 +Eval (hcp-train-subset): [69] Total time: 0:00:13 (0.2240 s / it) +Averaged stats (hcp-train-subset): loss: 0.8711 (0.8705) +Making plots (hcp-train-subset): example=41 +Eval (hcp-val): [69] [ 0/62] eta: 0:05:24 loss: 0.8673 (0.8673) time: 5.2288 data: 5.1967 max mem: 9377 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.8645 (0.8673) time: 0.1180 data: 0.0926 max mem: 9377 +Eval (hcp-val): [69] Total time: 0:00:13 (0.2205 s / it) +Averaged stats (hcp-val): loss: 0.8645 (0.8673) +Making plots (hcp-val): example=18 +Eval (nsd-val): [69] [ 0/62] eta: 0:05:51 loss: 0.8248 (0.8248) time: 5.6637 data: 5.6327 max mem: 9377 +Eval (nsd-val): [69] [61/62] eta: 0:00:00 loss: 0.8367 (0.8394) time: 0.0885 data: 0.0635 max mem: 9377 +Eval (nsd-val): [69] Total time: 0:00:13 (0.2100 s / it) +Averaged stats (nsd-val): loss: 0.8367 (0.8394) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00069.pth +Train: [70] [ 0/6250] eta: 11:20:11 lr: 0.000028 grad: 0.2417 (0.2417) loss: 0.7773 (0.7773) time: 6.5298 data: 6.4340 max mem: 9377 +Train: [70] [ 100/6250] eta: 0:19:42 lr: 0.000028 grad: 0.2762 (0.3159) loss: 0.7663 (0.7474) time: 0.1390 data: 0.0490 max mem: 9377 +Train: [70] [ 200/6250] eta: 0:17:04 lr: 0.000028 grad: 0.2500 (0.2926) loss: 0.7137 (0.7385) time: 0.1555 data: 0.0522 max mem: 9377 +Train: [70] [ 300/6250] eta: 0:15:56 lr: 0.000028 grad: 0.2566 (0.2862) loss: 0.6971 (0.7297) time: 0.1535 data: 0.0596 max mem: 9377 +Train: [70] [ 400/6250] eta: 0:15:16 lr: 0.000028 grad: 0.2243 (0.2759) loss: 0.7158 (0.7247) time: 0.1737 data: 0.0860 max mem: 9377 +Train: [70] [ 500/6250] eta: 0:14:29 lr: 0.000028 grad: 0.2079 (0.2640) loss: 0.7099 (0.7218) time: 0.1366 data: 0.0470 max mem: 9377 +Train: [70] [ 600/6250] eta: 0:14:05 lr: 0.000028 grad: 0.1994 (0.2543) loss: 0.7023 (0.7202) time: 0.1490 data: 0.0585 max mem: 9377 +Train: [70] [ 700/6250] eta: 0:13:45 lr: 0.000028 grad: 0.1894 (0.2461) loss: 0.7209 (0.7202) time: 0.1481 data: 0.0615 max mem: 9377 +Train: [70] [ 800/6250] eta: 0:13:29 lr: 0.000028 grad: 0.1899 (0.2397) loss: 0.7247 (0.7205) time: 0.1512 data: 0.0598 max mem: 9377 +Train: [70] [ 900/6250] eta: 0:13:08 lr: 0.000028 grad: 0.1952 (0.2343) loss: 0.7154 (0.7204) time: 0.1446 data: 0.0526 max mem: 9377 +Train: [70] [1000/6250] eta: 0:12:49 lr: 0.000028 grad: 0.1920 (0.2298) loss: 0.6999 (0.7201) time: 0.1442 data: 0.0525 max mem: 9377 +Train: [70] [1100/6250] eta: 0:12:28 lr: 0.000028 grad: 0.1868 (0.2261) loss: 0.7198 (0.7198) time: 0.1142 data: 0.0346 max mem: 9377 +Train: [70] [1200/6250] eta: 0:12:11 lr: 0.000028 grad: 0.1924 (0.2234) loss: 0.7095 (0.7191) time: 0.1397 data: 0.0549 max mem: 9377 +Train: [70] [1300/6250] eta: 0:11:55 lr: 0.000028 grad: 0.1836 (0.2211) loss: 0.7090 (0.7185) time: 0.1260 data: 0.0438 max mem: 9377 +Train: [70] [1400/6250] eta: 0:11:38 lr: 0.000028 grad: 0.1874 (0.2190) loss: 0.7185 (0.7181) time: 0.1392 data: 0.0553 max mem: 9377 +Train: [70] [1500/6250] eta: 0:11:21 lr: 0.000028 grad: 0.1891 (0.2170) loss: 0.7133 (0.7179) time: 0.1334 data: 0.0529 max mem: 9377 +Train: [70] [1600/6250] eta: 0:11:12 lr: 0.000028 grad: 0.1911 (0.2155) loss: 0.7141 (0.7175) time: 0.1375 data: 0.0521 max mem: 9377 +Train: [70] [1700/6250] eta: 0:10:59 lr: 0.000028 grad: 0.1904 (0.2141) loss: 0.7193 (0.7173) time: 0.1496 data: 0.0637 max mem: 9377 +Train: [70] [1800/6250] eta: 0:10:46 lr: 0.000028 grad: 0.1940 (0.2130) loss: 0.6997 (0.7170) time: 0.1510 data: 0.0719 max mem: 9377 +Train: [70] [1900/6250] eta: 0:10:32 lr: 0.000028 grad: 0.1847 (0.2118) loss: 0.7253 (0.7169) time: 0.1352 data: 0.0597 max mem: 9377 +Train: [70] [2000/6250] eta: 0:10:16 lr: 0.000028 grad: 0.1867 (0.2109) loss: 0.7123 (0.7168) time: 0.1526 data: 0.0703 max mem: 9377 +Train: [70] [2100/6250] eta: 0:10:03 lr: 0.000028 grad: 0.1897 (0.2100) loss: 0.7090 (0.7167) time: 0.1478 data: 0.0663 max mem: 9377 +Train: [70] [2200/6250] eta: 0:09:47 lr: 0.000028 grad: 0.1869 (0.2091) loss: 0.7142 (0.7167) time: 0.1385 data: 0.0560 max mem: 9377 +Train: [70] [2300/6250] eta: 0:09:31 lr: 0.000028 grad: 0.1877 (0.2082) loss: 0.7088 (0.7165) time: 0.1221 data: 0.0350 max mem: 9377 +Train: [70] [2400/6250] eta: 0:09:15 lr: 0.000028 grad: 0.1911 (0.2076) loss: 0.7163 (0.7164) time: 0.1420 data: 0.0665 max mem: 9377 +Train: [70] [2500/6250] eta: 0:08:59 lr: 0.000028 grad: 0.1849 (0.2068) loss: 0.7133 (0.7164) time: 0.1467 data: 0.0622 max mem: 9377 +Train: [70] [2600/6250] eta: 0:08:43 lr: 0.000028 grad: 0.1854 (0.2062) loss: 0.7249 (0.7165) time: 0.1302 data: 0.0385 max mem: 9377 +Train: [70] [2700/6250] eta: 0:08:26 lr: 0.000028 grad: 0.1866 (0.2055) loss: 0.7154 (0.7166) time: 0.1364 data: 0.0527 max mem: 9377 +Train: [70] [2800/6250] eta: 0:08:11 lr: 0.000028 grad: 0.1884 (0.2050) loss: 0.7127 (0.7164) time: 0.1416 data: 0.0526 max mem: 9377 +Train: [70] [2900/6250] eta: 0:07:54 lr: 0.000028 grad: 0.1894 (0.2044) loss: 0.7107 (0.7164) time: 0.1263 data: 0.0385 max mem: 9377 +Train: [70] [3000/6250] eta: 0:07:40 lr: 0.000027 grad: 0.1839 (0.2040) loss: 0.7202 (0.7164) time: 0.1500 data: 0.0691 max mem: 9377 +Train: [70] [3100/6250] eta: 0:07:25 lr: 0.000027 grad: 0.1860 (0.2035) loss: 0.7190 (0.7164) time: 0.1309 data: 0.0480 max mem: 9377 +Train: [70] [3200/6250] eta: 0:07:11 lr: 0.000027 grad: 0.1933 (0.2030) loss: 0.7083 (0.7164) time: 0.1354 data: 0.0565 max mem: 9377 +Train: [70] [3300/6250] eta: 0:06:56 lr: 0.000027 grad: 0.1898 (0.2026) loss: 0.7316 (0.7164) time: 0.1273 data: 0.0451 max mem: 9377 +Train: [70] [3400/6250] eta: 0:06:43 lr: 0.000027 grad: 0.1871 (0.2024) loss: 0.7195 (0.7163) time: 0.1455 data: 0.0649 max mem: 9377 +Train: [70] [3500/6250] eta: 0:06:31 lr: 0.000027 grad: 0.1866 (0.2019) loss: 0.7083 (0.7163) time: 0.1923 data: 0.1044 max mem: 9377 +Train: [70] [3600/6250] eta: 0:06:17 lr: 0.000027 grad: 0.1902 (0.2016) loss: 0.7214 (0.7162) time: 0.1517 data: 0.0755 max mem: 9377 +Train: [70] [3700/6250] eta: 0:06:03 lr: 0.000027 grad: 0.1959 (0.2015) loss: 0.7089 (0.7160) time: 0.1228 data: 0.0450 max mem: 9377 +Train: [70] [3800/6250] eta: 0:05:49 lr: 0.000027 grad: 0.1920 (0.2012) loss: 0.7123 (0.7159) time: 0.1348 data: 0.0479 max mem: 9377 +Train: [70] [3900/6250] eta: 0:05:34 lr: 0.000027 grad: 0.1901 (0.2010) loss: 0.7105 (0.7157) time: 0.1444 data: 0.0656 max mem: 9377 +Train: [70] [4000/6250] eta: 0:05:19 lr: 0.000027 grad: 0.1984 (0.2008) loss: 0.6973 (0.7156) time: 0.1243 data: 0.0388 max mem: 9377 +Train: [70] [4100/6250] eta: 0:05:04 lr: 0.000027 grad: 0.1904 (0.2006) loss: 0.6965 (0.7154) time: 0.1337 data: 0.0526 max mem: 9377 +Train: [70] [4200/6250] eta: 0:04:50 lr: 0.000027 grad: 0.1869 (0.2003) loss: 0.7163 (0.7154) time: 0.1255 data: 0.0408 max mem: 9377 +Train: [70] [4300/6250] eta: 0:04:35 lr: 0.000027 grad: 0.1891 (0.2001) loss: 0.7240 (0.7153) time: 0.1278 data: 0.0423 max mem: 9377 +Train: [70] [4400/6250] eta: 0:04:20 lr: 0.000027 grad: 0.1914 (0.1999) loss: 0.7122 (0.7153) time: 0.1463 data: 0.0652 max mem: 9377 +Train: [70] [4500/6250] eta: 0:04:06 lr: 0.000027 grad: 0.1871 (0.1996) loss: 0.7167 (0.7154) time: 0.1576 data: 0.0750 max mem: 9377 +Train: [70] [4600/6250] eta: 0:03:51 lr: 0.000027 grad: 0.1860 (0.1993) loss: 0.7192 (0.7155) time: 0.1322 data: 0.0498 max mem: 9377 +Train: [70] [4700/6250] eta: 0:03:37 lr: 0.000027 grad: 0.1875 (0.1990) loss: 0.7156 (0.7156) time: 0.1474 data: 0.0683 max mem: 9377 +Train: [70] [4800/6250] eta: 0:03:23 lr: 0.000027 grad: 0.1860 (0.1989) loss: 0.7134 (0.7155) time: 0.1387 data: 0.0580 max mem: 9377 +Train: [70] [4900/6250] eta: 0:03:09 lr: 0.000027 grad: 0.1893 (0.1987) loss: 0.7126 (0.7154) time: 0.1485 data: 0.0717 max mem: 9377 +Train: [70] [5000/6250] eta: 0:02:55 lr: 0.000027 grad: 0.1834 (0.1986) loss: 0.7185 (0.7155) time: 0.1385 data: 0.0582 max mem: 9377 +Train: [70] [5100/6250] eta: 0:02:41 lr: 0.000027 grad: 0.1867 (0.1983) loss: 0.7303 (0.7155) time: 0.1393 data: 0.0599 max mem: 9377 +Train: [70] [5200/6250] eta: 0:02:27 lr: 0.000027 grad: 0.1821 (0.1981) loss: 0.7040 (0.7155) time: 0.1515 data: 0.0730 max mem: 9377 +Train: [70] [5300/6250] eta: 0:02:13 lr: 0.000027 grad: 0.1851 (0.1979) loss: 0.7198 (0.7155) time: 0.1286 data: 0.0447 max mem: 9377 +Train: [70] [5400/6250] eta: 0:01:59 lr: 0.000027 grad: 0.1851 (0.1978) loss: 0.7184 (0.7154) time: 0.1382 data: 0.0605 max mem: 9377 +Train: [70] [5500/6250] eta: 0:01:44 lr: 0.000027 grad: 0.1903 (0.1976) loss: 0.7162 (0.7154) time: 0.1063 data: 0.0192 max mem: 9377 +Train: [70] [5600/6250] eta: 0:01:30 lr: 0.000027 grad: 0.1845 (0.1974) loss: 0.7121 (0.7155) time: 0.1381 data: 0.0564 max mem: 9377 +Train: [70] [5700/6250] eta: 0:01:16 lr: 0.000027 grad: 0.1890 (0.1973) loss: 0.7071 (0.7156) time: 0.1249 data: 0.0481 max mem: 9377 +Train: [70] [5800/6250] eta: 0:01:02 lr: 0.000027 grad: 0.1844 (0.1971) loss: 0.7194 (0.7158) time: 0.1464 data: 0.0670 max mem: 9377 +Train: [70] [5900/6250] eta: 0:00:48 lr: 0.000027 grad: 0.1889 (0.1969) loss: 0.7303 (0.7159) time: 0.1313 data: 0.0500 max mem: 9377 +Train: [70] [6000/6250] eta: 0:00:34 lr: 0.000027 grad: 0.1892 (0.1968) loss: 0.7249 (0.7160) time: 0.1244 data: 0.0456 max mem: 9377 +Train: [70] [6100/6250] eta: 0:00:20 lr: 0.000027 grad: 0.1904 (0.1968) loss: 0.7216 (0.7162) time: 0.1386 data: 0.0569 max mem: 9377 +Train: [70] [6200/6250] eta: 0:00:06 lr: 0.000027 grad: 0.1864 (0.1966) loss: 0.7210 (0.7162) time: 0.1258 data: 0.0373 max mem: 9377 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.1940 (0.1966) loss: 0.7175 (0.7163) time: 0.1607 data: 0.0816 max mem: 9377 +Train: [70] Total time: 0:14:36 (0.1403 s / it) +Averaged stats: lr: 0.000027 grad: 0.1940 (0.1966) loss: 0.7175 (0.7163) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:03:46 loss: 0.8595 (0.8595) time: 3.6600 data: 3.5651 max mem: 9377 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.8696 (0.8719) time: 0.1306 data: 0.1053 max mem: 9377 +Eval (hcp-train-subset): [70] Total time: 0:00:13 (0.2193 s / it) +Averaged stats (hcp-train-subset): loss: 0.8696 (0.8719) +Eval (hcp-val): [70] [ 0/62] eta: 0:05:42 loss: 0.8675 (0.8675) time: 5.5217 data: 5.4879 max mem: 9377 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.8659 (0.8699) time: 0.1441 data: 0.1187 max mem: 9377 +Eval (hcp-val): [70] Total time: 0:00:14 (0.2411 s / it) +Averaged stats (hcp-val): loss: 0.8659 (0.8699) +Eval (nsd-val): [70] [ 0/62] eta: 0:06:19 loss: 0.8349 (0.8349) time: 6.1176 data: 6.0864 max mem: 9377 +Eval (nsd-val): [70] [61/62] eta: 0:00:00 loss: 0.8412 (0.8437) time: 0.1164 data: 0.0895 max mem: 9377 +Eval (nsd-val): [70] Total time: 0:00:14 (0.2378 s / it) +Averaged stats (nsd-val): loss: 0.8412 (0.8437) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 11:14:31 lr: 0.000027 grad: 0.3872 (0.3872) loss: 0.6688 (0.6688) time: 6.4754 data: 6.3230 max mem: 9377 +Train: [71] [ 100/6250] eta: 0:22:58 lr: 0.000027 grad: 0.2640 (0.3226) loss: 0.7210 (0.7233) time: 0.1792 data: 0.0745 max mem: 9377 +Train: [71] [ 200/6250] eta: 0:19:53 lr: 0.000027 grad: 0.2609 (0.3031) loss: 0.7090 (0.7152) time: 0.1696 data: 0.0774 max mem: 9377 +Train: [71] [ 300/6250] eta: 0:18:16 lr: 0.000027 grad: 0.2486 (0.2893) loss: 0.7101 (0.7136) time: 0.1657 data: 0.0718 max mem: 9377 +Train: [71] [ 400/6250] eta: 0:17:08 lr: 0.000026 grad: 0.2173 (0.2721) loss: 0.7147 (0.7136) time: 0.1516 data: 0.0632 max mem: 9377 +Train: [71] [ 500/6250] eta: 0:16:13 lr: 0.000026 grad: 0.2011 (0.2589) loss: 0.7067 (0.7152) time: 0.1383 data: 0.0428 max mem: 9377 +Train: [71] [ 600/6250] eta: 0:15:30 lr: 0.000026 grad: 0.1986 (0.2499) loss: 0.6980 (0.7151) time: 0.1362 data: 0.0491 max mem: 9377 +Train: [71] [ 700/6250] eta: 0:15:00 lr: 0.000026 grad: 0.1964 (0.2421) loss: 0.7040 (0.7156) time: 0.1468 data: 0.0557 max mem: 9377 +Train: [71] [ 800/6250] eta: 0:14:44 lr: 0.000026 grad: 0.1950 (0.2363) loss: 0.7183 (0.7154) time: 0.1707 data: 0.0900 max mem: 9377 +Train: [71] [ 900/6250] eta: 0:14:29 lr: 0.000026 grad: 0.2025 (0.2317) loss: 0.7235 (0.7152) time: 0.1516 data: 0.0608 max mem: 9377 +Train: [71] [1000/6250] eta: 0:14:17 lr: 0.000026 grad: 0.1872 (0.2278) loss: 0.7230 (0.7156) time: 0.1614 data: 0.0820 max mem: 9377 +Train: [71] [1100/6250] eta: 0:13:59 lr: 0.000026 grad: 0.1895 (0.2246) loss: 0.7175 (0.7157) time: 0.1498 data: 0.0647 max mem: 9377 +Train: [71] [1200/6250] eta: 0:13:38 lr: 0.000026 grad: 0.1872 (0.2216) loss: 0.7172 (0.7158) time: 0.1512 data: 0.0640 max mem: 9377 +Train: [71] [1300/6250] eta: 0:13:11 lr: 0.000026 grad: 0.1902 (0.2190) loss: 0.7114 (0.7159) time: 0.1316 data: 0.0476 max mem: 9377 +Train: [71] [1400/6250] eta: 0:12:49 lr: 0.000026 grad: 0.1891 (0.2169) loss: 0.7095 (0.7158) time: 0.1326 data: 0.0465 max mem: 9377 +Train: [71] [1500/6250] eta: 0:12:29 lr: 0.000026 grad: 0.1890 (0.2150) loss: 0.7059 (0.7160) time: 0.1483 data: 0.0636 max mem: 9377 +Train: [71] [1600/6250] eta: 0:12:12 lr: 0.000026 grad: 0.1910 (0.2135) loss: 0.7214 (0.7160) time: 0.1725 data: 0.0914 max mem: 9377 +Train: [71] [1700/6250] eta: 0:12:01 lr: 0.000026 grad: 0.1874 (0.2122) loss: 0.7104 (0.7158) time: 0.1567 data: 0.0677 max mem: 9377 +Train: [71] [1800/6250] eta: 0:11:48 lr: 0.000026 grad: 0.1862 (0.2111) loss: 0.7199 (0.7156) time: 0.1776 data: 0.0964 max mem: 9377 +Train: [71] [1900/6250] eta: 0:11:35 lr: 0.000026 grad: 0.1890 (0.2101) loss: 0.7175 (0.7157) time: 0.1772 data: 0.0891 max mem: 9377 +Train: [71] [2000/6250] eta: 0:11:19 lr: 0.000026 grad: 0.1872 (0.2090) loss: 0.7238 (0.7160) time: 0.1672 data: 0.0829 max mem: 9377 +Train: [71] [2100/6250] eta: 0:11:02 lr: 0.000026 grad: 0.1899 (0.2080) loss: 0.7084 (0.7161) time: 0.1575 data: 0.0724 max mem: 9377 +Train: [71] [2200/6250] eta: 0:10:46 lr: 0.000026 grad: 0.1920 (0.2072) loss: 0.7098 (0.7160) time: 0.1588 data: 0.0762 max mem: 9377 +Train: [71] [2300/6250] eta: 0:10:29 lr: 0.000026 grad: 0.1938 (0.2065) loss: 0.7032 (0.7160) time: 0.1670 data: 0.0817 max mem: 9377 +Train: [71] [2400/6250] eta: 0:10:12 lr: 0.000026 grad: 0.1900 (0.2058) loss: 0.7288 (0.7159) time: 0.1442 data: 0.0523 max mem: 9377 +Train: [71] [2500/6250] eta: 0:09:55 lr: 0.000026 grad: 0.1954 (0.2054) loss: 0.7121 (0.7157) time: 0.1521 data: 0.0681 max mem: 9377 +Train: [71] [2600/6250] eta: 0:09:36 lr: 0.000026 grad: 0.1874 (0.2047) loss: 0.7174 (0.7156) time: 0.1235 data: 0.0349 max mem: 9377 +Train: [71] [2700/6250] eta: 0:09:18 lr: 0.000026 grad: 0.1944 (0.2043) loss: 0.7085 (0.7155) time: 0.1520 data: 0.0702 max mem: 9377 +Train: [71] [2800/6250] eta: 0:09:01 lr: 0.000026 grad: 0.1887 (0.2038) loss: 0.7143 (0.7155) time: 0.1608 data: 0.0777 max mem: 9377 +Train: [71] [2900/6250] eta: 0:08:43 lr: 0.000026 grad: 0.1910 (0.2034) loss: 0.7079 (0.7153) time: 0.1437 data: 0.0609 max mem: 9377 +Train: [71] [3000/6250] eta: 0:08:25 lr: 0.000026 grad: 0.1905 (0.2031) loss: 0.7115 (0.7153) time: 0.1407 data: 0.0560 max mem: 9377 +Train: [71] [3100/6250] eta: 0:08:10 lr: 0.000026 grad: 0.1862 (0.2028) loss: 0.7220 (0.7153) time: 0.1481 data: 0.0635 max mem: 9377 +Train: [71] [3200/6250] eta: 0:07:53 lr: 0.000026 grad: 0.1910 (0.2025) loss: 0.7264 (0.7153) time: 0.1508 data: 0.0710 max mem: 9377 +Train: [71] [3300/6250] eta: 0:07:37 lr: 0.000026 grad: 0.1874 (0.2022) loss: 0.7080 (0.7153) time: 0.1478 data: 0.0642 max mem: 9377 +Train: [71] [3400/6250] eta: 0:07:20 lr: 0.000026 grad: 0.1861 (0.2019) loss: 0.7194 (0.7153) time: 0.1389 data: 0.0586 max mem: 9377 +Train: [71] [3500/6250] eta: 0:07:04 lr: 0.000026 grad: 0.1911 (0.2017) loss: 0.7158 (0.7154) time: 0.1469 data: 0.0686 max mem: 9377 +Train: [71] [3600/6250] eta: 0:06:47 lr: 0.000026 grad: 0.1929 (0.2015) loss: 0.7179 (0.7155) time: 0.1447 data: 0.0622 max mem: 9377 +Train: [71] [3700/6250] eta: 0:06:32 lr: 0.000026 grad: 0.1902 (0.2012) loss: 0.7128 (0.7156) time: 0.1449 data: 0.0641 max mem: 9377 +Train: [71] [3800/6250] eta: 0:06:16 lr: 0.000026 grad: 0.1908 (0.2010) loss: 0.7272 (0.7157) time: 0.1255 data: 0.0408 max mem: 9377 +Train: [71] [3900/6250] eta: 0:06:01 lr: 0.000026 grad: 0.1891 (0.2008) loss: 0.7186 (0.7158) time: 0.1478 data: 0.0621 max mem: 9377 +Train: [71] [4000/6250] eta: 0:05:45 lr: 0.000026 grad: 0.1937 (0.2006) loss: 0.7132 (0.7157) time: 0.1476 data: 0.0721 max mem: 9377 +Train: [71] [4100/6250] eta: 0:05:29 lr: 0.000026 grad: 0.2001 (0.2005) loss: 0.7066 (0.7157) time: 0.1641 data: 0.0892 max mem: 9377 +Train: [71] [4200/6250] eta: 0:05:13 lr: 0.000025 grad: 0.1974 (0.2005) loss: 0.7181 (0.7156) time: 0.1324 data: 0.0524 max mem: 9377 +Train: [71] [4300/6250] eta: 0:04:57 lr: 0.000025 grad: 0.1904 (0.2004) loss: 0.7106 (0.7156) time: 0.1426 data: 0.0543 max mem: 9377 +Train: [71] [4400/6250] eta: 0:04:41 lr: 0.000025 grad: 0.1929 (0.2002) loss: 0.7168 (0.7155) time: 0.1269 data: 0.0425 max mem: 9377 +Train: [71] [4500/6250] eta: 0:04:25 lr: 0.000025 grad: 0.1956 (0.2001) loss: 0.7121 (0.7156) time: 0.1325 data: 0.0443 max mem: 9377 +Train: [71] [4600/6250] eta: 0:04:09 lr: 0.000025 grad: 0.1911 (0.2000) loss: 0.7334 (0.7156) time: 0.1330 data: 0.0420 max mem: 9377 +Train: [71] [4700/6250] eta: 0:03:54 lr: 0.000025 grad: 0.1875 (0.1998) loss: 0.7207 (0.7157) time: 0.1298 data: 0.0480 max mem: 9377 +Train: [71] [4800/6250] eta: 0:03:38 lr: 0.000025 grad: 0.1884 (0.1996) loss: 0.7300 (0.7158) time: 0.1263 data: 0.0413 max mem: 9377 +Train: [71] [4900/6250] eta: 0:03:22 lr: 0.000025 grad: 0.1874 (0.1994) loss: 0.7090 (0.7159) time: 0.1480 data: 0.0697 max mem: 9377 +Train: [71] [5000/6250] eta: 0:03:07 lr: 0.000025 grad: 0.1937 (0.1994) loss: 0.7215 (0.7159) time: 0.1496 data: 0.0761 max mem: 9377 +Train: [71] [5100/6250] eta: 0:02:52 lr: 0.000025 grad: 0.1952 (0.1993) loss: 0.7178 (0.7160) time: 0.1389 data: 0.0612 max mem: 9377 +Train: [71] [5200/6250] eta: 0:02:37 lr: 0.000025 grad: 0.1995 (0.1992) loss: 0.7088 (0.7159) time: 0.1475 data: 0.0680 max mem: 9377 +Train: [71] [5300/6250] eta: 0:02:21 lr: 0.000025 grad: 0.1995 (0.1992) loss: 0.7045 (0.7158) time: 0.1360 data: 0.0479 max mem: 9377 +Train: [71] [5400/6250] eta: 0:02:06 lr: 0.000025 grad: 0.1901 (0.1991) loss: 0.7243 (0.7158) time: 0.1438 data: 0.0642 max mem: 9377 +Train: [71] [5500/6250] eta: 0:01:51 lr: 0.000025 grad: 0.1967 (0.1990) loss: 0.7129 (0.7157) time: 0.1517 data: 0.0739 max mem: 9377 +Train: [71] [5600/6250] eta: 0:01:36 lr: 0.000025 grad: 0.1980 (0.1990) loss: 0.7025 (0.7157) time: 0.1607 data: 0.0791 max mem: 9377 +Train: [71] [5700/6250] eta: 0:01:21 lr: 0.000025 grad: 0.1961 (0.1989) loss: 0.7212 (0.7158) time: 0.1510 data: 0.0639 max mem: 9377 +Train: [71] [5800/6250] eta: 0:01:06 lr: 0.000025 grad: 0.1970 (0.1988) loss: 0.7169 (0.7159) time: 0.1448 data: 0.0604 max mem: 9377 +Train: [71] [5900/6250] eta: 0:00:51 lr: 0.000025 grad: 0.1970 (0.1988) loss: 0.7144 (0.7159) time: 0.1475 data: 0.0604 max mem: 9377 +Train: [71] [6000/6250] eta: 0:00:37 lr: 0.000025 grad: 0.1882 (0.1988) loss: 0.7225 (0.7159) time: 0.1130 data: 0.0287 max mem: 9377 +Train: [71] [6100/6250] eta: 0:00:22 lr: 0.000025 grad: 0.1896 (0.1987) loss: 0.7285 (0.7159) time: 0.1557 data: 0.0717 max mem: 9377 +Train: [71] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.1963 (0.1987) loss: 0.7060 (0.7158) time: 0.1376 data: 0.0531 max mem: 9377 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.2004 (0.1987) loss: 0.7201 (0.7158) time: 0.1698 data: 0.0363 max mem: 9377 +Train: [71] Total time: 0:15:29 (0.1486 s / it) +Averaged stats: lr: 0.000025 grad: 0.2004 (0.1987) loss: 0.7201 (0.7158) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:03:55 loss: 0.8642 (0.8642) time: 3.7912 data: 3.7385 max mem: 9377 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.8726 (0.8732) time: 0.1422 data: 0.1169 max mem: 9377 +Eval (hcp-train-subset): [71] Total time: 0:00:13 (0.2204 s / it) +Averaged stats (hcp-train-subset): loss: 0.8726 (0.8732) +Eval (hcp-val): [71] [ 0/62] eta: 0:03:45 loss: 0.8797 (0.8797) time: 3.6397 data: 3.5436 max mem: 9377 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.8659 (0.8694) time: 0.1009 data: 0.0761 max mem: 9377 +Eval (hcp-val): [71] Total time: 0:00:13 (0.2229 s / it) +Averaged stats (hcp-val): loss: 0.8659 (0.8694) +Eval (nsd-val): [71] [ 0/62] eta: 0:05:32 loss: 0.8316 (0.8316) time: 5.3554 data: 5.3248 max mem: 9377 +Eval (nsd-val): [71] [61/62] eta: 0:00:00 loss: 0.8405 (0.8406) time: 0.1278 data: 0.1028 max mem: 9377 +Eval (nsd-val): [71] Total time: 0:00:13 (0.2124 s / it) +Averaged stats (nsd-val): loss: 0.8405 (0.8406) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [72] [ 0/6250] eta: 7:19:54 lr: 0.000025 grad: 0.2368 (0.2368) loss: 0.8151 (0.8151) time: 4.2231 data: 3.8629 max mem: 9377 +Train: [72] [ 100/6250] eta: 0:19:59 lr: 0.000025 grad: 0.2501 (0.3169) loss: 0.7202 (0.7219) time: 0.1587 data: 0.0704 max mem: 9377 +Train: [72] [ 200/6250] eta: 0:16:55 lr: 0.000025 grad: 0.2287 (0.2891) loss: 0.7313 (0.7248) time: 0.1366 data: 0.0407 max mem: 9377 +Train: [72] [ 300/6250] eta: 0:15:39 lr: 0.000025 grad: 0.2476 (0.2792) loss: 0.7001 (0.7194) time: 0.1091 data: 0.0192 max mem: 9377 +Train: [72] [ 400/6250] eta: 0:14:53 lr: 0.000025 grad: 0.2255 (0.2689) loss: 0.6961 (0.7154) time: 0.1377 data: 0.0427 max mem: 9377 +Train: [72] [ 500/6250] eta: 0:14:15 lr: 0.000025 grad: 0.2047 (0.2586) loss: 0.7066 (0.7124) time: 0.1127 data: 0.0214 max mem: 9377 +Train: [72] [ 600/6250] eta: 0:13:54 lr: 0.000025 grad: 0.2013 (0.2504) loss: 0.7063 (0.7105) time: 0.1574 data: 0.0663 max mem: 9377 +Train: [72] [ 700/6250] eta: 0:13:30 lr: 0.000025 grad: 0.1944 (0.2432) loss: 0.7131 (0.7104) time: 0.1416 data: 0.0559 max mem: 9377 +Train: [72] [ 800/6250] eta: 0:13:14 lr: 0.000025 grad: 0.1981 (0.2374) loss: 0.7120 (0.7107) time: 0.1382 data: 0.0553 max mem: 9377 +Train: [72] [ 900/6250] eta: 0:13:00 lr: 0.000025 grad: 0.1969 (0.2331) loss: 0.7040 (0.7103) time: 0.1511 data: 0.0622 max mem: 9377 +Train: [72] [1000/6250] eta: 0:12:42 lr: 0.000025 grad: 0.1891 (0.2296) loss: 0.7176 (0.7102) time: 0.1491 data: 0.0656 max mem: 9377 +Train: [72] [1100/6250] eta: 0:12:19 lr: 0.000025 grad: 0.1959 (0.2268) loss: 0.6979 (0.7099) time: 0.1302 data: 0.0462 max mem: 9377 +Train: [72] [1200/6250] eta: 0:11:59 lr: 0.000025 grad: 0.1934 (0.2244) loss: 0.7119 (0.7096) time: 0.1028 data: 0.0117 max mem: 9377 +Train: [72] [1300/6250] eta: 0:11:44 lr: 0.000025 grad: 0.1956 (0.2223) loss: 0.7072 (0.7096) time: 0.1562 data: 0.0725 max mem: 9377 +Train: [72] [1400/6250] eta: 0:11:26 lr: 0.000025 grad: 0.1832 (0.2201) loss: 0.7268 (0.7100) time: 0.1377 data: 0.0529 max mem: 9377 +Train: [72] [1500/6250] eta: 0:11:12 lr: 0.000025 grad: 0.1898 (0.2183) loss: 0.7128 (0.7100) time: 0.1339 data: 0.0489 max mem: 9377 +Train: [72] [1600/6250] eta: 0:10:56 lr: 0.000025 grad: 0.1881 (0.2168) loss: 0.7157 (0.7100) time: 0.1584 data: 0.0725 max mem: 9377 +Train: [72] [1700/6250] eta: 0:10:44 lr: 0.000024 grad: 0.1902 (0.2155) loss: 0.7167 (0.7102) time: 0.1246 data: 0.0380 max mem: 9377 +Train: [72] [1800/6250] eta: 0:10:31 lr: 0.000024 grad: 0.1851 (0.2142) loss: 0.7234 (0.7105) time: 0.1358 data: 0.0550 max mem: 9377 +Train: [72] [1900/6250] eta: 0:10:17 lr: 0.000024 grad: 0.1938 (0.2131) loss: 0.7135 (0.7107) time: 0.1443 data: 0.0650 max mem: 9377 +Train: [72] [2000/6250] eta: 0:10:01 lr: 0.000024 grad: 0.1850 (0.2121) loss: 0.7153 (0.7111) time: 0.1402 data: 0.0502 max mem: 9377 +Train: [72] [2100/6250] eta: 0:09:47 lr: 0.000024 grad: 0.1972 (0.2112) loss: 0.7044 (0.7113) time: 0.1402 data: 0.0590 max mem: 9377 +Train: [72] [2200/6250] eta: 0:09:33 lr: 0.000024 grad: 0.1883 (0.2103) loss: 0.7206 (0.7117) time: 0.1677 data: 0.0854 max mem: 9377 +Train: [72] [2300/6250] eta: 0:09:16 lr: 0.000024 grad: 0.1910 (0.2095) loss: 0.7333 (0.7121) time: 0.1355 data: 0.0511 max mem: 9377 +Train: [72] [2400/6250] eta: 0:09:02 lr: 0.000024 grad: 0.1897 (0.2087) loss: 0.7255 (0.7125) time: 0.1508 data: 0.0713 max mem: 9377 +Train: [72] [2500/6250] eta: 0:08:47 lr: 0.000024 grad: 0.1866 (0.2079) loss: 0.7273 (0.7128) time: 0.1367 data: 0.0425 max mem: 9377 +Train: [72] [2600/6250] eta: 0:08:33 lr: 0.000024 grad: 0.1867 (0.2072) loss: 0.7273 (0.7132) time: 0.1327 data: 0.0444 max mem: 9377 +Train: [72] [2700/6250] eta: 0:08:18 lr: 0.000024 grad: 0.1809 (0.2065) loss: 0.7384 (0.7137) time: 0.1446 data: 0.0654 max mem: 9377 +Train: [72] [2800/6250] eta: 0:08:04 lr: 0.000024 grad: 0.1929 (0.2060) loss: 0.7026 (0.7139) time: 0.1517 data: 0.0706 max mem: 9377 +Train: [72] [2900/6250] eta: 0:07:49 lr: 0.000024 grad: 0.1895 (0.2056) loss: 0.7217 (0.7141) time: 0.1346 data: 0.0521 max mem: 9377 +Train: [72] [3000/6250] eta: 0:07:35 lr: 0.000024 grad: 0.1884 (0.2051) loss: 0.7278 (0.7143) time: 0.1406 data: 0.0557 max mem: 9377 +Train: [72] [3100/6250] eta: 0:07:20 lr: 0.000024 grad: 0.1848 (0.2047) loss: 0.7217 (0.7146) time: 0.1388 data: 0.0530 max mem: 9377 +Train: [72] [3200/6250] eta: 0:07:06 lr: 0.000024 grad: 0.1864 (0.2042) loss: 0.7311 (0.7148) time: 0.1346 data: 0.0526 max mem: 9377 +Train: [72] [3300/6250] eta: 0:06:52 lr: 0.000024 grad: 0.1976 (0.2039) loss: 0.7181 (0.7149) time: 0.1491 data: 0.0656 max mem: 9377 +Train: [72] [3400/6250] eta: 0:06:39 lr: 0.000024 grad: 0.1908 (0.2035) loss: 0.7137 (0.7150) time: 0.1624 data: 0.0770 max mem: 9377 +Train: [72] [3500/6250] eta: 0:06:25 lr: 0.000024 grad: 0.1882 (0.2032) loss: 0.7148 (0.7150) time: 0.1590 data: 0.0807 max mem: 9377 +Train: [72] [3600/6250] eta: 0:06:10 lr: 0.000024 grad: 0.1924 (0.2029) loss: 0.7138 (0.7149) time: 0.1500 data: 0.0697 max mem: 9377 +Train: [72] [3700/6250] eta: 0:05:56 lr: 0.000024 grad: 0.2015 (0.2027) loss: 0.6955 (0.7148) time: 0.1432 data: 0.0610 max mem: 9377 +Train: [72] [3800/6250] eta: 0:05:42 lr: 0.000024 grad: 0.1968 (0.2024) loss: 0.6940 (0.7148) time: 0.1398 data: 0.0623 max mem: 9377 +Train: [72] [3900/6250] eta: 0:05:28 lr: 0.000024 grad: 0.1909 (0.2021) loss: 0.7087 (0.7148) time: 0.1241 data: 0.0383 max mem: 9377 +Train: [72] [4000/6250] eta: 0:05:15 lr: 0.000024 grad: 0.1923 (0.2019) loss: 0.7017 (0.7148) time: 0.1543 data: 0.0653 max mem: 9377 +Train: [72] [4100/6250] eta: 0:05:02 lr: 0.000024 grad: 0.1896 (0.2017) loss: 0.7154 (0.7148) time: 0.2019 data: 0.1066 max mem: 9377 +Train: [72] [4200/6250] eta: 0:04:48 lr: 0.000024 grad: 0.1916 (0.2015) loss: 0.7200 (0.7148) time: 0.1632 data: 0.0822 max mem: 9377 +Train: [72] [4300/6250] eta: 0:04:34 lr: 0.000024 grad: 0.1957 (0.2013) loss: 0.7099 (0.7148) time: 0.1258 data: 0.0460 max mem: 9377 +Train: [72] [4400/6250] eta: 0:04:19 lr: 0.000024 grad: 0.1825 (0.2011) loss: 0.7223 (0.7149) time: 0.1210 data: 0.0418 max mem: 9377 +Train: [72] [4500/6250] eta: 0:04:05 lr: 0.000024 grad: 0.1964 (0.2009) loss: 0.7142 (0.7149) time: 0.1367 data: 0.0554 max mem: 9377 +Train: [72] [4600/6250] eta: 0:03:51 lr: 0.000024 grad: 0.1899 (0.2007) loss: 0.7157 (0.7149) time: 0.1339 data: 0.0463 max mem: 9377 +Train: [72] [4700/6250] eta: 0:03:37 lr: 0.000024 grad: 0.1926 (0.2005) loss: 0.7086 (0.7148) time: 0.1266 data: 0.0396 max mem: 9377 +Train: [72] [4800/6250] eta: 0:03:23 lr: 0.000024 grad: 0.1926 (0.2003) loss: 0.7057 (0.7148) time: 0.1265 data: 0.0371 max mem: 9377 +Train: [72] [4900/6250] eta: 0:03:09 lr: 0.000024 grad: 0.1973 (0.2002) loss: 0.7069 (0.7148) time: 0.1351 data: 0.0559 max mem: 9377 +Train: [72] [5000/6250] eta: 0:02:54 lr: 0.000024 grad: 0.1903 (0.2000) loss: 0.7200 (0.7148) time: 0.1210 data: 0.0345 max mem: 9377 +Train: [72] [5100/6250] eta: 0:02:40 lr: 0.000024 grad: 0.1933 (0.1999) loss: 0.7144 (0.7148) time: 0.1205 data: 0.0318 max mem: 9377 +Train: [72] [5200/6250] eta: 0:02:26 lr: 0.000024 grad: 0.1885 (0.1998) loss: 0.7191 (0.7148) time: 0.1338 data: 0.0453 max mem: 9377 +Train: [72] [5300/6250] eta: 0:02:12 lr: 0.000024 grad: 0.1944 (0.1997) loss: 0.7143 (0.7148) time: 0.1505 data: 0.0654 max mem: 9377 +Train: [72] [5400/6250] eta: 0:01:58 lr: 0.000024 grad: 0.1910 (0.1996) loss: 0.7060 (0.7148) time: 0.1582 data: 0.0793 max mem: 9377 +Train: [72] [5500/6250] eta: 0:01:44 lr: 0.000023 grad: 0.1937 (0.1995) loss: 0.7141 (0.7148) time: 0.1539 data: 0.0756 max mem: 9377 +Train: [72] [5600/6250] eta: 0:01:30 lr: 0.000023 grad: 0.1911 (0.1994) loss: 0.7210 (0.7149) time: 0.1338 data: 0.0564 max mem: 9377 +Train: [72] [5700/6250] eta: 0:01:17 lr: 0.000023 grad: 0.1945 (0.1992) loss: 0.7216 (0.7149) time: 0.1471 data: 0.0712 max mem: 9377 +Train: [72] [5800/6250] eta: 0:01:03 lr: 0.000023 grad: 0.1959 (0.1992) loss: 0.7249 (0.7150) time: 0.1565 data: 0.0711 max mem: 9377 +Train: [72] [5900/6250] eta: 0:00:49 lr: 0.000023 grad: 0.1973 (0.1991) loss: 0.7199 (0.7150) time: 0.1341 data: 0.0535 max mem: 9377 +Train: [72] [6000/6250] eta: 0:00:35 lr: 0.000023 grad: 0.1899 (0.1991) loss: 0.7101 (0.7151) time: 0.1410 data: 0.0580 max mem: 9377 +Train: [72] [6100/6250] eta: 0:00:21 lr: 0.000023 grad: 0.1868 (0.1990) loss: 0.7158 (0.7151) time: 0.1512 data: 0.0713 max mem: 9377 +Train: [72] [6200/6250] eta: 0:00:07 lr: 0.000023 grad: 0.1912 (0.1988) loss: 0.7158 (0.7152) time: 0.1459 data: 0.0663 max mem: 9377 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1891 (0.1988) loss: 0.7250 (0.7153) time: 0.1563 data: 0.0733 max mem: 9377 +Train: [72] Total time: 0:14:44 (0.1415 s / it) +Averaged stats: lr: 0.000023 grad: 0.1891 (0.1988) loss: 0.7250 (0.7153) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:04:56 loss: 0.8637 (0.8637) time: 4.7877 data: 4.7557 max mem: 9377 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.8717 (0.8711) time: 0.1370 data: 0.1114 max mem: 9377 +Eval (hcp-train-subset): [72] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (hcp-train-subset): loss: 0.8717 (0.8711) +Eval (hcp-val): [72] [ 0/62] eta: 0:05:25 loss: 0.8681 (0.8681) time: 5.2428 data: 5.2115 max mem: 9377 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.8665 (0.8686) time: 0.1311 data: 0.1058 max mem: 9377 +Eval (hcp-val): [72] Total time: 0:00:13 (0.2150 s / it) +Averaged stats (hcp-val): loss: 0.8665 (0.8686) +Eval (nsd-val): [72] [ 0/62] eta: 0:05:54 loss: 0.8296 (0.8296) time: 5.7251 data: 5.6906 max mem: 9377 +Eval (nsd-val): [72] [61/62] eta: 0:00:00 loss: 0.8387 (0.8423) time: 0.1356 data: 0.1102 max mem: 9377 +Eval (nsd-val): [72] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (nsd-val): loss: 0.8387 (0.8423) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 8:33:23 lr: 0.000023 grad: 0.1527 (0.1527) loss: 0.8135 (0.8135) time: 4.9285 data: 4.6764 max mem: 9377 +Train: [73] [ 100/6250] eta: 0:21:31 lr: 0.000023 grad: 0.2395 (0.3362) loss: 0.7218 (0.7167) time: 0.1507 data: 0.0609 max mem: 9377 +Train: [73] [ 200/6250] eta: 0:18:51 lr: 0.000023 grad: 0.2388 (0.2980) loss: 0.6838 (0.7139) time: 0.1774 data: 0.0815 max mem: 9377 +Train: [73] [ 300/6250] eta: 0:17:16 lr: 0.000023 grad: 0.2485 (0.2790) loss: 0.6749 (0.7081) time: 0.1543 data: 0.0638 max mem: 9377 +Train: [73] [ 400/6250] eta: 0:16:06 lr: 0.000023 grad: 0.2212 (0.2673) loss: 0.6866 (0.7070) time: 0.1393 data: 0.0515 max mem: 9377 +Train: [73] [ 500/6250] eta: 0:15:19 lr: 0.000023 grad: 0.2036 (0.2565) loss: 0.7118 (0.7070) time: 0.1440 data: 0.0570 max mem: 9377 +Train: [73] [ 600/6250] eta: 0:14:50 lr: 0.000023 grad: 0.1926 (0.2476) loss: 0.7073 (0.7076) time: 0.1578 data: 0.0728 max mem: 9377 +Train: [73] [ 700/6250] eta: 0:14:25 lr: 0.000023 grad: 0.1971 (0.2414) loss: 0.7097 (0.7080) time: 0.1442 data: 0.0608 max mem: 9377 +Train: [73] [ 800/6250] eta: 0:14:18 lr: 0.000023 grad: 0.1939 (0.2362) loss: 0.7205 (0.7084) time: 0.1683 data: 0.0801 max mem: 9377 +Train: [73] [ 900/6250] eta: 0:14:08 lr: 0.000023 grad: 0.1938 (0.2317) loss: 0.7081 (0.7089) time: 0.1531 data: 0.0667 max mem: 9377 +Train: [73] [1000/6250] eta: 0:13:50 lr: 0.000023 grad: 0.2004 (0.2282) loss: 0.7125 (0.7096) time: 0.1489 data: 0.0611 max mem: 9377 +Train: [73] [1100/6250] eta: 0:13:29 lr: 0.000023 grad: 0.1918 (0.2251) loss: 0.7043 (0.7101) time: 0.1457 data: 0.0568 max mem: 9377 +Train: [73] [1200/6250] eta: 0:13:06 lr: 0.000023 grad: 0.1948 (0.2225) loss: 0.7184 (0.7101) time: 0.1419 data: 0.0579 max mem: 9377 +Train: [73] [1300/6250] eta: 0:12:44 lr: 0.000023 grad: 0.1892 (0.2205) loss: 0.7198 (0.7102) time: 0.1252 data: 0.0401 max mem: 9377 +Train: [73] [1400/6250] eta: 0:12:25 lr: 0.000023 grad: 0.1847 (0.2185) loss: 0.7243 (0.7107) time: 0.1412 data: 0.0527 max mem: 9377 +Train: [73] [1500/6250] eta: 0:12:06 lr: 0.000023 grad: 0.1936 (0.2168) loss: 0.7281 (0.7110) time: 0.1333 data: 0.0476 max mem: 9377 +Train: [73] [1600/6250] eta: 0:11:49 lr: 0.000023 grad: 0.1929 (0.2154) loss: 0.7274 (0.7112) time: 0.1336 data: 0.0503 max mem: 9377 +Train: [73] [1700/6250] eta: 0:11:36 lr: 0.000023 grad: 0.1973 (0.2143) loss: 0.7258 (0.7113) time: 0.1457 data: 0.0528 max mem: 9377 +Train: [73] [1800/6250] eta: 0:11:24 lr: 0.000023 grad: 0.1928 (0.2131) loss: 0.7125 (0.7113) time: 0.1733 data: 0.0932 max mem: 9377 +Train: [73] [1900/6250] eta: 0:11:10 lr: 0.000023 grad: 0.1903 (0.2121) loss: 0.7202 (0.7116) time: 0.1462 data: 0.0610 max mem: 9377 +Train: [73] [2000/6250] eta: 0:10:58 lr: 0.000023 grad: 0.1898 (0.2111) loss: 0.7269 (0.7119) time: 0.1759 data: 0.0970 max mem: 9377 +Train: [73] [2100/6250] eta: 0:10:42 lr: 0.000023 grad: 0.1858 (0.2103) loss: 0.7007 (0.7121) time: 0.1458 data: 0.0566 max mem: 9377 +Train: [73] [2200/6250] eta: 0:10:25 lr: 0.000023 grad: 0.1966 (0.2096) loss: 0.7262 (0.7122) time: 0.1395 data: 0.0560 max mem: 9377 +Train: [73] [2300/6250] eta: 0:10:07 lr: 0.000023 grad: 0.1937 (0.2089) loss: 0.7248 (0.7123) time: 0.1433 data: 0.0630 max mem: 9377 +Train: [73] [2400/6250] eta: 0:09:50 lr: 0.000023 grad: 0.1886 (0.2083) loss: 0.7268 (0.7123) time: 0.1490 data: 0.0617 max mem: 9377 +Train: [73] [2500/6250] eta: 0:09:32 lr: 0.000023 grad: 0.1965 (0.2078) loss: 0.7094 (0.7122) time: 0.1367 data: 0.0501 max mem: 9377 +Train: [73] [2600/6250] eta: 0:09:14 lr: 0.000023 grad: 0.1908 (0.2074) loss: 0.7101 (0.7121) time: 0.1201 data: 0.0371 max mem: 9377 +Train: [73] [2700/6250] eta: 0:08:57 lr: 0.000023 grad: 0.1960 (0.2069) loss: 0.6996 (0.7119) time: 0.1546 data: 0.0763 max mem: 9377 +Train: [73] [2800/6250] eta: 0:08:40 lr: 0.000023 grad: 0.1971 (0.2065) loss: 0.7022 (0.7118) time: 0.1366 data: 0.0493 max mem: 9377 +Train: [73] [2900/6250] eta: 0:08:23 lr: 0.000023 grad: 0.1984 (0.2062) loss: 0.7070 (0.7117) time: 0.1352 data: 0.0536 max mem: 9377 +Train: [73] [3000/6250] eta: 0:08:08 lr: 0.000023 grad: 0.1936 (0.2060) loss: 0.7034 (0.7115) time: 0.1520 data: 0.0755 max mem: 9377 +Train: [73] [3100/6250] eta: 0:07:52 lr: 0.000023 grad: 0.1951 (0.2057) loss: 0.7122 (0.7115) time: 0.1368 data: 0.0537 max mem: 9377 +Train: [73] [3200/6250] eta: 0:07:36 lr: 0.000022 grad: 0.1987 (0.2055) loss: 0.7100 (0.7114) time: 0.1287 data: 0.0460 max mem: 9377 +Train: [73] [3300/6250] eta: 0:07:20 lr: 0.000022 grad: 0.1920 (0.2053) loss: 0.7089 (0.7113) time: 0.1172 data: 0.0260 max mem: 9377 +Train: [73] [3400/6250] eta: 0:07:04 lr: 0.000022 grad: 0.1976 (0.2051) loss: 0.6993 (0.7113) time: 0.1338 data: 0.0465 max mem: 9377 +Train: [73] [3500/6250] eta: 0:06:48 lr: 0.000022 grad: 0.1977 (0.2049) loss: 0.7092 (0.7114) time: 0.1347 data: 0.0492 max mem: 9377 +Train: [73] [3600/6250] eta: 0:06:32 lr: 0.000022 grad: 0.1921 (0.2046) loss: 0.7179 (0.7114) time: 0.1396 data: 0.0582 max mem: 9377 +Train: [73] [3700/6250] eta: 0:06:17 lr: 0.000022 grad: 0.2017 (0.2044) loss: 0.6988 (0.7113) time: 0.1466 data: 0.0633 max mem: 9377 +Train: [73] [3800/6250] eta: 0:06:02 lr: 0.000022 grad: 0.1924 (0.2043) loss: 0.7143 (0.7114) time: 0.1454 data: 0.0585 max mem: 9377 +Train: [73] [3900/6250] eta: 0:05:46 lr: 0.000022 grad: 0.1992 (0.2041) loss: 0.7098 (0.7114) time: 0.1359 data: 0.0556 max mem: 9377 +Train: [73] [4000/6250] eta: 0:05:31 lr: 0.000022 grad: 0.1950 (0.2040) loss: 0.7096 (0.7114) time: 0.1405 data: 0.0590 max mem: 9377 +Train: [73] [4100/6250] eta: 0:05:16 lr: 0.000022 grad: 0.1920 (0.2038) loss: 0.7245 (0.7114) time: 0.1204 data: 0.0359 max mem: 9377 +Train: [73] [4200/6250] eta: 0:05:01 lr: 0.000022 grad: 0.2071 (0.2036) loss: 0.6837 (0.7114) time: 0.1294 data: 0.0388 max mem: 9377 +Train: [73] [4300/6250] eta: 0:04:46 lr: 0.000022 grad: 0.1971 (0.2035) loss: 0.7066 (0.7114) time: 0.1539 data: 0.0754 max mem: 9377 +Train: [73] [4400/6250] eta: 0:04:31 lr: 0.000022 grad: 0.1931 (0.2033) loss: 0.7281 (0.7114) time: 0.1577 data: 0.0698 max mem: 9377 +Train: [73] [4500/6250] eta: 0:04:17 lr: 0.000022 grad: 0.1987 (0.2033) loss: 0.7015 (0.7112) time: 0.1601 data: 0.0730 max mem: 9377 +Train: [73] [4600/6250] eta: 0:04:02 lr: 0.000022 grad: 0.1975 (0.2032) loss: 0.7028 (0.7111) time: 0.1497 data: 0.0620 max mem: 9377 +Train: [73] [4700/6250] eta: 0:03:47 lr: 0.000022 grad: 0.2016 (0.2032) loss: 0.6988 (0.7109) time: 0.1368 data: 0.0561 max mem: 9377 +Train: [73] [4800/6250] eta: 0:03:33 lr: 0.000022 grad: 0.1979 (0.2031) loss: 0.6964 (0.7107) time: 0.1602 data: 0.0852 max mem: 9377 +Train: [73] [4900/6250] eta: 0:03:18 lr: 0.000022 grad: 0.1947 (0.2030) loss: 0.6993 (0.7106) time: 0.1453 data: 0.0601 max mem: 9377 +Train: [73] [5000/6250] eta: 0:03:03 lr: 0.000022 grad: 0.2003 (0.2030) loss: 0.7094 (0.7104) time: 0.1459 data: 0.0652 max mem: 9377 +Train: [73] [5100/6250] eta: 0:02:48 lr: 0.000022 grad: 0.2001 (0.2029) loss: 0.7049 (0.7103) time: 0.1207 data: 0.0319 max mem: 9377 +Train: [73] [5200/6250] eta: 0:02:33 lr: 0.000022 grad: 0.1963 (0.2029) loss: 0.6951 (0.7101) time: 0.1469 data: 0.0686 max mem: 9377 +Train: [73] [5300/6250] eta: 0:02:18 lr: 0.000022 grad: 0.1887 (0.2028) loss: 0.7049 (0.7100) time: 0.1294 data: 0.0401 max mem: 9377 +Train: [73] [5400/6250] eta: 0:02:03 lr: 0.000022 grad: 0.2014 (0.2027) loss: 0.7123 (0.7100) time: 0.1373 data: 0.0563 max mem: 9377 +Train: [73] [5500/6250] eta: 0:01:49 lr: 0.000022 grad: 0.2081 (0.2027) loss: 0.6958 (0.7099) time: 0.1618 data: 0.0868 max mem: 9377 +Train: [73] [5600/6250] eta: 0:01:34 lr: 0.000022 grad: 0.2000 (0.2026) loss: 0.6959 (0.7098) time: 0.1404 data: 0.0540 max mem: 9377 +Train: [73] [5700/6250] eta: 0:01:20 lr: 0.000022 grad: 0.1975 (0.2026) loss: 0.7025 (0.7098) time: 0.1396 data: 0.0528 max mem: 9377 +Train: [73] [5800/6250] eta: 0:01:05 lr: 0.000022 grad: 0.1919 (0.2025) loss: 0.7143 (0.7097) time: 0.1234 data: 0.0353 max mem: 9377 +Train: [73] [5900/6250] eta: 0:00:50 lr: 0.000022 grad: 0.1984 (0.2024) loss: 0.7030 (0.7097) time: 0.1512 data: 0.0730 max mem: 9377 +Train: [73] [6000/6250] eta: 0:00:36 lr: 0.000022 grad: 0.1944 (0.2024) loss: 0.7038 (0.7096) time: 0.1274 data: 0.0446 max mem: 9377 +Train: [73] [6100/6250] eta: 0:00:21 lr: 0.000022 grad: 0.1916 (0.2023) loss: 0.7059 (0.7096) time: 0.1260 data: 0.0374 max mem: 9377 +Train: [73] [6200/6250] eta: 0:00:07 lr: 0.000022 grad: 0.2014 (0.2023) loss: 0.7208 (0.7096) time: 0.1346 data: 0.0503 max mem: 9377 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.1949 (0.2023) loss: 0.7034 (0.7095) time: 0.1321 data: 0.0489 max mem: 9377 +Train: [73] Total time: 0:15:12 (0.1459 s / it) +Averaged stats: lr: 0.000022 grad: 0.1949 (0.2023) loss: 0.7034 (0.7095) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:05:58 loss: 0.8679 (0.8679) time: 5.7864 data: 5.7560 max mem: 9377 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.8707 (0.8720) time: 0.1181 data: 0.0927 max mem: 9377 +Eval (hcp-train-subset): [73] Total time: 0:00:13 (0.2216 s / it) +Averaged stats (hcp-train-subset): loss: 0.8707 (0.8720) +Eval (hcp-val): [73] [ 0/62] eta: 0:05:36 loss: 0.8730 (0.8730) time: 5.4232 data: 5.3904 max mem: 9377 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.8650 (0.8694) time: 0.1098 data: 0.0832 max mem: 9377 +Eval (hcp-val): [73] Total time: 0:00:13 (0.2168 s / it) +Averaged stats (hcp-val): loss: 0.8650 (0.8694) +Eval (nsd-val): [73] [ 0/62] eta: 0:05:08 loss: 0.8298 (0.8298) time: 4.9818 data: 4.9517 max mem: 9377 +Eval (nsd-val): [73] [61/62] eta: 0:00:00 loss: 0.8423 (0.8414) time: 0.1223 data: 0.0970 max mem: 9377 +Eval (nsd-val): [73] Total time: 0:00:13 (0.2100 s / it) +Averaged stats (nsd-val): loss: 0.8423 (0.8414) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [74] [ 0/6250] eta: 7:42:50 lr: 0.000022 grad: 0.1609 (0.1609) loss: 0.8026 (0.8026) time: 4.4433 data: 4.2011 max mem: 9377 +Train: [74] [ 100/6250] eta: 0:19:12 lr: 0.000022 grad: 0.2814 (0.3200) loss: 0.7442 (0.7355) time: 0.1401 data: 0.0405 max mem: 9377 +Train: [74] [ 200/6250] eta: 0:16:43 lr: 0.000022 grad: 0.2459 (0.2890) loss: 0.7243 (0.7250) time: 0.1272 data: 0.0321 max mem: 9377 +Train: [74] [ 300/6250] eta: 0:15:43 lr: 0.000022 grad: 0.2038 (0.2685) loss: 0.7429 (0.7258) time: 0.1605 data: 0.0702 max mem: 9377 +Train: [74] [ 400/6250] eta: 0:14:54 lr: 0.000022 grad: 0.2012 (0.2522) loss: 0.7283 (0.7283) time: 0.1402 data: 0.0459 max mem: 9377 +Train: [74] [ 500/6250] eta: 0:14:18 lr: 0.000022 grad: 0.2094 (0.2441) loss: 0.7056 (0.7259) time: 0.1300 data: 0.0383 max mem: 9377 +Train: [74] [ 600/6250] eta: 0:13:53 lr: 0.000022 grad: 0.2026 (0.2378) loss: 0.7090 (0.7242) time: 0.1429 data: 0.0452 max mem: 9377 +Train: [74] [ 700/6250] eta: 0:13:31 lr: 0.000022 grad: 0.1944 (0.2322) loss: 0.7135 (0.7231) time: 0.1200 data: 0.0385 max mem: 9377 +Train: [74] [ 800/6250] eta: 0:13:16 lr: 0.000022 grad: 0.1990 (0.2285) loss: 0.7225 (0.7223) time: 0.1458 data: 0.0527 max mem: 9377 +Train: [74] [ 900/6250] eta: 0:12:56 lr: 0.000021 grad: 0.2092 (0.2264) loss: 0.7021 (0.7211) time: 0.1310 data: 0.0476 max mem: 9377 +Train: [74] [1000/6250] eta: 0:12:35 lr: 0.000021 grad: 0.1951 (0.2240) loss: 0.7046 (0.7204) time: 0.1173 data: 0.0307 max mem: 9377 +Train: [74] [1100/6250] eta: 0:12:17 lr: 0.000021 grad: 0.1938 (0.2218) loss: 0.7065 (0.7194) time: 0.1302 data: 0.0447 max mem: 9377 +Train: [74] [1200/6250] eta: 0:12:01 lr: 0.000021 grad: 0.2037 (0.2198) loss: 0.7137 (0.7193) time: 0.1356 data: 0.0513 max mem: 9377 +Train: [74] [1300/6250] eta: 0:11:46 lr: 0.000021 grad: 0.1878 (0.2180) loss: 0.7259 (0.7195) time: 0.1148 data: 0.0333 max mem: 9377 +Train: [74] [1400/6250] eta: 0:11:29 lr: 0.000021 grad: 0.1983 (0.2167) loss: 0.7035 (0.7194) time: 0.1116 data: 0.0240 max mem: 9377 +Train: [74] [1500/6250] eta: 0:11:12 lr: 0.000021 grad: 0.1929 (0.2154) loss: 0.7217 (0.7193) time: 0.1343 data: 0.0542 max mem: 9377 +Train: [74] [1600/6250] eta: 0:10:56 lr: 0.000021 grad: 0.1966 (0.2143) loss: 0.7142 (0.7192) time: 0.1426 data: 0.0628 max mem: 9377 +Train: [74] [1700/6250] eta: 0:10:45 lr: 0.000021 grad: 0.1942 (0.2133) loss: 0.7317 (0.7192) time: 0.1485 data: 0.0618 max mem: 9377 +Train: [74] [1800/6250] eta: 0:10:34 lr: 0.000021 grad: 0.1953 (0.2124) loss: 0.7102 (0.7190) time: 0.1396 data: 0.0592 max mem: 9377 +Train: [74] [1900/6250] eta: 0:10:20 lr: 0.000021 grad: 0.1880 (0.2115) loss: 0.7116 (0.7189) time: 0.1378 data: 0.0592 max mem: 9377 +Train: [74] [2000/6250] eta: 0:10:05 lr: 0.000021 grad: 0.1995 (0.2107) loss: 0.7090 (0.7190) time: 0.1271 data: 0.0495 max mem: 9377 +Train: [74] [2100/6250] eta: 0:09:51 lr: 0.000021 grad: 0.1980 (0.2101) loss: 0.7067 (0.7187) time: 0.1511 data: 0.0776 max mem: 9377 +Train: [74] [2200/6250] eta: 0:09:37 lr: 0.000021 grad: 0.1977 (0.2095) loss: 0.7074 (0.7185) time: 0.1476 data: 0.0626 max mem: 9377 +Train: [74] [2300/6250] eta: 0:09:22 lr: 0.000021 grad: 0.1973 (0.2090) loss: 0.7194 (0.7184) time: 0.1469 data: 0.0626 max mem: 9377 +Train: [74] [2400/6250] eta: 0:09:06 lr: 0.000021 grad: 0.1877 (0.2085) loss: 0.7227 (0.7183) time: 0.1462 data: 0.0701 max mem: 9377 +Train: [74] [2500/6250] eta: 0:08:51 lr: 0.000021 grad: 0.1913 (0.2080) loss: 0.7061 (0.7182) time: 0.1427 data: 0.0572 max mem: 9377 +Train: [74] [2600/6250] eta: 0:08:36 lr: 0.000021 grad: 0.1865 (0.2073) loss: 0.7321 (0.7184) time: 0.1002 data: 0.0003 max mem: 9377 +Train: [74] [2700/6250] eta: 0:08:21 lr: 0.000021 grad: 0.1913 (0.2069) loss: 0.7182 (0.7184) time: 0.1474 data: 0.0633 max mem: 9377 +Train: [74] [2800/6250] eta: 0:08:05 lr: 0.000021 grad: 0.1948 (0.2066) loss: 0.7182 (0.7184) time: 0.1072 data: 0.0200 max mem: 9377 +Train: [74] [2900/6250] eta: 0:07:50 lr: 0.000021 grad: 0.1962 (0.2062) loss: 0.7083 (0.7185) time: 0.1305 data: 0.0499 max mem: 9377 +Train: [74] [3000/6250] eta: 0:07:36 lr: 0.000021 grad: 0.1981 (0.2059) loss: 0.7080 (0.7185) time: 0.1539 data: 0.0743 max mem: 9377 +Train: [74] [3100/6250] eta: 0:07:21 lr: 0.000021 grad: 0.1953 (0.2056) loss: 0.7186 (0.7184) time: 0.1190 data: 0.0361 max mem: 9377 +Train: [74] [3200/6250] eta: 0:07:07 lr: 0.000021 grad: 0.1997 (0.2054) loss: 0.7177 (0.7184) time: 0.1316 data: 0.0500 max mem: 9377 +Train: [74] [3300/6250] eta: 0:06:52 lr: 0.000021 grad: 0.2018 (0.2053) loss: 0.7130 (0.7182) time: 0.1514 data: 0.0708 max mem: 9377 +Train: [74] [3400/6250] eta: 0:06:37 lr: 0.000021 grad: 0.1942 (0.2051) loss: 0.7106 (0.7181) time: 0.1221 data: 0.0470 max mem: 9377 +Train: [74] [3500/6250] eta: 0:06:23 lr: 0.000021 grad: 0.1935 (0.2048) loss: 0.7127 (0.7181) time: 0.1280 data: 0.0393 max mem: 9377 +Train: [74] [3600/6250] eta: 0:06:09 lr: 0.000021 grad: 0.1883 (0.2046) loss: 0.7309 (0.7181) time: 0.1412 data: 0.0616 max mem: 9377 +Train: [74] [3700/6250] eta: 0:05:55 lr: 0.000021 grad: 0.1907 (0.2043) loss: 0.7179 (0.7182) time: 0.1538 data: 0.0772 max mem: 9377 +Train: [74] [3800/6250] eta: 0:05:40 lr: 0.000021 grad: 0.1980 (0.2041) loss: 0.7182 (0.7182) time: 0.1286 data: 0.0469 max mem: 9377 +Train: [74] [3900/6250] eta: 0:05:26 lr: 0.000021 grad: 0.1912 (0.2038) loss: 0.7139 (0.7183) time: 0.1137 data: 0.0319 max mem: 9377 +Train: [74] [4000/6250] eta: 0:05:12 lr: 0.000021 grad: 0.1900 (0.2036) loss: 0.7210 (0.7185) time: 0.1391 data: 0.0609 max mem: 9377 +Train: [74] [4100/6250] eta: 0:04:58 lr: 0.000021 grad: 0.1941 (0.2034) loss: 0.7237 (0.7186) time: 0.1289 data: 0.0448 max mem: 9377 +Train: [74] [4200/6250] eta: 0:04:44 lr: 0.000021 grad: 0.1879 (0.2032) loss: 0.7270 (0.7187) time: 0.1554 data: 0.0752 max mem: 9377 +Train: [74] [4300/6250] eta: 0:04:30 lr: 0.000021 grad: 0.1990 (0.2030) loss: 0.7241 (0.7188) time: 0.1375 data: 0.0573 max mem: 9377 +Train: [74] [4400/6250] eta: 0:04:17 lr: 0.000021 grad: 0.1954 (0.2029) loss: 0.7152 (0.7189) time: 0.1570 data: 0.0759 max mem: 9377 +Train: [74] [4500/6250] eta: 0:04:02 lr: 0.000021 grad: 0.1864 (0.2027) loss: 0.7301 (0.7189) time: 0.1380 data: 0.0532 max mem: 9377 +Train: [74] [4600/6250] eta: 0:03:49 lr: 0.000021 grad: 0.1952 (0.2026) loss: 0.7236 (0.7189) time: 0.1541 data: 0.0710 max mem: 9377 +Train: [74] [4700/6250] eta: 0:03:36 lr: 0.000021 grad: 0.1917 (0.2024) loss: 0.7110 (0.7189) time: 0.1706 data: 0.0875 max mem: 9377 +Train: [74] [4800/6250] eta: 0:03:23 lr: 0.000021 grad: 0.1999 (0.2023) loss: 0.7065 (0.7190) time: 0.1691 data: 0.0904 max mem: 9377 +Train: [74] [4900/6250] eta: 0:03:10 lr: 0.000020 grad: 0.1901 (0.2022) loss: 0.7184 (0.7191) time: 0.1645 data: 0.0801 max mem: 9377 +Train: [74] [5000/6250] eta: 0:02:56 lr: 0.000020 grad: 0.2003 (0.2021) loss: 0.6979 (0.7190) time: 0.1766 data: 0.0927 max mem: 9377 +Train: [74] [5100/6250] eta: 0:02:43 lr: 0.000020 grad: 0.1948 (0.2020) loss: 0.7182 (0.7189) time: 0.1639 data: 0.0862 max mem: 9377 +Train: [74] [5200/6250] eta: 0:02:29 lr: 0.000020 grad: 0.1896 (0.2019) loss: 0.7184 (0.7188) time: 0.1452 data: 0.0506 max mem: 9377 +Train: [74] [5300/6250] eta: 0:02:15 lr: 0.000020 grad: 0.1902 (0.2018) loss: 0.7160 (0.7187) time: 0.1660 data: 0.0796 max mem: 9377 +Train: [74] [5400/6250] eta: 0:02:00 lr: 0.000020 grad: 0.2029 (0.2017) loss: 0.7164 (0.7186) time: 0.1543 data: 0.0713 max mem: 9377 +Train: [74] [5500/6250] eta: 0:01:46 lr: 0.000020 grad: 0.1921 (0.2016) loss: 0.7171 (0.7186) time: 0.1356 data: 0.0524 max mem: 9377 +Train: [74] [5600/6250] eta: 0:01:32 lr: 0.000020 grad: 0.1961 (0.2014) loss: 0.7231 (0.7186) time: 0.1290 data: 0.0446 max mem: 9377 +Train: [74] [5700/6250] eta: 0:01:18 lr: 0.000020 grad: 0.1910 (0.2013) loss: 0.7255 (0.7187) time: 0.1392 data: 0.0549 max mem: 9377 +Train: [74] [5800/6250] eta: 0:01:03 lr: 0.000020 grad: 0.1923 (0.2011) loss: 0.7109 (0.7187) time: 0.1364 data: 0.0513 max mem: 9377 +Train: [74] [5900/6250] eta: 0:00:49 lr: 0.000020 grad: 0.1947 (0.2010) loss: 0.7145 (0.7186) time: 0.1474 data: 0.0661 max mem: 9377 +Train: [74] [6000/6250] eta: 0:00:35 lr: 0.000020 grad: 0.1867 (0.2009) loss: 0.7277 (0.7187) time: 0.1379 data: 0.0576 max mem: 9377 +Train: [74] [6100/6250] eta: 0:00:21 lr: 0.000020 grad: 0.1965 (0.2008) loss: 0.7096 (0.7187) time: 0.1487 data: 0.0666 max mem: 9377 +Train: [74] [6200/6250] eta: 0:00:07 lr: 0.000020 grad: 0.1968 (0.2007) loss: 0.7105 (0.7186) time: 0.1597 data: 0.0788 max mem: 9377 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1986 (0.2007) loss: 0.7020 (0.7185) time: 0.1410 data: 0.0596 max mem: 9377 +Train: [74] Total time: 0:14:55 (0.1433 s / it) +Averaged stats: lr: 0.000020 grad: 0.1986 (0.2007) loss: 0.7020 (0.7185) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:06:03 loss: 0.8700 (0.8700) time: 5.8587 data: 5.8268 max mem: 9377 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.8708 (0.8739) time: 0.1338 data: 0.1085 max mem: 9377 +Eval (hcp-train-subset): [74] Total time: 0:00:13 (0.2253 s / it) +Averaged stats (hcp-train-subset): loss: 0.8708 (0.8739) +Making plots (hcp-train-subset): example=19 +Eval (hcp-val): [74] [ 0/62] eta: 0:06:04 loss: 0.8692 (0.8692) time: 5.8750 data: 5.8434 max mem: 9377 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.8704 (0.8716) time: 0.1310 data: 0.1059 max mem: 9377 +Eval (hcp-val): [74] Total time: 0:00:13 (0.2204 s / it) +Averaged stats (hcp-val): loss: 0.8704 (0.8716) +Making plots (hcp-val): example=5 +Eval (nsd-val): [74] [ 0/62] eta: 0:05:34 loss: 0.8279 (0.8279) time: 5.3926 data: 5.3621 max mem: 9377 +Eval (nsd-val): [74] [61/62] eta: 0:00:00 loss: 0.8396 (0.8412) time: 0.1255 data: 0.0988 max mem: 9377 +Eval (nsd-val): [74] Total time: 0:00:13 (0.2112 s / it) +Averaged stats (nsd-val): loss: 0.8396 (0.8412) +Making plots (nsd-val): example=52 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00074.pth +Train: [75] [ 0/6250] eta: 10:08:52 lr: 0.000020 grad: 0.5136 (0.5136) loss: 0.6309 (0.6309) time: 5.8452 data: 5.6915 max mem: 9377 +Train: [75] [ 100/6250] eta: 0:19:39 lr: 0.000020 grad: 0.3154 (0.3713) loss: 0.7162 (0.6952) time: 0.1559 data: 0.0565 max mem: 9377 +Train: [75] [ 200/6250] eta: 0:16:57 lr: 0.000020 grad: 0.2667 (0.3319) loss: 0.6993 (0.7008) time: 0.1473 data: 0.0603 max mem: 9377 +Train: [75] [ 300/6250] eta: 0:15:42 lr: 0.000020 grad: 0.2309 (0.3111) loss: 0.7447 (0.7029) time: 0.1173 data: 0.0192 max mem: 9377 +Train: [75] [ 400/6250] eta: 0:14:56 lr: 0.000020 grad: 0.2149 (0.2898) loss: 0.6908 (0.7038) time: 0.1352 data: 0.0476 max mem: 9377 +Train: [75] [ 500/6250] eta: 0:14:17 lr: 0.000020 grad: 0.2043 (0.2744) loss: 0.7214 (0.7060) time: 0.1485 data: 0.0611 max mem: 9377 +Train: [75] [ 600/6250] eta: 0:13:50 lr: 0.000020 grad: 0.2085 (0.2641) loss: 0.7065 (0.7071) time: 0.1292 data: 0.0305 max mem: 9377 +Train: [75] [ 700/6250] eta: 0:13:25 lr: 0.000020 grad: 0.2099 (0.2557) loss: 0.7055 (0.7079) time: 0.1167 data: 0.0238 max mem: 9377 +Train: [75] [ 800/6250] eta: 0:13:00 lr: 0.000020 grad: 0.2055 (0.2494) loss: 0.6964 (0.7066) time: 0.1290 data: 0.0400 max mem: 9377 +Train: [75] [ 900/6250] eta: 0:12:46 lr: 0.000020 grad: 0.2012 (0.2446) loss: 0.6949 (0.7059) time: 0.1346 data: 0.0463 max mem: 9377 +Train: [75] [1000/6250] eta: 0:12:33 lr: 0.000020 grad: 0.2047 (0.2405) loss: 0.6911 (0.7052) time: 0.1451 data: 0.0653 max mem: 9377 +Train: [75] [1100/6250] eta: 0:12:17 lr: 0.000020 grad: 0.2013 (0.2376) loss: 0.6806 (0.7044) time: 0.1378 data: 0.0534 max mem: 9377 +Train: [75] [1200/6250] eta: 0:12:02 lr: 0.000020 grad: 0.1970 (0.2347) loss: 0.7097 (0.7040) time: 0.1418 data: 0.0588 max mem: 9377 +Train: [75] [1300/6250] eta: 0:11:43 lr: 0.000020 grad: 0.2005 (0.2321) loss: 0.7124 (0.7042) time: 0.1256 data: 0.0434 max mem: 9377 +Train: [75] [1400/6250] eta: 0:11:27 lr: 0.000020 grad: 0.2033 (0.2299) loss: 0.7080 (0.7042) time: 0.1320 data: 0.0437 max mem: 9377 +Train: [75] [1500/6250] eta: 0:11:12 lr: 0.000020 grad: 0.2006 (0.2280) loss: 0.7054 (0.7041) time: 0.1372 data: 0.0535 max mem: 9377 +Train: [75] [1600/6250] eta: 0:11:05 lr: 0.000020 grad: 0.1980 (0.2263) loss: 0.7106 (0.7041) time: 0.1575 data: 0.0703 max mem: 9377 +Train: [75] [1700/6250] eta: 0:10:57 lr: 0.000020 grad: 0.1972 (0.2248) loss: 0.7101 (0.7041) time: 0.1716 data: 0.0809 max mem: 9377 +Train: [75] [1800/6250] eta: 0:10:48 lr: 0.000020 grad: 0.1958 (0.2234) loss: 0.7152 (0.7044) time: 0.1702 data: 0.0944 max mem: 9377 +Train: [75] [1900/6250] eta: 0:10:36 lr: 0.000020 grad: 0.1978 (0.2220) loss: 0.7167 (0.7046) time: 0.1639 data: 0.0850 max mem: 9377 +Train: [75] [2000/6250] eta: 0:10:25 lr: 0.000020 grad: 0.1935 (0.2210) loss: 0.7129 (0.7048) time: 0.1613 data: 0.0846 max mem: 9377 +Train: [75] [2100/6250] eta: 0:10:10 lr: 0.000020 grad: 0.1911 (0.2199) loss: 0.7164 (0.7050) time: 0.1421 data: 0.0608 max mem: 9377 +Train: [75] [2200/6250] eta: 0:09:54 lr: 0.000020 grad: 0.2010 (0.2190) loss: 0.7074 (0.7052) time: 0.1456 data: 0.0637 max mem: 9377 +Train: [75] [2300/6250] eta: 0:09:38 lr: 0.000020 grad: 0.1930 (0.2181) loss: 0.7133 (0.7054) time: 0.1424 data: 0.0553 max mem: 9377 +Train: [75] [2400/6250] eta: 0:09:22 lr: 0.000020 grad: 0.1991 (0.2172) loss: 0.7120 (0.7058) time: 0.1548 data: 0.0700 max mem: 9377 +Train: [75] [2500/6250] eta: 0:09:05 lr: 0.000020 grad: 0.1932 (0.2164) loss: 0.7115 (0.7063) time: 0.1312 data: 0.0498 max mem: 9377 +Train: [75] [2600/6250] eta: 0:08:50 lr: 0.000020 grad: 0.1919 (0.2156) loss: 0.7251 (0.7067) time: 0.1409 data: 0.0474 max mem: 9377 +Train: [75] [2700/6250] eta: 0:08:36 lr: 0.000020 grad: 0.1898 (0.2148) loss: 0.7234 (0.7072) time: 0.1492 data: 0.0661 max mem: 9377 +Train: [75] [2800/6250] eta: 0:08:20 lr: 0.000019 grad: 0.1905 (0.2141) loss: 0.7194 (0.7076) time: 0.1398 data: 0.0563 max mem: 9377 +Train: [75] [2900/6250] eta: 0:08:05 lr: 0.000019 grad: 0.1870 (0.2134) loss: 0.7180 (0.7081) time: 0.1563 data: 0.0693 max mem: 9377 +Train: [75] [3000/6250] eta: 0:07:49 lr: 0.000019 grad: 0.1929 (0.2128) loss: 0.7269 (0.7086) time: 0.1466 data: 0.0646 max mem: 9377 +Train: [75] [3100/6250] eta: 0:07:34 lr: 0.000019 grad: 0.2017 (0.2123) loss: 0.7159 (0.7090) time: 0.1121 data: 0.0237 max mem: 9377 +Train: [75] [3200/6250] eta: 0:07:19 lr: 0.000019 grad: 0.1951 (0.2119) loss: 0.7195 (0.7092) time: 0.1402 data: 0.0587 max mem: 9377 +Train: [75] [3300/6250] eta: 0:07:04 lr: 0.000019 grad: 0.1984 (0.2115) loss: 0.7087 (0.7095) time: 0.1294 data: 0.0450 max mem: 9377 +Train: [75] [3400/6250] eta: 0:06:49 lr: 0.000019 grad: 0.1970 (0.2111) loss: 0.7223 (0.7099) time: 0.1593 data: 0.0717 max mem: 9377 +Train: [75] [3500/6250] eta: 0:06:35 lr: 0.000019 grad: 0.1963 (0.2106) loss: 0.7131 (0.7101) time: 0.1559 data: 0.0731 max mem: 9377 +Train: [75] [3600/6250] eta: 0:06:20 lr: 0.000019 grad: 0.1970 (0.2103) loss: 0.7115 (0.7103) time: 0.1568 data: 0.0737 max mem: 9377 +Train: [75] [3700/6250] eta: 0:06:05 lr: 0.000019 grad: 0.1955 (0.2100) loss: 0.7128 (0.7104) time: 0.1420 data: 0.0577 max mem: 9377 +Train: [75] [3800/6250] eta: 0:05:51 lr: 0.000019 grad: 0.2002 (0.2097) loss: 0.7104 (0.7106) time: 0.1324 data: 0.0462 max mem: 9377 +Train: [75] [3900/6250] eta: 0:05:36 lr: 0.000019 grad: 0.2029 (0.2095) loss: 0.7102 (0.7107) time: 0.1447 data: 0.0615 max mem: 9377 +Train: [75] [4000/6250] eta: 0:05:21 lr: 0.000019 grad: 0.1975 (0.2093) loss: 0.7187 (0.7108) time: 0.1235 data: 0.0368 max mem: 9377 +Train: [75] [4100/6250] eta: 0:05:07 lr: 0.000019 grad: 0.1993 (0.2090) loss: 0.7010 (0.7108) time: 0.1426 data: 0.0539 max mem: 9377 +Train: [75] [4200/6250] eta: 0:04:52 lr: 0.000019 grad: 0.2036 (0.2088) loss: 0.7099 (0.7108) time: 0.1326 data: 0.0520 max mem: 9377 +Train: [75] [4300/6250] eta: 0:04:37 lr: 0.000019 grad: 0.1920 (0.2085) loss: 0.7143 (0.7110) time: 0.1308 data: 0.0521 max mem: 9377 +Train: [75] [4400/6250] eta: 0:04:23 lr: 0.000019 grad: 0.1913 (0.2082) loss: 0.7302 (0.7112) time: 0.1310 data: 0.0431 max mem: 9377 +Train: [75] [4500/6250] eta: 0:04:08 lr: 0.000019 grad: 0.1924 (0.2080) loss: 0.7196 (0.7113) time: 0.1245 data: 0.0441 max mem: 9377 +Train: [75] [4600/6250] eta: 0:03:54 lr: 0.000019 grad: 0.2010 (0.2078) loss: 0.7125 (0.7114) time: 0.1431 data: 0.0649 max mem: 9377 +Train: [75] [4700/6250] eta: 0:03:40 lr: 0.000019 grad: 0.1972 (0.2076) loss: 0.7228 (0.7115) time: 0.1774 data: 0.1034 max mem: 9377 +Train: [75] [4800/6250] eta: 0:03:26 lr: 0.000019 grad: 0.1939 (0.2074) loss: 0.7223 (0.7115) time: 0.1413 data: 0.0614 max mem: 9377 +Train: [75] [4900/6250] eta: 0:03:12 lr: 0.000019 grad: 0.1938 (0.2072) loss: 0.7022 (0.7116) time: 0.1329 data: 0.0553 max mem: 9377 +Train: [75] [5000/6250] eta: 0:02:58 lr: 0.000019 grad: 0.1997 (0.2069) loss: 0.7086 (0.7117) time: 0.1380 data: 0.0614 max mem: 9377 +Train: [75] [5100/6250] eta: 0:02:43 lr: 0.000019 grad: 0.2017 (0.2069) loss: 0.7045 (0.7118) time: 0.1454 data: 0.0600 max mem: 9377 +Train: [75] [5200/6250] eta: 0:02:29 lr: 0.000019 grad: 0.1977 (0.2068) loss: 0.7025 (0.7118) time: 0.1688 data: 0.0840 max mem: 9377 +Train: [75] [5300/6250] eta: 0:02:15 lr: 0.000019 grad: 0.1982 (0.2067) loss: 0.7130 (0.7117) time: 0.1279 data: 0.0420 max mem: 9377 +Train: [75] [5400/6250] eta: 0:02:01 lr: 0.000019 grad: 0.1952 (0.2066) loss: 0.7183 (0.7117) time: 0.1286 data: 0.0399 max mem: 9377 +Train: [75] [5500/6250] eta: 0:01:47 lr: 0.000019 grad: 0.2019 (0.2065) loss: 0.7080 (0.7118) time: 0.1358 data: 0.0434 max mem: 9377 +Train: [75] [5600/6250] eta: 0:01:32 lr: 0.000019 grad: 0.1969 (0.2064) loss: 0.6983 (0.7118) time: 0.1682 data: 0.0891 max mem: 9377 +Train: [75] [5700/6250] eta: 0:01:18 lr: 0.000019 grad: 0.1971 (0.2062) loss: 0.7234 (0.7119) time: 0.1426 data: 0.0614 max mem: 9377 +Train: [75] [5800/6250] eta: 0:01:04 lr: 0.000019 grad: 0.1934 (0.2061) loss: 0.7185 (0.7119) time: 0.1452 data: 0.0621 max mem: 9377 +Train: [75] [5900/6250] eta: 0:00:49 lr: 0.000019 grad: 0.2014 (0.2060) loss: 0.7127 (0.7119) time: 0.1393 data: 0.0521 max mem: 9377 +Train: [75] [6000/6250] eta: 0:00:35 lr: 0.000019 grad: 0.1975 (0.2059) loss: 0.7257 (0.7119) time: 0.1665 data: 0.0871 max mem: 9377 +Train: [75] [6100/6250] eta: 0:00:21 lr: 0.000019 grad: 0.1965 (0.2059) loss: 0.7232 (0.7119) time: 0.1348 data: 0.0543 max mem: 9377 +Train: [75] [6200/6250] eta: 0:00:07 lr: 0.000019 grad: 0.2024 (0.2059) loss: 0.7150 (0.7119) time: 0.1231 data: 0.0413 max mem: 9377 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.2001 (0.2058) loss: 0.7193 (0.7119) time: 0.1382 data: 0.0596 max mem: 9377 +Train: [75] Total time: 0:14:52 (0.1427 s / it) +Averaged stats: lr: 0.000019 grad: 0.2001 (0.2058) loss: 0.7193 (0.7119) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:06:07 loss: 0.8682 (0.8682) time: 5.9323 data: 5.9022 max mem: 9377 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.8720 (0.8722) time: 0.1180 data: 0.0917 max mem: 9377 +Eval (hcp-train-subset): [75] Total time: 0:00:13 (0.2146 s / it) +Averaged stats (hcp-train-subset): loss: 0.8720 (0.8722) +Eval (hcp-val): [75] [ 0/62] eta: 0:04:00 loss: 0.8727 (0.8727) time: 3.8839 data: 3.8338 max mem: 9377 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.8701 (0.8696) time: 0.1035 data: 0.0781 max mem: 9377 +Eval (hcp-val): [75] Total time: 0:00:13 (0.2184 s / it) +Averaged stats (hcp-val): loss: 0.8701 (0.8696) +Eval (nsd-val): [75] [ 0/62] eta: 0:05:42 loss: 0.8356 (0.8356) time: 5.5318 data: 5.4919 max mem: 9377 +Eval (nsd-val): [75] [61/62] eta: 0:00:00 loss: 0.8409 (0.8432) time: 0.1458 data: 0.1193 max mem: 9377 +Eval (nsd-val): [75] Total time: 0:00:14 (0.2279 s / it) +Averaged stats (nsd-val): loss: 0.8409 (0.8432) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [76] [ 0/6250] eta: 11:00:22 lr: 0.000019 grad: 0.3378 (0.3378) loss: 0.6187 (0.6187) time: 6.3396 data: 6.2308 max mem: 9377 +Train: [76] [ 100/6250] eta: 0:20:19 lr: 0.000019 grad: 0.2407 (0.2985) loss: 0.7367 (0.7119) time: 0.1388 data: 0.0419 max mem: 9377 +Train: [76] [ 200/6250] eta: 0:17:13 lr: 0.000019 grad: 0.2509 (0.2794) loss: 0.7082 (0.7068) time: 0.1293 data: 0.0365 max mem: 9377 +Train: [76] [ 300/6250] eta: 0:16:03 lr: 0.000019 grad: 0.2164 (0.2657) loss: 0.7207 (0.7056) time: 0.1468 data: 0.0586 max mem: 9377 +Train: [76] [ 400/6250] eta: 0:15:16 lr: 0.000019 grad: 0.2124 (0.2543) loss: 0.7172 (0.7058) time: 0.1447 data: 0.0573 max mem: 9377 +Train: [76] [ 500/6250] eta: 0:14:35 lr: 0.000019 grad: 0.2056 (0.2468) loss: 0.7123 (0.7060) time: 0.1329 data: 0.0419 max mem: 9377 +Train: [76] [ 600/6250] eta: 0:14:02 lr: 0.000019 grad: 0.2100 (0.2406) loss: 0.7176 (0.7071) time: 0.1380 data: 0.0374 max mem: 9377 +Train: [76] [ 700/6250] eta: 0:13:34 lr: 0.000019 grad: 0.2042 (0.2354) loss: 0.7117 (0.7082) time: 0.1421 data: 0.0514 max mem: 9377 +Train: [76] [ 800/6250] eta: 0:13:14 lr: 0.000018 grad: 0.2012 (0.2312) loss: 0.7185 (0.7090) time: 0.1477 data: 0.0581 max mem: 9377 +Train: [76] [ 900/6250] eta: 0:12:56 lr: 0.000018 grad: 0.1960 (0.2276) loss: 0.7168 (0.7102) time: 0.1249 data: 0.0268 max mem: 9377 +Train: [76] [1000/6250] eta: 0:12:39 lr: 0.000018 grad: 0.2022 (0.2249) loss: 0.7204 (0.7109) time: 0.1469 data: 0.0633 max mem: 9377 +Train: [76] [1100/6250] eta: 0:12:17 lr: 0.000018 grad: 0.1990 (0.2225) loss: 0.7207 (0.7119) time: 0.1304 data: 0.0441 max mem: 9377 +Train: [76] [1200/6250] eta: 0:12:00 lr: 0.000018 grad: 0.1914 (0.2203) loss: 0.7217 (0.7126) time: 0.1429 data: 0.0620 max mem: 9377 +Train: [76] [1300/6250] eta: 0:11:44 lr: 0.000018 grad: 0.1941 (0.2187) loss: 0.7177 (0.7133) time: 0.1607 data: 0.0793 max mem: 9377 +Train: [76] [1400/6250] eta: 0:11:28 lr: 0.000018 grad: 0.1993 (0.2170) loss: 0.7194 (0.7139) time: 0.1430 data: 0.0593 max mem: 9377 +Train: [76] [1500/6250] eta: 0:11:13 lr: 0.000018 grad: 0.1960 (0.2156) loss: 0.7160 (0.7146) time: 0.1525 data: 0.0676 max mem: 9377 +Train: [76] [1600/6250] eta: 0:10:56 lr: 0.000018 grad: 0.1997 (0.2145) loss: 0.7232 (0.7152) time: 0.1191 data: 0.0351 max mem: 9377 +Train: [76] [1700/6250] eta: 0:10:44 lr: 0.000018 grad: 0.1978 (0.2136) loss: 0.7187 (0.7155) time: 0.1480 data: 0.0560 max mem: 9377 +Train: [76] [1800/6250] eta: 0:10:34 lr: 0.000018 grad: 0.1963 (0.2127) loss: 0.7094 (0.7158) time: 0.1598 data: 0.0720 max mem: 9377 +Train: [76] [1900/6250] eta: 0:10:21 lr: 0.000018 grad: 0.1942 (0.2120) loss: 0.7209 (0.7159) time: 0.1537 data: 0.0669 max mem: 9377 +Train: [76] [2000/6250] eta: 0:10:07 lr: 0.000018 grad: 0.1928 (0.2114) loss: 0.7219 (0.7159) time: 0.1448 data: 0.0527 max mem: 9377 +Train: [76] [2100/6250] eta: 0:09:52 lr: 0.000018 grad: 0.2000 (0.2109) loss: 0.7186 (0.7159) time: 0.1361 data: 0.0548 max mem: 9377 +Train: [76] [2200/6250] eta: 0:09:36 lr: 0.000018 grad: 0.1983 (0.2104) loss: 0.7204 (0.7159) time: 0.1428 data: 0.0567 max mem: 9377 +Train: [76] [2300/6250] eta: 0:09:22 lr: 0.000018 grad: 0.1978 (0.2099) loss: 0.7099 (0.7160) time: 0.1316 data: 0.0432 max mem: 9377 +Train: [76] [2400/6250] eta: 0:09:08 lr: 0.000018 grad: 0.1957 (0.2094) loss: 0.7198 (0.7162) time: 0.1649 data: 0.0794 max mem: 9377 +Train: [76] [2500/6250] eta: 0:08:53 lr: 0.000018 grad: 0.2016 (0.2090) loss: 0.7130 (0.7162) time: 0.1277 data: 0.0321 max mem: 9377 +Train: [76] [2600/6250] eta: 0:08:39 lr: 0.000018 grad: 0.2033 (0.2087) loss: 0.7233 (0.7162) time: 0.1410 data: 0.0514 max mem: 9377 +Train: [76] [2700/6250] eta: 0:08:24 lr: 0.000018 grad: 0.1967 (0.2084) loss: 0.7153 (0.7162) time: 0.1504 data: 0.0724 max mem: 9377 +Train: [76] [2800/6250] eta: 0:08:09 lr: 0.000018 grad: 0.1896 (0.2080) loss: 0.7272 (0.7164) time: 0.1521 data: 0.0726 max mem: 9377 +Train: [76] [2900/6250] eta: 0:07:54 lr: 0.000018 grad: 0.1919 (0.2076) loss: 0.7245 (0.7164) time: 0.1229 data: 0.0359 max mem: 9377 +Train: [76] [3000/6250] eta: 0:07:40 lr: 0.000018 grad: 0.2011 (0.2074) loss: 0.7054 (0.7162) time: 0.1474 data: 0.0622 max mem: 9377 +Train: [76] [3100/6250] eta: 0:07:27 lr: 0.000018 grad: 0.1963 (0.2071) loss: 0.7155 (0.7161) time: 0.1298 data: 0.0528 max mem: 9377 +Train: [76] [3200/6250] eta: 0:07:13 lr: 0.000018 grad: 0.1952 (0.2068) loss: 0.7110 (0.7159) time: 0.1471 data: 0.0684 max mem: 9377 +Train: [76] [3300/6250] eta: 0:07:00 lr: 0.000018 grad: 0.2006 (0.2065) loss: 0.7099 (0.7158) time: 0.1402 data: 0.0564 max mem: 9377 +Train: [76] [3400/6250] eta: 0:06:48 lr: 0.000018 grad: 0.2033 (0.2063) loss: 0.7152 (0.7157) time: 0.1761 data: 0.0967 max mem: 9377 +Train: [76] [3500/6250] eta: 0:06:34 lr: 0.000018 grad: 0.2033 (0.2060) loss: 0.7064 (0.7155) time: 0.1515 data: 0.0676 max mem: 9377 +Train: [76] [3600/6250] eta: 0:06:19 lr: 0.000018 grad: 0.1934 (0.2058) loss: 0.7135 (0.7154) time: 0.1406 data: 0.0554 max mem: 9377 +Train: [76] [3700/6250] eta: 0:06:05 lr: 0.000018 grad: 0.1968 (0.2057) loss: 0.7055 (0.7151) time: 0.1365 data: 0.0620 max mem: 9377 +Train: [76] [3800/6250] eta: 0:05:51 lr: 0.000018 grad: 0.1964 (0.2055) loss: 0.7040 (0.7149) time: 0.1485 data: 0.0674 max mem: 9377 +Train: [76] [3900/6250] eta: 0:05:37 lr: 0.000018 grad: 0.1986 (0.2055) loss: 0.6961 (0.7145) time: 0.1430 data: 0.0624 max mem: 9377 +Train: [76] [4000/6250] eta: 0:05:22 lr: 0.000018 grad: 0.2002 (0.2054) loss: 0.7107 (0.7141) time: 0.1168 data: 0.0317 max mem: 9377 +Train: [76] [4100/6250] eta: 0:05:08 lr: 0.000018 grad: 0.1936 (0.2053) loss: 0.7137 (0.7139) time: 0.1714 data: 0.0894 max mem: 9377 +Train: [76] [4200/6250] eta: 0:04:54 lr: 0.000018 grad: 0.1995 (0.2052) loss: 0.7132 (0.7137) time: 0.1547 data: 0.0720 max mem: 9377 +Train: [76] [4300/6250] eta: 0:04:39 lr: 0.000018 grad: 0.2038 (0.2052) loss: 0.7064 (0.7135) time: 0.1479 data: 0.0652 max mem: 9377 +Train: [76] [4400/6250] eta: 0:04:25 lr: 0.000018 grad: 0.1985 (0.2051) loss: 0.7068 (0.7133) time: 0.1418 data: 0.0602 max mem: 9377 +Train: [76] [4500/6250] eta: 0:04:10 lr: 0.000018 grad: 0.2002 (0.2050) loss: 0.7095 (0.7130) time: 0.1456 data: 0.0696 max mem: 9377 +Train: [76] [4600/6250] eta: 0:03:56 lr: 0.000018 grad: 0.2079 (0.2052) loss: 0.7025 (0.7129) time: 0.1370 data: 0.0511 max mem: 9377 +Train: [76] [4700/6250] eta: 0:03:41 lr: 0.000018 grad: 0.1971 (0.2052) loss: 0.6975 (0.7126) time: 0.1244 data: 0.0406 max mem: 9377 +Train: [76] [4800/6250] eta: 0:03:27 lr: 0.000018 grad: 0.1966 (0.2051) loss: 0.7003 (0.7125) time: 0.1361 data: 0.0534 max mem: 9377 +Train: [76] [4900/6250] eta: 0:03:12 lr: 0.000018 grad: 0.2064 (0.2051) loss: 0.7001 (0.7123) time: 0.1361 data: 0.0555 max mem: 9377 +Train: [76] [5000/6250] eta: 0:02:58 lr: 0.000018 grad: 0.2070 (0.2051) loss: 0.7032 (0.7122) time: 0.1542 data: 0.0677 max mem: 9377 +Train: [76] [5100/6250] eta: 0:02:44 lr: 0.000017 grad: 0.2026 (0.2051) loss: 0.7118 (0.7121) time: 0.1607 data: 0.0832 max mem: 9377 +Train: [76] [5200/6250] eta: 0:02:30 lr: 0.000017 grad: 0.2020 (0.2050) loss: 0.7016 (0.7120) time: 0.1465 data: 0.0662 max mem: 9377 +Train: [76] [5300/6250] eta: 0:02:16 lr: 0.000017 grad: 0.1861 (0.2049) loss: 0.7184 (0.7120) time: 0.1647 data: 0.0795 max mem: 9377 +Train: [76] [5400/6250] eta: 0:02:02 lr: 0.000017 grad: 0.2009 (0.2048) loss: 0.7169 (0.7120) time: 0.1404 data: 0.0596 max mem: 9377 +Train: [76] [5500/6250] eta: 0:01:47 lr: 0.000017 grad: 0.1983 (0.2048) loss: 0.7084 (0.7120) time: 0.1388 data: 0.0517 max mem: 9377 +Train: [76] [5600/6250] eta: 0:01:33 lr: 0.000017 grad: 0.2010 (0.2048) loss: 0.6999 (0.7119) time: 0.1320 data: 0.0480 max mem: 9377 +Train: [76] [5700/6250] eta: 0:01:18 lr: 0.000017 grad: 0.1966 (0.2047) loss: 0.7122 (0.7118) time: 0.1408 data: 0.0526 max mem: 9377 +Train: [76] [5800/6250] eta: 0:01:04 lr: 0.000017 grad: 0.2085 (0.2047) loss: 0.6973 (0.7117) time: 0.1367 data: 0.0537 max mem: 9377 +Train: [76] [5900/6250] eta: 0:00:50 lr: 0.000017 grad: 0.1987 (0.2046) loss: 0.7131 (0.7117) time: 0.1260 data: 0.0371 max mem: 9377 +Train: [76] [6000/6250] eta: 0:00:35 lr: 0.000017 grad: 0.2019 (0.2046) loss: 0.7140 (0.7117) time: 0.1341 data: 0.0550 max mem: 9377 +Train: [76] [6100/6250] eta: 0:00:21 lr: 0.000017 grad: 0.1980 (0.2045) loss: 0.7055 (0.7116) time: 0.1189 data: 0.0307 max mem: 9377 +Train: [76] [6200/6250] eta: 0:00:07 lr: 0.000017 grad: 0.1951 (0.2045) loss: 0.7089 (0.7116) time: 0.1120 data: 0.0290 max mem: 9377 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1954 (0.2045) loss: 0.7047 (0.7116) time: 0.1307 data: 0.0451 max mem: 9377 +Train: [76] Total time: 0:14:54 (0.1431 s / it) +Averaged stats: lr: 0.000017 grad: 0.1954 (0.2045) loss: 0.7047 (0.7116) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:05:55 loss: 0.8637 (0.8637) time: 5.7282 data: 5.6988 max mem: 9377 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.8710 (0.8722) time: 0.1381 data: 0.1123 max mem: 9377 +Eval (hcp-train-subset): [76] Total time: 0:00:14 (0.2334 s / it) +Averaged stats (hcp-train-subset): loss: 0.8710 (0.8722) +Eval (hcp-val): [76] [ 0/62] eta: 0:07:01 loss: 0.8680 (0.8680) time: 6.7934 data: 6.7601 max mem: 9377 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.8674 (0.8696) time: 0.1469 data: 0.1214 max mem: 9377 +Eval (hcp-val): [76] Total time: 0:00:15 (0.2520 s / it) +Averaged stats (hcp-val): loss: 0.8674 (0.8696) +Eval (nsd-val): [76] [ 0/62] eta: 0:04:08 loss: 0.8303 (0.8303) time: 4.0161 data: 3.9385 max mem: 9377 +Eval (nsd-val): [76] [61/62] eta: 0:00:00 loss: 0.8400 (0.8433) time: 0.1409 data: 0.1158 max mem: 9377 +Eval (nsd-val): [76] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (nsd-val): loss: 0.8400 (0.8433) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [77] [ 0/6250] eta: 11:52:51 lr: 0.000017 grad: 0.5158 (0.5158) loss: 0.5320 (0.5320) time: 6.8434 data: 6.6770 max mem: 9377 +Train: [77] [ 100/6250] eta: 0:22:23 lr: 0.000017 grad: 0.2655 (0.3050) loss: 0.7198 (0.7182) time: 0.1642 data: 0.0521 max mem: 9377 +Train: [77] [ 200/6250] eta: 0:19:02 lr: 0.000017 grad: 0.2241 (0.2793) loss: 0.7039 (0.7121) time: 0.1654 data: 0.0710 max mem: 9377 +Train: [77] [ 300/6250] eta: 0:17:49 lr: 0.000017 grad: 0.2037 (0.2575) loss: 0.7217 (0.7159) time: 0.1501 data: 0.0632 max mem: 9377 +Train: [77] [ 400/6250] eta: 0:17:00 lr: 0.000017 grad: 0.2065 (0.2455) loss: 0.7164 (0.7172) time: 0.1516 data: 0.0528 max mem: 9377 +Train: [77] [ 500/6250] eta: 0:16:12 lr: 0.000017 grad: 0.2058 (0.2383) loss: 0.6902 (0.7159) time: 0.1371 data: 0.0428 max mem: 9377 +Train: [77] [ 600/6250] eta: 0:15:26 lr: 0.000017 grad: 0.2070 (0.2331) loss: 0.7025 (0.7146) time: 0.1396 data: 0.0489 max mem: 9377 +Train: [77] [ 700/6250] eta: 0:14:50 lr: 0.000017 grad: 0.2070 (0.2294) loss: 0.7011 (0.7134) time: 0.1472 data: 0.0578 max mem: 9377 +Train: [77] [ 800/6250] eta: 0:14:19 lr: 0.000017 grad: 0.2023 (0.2260) loss: 0.7146 (0.7128) time: 0.1266 data: 0.0386 max mem: 9377 +Train: [77] [ 900/6250] eta: 0:14:03 lr: 0.000017 grad: 0.1961 (0.2232) loss: 0.7119 (0.7128) time: 0.1986 data: 0.0992 max mem: 9377 +Train: [77] [1000/6250] eta: 0:13:41 lr: 0.000017 grad: 0.1936 (0.2210) loss: 0.7073 (0.7127) time: 0.1507 data: 0.0653 max mem: 9377 +Train: [77] [1100/6250] eta: 0:13:22 lr: 0.000017 grad: 0.2004 (0.2191) loss: 0.7045 (0.7120) time: 0.1357 data: 0.0519 max mem: 9377 +Train: [77] [1200/6250] eta: 0:13:03 lr: 0.000017 grad: 0.2014 (0.2176) loss: 0.7039 (0.7117) time: 0.1409 data: 0.0592 max mem: 9377 +Train: [77] [1300/6250] eta: 0:12:45 lr: 0.000017 grad: 0.2004 (0.2161) loss: 0.6980 (0.7117) time: 0.1601 data: 0.0741 max mem: 9377 +Train: [77] [1400/6250] eta: 0:12:28 lr: 0.000017 grad: 0.1966 (0.2149) loss: 0.6963 (0.7115) time: 0.1490 data: 0.0662 max mem: 9377 +Train: [77] [1500/6250] eta: 0:12:10 lr: 0.000017 grad: 0.1995 (0.2140) loss: 0.7090 (0.7113) time: 0.1427 data: 0.0610 max mem: 9377 +Train: [77] [1600/6250] eta: 0:11:55 lr: 0.000017 grad: 0.2025 (0.2131) loss: 0.7014 (0.7110) time: 0.1578 data: 0.0747 max mem: 9377 +Train: [77] [1700/6250] eta: 0:11:38 lr: 0.000017 grad: 0.1999 (0.2124) loss: 0.7191 (0.7108) time: 0.1485 data: 0.0620 max mem: 9377 +Train: [77] [1800/6250] eta: 0:11:24 lr: 0.000017 grad: 0.2068 (0.2118) loss: 0.7068 (0.7108) time: 0.1693 data: 0.0803 max mem: 9377 +Train: [77] [1900/6250] eta: 0:11:07 lr: 0.000017 grad: 0.2018 (0.2113) loss: 0.6973 (0.7104) time: 0.1423 data: 0.0566 max mem: 9377 +Train: [77] [2000/6250] eta: 0:10:48 lr: 0.000017 grad: 0.2016 (0.2109) loss: 0.6975 (0.7102) time: 0.1686 data: 0.0859 max mem: 9377 +Train: [77] [2100/6250] eta: 0:10:32 lr: 0.000017 grad: 0.1991 (0.2103) loss: 0.7041 (0.7101) time: 0.1312 data: 0.0497 max mem: 9377 +Train: [77] [2200/6250] eta: 0:10:15 lr: 0.000017 grad: 0.1955 (0.2099) loss: 0.6973 (0.7100) time: 0.1391 data: 0.0543 max mem: 9377 +Train: [77] [2300/6250] eta: 0:09:56 lr: 0.000017 grad: 0.2009 (0.2095) loss: 0.6904 (0.7098) time: 0.1371 data: 0.0518 max mem: 9377 +Train: [77] [2400/6250] eta: 0:09:38 lr: 0.000017 grad: 0.1997 (0.2092) loss: 0.7082 (0.7098) time: 0.1338 data: 0.0496 max mem: 9377 +Train: [77] [2500/6250] eta: 0:09:19 lr: 0.000017 grad: 0.1979 (0.2089) loss: 0.7086 (0.7098) time: 0.1170 data: 0.0347 max mem: 9377 +Train: [77] [2600/6250] eta: 0:09:01 lr: 0.000017 grad: 0.2041 (0.2086) loss: 0.7054 (0.7098) time: 0.1227 data: 0.0297 max mem: 9377 +Train: [77] [2700/6250] eta: 0:08:45 lr: 0.000017 grad: 0.1979 (0.2084) loss: 0.7158 (0.7099) time: 0.1470 data: 0.0670 max mem: 9377 +Train: [77] [2800/6250] eta: 0:08:29 lr: 0.000017 grad: 0.2078 (0.2083) loss: 0.7093 (0.7098) time: 0.1557 data: 0.0735 max mem: 9377 +Train: [77] [2900/6250] eta: 0:08:15 lr: 0.000017 grad: 0.1938 (0.2080) loss: 0.7202 (0.7100) time: 0.1570 data: 0.0775 max mem: 9377 +Train: [77] [3000/6250] eta: 0:07:59 lr: 0.000017 grad: 0.1962 (0.2078) loss: 0.7178 (0.7101) time: 0.1364 data: 0.0578 max mem: 9377 +Train: [77] [3100/6250] eta: 0:07:43 lr: 0.000017 grad: 0.2011 (0.2077) loss: 0.7195 (0.7100) time: 0.1250 data: 0.0411 max mem: 9377 +Train: [77] [3200/6250] eta: 0:07:28 lr: 0.000017 grad: 0.2077 (0.2076) loss: 0.7016 (0.7100) time: 0.1357 data: 0.0512 max mem: 9377 +Train: [77] [3300/6250] eta: 0:07:12 lr: 0.000016 grad: 0.1995 (0.2074) loss: 0.7085 (0.7099) time: 0.1208 data: 0.0390 max mem: 9377 +Train: [77] [3400/6250] eta: 0:06:57 lr: 0.000016 grad: 0.1984 (0.2073) loss: 0.7143 (0.7099) time: 0.1393 data: 0.0559 max mem: 9377 +Train: [77] [3500/6250] eta: 0:06:42 lr: 0.000016 grad: 0.1971 (0.2071) loss: 0.7093 (0.7099) time: 0.1434 data: 0.0636 max mem: 9377 +Train: [77] [3600/6250] eta: 0:06:27 lr: 0.000016 grad: 0.1918 (0.2069) loss: 0.7112 (0.7100) time: 0.1391 data: 0.0587 max mem: 9377 +Train: [77] [3700/6250] eta: 0:06:12 lr: 0.000016 grad: 0.1958 (0.2068) loss: 0.7101 (0.7099) time: 0.1560 data: 0.0750 max mem: 9377 +Train: [77] [3800/6250] eta: 0:05:58 lr: 0.000016 grad: 0.2017 (0.2067) loss: 0.7092 (0.7100) time: 0.1617 data: 0.0804 max mem: 9377 +Train: [77] [3900/6250] eta: 0:05:42 lr: 0.000016 grad: 0.1937 (0.2065) loss: 0.7174 (0.7101) time: 0.0931 data: 0.0074 max mem: 9377 +Train: [77] [4000/6250] eta: 0:05:27 lr: 0.000016 grad: 0.1979 (0.2064) loss: 0.7177 (0.7101) time: 0.1427 data: 0.0622 max mem: 9377 +Train: [77] [4100/6250] eta: 0:05:12 lr: 0.000016 grad: 0.2017 (0.2063) loss: 0.7068 (0.7101) time: 0.1394 data: 0.0581 max mem: 9377 +Train: [77] [4200/6250] eta: 0:04:57 lr: 0.000016 grad: 0.1988 (0.2061) loss: 0.7184 (0.7102) time: 0.1571 data: 0.0782 max mem: 9377 +Train: [77] [4300/6250] eta: 0:04:42 lr: 0.000016 grad: 0.1977 (0.2060) loss: 0.7048 (0.7102) time: 0.1321 data: 0.0510 max mem: 9377 +Train: [77] [4400/6250] eta: 0:04:28 lr: 0.000016 grad: 0.1959 (0.2058) loss: 0.7294 (0.7103) time: 0.1424 data: 0.0618 max mem: 9377 +Train: [77] [4500/6250] eta: 0:04:13 lr: 0.000016 grad: 0.1974 (0.2057) loss: 0.7192 (0.7102) time: 0.1496 data: 0.0684 max mem: 9377 +Train: [77] [4600/6250] eta: 0:03:58 lr: 0.000016 grad: 0.1979 (0.2056) loss: 0.7149 (0.7103) time: 0.1319 data: 0.0388 max mem: 9377 +Train: [77] [4700/6250] eta: 0:03:44 lr: 0.000016 grad: 0.2013 (0.2055) loss: 0.7198 (0.7105) time: 0.1436 data: 0.0686 max mem: 9377 +Train: [77] [4800/6250] eta: 0:03:29 lr: 0.000016 grad: 0.1917 (0.2053) loss: 0.7178 (0.7106) time: 0.1288 data: 0.0458 max mem: 9377 +Train: [77] [4900/6250] eta: 0:03:15 lr: 0.000016 grad: 0.1984 (0.2052) loss: 0.7125 (0.7107) time: 0.1367 data: 0.0491 max mem: 9377 +Train: [77] [5000/6250] eta: 0:03:00 lr: 0.000016 grad: 0.1941 (0.2051) loss: 0.7090 (0.7108) time: 0.1339 data: 0.0493 max mem: 9377 +Train: [77] [5100/6250] eta: 0:02:45 lr: 0.000016 grad: 0.1977 (0.2049) loss: 0.7119 (0.7109) time: 0.1533 data: 0.0717 max mem: 9377 +Train: [77] [5200/6250] eta: 0:02:31 lr: 0.000016 grad: 0.1951 (0.2048) loss: 0.7106 (0.7109) time: 0.1964 data: 0.1202 max mem: 9377 +Train: [77] [5300/6250] eta: 0:02:17 lr: 0.000016 grad: 0.1984 (0.2047) loss: 0.7231 (0.7110) time: 0.1484 data: 0.0644 max mem: 9377 +Train: [77] [5400/6250] eta: 0:02:03 lr: 0.000016 grad: 0.1978 (0.2047) loss: 0.7071 (0.7110) time: 0.1807 data: 0.0952 max mem: 9377 +Train: [77] [5500/6250] eta: 0:01:49 lr: 0.000016 grad: 0.2019 (0.2046) loss: 0.7152 (0.7110) time: 0.1591 data: 0.0735 max mem: 9377 +Train: [77] [5600/6250] eta: 0:01:34 lr: 0.000016 grad: 0.2044 (0.2046) loss: 0.7047 (0.7110) time: 0.1536 data: 0.0608 max mem: 9377 +Train: [77] [5700/6250] eta: 0:01:20 lr: 0.000016 grad: 0.1943 (0.2045) loss: 0.7177 (0.7111) time: 0.1663 data: 0.0818 max mem: 9377 +Train: [77] [5800/6250] eta: 0:01:05 lr: 0.000016 grad: 0.1974 (0.2044) loss: 0.7167 (0.7112) time: 0.1402 data: 0.0556 max mem: 9377 +Train: [77] [5900/6250] eta: 0:00:51 lr: 0.000016 grad: 0.1974 (0.2042) loss: 0.7178 (0.7114) time: 0.1400 data: 0.0563 max mem: 9377 +Train: [77] [6000/6250] eta: 0:00:36 lr: 0.000016 grad: 0.1959 (0.2041) loss: 0.7098 (0.7115) time: 0.1322 data: 0.0419 max mem: 9377 +Train: [77] [6100/6250] eta: 0:00:21 lr: 0.000016 grad: 0.1916 (0.2040) loss: 0.7184 (0.7116) time: 0.1225 data: 0.0417 max mem: 9377 +Train: [77] [6200/6250] eta: 0:00:07 lr: 0.000016 grad: 0.1994 (0.2039) loss: 0.7164 (0.7117) time: 0.1533 data: 0.0712 max mem: 9377 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.1929 (0.2039) loss: 0.7211 (0.7118) time: 0.1413 data: 0.0563 max mem: 9377 +Train: [77] Total time: 0:15:12 (0.1460 s / it) +Averaged stats: lr: 0.000016 grad: 0.1929 (0.2039) loss: 0.7211 (0.7118) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:05:39 loss: 0.8675 (0.8675) time: 5.4762 data: 5.4464 max mem: 9377 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.8731 (0.8740) time: 0.1184 data: 0.0922 max mem: 9377 +Eval (hcp-train-subset): [77] Total time: 0:00:13 (0.2127 s / it) +Averaged stats (hcp-train-subset): loss: 0.8731 (0.8740) +Eval (hcp-val): [77] [ 0/62] eta: 0:05:20 loss: 0.8804 (0.8804) time: 5.1652 data: 5.1357 max mem: 9377 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.8694 (0.8728) time: 0.0983 data: 0.0719 max mem: 9377 +Eval (hcp-val): [77] Total time: 0:00:12 (0.2032 s / it) +Averaged stats (hcp-val): loss: 0.8694 (0.8728) +Eval (nsd-val): [77] [ 0/62] eta: 0:04:12 loss: 0.8300 (0.8300) time: 4.0707 data: 3.9961 max mem: 9377 +Eval (nsd-val): [77] [61/62] eta: 0:00:00 loss: 0.8434 (0.8438) time: 0.1136 data: 0.0887 max mem: 9377 +Eval (nsd-val): [77] Total time: 0:00:12 (0.2001 s / it) +Averaged stats (nsd-val): loss: 0.8434 (0.8438) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 10:32:56 lr: 0.000016 grad: 0.2718 (0.2718) loss: 0.7411 (0.7411) time: 6.0762 data: 5.9784 max mem: 9377 +Train: [78] [ 100/6250] eta: 0:19:06 lr: 0.000016 grad: 0.2276 (0.2650) loss: 0.7626 (0.7562) time: 0.1355 data: 0.0357 max mem: 9377 +Train: [78] [ 200/6250] eta: 0:16:17 lr: 0.000016 grad: 0.2223 (0.2525) loss: 0.7338 (0.7465) time: 0.1198 data: 0.0212 max mem: 9377 +Train: [78] [ 300/6250] eta: 0:15:32 lr: 0.000016 grad: 0.2120 (0.2423) loss: 0.7263 (0.7405) time: 0.1442 data: 0.0430 max mem: 9377 +Train: [78] [ 400/6250] eta: 0:15:11 lr: 0.000016 grad: 0.1980 (0.2337) loss: 0.7193 (0.7384) time: 0.1692 data: 0.0790 max mem: 9377 +Train: [78] [ 500/6250] eta: 0:14:36 lr: 0.000016 grad: 0.2006 (0.2287) loss: 0.7166 (0.7355) time: 0.1405 data: 0.0536 max mem: 9377 +Train: [78] [ 600/6250] eta: 0:14:13 lr: 0.000016 grad: 0.2105 (0.2245) loss: 0.7123 (0.7339) time: 0.1357 data: 0.0381 max mem: 9377 +Train: [78] [ 700/6250] eta: 0:13:54 lr: 0.000016 grad: 0.2119 (0.2220) loss: 0.7020 (0.7317) time: 0.1530 data: 0.0669 max mem: 9377 +Train: [78] [ 800/6250] eta: 0:13:37 lr: 0.000016 grad: 0.2031 (0.2199) loss: 0.7134 (0.7297) time: 0.1655 data: 0.0877 max mem: 9377 +Train: [78] [ 900/6250] eta: 0:13:16 lr: 0.000016 grad: 0.1938 (0.2176) loss: 0.7296 (0.7285) time: 0.1482 data: 0.0527 max mem: 9377 +Train: [78] [1000/6250] eta: 0:12:57 lr: 0.000016 grad: 0.1983 (0.2159) loss: 0.7031 (0.7270) time: 0.1340 data: 0.0484 max mem: 9377 +Train: [78] [1100/6250] eta: 0:12:36 lr: 0.000016 grad: 0.2026 (0.2148) loss: 0.7008 (0.7257) time: 0.1328 data: 0.0492 max mem: 9377 +Train: [78] [1200/6250] eta: 0:12:17 lr: 0.000016 grad: 0.2063 (0.2138) loss: 0.6971 (0.7243) time: 0.1187 data: 0.0358 max mem: 9377 +Train: [78] [1300/6250] eta: 0:11:59 lr: 0.000016 grad: 0.2057 (0.2128) loss: 0.7014 (0.7232) time: 0.1273 data: 0.0408 max mem: 9377 +Train: [78] [1400/6250] eta: 0:11:44 lr: 0.000016 grad: 0.1979 (0.2118) loss: 0.7166 (0.7222) time: 0.1745 data: 0.0927 max mem: 9377 +Train: [78] [1500/6250] eta: 0:11:26 lr: 0.000015 grad: 0.2080 (0.2113) loss: 0.7119 (0.7211) time: 0.1488 data: 0.0689 max mem: 9377 +Train: [78] [1600/6250] eta: 0:11:10 lr: 0.000015 grad: 0.2021 (0.2108) loss: 0.7059 (0.7202) time: 0.1431 data: 0.0642 max mem: 9377 +Train: [78] [1700/6250] eta: 0:10:58 lr: 0.000015 grad: 0.2031 (0.2103) loss: 0.7040 (0.7197) time: 0.1405 data: 0.0668 max mem: 9377 +Train: [78] [1800/6250] eta: 0:10:46 lr: 0.000015 grad: 0.1999 (0.2099) loss: 0.7050 (0.7191) time: 0.1606 data: 0.0747 max mem: 9377 +Train: [78] [1900/6250] eta: 0:10:32 lr: 0.000015 grad: 0.2020 (0.2094) loss: 0.7061 (0.7185) time: 0.1326 data: 0.0515 max mem: 9377 +Train: [78] [2000/6250] eta: 0:10:18 lr: 0.000015 grad: 0.1990 (0.2090) loss: 0.7113 (0.7182) time: 0.1418 data: 0.0643 max mem: 9377 +Train: [78] [2100/6250] eta: 0:10:03 lr: 0.000015 grad: 0.1986 (0.2087) loss: 0.7107 (0.7178) time: 0.1547 data: 0.0749 max mem: 9377 +Train: [78] [2200/6250] eta: 0:09:46 lr: 0.000015 grad: 0.1972 (0.2083) loss: 0.7133 (0.7176) time: 0.1300 data: 0.0456 max mem: 9377 +Train: [78] [2300/6250] eta: 0:09:30 lr: 0.000015 grad: 0.1959 (0.2080) loss: 0.7035 (0.7174) time: 0.1348 data: 0.0524 max mem: 9377 +Train: [78] [2400/6250] eta: 0:09:14 lr: 0.000015 grad: 0.2032 (0.2078) loss: 0.6970 (0.7171) time: 0.1393 data: 0.0602 max mem: 9377 +Train: [78] [2500/6250] eta: 0:08:57 lr: 0.000015 grad: 0.1989 (0.2076) loss: 0.7129 (0.7169) time: 0.1359 data: 0.0550 max mem: 9377 +Train: [78] [2600/6250] eta: 0:08:40 lr: 0.000015 grad: 0.2041 (0.2073) loss: 0.7164 (0.7169) time: 0.1322 data: 0.0501 max mem: 9377 +Train: [78] [2700/6250] eta: 0:08:24 lr: 0.000015 grad: 0.1920 (0.2071) loss: 0.7225 (0.7167) time: 0.1463 data: 0.0624 max mem: 9377 +Train: [78] [2800/6250] eta: 0:08:08 lr: 0.000015 grad: 0.1993 (0.2069) loss: 0.7134 (0.7167) time: 0.1294 data: 0.0490 max mem: 9377 +Train: [78] [2900/6250] eta: 0:07:52 lr: 0.000015 grad: 0.1936 (0.2067) loss: 0.7221 (0.7167) time: 0.1259 data: 0.0347 max mem: 9377 +Train: [78] [3000/6250] eta: 0:07:37 lr: 0.000015 grad: 0.2029 (0.2066) loss: 0.6970 (0.7166) time: 0.1291 data: 0.0433 max mem: 9377 +Train: [78] [3100/6250] eta: 0:07:22 lr: 0.000015 grad: 0.1994 (0.2065) loss: 0.7083 (0.7163) time: 0.1337 data: 0.0463 max mem: 9377 +Train: [78] [3200/6250] eta: 0:07:08 lr: 0.000015 grad: 0.1994 (0.2063) loss: 0.7150 (0.7161) time: 0.1225 data: 0.0372 max mem: 9377 +Train: [78] [3300/6250] eta: 0:06:54 lr: 0.000015 grad: 0.1996 (0.2062) loss: 0.7158 (0.7160) time: 0.1283 data: 0.0349 max mem: 9377 +Train: [78] [3400/6250] eta: 0:06:41 lr: 0.000015 grad: 0.2042 (0.2061) loss: 0.6983 (0.7158) time: 0.1447 data: 0.0644 max mem: 9377 +Train: [78] [3500/6250] eta: 0:06:26 lr: 0.000015 grad: 0.2021 (0.2061) loss: 0.7035 (0.7155) time: 0.1242 data: 0.0425 max mem: 9377 +Train: [78] [3600/6250] eta: 0:06:12 lr: 0.000015 grad: 0.1988 (0.2059) loss: 0.7042 (0.7154) time: 0.1421 data: 0.0569 max mem: 9377 +Train: [78] [3700/6250] eta: 0:05:58 lr: 0.000015 grad: 0.2054 (0.2059) loss: 0.7063 (0.7152) time: 0.1462 data: 0.0642 max mem: 9377 +Train: [78] [3800/6250] eta: 0:05:45 lr: 0.000015 grad: 0.2008 (0.2058) loss: 0.7115 (0.7152) time: 0.1726 data: 0.0938 max mem: 9377 +Train: [78] [3900/6250] eta: 0:05:31 lr: 0.000015 grad: 0.2047 (0.2058) loss: 0.7206 (0.7150) time: 0.1556 data: 0.0681 max mem: 9377 +Train: [78] [4000/6250] eta: 0:05:17 lr: 0.000015 grad: 0.2001 (0.2056) loss: 0.7095 (0.7150) time: 0.1192 data: 0.0392 max mem: 9377 +Train: [78] [4100/6250] eta: 0:05:03 lr: 0.000015 grad: 0.1974 (0.2055) loss: 0.7206 (0.7151) time: 0.1491 data: 0.0729 max mem: 9377 +Train: [78] [4200/6250] eta: 0:04:49 lr: 0.000015 grad: 0.2043 (0.2054) loss: 0.7093 (0.7151) time: 0.1441 data: 0.0654 max mem: 9377 +Train: [78] [4300/6250] eta: 0:04:35 lr: 0.000015 grad: 0.1975 (0.2053) loss: 0.7191 (0.7152) time: 0.1450 data: 0.0647 max mem: 9377 +Train: [78] [4400/6250] eta: 0:04:21 lr: 0.000015 grad: 0.1953 (0.2051) loss: 0.7206 (0.7152) time: 0.1358 data: 0.0527 max mem: 9377 +Train: [78] [4500/6250] eta: 0:04:06 lr: 0.000015 grad: 0.2020 (0.2051) loss: 0.7128 (0.7153) time: 0.1400 data: 0.0556 max mem: 9377 +Train: [78] [4600/6250] eta: 0:03:52 lr: 0.000015 grad: 0.2014 (0.2050) loss: 0.7115 (0.7153) time: 0.1486 data: 0.0660 max mem: 9377 +Train: [78] [4700/6250] eta: 0:03:38 lr: 0.000015 grad: 0.1971 (0.2049) loss: 0.7248 (0.7154) time: 0.1334 data: 0.0482 max mem: 9377 +Train: [78] [4800/6250] eta: 0:03:24 lr: 0.000015 grad: 0.1952 (0.2048) loss: 0.7155 (0.7155) time: 0.1663 data: 0.0818 max mem: 9377 +Train: [78] [4900/6250] eta: 0:03:10 lr: 0.000015 grad: 0.1982 (0.2046) loss: 0.7225 (0.7157) time: 0.1427 data: 0.0635 max mem: 9377 +Train: [78] [5000/6250] eta: 0:02:56 lr: 0.000015 grad: 0.1927 (0.2045) loss: 0.7168 (0.7158) time: 0.1444 data: 0.0642 max mem: 9377 +Train: [78] [5100/6250] eta: 0:02:42 lr: 0.000015 grad: 0.1927 (0.2043) loss: 0.7275 (0.7160) time: 0.1422 data: 0.0660 max mem: 9377 +Train: [78] [5200/6250] eta: 0:02:27 lr: 0.000015 grad: 0.1952 (0.2041) loss: 0.7243 (0.7161) time: 0.1217 data: 0.0411 max mem: 9377 +Train: [78] [5300/6250] eta: 0:02:13 lr: 0.000015 grad: 0.1947 (0.2040) loss: 0.7172 (0.7162) time: 0.1192 data: 0.0400 max mem: 9377 +Train: [78] [5400/6250] eta: 0:01:59 lr: 0.000015 grad: 0.1948 (0.2039) loss: 0.7153 (0.7163) time: 0.1211 data: 0.0394 max mem: 9377 +Train: [78] [5500/6250] eta: 0:01:45 lr: 0.000015 grad: 0.1981 (0.2039) loss: 0.7268 (0.7162) time: 0.1591 data: 0.0825 max mem: 9377 +Train: [78] [5600/6250] eta: 0:01:31 lr: 0.000015 grad: 0.2017 (0.2038) loss: 0.7120 (0.7162) time: 0.1318 data: 0.0542 max mem: 9377 +Train: [78] [5700/6250] eta: 0:01:17 lr: 0.000015 grad: 0.2064 (0.2038) loss: 0.7011 (0.7161) time: 0.1544 data: 0.0730 max mem: 9377 +Train: [78] [5800/6250] eta: 0:01:03 lr: 0.000015 grad: 0.1978 (0.2038) loss: 0.7212 (0.7161) time: 0.1356 data: 0.0480 max mem: 9377 +Train: [78] [5900/6250] eta: 0:00:49 lr: 0.000015 grad: 0.1967 (0.2037) loss: 0.7198 (0.7161) time: 0.1508 data: 0.0649 max mem: 9377 +Train: [78] [6000/6250] eta: 0:00:35 lr: 0.000015 grad: 0.1916 (0.2036) loss: 0.7213 (0.7161) time: 0.1396 data: 0.0544 max mem: 9377 +Train: [78] [6100/6250] eta: 0:00:21 lr: 0.000015 grad: 0.2017 (0.2036) loss: 0.7023 (0.7161) time: 0.1546 data: 0.0739 max mem: 9377 +Train: [78] [6200/6250] eta: 0:00:07 lr: 0.000014 grad: 0.2013 (0.2035) loss: 0.7115 (0.7161) time: 0.1563 data: 0.0617 max mem: 9377 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.1959 (0.2035) loss: 0.7145 (0.7161) time: 0.1491 data: 0.0590 max mem: 9377 +Train: [78] Total time: 0:14:56 (0.1434 s / it) +Averaged stats: lr: 0.000014 grad: 0.1959 (0.2035) loss: 0.7145 (0.7161) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:04:15 loss: 0.8649 (0.8649) time: 4.1260 data: 4.0633 max mem: 9377 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.8719 (0.8724) time: 0.1232 data: 0.0979 max mem: 9377 +Eval (hcp-train-subset): [78] Total time: 0:00:14 (0.2367 s / it) +Averaged stats (hcp-train-subset): loss: 0.8719 (0.8724) +Eval (hcp-val): [78] [ 0/62] eta: 0:05:45 loss: 0.8717 (0.8717) time: 5.5695 data: 5.5394 max mem: 9377 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.8676 (0.8698) time: 0.1248 data: 0.0996 max mem: 9377 +Eval (hcp-val): [78] Total time: 0:00:14 (0.2378 s / it) +Averaged stats (hcp-val): loss: 0.8676 (0.8698) +Eval (nsd-val): [78] [ 0/62] eta: 0:05:30 loss: 0.8353 (0.8353) time: 5.3291 data: 5.2974 max mem: 9377 +Eval (nsd-val): [78] [61/62] eta: 0:00:00 loss: 0.8427 (0.8450) time: 0.1454 data: 0.1200 max mem: 9377 +Eval (nsd-val): [78] Total time: 0:00:14 (0.2308 s / it) +Averaged stats (nsd-val): loss: 0.8427 (0.8450) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [79] [ 0/6250] eta: 12:27:22 lr: 0.000014 grad: 0.5479 (0.5479) loss: 0.5343 (0.5343) time: 7.1748 data: 7.0743 max mem: 9377 +Train: [79] [ 100/6250] eta: 0:21:55 lr: 0.000014 grad: 0.2381 (0.2724) loss: 0.7273 (0.7293) time: 0.1273 data: 0.0130 max mem: 9377 +Train: [79] [ 200/6250] eta: 0:18:03 lr: 0.000014 grad: 0.2377 (0.2512) loss: 0.7044 (0.7293) time: 0.1322 data: 0.0354 max mem: 9377 +Train: [79] [ 300/6250] eta: 0:16:22 lr: 0.000014 grad: 0.2114 (0.2395) loss: 0.7095 (0.7243) time: 0.1288 data: 0.0301 max mem: 9377 +Train: [79] [ 400/6250] eta: 0:15:41 lr: 0.000014 grad: 0.2037 (0.2323) loss: 0.7168 (0.7226) time: 0.1523 data: 0.0580 max mem: 9377 +Train: [79] [ 500/6250] eta: 0:15:26 lr: 0.000014 grad: 0.2203 (0.2291) loss: 0.6940 (0.7186) time: 0.1653 data: 0.0799 max mem: 9377 +Train: [79] [ 600/6250] eta: 0:15:14 lr: 0.000014 grad: 0.2175 (0.2265) loss: 0.6878 (0.7157) time: 0.1807 data: 0.0978 max mem: 9377 +Train: [79] [ 700/6250] eta: 0:15:09 lr: 0.000014 grad: 0.2005 (0.2244) loss: 0.7227 (0.7141) time: 0.1845 data: 0.1019 max mem: 9377 +Train: [79] [ 800/6250] eta: 0:14:46 lr: 0.000014 grad: 0.2063 (0.2226) loss: 0.7079 (0.7127) time: 0.1418 data: 0.0583 max mem: 9377 +Train: [79] [ 900/6250] eta: 0:14:21 lr: 0.000014 grad: 0.2028 (0.2207) loss: 0.7032 (0.7119) time: 0.1576 data: 0.0734 max mem: 9377 +Train: [79] [1000/6250] eta: 0:13:54 lr: 0.000014 grad: 0.1994 (0.2189) loss: 0.7089 (0.7114) time: 0.1496 data: 0.0694 max mem: 9377 +Train: [79] [1100/6250] eta: 0:13:32 lr: 0.000014 grad: 0.1999 (0.2173) loss: 0.7077 (0.7111) time: 0.1697 data: 0.0872 max mem: 9377 +Train: [79] [1200/6250] eta: 0:13:06 lr: 0.000014 grad: 0.2026 (0.2160) loss: 0.7121 (0.7109) time: 0.1215 data: 0.0337 max mem: 9377 +Train: [79] [1300/6250] eta: 0:12:45 lr: 0.000014 grad: 0.1988 (0.2150) loss: 0.7118 (0.7108) time: 0.1433 data: 0.0575 max mem: 9377 +Train: [79] [1400/6250] eta: 0:12:23 lr: 0.000014 grad: 0.1979 (0.2140) loss: 0.7164 (0.7110) time: 0.1193 data: 0.0298 max mem: 9377 +Train: [79] [1500/6250] eta: 0:12:04 lr: 0.000014 grad: 0.2023 (0.2131) loss: 0.7086 (0.7113) time: 0.1364 data: 0.0523 max mem: 9377 +Train: [79] [1600/6250] eta: 0:11:46 lr: 0.000014 grad: 0.2055 (0.2124) loss: 0.7054 (0.7115) time: 0.1478 data: 0.0671 max mem: 9377 +Train: [79] [1700/6250] eta: 0:11:35 lr: 0.000014 grad: 0.1988 (0.2118) loss: 0.7061 (0.7114) time: 0.1711 data: 0.0907 max mem: 9377 +Train: [79] [1800/6250] eta: 0:11:22 lr: 0.000014 grad: 0.1970 (0.2110) loss: 0.7198 (0.7117) time: 0.1754 data: 0.0883 max mem: 9377 +Train: [79] [1900/6250] eta: 0:11:04 lr: 0.000014 grad: 0.2003 (0.2105) loss: 0.7102 (0.7118) time: 0.1321 data: 0.0500 max mem: 9377 +Train: [79] [2000/6250] eta: 0:10:48 lr: 0.000014 grad: 0.1996 (0.2100) loss: 0.7053 (0.7118) time: 0.1550 data: 0.0684 max mem: 9377 +Train: [79] [2100/6250] eta: 0:10:31 lr: 0.000014 grad: 0.2019 (0.2095) loss: 0.7171 (0.7120) time: 0.1404 data: 0.0564 max mem: 9377 +Train: [79] [2200/6250] eta: 0:10:15 lr: 0.000014 grad: 0.1995 (0.2091) loss: 0.7163 (0.7121) time: 0.1449 data: 0.0561 max mem: 9377 +Train: [79] [2300/6250] eta: 0:09:59 lr: 0.000014 grad: 0.2031 (0.2088) loss: 0.7097 (0.7119) time: 0.1606 data: 0.0747 max mem: 9377 +Train: [79] [2400/6250] eta: 0:09:42 lr: 0.000014 grad: 0.2013 (0.2085) loss: 0.7205 (0.7118) time: 0.1399 data: 0.0569 max mem: 9377 +Train: [79] [2500/6250] eta: 0:09:25 lr: 0.000014 grad: 0.1892 (0.2083) loss: 0.7132 (0.7118) time: 0.1433 data: 0.0555 max mem: 9377 +Train: [79] [2600/6250] eta: 0:09:07 lr: 0.000014 grad: 0.2023 (0.2081) loss: 0.7101 (0.7117) time: 0.1397 data: 0.0491 max mem: 9377 +Train: [79] [2700/6250] eta: 0:08:50 lr: 0.000014 grad: 0.2053 (0.2080) loss: 0.7121 (0.7116) time: 0.1324 data: 0.0477 max mem: 9377 +Train: [79] [2800/6250] eta: 0:08:35 lr: 0.000014 grad: 0.1976 (0.2079) loss: 0.7092 (0.7116) time: 0.1423 data: 0.0614 max mem: 9377 +Train: [79] [2900/6250] eta: 0:08:21 lr: 0.000014 grad: 0.2011 (0.2078) loss: 0.7143 (0.7116) time: 0.1554 data: 0.0750 max mem: 9377 +Train: [79] [3000/6250] eta: 0:08:07 lr: 0.000014 grad: 0.1969 (0.2077) loss: 0.7035 (0.7115) time: 0.1694 data: 0.0911 max mem: 9377 +Train: [79] [3100/6250] eta: 0:07:54 lr: 0.000014 grad: 0.2020 (0.2075) loss: 0.7061 (0.7115) time: 0.1897 data: 0.1131 max mem: 9377 +Train: [79] [3200/6250] eta: 0:07:39 lr: 0.000014 grad: 0.2017 (0.2074) loss: 0.7181 (0.7116) time: 0.0966 data: 0.0083 max mem: 9377 +Train: [79] [3300/6250] eta: 0:07:24 lr: 0.000014 grad: 0.1957 (0.2072) loss: 0.7149 (0.7117) time: 0.1378 data: 0.0567 max mem: 9377 +Train: [79] [3400/6250] eta: 0:07:09 lr: 0.000014 grad: 0.2054 (0.2071) loss: 0.6972 (0.7117) time: 0.1545 data: 0.0762 max mem: 9377 +Train: [79] [3500/6250] eta: 0:06:53 lr: 0.000014 grad: 0.2013 (0.2070) loss: 0.7125 (0.7117) time: 0.1315 data: 0.0501 max mem: 9377 +Train: [79] [3600/6250] eta: 0:06:37 lr: 0.000014 grad: 0.1992 (0.2068) loss: 0.7127 (0.7117) time: 0.1276 data: 0.0456 max mem: 9377 +Train: [79] [3700/6250] eta: 0:06:21 lr: 0.000014 grad: 0.2029 (0.2067) loss: 0.7090 (0.7117) time: 0.1282 data: 0.0425 max mem: 9377 +Train: [79] [3800/6250] eta: 0:06:06 lr: 0.000014 grad: 0.2034 (0.2066) loss: 0.7120 (0.7118) time: 0.1564 data: 0.0730 max mem: 9377 +Train: [79] [3900/6250] eta: 0:05:50 lr: 0.000014 grad: 0.2074 (0.2066) loss: 0.7103 (0.7117) time: 0.1294 data: 0.0471 max mem: 9377 +Train: [79] [4000/6250] eta: 0:05:34 lr: 0.000014 grad: 0.1940 (0.2065) loss: 0.7137 (0.7116) time: 0.1323 data: 0.0508 max mem: 9377 +Train: [79] [4100/6250] eta: 0:05:19 lr: 0.000014 grad: 0.2000 (0.2063) loss: 0.7164 (0.7117) time: 0.1614 data: 0.0743 max mem: 9377 +Train: [79] [4200/6250] eta: 0:05:03 lr: 0.000014 grad: 0.2031 (0.2063) loss: 0.7017 (0.7117) time: 0.1499 data: 0.0676 max mem: 9377 +Train: [79] [4300/6250] eta: 0:04:47 lr: 0.000014 grad: 0.2004 (0.2062) loss: 0.7135 (0.7117) time: 0.1374 data: 0.0560 max mem: 9377 +Train: [79] [4400/6250] eta: 0:04:32 lr: 0.000014 grad: 0.2055 (0.2062) loss: 0.7022 (0.7117) time: 0.1624 data: 0.0827 max mem: 9377 +Train: [79] [4500/6250] eta: 0:04:17 lr: 0.000014 grad: 0.2012 (0.2061) loss: 0.7184 (0.7116) time: 0.1429 data: 0.0645 max mem: 9377 +Train: [79] [4600/6250] eta: 0:04:02 lr: 0.000014 grad: 0.2011 (0.2060) loss: 0.7146 (0.7117) time: 0.1577 data: 0.0746 max mem: 9377 +Train: [79] [4700/6250] eta: 0:03:48 lr: 0.000013 grad: 0.2049 (0.2060) loss: 0.7152 (0.7117) time: 0.1626 data: 0.0829 max mem: 9377 +Train: [79] [4800/6250] eta: 0:03:33 lr: 0.000013 grad: 0.2114 (0.2060) loss: 0.7137 (0.7118) time: 0.1459 data: 0.0638 max mem: 9377 +Train: [79] [4900/6250] eta: 0:03:18 lr: 0.000013 grad: 0.2015 (0.2059) loss: 0.7052 (0.7118) time: 0.1372 data: 0.0509 max mem: 9377 +Train: [79] [5000/6250] eta: 0:03:03 lr: 0.000013 grad: 0.2024 (0.2059) loss: 0.7155 (0.7120) time: 0.1450 data: 0.0648 max mem: 9377 +Train: [79] [5100/6250] eta: 0:02:48 lr: 0.000013 grad: 0.1964 (0.2058) loss: 0.7091 (0.7120) time: 0.1293 data: 0.0458 max mem: 9377 +Train: [79] [5200/6250] eta: 0:02:33 lr: 0.000013 grad: 0.2004 (0.2057) loss: 0.7115 (0.7121) time: 0.1221 data: 0.0403 max mem: 9377 +Train: [79] [5300/6250] eta: 0:02:18 lr: 0.000013 grad: 0.2077 (0.2057) loss: 0.7164 (0.7121) time: 0.1389 data: 0.0574 max mem: 9377 +Train: [79] [5400/6250] eta: 0:02:04 lr: 0.000013 grad: 0.2010 (0.2057) loss: 0.7148 (0.7122) time: 0.1317 data: 0.0538 max mem: 9377 +Train: [79] [5500/6250] eta: 0:01:49 lr: 0.000013 grad: 0.2030 (0.2057) loss: 0.7072 (0.7121) time: 0.1320 data: 0.0502 max mem: 9377 +Train: [79] [5600/6250] eta: 0:01:34 lr: 0.000013 grad: 0.2039 (0.2057) loss: 0.6995 (0.7120) time: 0.1587 data: 0.0744 max mem: 9377 +Train: [79] [5700/6250] eta: 0:01:20 lr: 0.000013 grad: 0.2050 (0.2058) loss: 0.7116 (0.7119) time: 0.1468 data: 0.0644 max mem: 9377 +Train: [79] [5800/6250] eta: 0:01:05 lr: 0.000013 grad: 0.2060 (0.2058) loss: 0.6969 (0.7119) time: 0.1386 data: 0.0582 max mem: 9377 +Train: [79] [5900/6250] eta: 0:00:51 lr: 0.000013 grad: 0.2032 (0.2058) loss: 0.7171 (0.7119) time: 0.1658 data: 0.0828 max mem: 9377 +Train: [79] [6000/6250] eta: 0:00:36 lr: 0.000013 grad: 0.2028 (0.2057) loss: 0.7150 (0.7119) time: 0.1556 data: 0.0778 max mem: 9377 +Train: [79] [6100/6250] eta: 0:00:21 lr: 0.000013 grad: 0.2021 (0.2057) loss: 0.7181 (0.7119) time: 0.1343 data: 0.0537 max mem: 9377 +Train: [79] [6200/6250] eta: 0:00:07 lr: 0.000013 grad: 0.1967 (0.2056) loss: 0.7101 (0.7119) time: 0.1454 data: 0.0673 max mem: 9377 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.2020 (0.2056) loss: 0.7160 (0.7120) time: 0.1430 data: 0.0613 max mem: 9377 +Train: [79] Total time: 0:15:18 (0.1469 s / it) +Averaged stats: lr: 0.000013 grad: 0.2020 (0.2056) loss: 0.7160 (0.7120) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:05:07 loss: 0.8694 (0.8694) time: 4.9656 data: 4.9358 max mem: 9377 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.8741 (0.8747) time: 0.1196 data: 0.0922 max mem: 9377 +Eval (hcp-train-subset): [79] Total time: 0:00:13 (0.2131 s / it) +Averaged stats (hcp-train-subset): loss: 0.8741 (0.8747) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [79] [ 0/62] eta: 0:05:10 loss: 0.8812 (0.8812) time: 5.0121 data: 4.9822 max mem: 9377 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.8691 (0.8716) time: 0.1039 data: 0.0792 max mem: 9377 +Eval (hcp-val): [79] Total time: 0:00:12 (0.2063 s / it) +Averaged stats (hcp-val): loss: 0.8691 (0.8716) +Making plots (hcp-val): example=2 +Eval (nsd-val): [79] [ 0/62] eta: 0:04:23 loss: 0.8307 (0.8307) time: 4.2558 data: 4.1830 max mem: 9377 +Eval (nsd-val): [79] [61/62] eta: 0:00:00 loss: 0.8437 (0.8442) time: 0.1030 data: 0.0778 max mem: 9377 +Eval (nsd-val): [79] Total time: 0:00:12 (0.1995 s / it) +Averaged stats (nsd-val): loss: 0.8437 (0.8442) +Making plots (nsd-val): example=11 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 9:25:28 lr: 0.000013 grad: 0.3111 (0.3111) loss: 0.6955 (0.6955) time: 5.4285 data: 5.2398 max mem: 9377 +Train: [80] [ 100/6250] eta: 0:20:15 lr: 0.000013 grad: 0.2317 (0.2584) loss: 0.7210 (0.7381) time: 0.1631 data: 0.0773 max mem: 9377 +Train: [80] [ 200/6250] eta: 0:17:02 lr: 0.000013 grad: 0.2196 (0.2436) loss: 0.7100 (0.7274) time: 0.1557 data: 0.0655 max mem: 9377 +Train: [80] [ 300/6250] eta: 0:15:35 lr: 0.000013 grad: 0.2271 (0.2370) loss: 0.6952 (0.7200) time: 0.1432 data: 0.0564 max mem: 9377 +Train: [80] [ 400/6250] eta: 0:14:48 lr: 0.000013 grad: 0.2108 (0.2324) loss: 0.7053 (0.7165) time: 0.1356 data: 0.0407 max mem: 9377 +Train: [80] [ 500/6250] eta: 0:14:09 lr: 0.000013 grad: 0.2051 (0.2283) loss: 0.7083 (0.7147) time: 0.1308 data: 0.0368 max mem: 9377 +Train: [80] [ 600/6250] eta: 0:13:44 lr: 0.000013 grad: 0.2114 (0.2253) loss: 0.6929 (0.7130) time: 0.1326 data: 0.0388 max mem: 9377 +Train: [80] [ 700/6250] eta: 0:13:21 lr: 0.000013 grad: 0.2104 (0.2232) loss: 0.7065 (0.7125) time: 0.1364 data: 0.0397 max mem: 9377 +Train: [80] [ 800/6250] eta: 0:13:02 lr: 0.000013 grad: 0.2020 (0.2215) loss: 0.7101 (0.7122) time: 0.1548 data: 0.0704 max mem: 9377 +Train: [80] [ 900/6250] eta: 0:12:47 lr: 0.000013 grad: 0.2054 (0.2201) loss: 0.7111 (0.7121) time: 0.1279 data: 0.0389 max mem: 9377 +Train: [80] [1000/6250] eta: 0:12:29 lr: 0.000013 grad: 0.2048 (0.2188) loss: 0.7154 (0.7118) time: 0.1318 data: 0.0473 max mem: 9377 +Train: [80] [1100/6250] eta: 0:12:14 lr: 0.000013 grad: 0.2038 (0.2176) loss: 0.7103 (0.7120) time: 0.1560 data: 0.0758 max mem: 9377 +Train: [80] [1200/6250] eta: 0:12:00 lr: 0.000013 grad: 0.2011 (0.2166) loss: 0.7195 (0.7117) time: 0.1435 data: 0.0595 max mem: 9377 +Train: [80] [1300/6250] eta: 0:11:44 lr: 0.000013 grad: 0.2077 (0.2155) loss: 0.7100 (0.7118) time: 0.1346 data: 0.0567 max mem: 9377 +Train: [80] [1400/6250] eta: 0:11:29 lr: 0.000013 grad: 0.1973 (0.2148) loss: 0.7131 (0.7116) time: 0.1310 data: 0.0500 max mem: 9377 +Train: [80] [1500/6250] eta: 0:11:17 lr: 0.000013 grad: 0.2044 (0.2141) loss: 0.7152 (0.7115) time: 0.1652 data: 0.0852 max mem: 9377 +Train: [80] [1600/6250] eta: 0:11:05 lr: 0.000013 grad: 0.2066 (0.2135) loss: 0.6976 (0.7113) time: 0.1471 data: 0.0667 max mem: 9377 +Train: [80] [1700/6250] eta: 0:10:53 lr: 0.000013 grad: 0.1965 (0.2129) loss: 0.7082 (0.7112) time: 0.1494 data: 0.0598 max mem: 9377 +Train: [80] [1800/6250] eta: 0:10:39 lr: 0.000013 grad: 0.2013 (0.2125) loss: 0.7146 (0.7112) time: 0.1457 data: 0.0646 max mem: 9377 +Train: [80] [1900/6250] eta: 0:10:25 lr: 0.000013 grad: 0.2006 (0.2121) loss: 0.7058 (0.7111) time: 0.1208 data: 0.0395 max mem: 9377 +Train: [80] [2000/6250] eta: 0:10:09 lr: 0.000013 grad: 0.2044 (0.2118) loss: 0.7117 (0.7109) time: 0.1311 data: 0.0501 max mem: 9377 +Train: [80] [2100/6250] eta: 0:09:55 lr: 0.000013 grad: 0.2025 (0.2113) loss: 0.7197 (0.7109) time: 0.1377 data: 0.0651 max mem: 9377 +Train: [80] [2200/6250] eta: 0:09:39 lr: 0.000013 grad: 0.2023 (0.2109) loss: 0.7149 (0.7110) time: 0.1328 data: 0.0572 max mem: 9377 +Train: [80] [2300/6250] eta: 0:09:24 lr: 0.000013 grad: 0.1984 (0.2104) loss: 0.7075 (0.7112) time: 0.1312 data: 0.0487 max mem: 9377 +Train: [80] [2400/6250] eta: 0:09:09 lr: 0.000013 grad: 0.1959 (0.2101) loss: 0.7197 (0.7113) time: 0.0996 data: 0.0114 max mem: 9377 +Train: [80] [2500/6250] eta: 0:08:53 lr: 0.000013 grad: 0.1998 (0.2097) loss: 0.7194 (0.7114) time: 0.1348 data: 0.0373 max mem: 9377 +Train: [80] [2600/6250] eta: 0:08:39 lr: 0.000013 grad: 0.2069 (0.2094) loss: 0.7144 (0.7116) time: 0.1281 data: 0.0376 max mem: 9377 +Train: [80] [2700/6250] eta: 0:08:24 lr: 0.000013 grad: 0.2010 (0.2092) loss: 0.7161 (0.7116) time: 0.1148 data: 0.0299 max mem: 9377 +Train: [80] [2800/6250] eta: 0:08:10 lr: 0.000013 grad: 0.2023 (0.2090) loss: 0.7050 (0.7115) time: 0.1360 data: 0.0520 max mem: 9377 +Train: [80] [2900/6250] eta: 0:07:55 lr: 0.000013 grad: 0.1996 (0.2089) loss: 0.7075 (0.7114) time: 0.1468 data: 0.0655 max mem: 9377 +Train: [80] [3000/6250] eta: 0:07:40 lr: 0.000013 grad: 0.1968 (0.2087) loss: 0.7101 (0.7113) time: 0.1331 data: 0.0523 max mem: 9377 +Train: [80] [3100/6250] eta: 0:07:25 lr: 0.000013 grad: 0.1961 (0.2085) loss: 0.7208 (0.7114) time: 0.1344 data: 0.0475 max mem: 9377 +Train: [80] [3200/6250] eta: 0:07:10 lr: 0.000013 grad: 0.2160 (0.2084) loss: 0.7002 (0.7113) time: 0.1342 data: 0.0491 max mem: 9377 +Train: [80] [3300/6250] eta: 0:06:56 lr: 0.000013 grad: 0.1995 (0.2083) loss: 0.7188 (0.7113) time: 0.1548 data: 0.0750 max mem: 9377 +Train: [80] [3400/6250] eta: 0:06:42 lr: 0.000012 grad: 0.2018 (0.2082) loss: 0.7144 (0.7113) time: 0.1456 data: 0.0659 max mem: 9377 +Train: [80] [3500/6250] eta: 0:06:28 lr: 0.000012 grad: 0.2025 (0.2081) loss: 0.7113 (0.7113) time: 0.1411 data: 0.0571 max mem: 9377 +Train: [80] [3600/6250] eta: 0:06:14 lr: 0.000012 grad: 0.2009 (0.2080) loss: 0.7119 (0.7112) time: 0.1506 data: 0.0766 max mem: 9377 +Train: [80] [3700/6250] eta: 0:05:59 lr: 0.000012 grad: 0.2014 (0.2079) loss: 0.7066 (0.7110) time: 0.1390 data: 0.0591 max mem: 9377 +Train: [80] [3800/6250] eta: 0:05:45 lr: 0.000012 grad: 0.2025 (0.2078) loss: 0.7029 (0.7109) time: 0.1611 data: 0.0727 max mem: 9377 +Train: [80] [3900/6250] eta: 0:05:32 lr: 0.000012 grad: 0.1994 (0.2077) loss: 0.7113 (0.7108) time: 0.1609 data: 0.0835 max mem: 9377 +Train: [80] [4000/6250] eta: 0:05:18 lr: 0.000012 grad: 0.2053 (0.2076) loss: 0.7126 (0.7108) time: 0.1399 data: 0.0627 max mem: 9377 +Train: [80] [4100/6250] eta: 0:05:04 lr: 0.000012 grad: 0.2033 (0.2075) loss: 0.7155 (0.7109) time: 0.1467 data: 0.0645 max mem: 9377 +Train: [80] [4200/6250] eta: 0:04:50 lr: 0.000012 grad: 0.1981 (0.2074) loss: 0.7062 (0.7110) time: 0.1530 data: 0.0775 max mem: 9377 +Train: [80] [4300/6250] eta: 0:04:36 lr: 0.000012 grad: 0.2023 (0.2073) loss: 0.7171 (0.7111) time: 0.1410 data: 0.0567 max mem: 9377 +Train: [80] [4400/6250] eta: 0:04:21 lr: 0.000012 grad: 0.1951 (0.2071) loss: 0.7187 (0.7111) time: 0.1139 data: 0.0323 max mem: 9377 +Train: [80] [4500/6250] eta: 0:04:07 lr: 0.000012 grad: 0.2014 (0.2070) loss: 0.7165 (0.7110) time: 0.1418 data: 0.0662 max mem: 9377 +Train: [80] [4600/6250] eta: 0:03:53 lr: 0.000012 grad: 0.2061 (0.2070) loss: 0.7035 (0.7109) time: 0.1530 data: 0.0742 max mem: 9377 +Train: [80] [4700/6250] eta: 0:03:39 lr: 0.000012 grad: 0.1974 (0.2069) loss: 0.7123 (0.7110) time: 0.1272 data: 0.0479 max mem: 9377 +Train: [80] [4800/6250] eta: 0:03:24 lr: 0.000012 grad: 0.2014 (0.2068) loss: 0.7170 (0.7111) time: 0.1365 data: 0.0593 max mem: 9377 +Train: [80] [4900/6250] eta: 0:03:10 lr: 0.000012 grad: 0.2041 (0.2067) loss: 0.7047 (0.7111) time: 0.1463 data: 0.0644 max mem: 9377 +Train: [80] [5000/6250] eta: 0:02:56 lr: 0.000012 grad: 0.2041 (0.2067) loss: 0.7039 (0.7111) time: 0.1161 data: 0.0311 max mem: 9377 +Train: [80] [5100/6250] eta: 0:02:42 lr: 0.000012 grad: 0.2050 (0.2067) loss: 0.7116 (0.7111) time: 0.1299 data: 0.0543 max mem: 9377 +Train: [80] [5200/6250] eta: 0:02:27 lr: 0.000012 grad: 0.1973 (0.2067) loss: 0.7142 (0.7112) time: 0.1292 data: 0.0454 max mem: 9377 +Train: [80] [5300/6250] eta: 0:02:13 lr: 0.000012 grad: 0.2059 (0.2066) loss: 0.7052 (0.7112) time: 0.1448 data: 0.0626 max mem: 9377 +Train: [80] [5400/6250] eta: 0:01:59 lr: 0.000012 grad: 0.2093 (0.2066) loss: 0.7009 (0.7112) time: 0.1383 data: 0.0564 max mem: 9377 +Train: [80] [5500/6250] eta: 0:01:45 lr: 0.000012 grad: 0.2005 (0.2066) loss: 0.7142 (0.7112) time: 0.1407 data: 0.0547 max mem: 9377 +Train: [80] [5600/6250] eta: 0:01:31 lr: 0.000012 grad: 0.2000 (0.2066) loss: 0.7063 (0.7112) time: 0.1283 data: 0.0442 max mem: 9377 +Train: [80] [5700/6250] eta: 0:01:17 lr: 0.000012 grad: 0.2028 (0.2066) loss: 0.7068 (0.7112) time: 0.1558 data: 0.0782 max mem: 9377 +Train: [80] [5800/6250] eta: 0:01:03 lr: 0.000012 grad: 0.2041 (0.2066) loss: 0.7091 (0.7111) time: 0.1453 data: 0.0627 max mem: 9377 +Train: [80] [5900/6250] eta: 0:00:49 lr: 0.000012 grad: 0.2004 (0.2065) loss: 0.7052 (0.7112) time: 0.1270 data: 0.0421 max mem: 9377 +Train: [80] [6000/6250] eta: 0:00:35 lr: 0.000012 grad: 0.2057 (0.2066) loss: 0.7112 (0.7112) time: 0.1748 data: 0.0948 max mem: 9377 +Train: [80] [6100/6250] eta: 0:00:21 lr: 0.000012 grad: 0.2053 (0.2065) loss: 0.7067 (0.7111) time: 0.1344 data: 0.0536 max mem: 9377 +Train: [80] [6200/6250] eta: 0:00:07 lr: 0.000012 grad: 0.2084 (0.2065) loss: 0.7070 (0.7111) time: 0.1547 data: 0.0728 max mem: 9377 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.2026 (0.2065) loss: 0.7071 (0.7111) time: 0.1639 data: 0.0793 max mem: 9377 +Train: [80] Total time: 0:14:50 (0.1425 s / it) +Averaged stats: lr: 0.000012 grad: 0.2026 (0.2065) loss: 0.7071 (0.7111) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:05:47 loss: 0.8663 (0.8663) time: 5.6095 data: 5.5789 max mem: 9377 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.8739 (0.8740) time: 0.1299 data: 0.1048 max mem: 9377 +Eval (hcp-train-subset): [80] Total time: 0:00:15 (0.2463 s / it) +Averaged stats (hcp-train-subset): loss: 0.8739 (0.8740) +Eval (hcp-val): [80] [ 0/62] eta: 0:05:01 loss: 0.8790 (0.8790) time: 4.8697 data: 4.7931 max mem: 9377 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.8703 (0.8718) time: 0.1460 data: 0.1192 max mem: 9377 +Eval (hcp-val): [80] Total time: 0:00:15 (0.2516 s / it) +Averaged stats (hcp-val): loss: 0.8703 (0.8718) +Eval (nsd-val): [80] [ 0/62] eta: 0:05:16 loss: 0.8260 (0.8260) time: 5.0993 data: 5.0301 max mem: 9377 +Eval (nsd-val): [80] [61/62] eta: 0:00:00 loss: 0.8437 (0.8437) time: 0.1460 data: 0.1193 max mem: 9377 +Eval (nsd-val): [80] Total time: 0:00:14 (0.2385 s / it) +Averaged stats (nsd-val): loss: 0.8437 (0.8437) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 7:59:46 lr: 0.000012 grad: 0.3188 (0.3188) loss: 0.7436 (0.7436) time: 4.6059 data: 4.3649 max mem: 9377 +Train: [81] [ 100/6250] eta: 0:20:27 lr: 0.000012 grad: 0.2522 (0.2820) loss: 0.6923 (0.7205) time: 0.1478 data: 0.0470 max mem: 9377 +Train: [81] [ 200/6250] eta: 0:17:23 lr: 0.000012 grad: 0.2164 (0.2584) loss: 0.7215 (0.7185) time: 0.1474 data: 0.0447 max mem: 9377 +Train: [81] [ 300/6250] eta: 0:16:04 lr: 0.000012 grad: 0.2172 (0.2451) loss: 0.7223 (0.7175) time: 0.1426 data: 0.0557 max mem: 9377 +Train: [81] [ 400/6250] eta: 0:15:17 lr: 0.000012 grad: 0.2137 (0.2379) loss: 0.7078 (0.7165) time: 0.1421 data: 0.0498 max mem: 9377 +Train: [81] [ 500/6250] eta: 0:14:34 lr: 0.000012 grad: 0.2049 (0.2321) loss: 0.7138 (0.7158) time: 0.1529 data: 0.0628 max mem: 9377 +Train: [81] [ 600/6250] eta: 0:14:02 lr: 0.000012 grad: 0.2079 (0.2279) loss: 0.7116 (0.7166) time: 0.1363 data: 0.0440 max mem: 9377 +Train: [81] [ 700/6250] eta: 0:13:39 lr: 0.000012 grad: 0.2055 (0.2251) loss: 0.7126 (0.7157) time: 0.1280 data: 0.0358 max mem: 9377 +Train: [81] [ 800/6250] eta: 0:13:22 lr: 0.000012 grad: 0.2024 (0.2226) loss: 0.7226 (0.7162) time: 0.1334 data: 0.0458 max mem: 9377 +Train: [81] [ 900/6250] eta: 0:13:02 lr: 0.000012 grad: 0.2004 (0.2205) loss: 0.7240 (0.7168) time: 0.1495 data: 0.0654 max mem: 9377 +Train: [81] [1000/6250] eta: 0:12:44 lr: 0.000012 grad: 0.1984 (0.2188) loss: 0.7218 (0.7170) time: 0.1406 data: 0.0541 max mem: 9377 +Train: [81] [1100/6250] eta: 0:12:26 lr: 0.000012 grad: 0.1985 (0.2172) loss: 0.7285 (0.7173) time: 0.1656 data: 0.0817 max mem: 9377 +Train: [81] [1200/6250] eta: 0:12:07 lr: 0.000012 grad: 0.2118 (0.2162) loss: 0.7153 (0.7177) time: 0.1494 data: 0.0648 max mem: 9377 +Train: [81] [1300/6250] eta: 0:11:52 lr: 0.000012 grad: 0.2024 (0.2150) loss: 0.7225 (0.7177) time: 0.1520 data: 0.0678 max mem: 9377 +Train: [81] [1400/6250] eta: 0:11:37 lr: 0.000012 grad: 0.1973 (0.2141) loss: 0.7144 (0.7178) time: 0.1263 data: 0.0458 max mem: 9377 +Train: [81] [1500/6250] eta: 0:11:23 lr: 0.000012 grad: 0.1966 (0.2134) loss: 0.7172 (0.7176) time: 0.1397 data: 0.0593 max mem: 9377 +Train: [81] [1600/6250] eta: 0:11:07 lr: 0.000012 grad: 0.2008 (0.2126) loss: 0.7263 (0.7177) time: 0.1327 data: 0.0520 max mem: 9377 +Train: [81] [1700/6250] eta: 0:10:53 lr: 0.000012 grad: 0.1983 (0.2118) loss: 0.7188 (0.7182) time: 0.1485 data: 0.0699 max mem: 9377 +Train: [81] [1800/6250] eta: 0:10:40 lr: 0.000012 grad: 0.1991 (0.2112) loss: 0.7185 (0.7183) time: 0.1453 data: 0.0563 max mem: 9377 +Train: [81] [1900/6250] eta: 0:10:31 lr: 0.000012 grad: 0.1981 (0.2105) loss: 0.7214 (0.7188) time: 0.1538 data: 0.0676 max mem: 9377 +Train: [81] [2000/6250] eta: 0:10:16 lr: 0.000012 grad: 0.2038 (0.2100) loss: 0.7189 (0.7187) time: 0.1521 data: 0.0750 max mem: 9377 +Train: [81] [2100/6250] eta: 0:10:01 lr: 0.000012 grad: 0.2008 (0.2096) loss: 0.7098 (0.7185) time: 0.1357 data: 0.0584 max mem: 9377 +Train: [81] [2200/6250] eta: 0:09:46 lr: 0.000012 grad: 0.1984 (0.2093) loss: 0.7173 (0.7181) time: 0.1506 data: 0.0697 max mem: 9377 +Train: [81] [2300/6250] eta: 0:09:30 lr: 0.000011 grad: 0.1982 (0.2091) loss: 0.7190 (0.7178) time: 0.1337 data: 0.0499 max mem: 9377 +Train: [81] [2400/6250] eta: 0:09:15 lr: 0.000011 grad: 0.2077 (0.2090) loss: 0.7093 (0.7175) time: 0.1326 data: 0.0432 max mem: 9377 +Train: [81] [2500/6250] eta: 0:08:59 lr: 0.000011 grad: 0.2004 (0.2089) loss: 0.7144 (0.7173) time: 0.1421 data: 0.0537 max mem: 9377 +Train: [81] [2600/6250] eta: 0:08:43 lr: 0.000011 grad: 0.2020 (0.2087) loss: 0.7090 (0.7172) time: 0.1514 data: 0.0666 max mem: 9377 +Train: [81] [2700/6250] eta: 0:08:26 lr: 0.000011 grad: 0.2046 (0.2084) loss: 0.7034 (0.7170) time: 0.1327 data: 0.0534 max mem: 9377 +Train: [81] [2800/6250] eta: 0:08:12 lr: 0.000011 grad: 0.2068 (0.2082) loss: 0.7173 (0.7170) time: 0.1456 data: 0.0693 max mem: 9377 +Train: [81] [2900/6250] eta: 0:07:56 lr: 0.000011 grad: 0.1983 (0.2080) loss: 0.7175 (0.7169) time: 0.1092 data: 0.0171 max mem: 9377 +Train: [81] [3000/6250] eta: 0:07:42 lr: 0.000011 grad: 0.1987 (0.2078) loss: 0.7121 (0.7169) time: 0.1433 data: 0.0596 max mem: 9377 +Train: [81] [3100/6250] eta: 0:07:26 lr: 0.000011 grad: 0.2044 (0.2077) loss: 0.6996 (0.7167) time: 0.1370 data: 0.0524 max mem: 9377 +Train: [81] [3200/6250] eta: 0:07:12 lr: 0.000011 grad: 0.2001 (0.2075) loss: 0.7096 (0.7166) time: 0.1169 data: 0.0351 max mem: 9377 +Train: [81] [3300/6250] eta: 0:06:57 lr: 0.000011 grad: 0.2044 (0.2075) loss: 0.7154 (0.7165) time: 0.1306 data: 0.0486 max mem: 9377 +Train: [81] [3400/6250] eta: 0:06:43 lr: 0.000011 grad: 0.2034 (0.2074) loss: 0.7114 (0.7163) time: 0.1401 data: 0.0607 max mem: 9377 +Train: [81] [3500/6250] eta: 0:06:29 lr: 0.000011 grad: 0.2008 (0.2072) loss: 0.7065 (0.7161) time: 0.1380 data: 0.0549 max mem: 9377 +Train: [81] [3600/6250] eta: 0:06:14 lr: 0.000011 grad: 0.1988 (0.2072) loss: 0.7143 (0.7160) time: 0.1300 data: 0.0483 max mem: 9377 +Train: [81] [3700/6250] eta: 0:06:00 lr: 0.000011 grad: 0.2018 (0.2071) loss: 0.7148 (0.7158) time: 0.1310 data: 0.0476 max mem: 9377 +Train: [81] [3800/6250] eta: 0:05:45 lr: 0.000011 grad: 0.1995 (0.2070) loss: 0.7100 (0.7157) time: 0.1327 data: 0.0509 max mem: 9377 +Train: [81] [3900/6250] eta: 0:05:31 lr: 0.000011 grad: 0.2050 (0.2069) loss: 0.7020 (0.7156) time: 0.1303 data: 0.0513 max mem: 9377 +Train: [81] [4000/6250] eta: 0:05:16 lr: 0.000011 grad: 0.2000 (0.2070) loss: 0.7188 (0.7155) time: 0.1218 data: 0.0401 max mem: 9377 +Train: [81] [4100/6250] eta: 0:05:02 lr: 0.000011 grad: 0.2084 (0.2070) loss: 0.7130 (0.7153) time: 0.1424 data: 0.0635 max mem: 9377 +Train: [81] [4200/6250] eta: 0:04:48 lr: 0.000011 grad: 0.2096 (0.2070) loss: 0.6811 (0.7150) time: 0.1304 data: 0.0502 max mem: 9377 +Train: [81] [4300/6250] eta: 0:04:34 lr: 0.000011 grad: 0.2025 (0.2070) loss: 0.7022 (0.7148) time: 0.1416 data: 0.0586 max mem: 9377 +Train: [81] [4400/6250] eta: 0:04:20 lr: 0.000011 grad: 0.2041 (0.2070) loss: 0.7073 (0.7145) time: 0.1609 data: 0.0832 max mem: 9377 +Train: [81] [4500/6250] eta: 0:04:06 lr: 0.000011 grad: 0.2019 (0.2069) loss: 0.7162 (0.7144) time: 0.1352 data: 0.0470 max mem: 9377 +Train: [81] [4600/6250] eta: 0:03:52 lr: 0.000011 grad: 0.2055 (0.2069) loss: 0.7065 (0.7144) time: 0.1105 data: 0.0239 max mem: 9377 +Train: [81] [4700/6250] eta: 0:03:37 lr: 0.000011 grad: 0.2057 (0.2068) loss: 0.7005 (0.7143) time: 0.1387 data: 0.0577 max mem: 9377 +Train: [81] [4800/6250] eta: 0:03:23 lr: 0.000011 grad: 0.2051 (0.2068) loss: 0.7012 (0.7143) time: 0.1335 data: 0.0558 max mem: 9377 +Train: [81] [4900/6250] eta: 0:03:09 lr: 0.000011 grad: 0.2023 (0.2067) loss: 0.7141 (0.7143) time: 0.1233 data: 0.0408 max mem: 9377 +Train: [81] [5000/6250] eta: 0:02:55 lr: 0.000011 grad: 0.2027 (0.2067) loss: 0.7111 (0.7142) time: 0.1333 data: 0.0478 max mem: 9377 +Train: [81] [5100/6250] eta: 0:02:41 lr: 0.000011 grad: 0.1998 (0.2067) loss: 0.7170 (0.7142) time: 0.1348 data: 0.0520 max mem: 9377 +Train: [81] [5200/6250] eta: 0:02:27 lr: 0.000011 grad: 0.2046 (0.2067) loss: 0.7030 (0.7141) time: 0.1505 data: 0.0692 max mem: 9377 +Train: [81] [5300/6250] eta: 0:02:13 lr: 0.000011 grad: 0.2047 (0.2067) loss: 0.6993 (0.7140) time: 0.1688 data: 0.0902 max mem: 9377 +Train: [81] [5400/6250] eta: 0:01:59 lr: 0.000011 grad: 0.2098 (0.2067) loss: 0.7054 (0.7139) time: 0.1362 data: 0.0552 max mem: 9377 +Train: [81] [5500/6250] eta: 0:01:45 lr: 0.000011 grad: 0.2021 (0.2067) loss: 0.7091 (0.7138) time: 0.1238 data: 0.0465 max mem: 9377 +Train: [81] [5600/6250] eta: 0:01:31 lr: 0.000011 grad: 0.1997 (0.2066) loss: 0.7159 (0.7138) time: 0.1422 data: 0.0648 max mem: 9377 +Train: [81] [5700/6250] eta: 0:01:17 lr: 0.000011 grad: 0.2009 (0.2065) loss: 0.7121 (0.7138) time: 0.1339 data: 0.0546 max mem: 9377 +Train: [81] [5800/6250] eta: 0:01:03 lr: 0.000011 grad: 0.2029 (0.2065) loss: 0.7140 (0.7138) time: 0.1337 data: 0.0584 max mem: 9377 +Train: [81] [5900/6250] eta: 0:00:49 lr: 0.000011 grad: 0.2023 (0.2064) loss: 0.7175 (0.7138) time: 0.1339 data: 0.0496 max mem: 9377 +Train: [81] [6000/6250] eta: 0:00:35 lr: 0.000011 grad: 0.2000 (0.2064) loss: 0.7134 (0.7139) time: 0.1373 data: 0.0575 max mem: 9377 +Train: [81] [6100/6250] eta: 0:00:21 lr: 0.000011 grad: 0.2060 (0.2063) loss: 0.7145 (0.7139) time: 0.1614 data: 0.0836 max mem: 9377 +Train: [81] [6200/6250] eta: 0:00:07 lr: 0.000011 grad: 0.2087 (0.2063) loss: 0.7035 (0.7138) time: 0.1491 data: 0.0706 max mem: 9377 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.2046 (0.2063) loss: 0.7140 (0.7138) time: 0.1466 data: 0.0648 max mem: 9377 +Train: [81] Total time: 0:14:41 (0.1411 s / it) +Averaged stats: lr: 0.000011 grad: 0.2046 (0.2063) loss: 0.7140 (0.7138) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:05:11 loss: 0.8676 (0.8676) time: 5.0259 data: 4.9924 max mem: 9377 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.8718 (0.8744) time: 0.1277 data: 0.1024 max mem: 9377 +Eval (hcp-train-subset): [81] Total time: 0:00:14 (0.2387 s / it) +Averaged stats (hcp-train-subset): loss: 0.8718 (0.8744) +Eval (hcp-val): [81] [ 0/62] eta: 0:04:12 loss: 0.8723 (0.8723) time: 4.0699 data: 3.9901 max mem: 9377 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.8684 (0.8711) time: 0.1374 data: 0.1125 max mem: 9377 +Eval (hcp-val): [81] Total time: 0:00:14 (0.2353 s / it) +Averaged stats (hcp-val): loss: 0.8684 (0.8711) +Eval (nsd-val): [81] [ 0/62] eta: 0:05:38 loss: 0.8371 (0.8371) time: 5.4535 data: 5.4239 max mem: 9377 +Eval (nsd-val): [81] [61/62] eta: 0:00:00 loss: 0.8438 (0.8447) time: 0.1394 data: 0.1145 max mem: 9377 +Eval (nsd-val): [81] Total time: 0:00:13 (0.2172 s / it) +Averaged stats (nsd-val): loss: 0.8438 (0.8447) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [82] [ 0/6250] eta: 10:52:10 lr: 0.000011 grad: 0.3182 (0.3182) loss: 0.6266 (0.6266) time: 6.2610 data: 6.1351 max mem: 9377 +Train: [82] [ 100/6250] eta: 0:19:48 lr: 0.000011 grad: 0.2492 (0.2767) loss: 0.7095 (0.7176) time: 0.1404 data: 0.0349 max mem: 9377 +Train: [82] [ 200/6250] eta: 0:17:05 lr: 0.000011 grad: 0.2203 (0.2563) loss: 0.7150 (0.7161) time: 0.1269 data: 0.0254 max mem: 9377 +Train: [82] [ 300/6250] eta: 0:16:00 lr: 0.000011 grad: 0.2240 (0.2459) loss: 0.6821 (0.7112) time: 0.1456 data: 0.0540 max mem: 9377 +Train: [82] [ 400/6250] eta: 0:15:41 lr: 0.000011 grad: 0.2108 (0.2389) loss: 0.7044 (0.7104) time: 0.1518 data: 0.0527 max mem: 9377 +Train: [82] [ 500/6250] eta: 0:15:27 lr: 0.000011 grad: 0.2056 (0.2331) loss: 0.7245 (0.7112) time: 0.1527 data: 0.0546 max mem: 9377 +Train: [82] [ 600/6250] eta: 0:15:06 lr: 0.000011 grad: 0.2025 (0.2283) loss: 0.7237 (0.7135) time: 0.1581 data: 0.0573 max mem: 9377 +Train: [82] [ 700/6250] eta: 0:14:42 lr: 0.000011 grad: 0.2064 (0.2249) loss: 0.7113 (0.7143) time: 0.1599 data: 0.0640 max mem: 9377 +Train: [82] [ 800/6250] eta: 0:14:17 lr: 0.000011 grad: 0.2040 (0.2228) loss: 0.7054 (0.7140) time: 0.1539 data: 0.0692 max mem: 9377 +Train: [82] [ 900/6250] eta: 0:14:05 lr: 0.000011 grad: 0.2006 (0.2210) loss: 0.7184 (0.7139) time: 0.1672 data: 0.0877 max mem: 9377 +Train: [82] [1000/6250] eta: 0:13:51 lr: 0.000011 grad: 0.2146 (0.2196) loss: 0.6978 (0.7137) time: 0.1978 data: 0.1142 max mem: 9377 +Train: [82] [1100/6250] eta: 0:13:32 lr: 0.000011 grad: 0.2091 (0.2184) loss: 0.7093 (0.7135) time: 0.1292 data: 0.0446 max mem: 9377 +Train: [82] [1200/6250] eta: 0:13:06 lr: 0.000011 grad: 0.2020 (0.2174) loss: 0.7111 (0.7136) time: 0.1377 data: 0.0534 max mem: 9377 +Train: [82] [1300/6250] eta: 0:12:46 lr: 0.000011 grad: 0.2109 (0.2166) loss: 0.7034 (0.7132) time: 0.1446 data: 0.0603 max mem: 9377 +Train: [82] [1400/6250] eta: 0:12:27 lr: 0.000010 grad: 0.2055 (0.2159) loss: 0.7047 (0.7131) time: 0.1429 data: 0.0554 max mem: 9377 +Train: [82] [1500/6250] eta: 0:12:09 lr: 0.000010 grad: 0.2011 (0.2151) loss: 0.7128 (0.7128) time: 0.1350 data: 0.0556 max mem: 9377 +Train: [82] [1600/6250] eta: 0:11:51 lr: 0.000010 grad: 0.1984 (0.2144) loss: 0.7116 (0.7128) time: 0.1311 data: 0.0495 max mem: 9377 +Train: [82] [1700/6250] eta: 0:11:34 lr: 0.000010 grad: 0.1990 (0.2139) loss: 0.7109 (0.7126) time: 0.1378 data: 0.0451 max mem: 9377 +Train: [82] [1800/6250] eta: 0:11:20 lr: 0.000010 grad: 0.2027 (0.2134) loss: 0.6996 (0.7124) time: 0.1688 data: 0.0880 max mem: 9377 +Train: [82] [1900/6250] eta: 0:11:04 lr: 0.000010 grad: 0.1984 (0.2130) loss: 0.7049 (0.7122) time: 0.1602 data: 0.0792 max mem: 9377 +Train: [82] [2000/6250] eta: 0:10:48 lr: 0.000010 grad: 0.2001 (0.2125) loss: 0.7044 (0.7118) time: 0.1565 data: 0.0776 max mem: 9377 +Train: [82] [2100/6250] eta: 0:10:33 lr: 0.000010 grad: 0.2004 (0.2122) loss: 0.7138 (0.7114) time: 0.1320 data: 0.0454 max mem: 9377 +Train: [82] [2200/6250] eta: 0:10:18 lr: 0.000010 grad: 0.2045 (0.2120) loss: 0.7004 (0.7109) time: 0.1512 data: 0.0659 max mem: 9377 +Train: [82] [2300/6250] eta: 0:10:03 lr: 0.000010 grad: 0.2039 (0.2118) loss: 0.7011 (0.7106) time: 0.1282 data: 0.0481 max mem: 9377 +Train: [82] [2400/6250] eta: 0:09:49 lr: 0.000010 grad: 0.1957 (0.2116) loss: 0.7114 (0.7104) time: 0.1699 data: 0.0842 max mem: 9377 +Train: [82] [2500/6250] eta: 0:09:32 lr: 0.000010 grad: 0.2069 (0.2114) loss: 0.7015 (0.7101) time: 0.1437 data: 0.0570 max mem: 9377 +Train: [82] [2600/6250] eta: 0:09:15 lr: 0.000010 grad: 0.2107 (0.2113) loss: 0.6956 (0.7096) time: 0.1337 data: 0.0441 max mem: 9377 +Train: [82] [2700/6250] eta: 0:08:58 lr: 0.000010 grad: 0.2073 (0.2111) loss: 0.6957 (0.7093) time: 0.1405 data: 0.0561 max mem: 9377 +Train: [82] [2800/6250] eta: 0:08:41 lr: 0.000010 grad: 0.2103 (0.2111) loss: 0.7059 (0.7090) time: 0.1361 data: 0.0549 max mem: 9377 +Train: [82] [2900/6250] eta: 0:08:25 lr: 0.000010 grad: 0.2050 (0.2110) loss: 0.7014 (0.7089) time: 0.1581 data: 0.0766 max mem: 9377 +Train: [82] [3000/6250] eta: 0:08:10 lr: 0.000010 grad: 0.2024 (0.2107) loss: 0.7015 (0.7090) time: 0.1406 data: 0.0636 max mem: 9377 +Train: [82] [3100/6250] eta: 0:07:55 lr: 0.000010 grad: 0.2010 (0.2106) loss: 0.7119 (0.7091) time: 0.1346 data: 0.0546 max mem: 9377 +Train: [82] [3200/6250] eta: 0:07:41 lr: 0.000010 grad: 0.2000 (0.2104) loss: 0.7180 (0.7092) time: 0.1355 data: 0.0566 max mem: 9377 +Train: [82] [3300/6250] eta: 0:07:26 lr: 0.000010 grad: 0.2066 (0.2103) loss: 0.7051 (0.7093) time: 0.1499 data: 0.0695 max mem: 9377 +Train: [82] [3400/6250] eta: 0:07:11 lr: 0.000010 grad: 0.2065 (0.2102) loss: 0.7099 (0.7092) time: 0.1421 data: 0.0641 max mem: 9377 +Train: [82] [3500/6250] eta: 0:06:55 lr: 0.000010 grad: 0.2067 (0.2101) loss: 0.7117 (0.7094) time: 0.1310 data: 0.0500 max mem: 9377 +Train: [82] [3600/6250] eta: 0:06:40 lr: 0.000010 grad: 0.2036 (0.2099) loss: 0.7070 (0.7095) time: 0.1094 data: 0.0283 max mem: 9377 +Train: [82] [3700/6250] eta: 0:06:24 lr: 0.000010 grad: 0.2055 (0.2098) loss: 0.7237 (0.7097) time: 0.1465 data: 0.0672 max mem: 9377 +Train: [82] [3800/6250] eta: 0:06:08 lr: 0.000010 grad: 0.2043 (0.2097) loss: 0.7100 (0.7098) time: 0.1527 data: 0.0755 max mem: 9377 +Train: [82] [3900/6250] eta: 0:05:52 lr: 0.000010 grad: 0.1990 (0.2095) loss: 0.7148 (0.7100) time: 0.1388 data: 0.0574 max mem: 9377 +Train: [82] [4000/6250] eta: 0:05:36 lr: 0.000010 grad: 0.1965 (0.2093) loss: 0.7074 (0.7101) time: 0.1379 data: 0.0467 max mem: 9377 +Train: [82] [4100/6250] eta: 0:05:21 lr: 0.000010 grad: 0.2003 (0.2091) loss: 0.7173 (0.7103) time: 0.1182 data: 0.0319 max mem: 9377 +Train: [82] [4200/6250] eta: 0:05:05 lr: 0.000010 grad: 0.2068 (0.2090) loss: 0.7171 (0.7105) time: 0.1206 data: 0.0416 max mem: 9377 +Train: [82] [4300/6250] eta: 0:04:50 lr: 0.000010 grad: 0.2035 (0.2089) loss: 0.7196 (0.7106) time: 0.1621 data: 0.0809 max mem: 9377 +Train: [82] [4400/6250] eta: 0:04:34 lr: 0.000010 grad: 0.2031 (0.2087) loss: 0.7120 (0.7107) time: 0.1034 data: 0.0170 max mem: 9377 +Train: [82] [4500/6250] eta: 0:04:19 lr: 0.000010 grad: 0.2020 (0.2086) loss: 0.7145 (0.7108) time: 0.1343 data: 0.0523 max mem: 9377 +Train: [82] [4600/6250] eta: 0:04:03 lr: 0.000010 grad: 0.2026 (0.2086) loss: 0.7149 (0.7108) time: 0.1425 data: 0.0599 max mem: 9377 +Train: [82] [4700/6250] eta: 0:03:48 lr: 0.000010 grad: 0.2040 (0.2084) loss: 0.7122 (0.7109) time: 0.1433 data: 0.0581 max mem: 9377 +Train: [82] [4800/6250] eta: 0:03:33 lr: 0.000010 grad: 0.2027 (0.2084) loss: 0.7027 (0.7109) time: 0.1217 data: 0.0422 max mem: 9377 +Train: [82] [4900/6250] eta: 0:03:18 lr: 0.000010 grad: 0.2004 (0.2083) loss: 0.7091 (0.7110) time: 0.1160 data: 0.0295 max mem: 9377 +Train: [82] [5000/6250] eta: 0:03:03 lr: 0.000010 grad: 0.2018 (0.2082) loss: 0.7124 (0.7110) time: 0.1268 data: 0.0385 max mem: 9377 +Train: [82] [5100/6250] eta: 0:02:48 lr: 0.000010 grad: 0.2005 (0.2081) loss: 0.7236 (0.7111) time: 0.1298 data: 0.0426 max mem: 9377 +Train: [82] [5200/6250] eta: 0:02:33 lr: 0.000010 grad: 0.1988 (0.2080) loss: 0.7097 (0.7112) time: 0.1366 data: 0.0552 max mem: 9377 +Train: [82] [5300/6250] eta: 0:02:19 lr: 0.000010 grad: 0.2066 (0.2079) loss: 0.7048 (0.7112) time: 0.1372 data: 0.0545 max mem: 9377 +Train: [82] [5400/6250] eta: 0:02:04 lr: 0.000010 grad: 0.2040 (0.2078) loss: 0.7135 (0.7112) time: 0.1303 data: 0.0495 max mem: 9377 +Train: [82] [5500/6250] eta: 0:01:49 lr: 0.000010 grad: 0.2045 (0.2078) loss: 0.7036 (0.7112) time: 0.1312 data: 0.0507 max mem: 9377 +Train: [82] [5600/6250] eta: 0:01:34 lr: 0.000010 grad: 0.2135 (0.2078) loss: 0.7066 (0.7111) time: 0.1307 data: 0.0488 max mem: 9377 +Train: [82] [5700/6250] eta: 0:01:20 lr: 0.000010 grad: 0.2046 (0.2077) loss: 0.7193 (0.7111) time: 0.1458 data: 0.0696 max mem: 9377 +Train: [82] [5800/6250] eta: 0:01:05 lr: 0.000010 grad: 0.2071 (0.2077) loss: 0.7129 (0.7111) time: 0.1512 data: 0.0690 max mem: 9377 +Train: [82] [5900/6250] eta: 0:00:51 lr: 0.000010 grad: 0.2033 (0.2077) loss: 0.7077 (0.7111) time: 0.1446 data: 0.0635 max mem: 9377 +Train: [82] [6000/6250] eta: 0:00:36 lr: 0.000010 grad: 0.2063 (0.2076) loss: 0.7100 (0.7112) time: 0.1606 data: 0.0797 max mem: 9377 +Train: [82] [6100/6250] eta: 0:00:21 lr: 0.000010 grad: 0.2056 (0.2076) loss: 0.7147 (0.7112) time: 0.1506 data: 0.0678 max mem: 9377 +Train: [82] [6200/6250] eta: 0:00:07 lr: 0.000010 grad: 0.2047 (0.2076) loss: 0.7150 (0.7112) time: 0.1458 data: 0.0638 max mem: 9377 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.2018 (0.2076) loss: 0.7067 (0.7112) time: 0.1544 data: 0.0768 max mem: 9377 +Train: [82] Total time: 0:15:15 (0.1464 s / it) +Averaged stats: lr: 0.000010 grad: 0.2018 (0.2076) loss: 0.7067 (0.7112) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:04:27 loss: 0.8714 (0.8714) time: 4.3087 data: 4.2434 max mem: 9377 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.8731 (0.8750) time: 0.1315 data: 0.1063 max mem: 9377 +Eval (hcp-train-subset): [82] Total time: 0:00:12 (0.2076 s / it) +Averaged stats (hcp-train-subset): loss: 0.8731 (0.8750) +Eval (hcp-val): [82] [ 0/62] eta: 0:05:32 loss: 0.8639 (0.8639) time: 5.3579 data: 5.3279 max mem: 9377 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.8721 (0.8725) time: 0.1242 data: 0.0978 max mem: 9377 +Eval (hcp-val): [82] Total time: 0:00:12 (0.2049 s / it) +Averaged stats (hcp-val): loss: 0.8721 (0.8725) +Eval (nsd-val): [82] [ 0/62] eta: 0:04:27 loss: 0.8319 (0.8319) time: 4.3070 data: 4.2433 max mem: 9377 +Eval (nsd-val): [82] [61/62] eta: 0:00:00 loss: 0.8451 (0.8451) time: 0.1861 data: 0.1612 max mem: 9377 +Eval (nsd-val): [82] Total time: 0:00:14 (0.2390 s / it) +Averaged stats (nsd-val): loss: 0.8451 (0.8451) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 12:05:57 lr: 0.000010 grad: 0.2558 (0.2558) loss: 0.7418 (0.7418) time: 6.9693 data: 6.8827 max mem: 9377 +Train: [83] [ 100/6250] eta: 0:21:34 lr: 0.000010 grad: 0.2422 (0.2684) loss: 0.7302 (0.7142) time: 0.1703 data: 0.0777 max mem: 9377 +Train: [83] [ 200/6250] eta: 0:17:57 lr: 0.000010 grad: 0.2413 (0.2555) loss: 0.7010 (0.7091) time: 0.1467 data: 0.0664 max mem: 9377 +Train: [83] [ 300/6250] eta: 0:16:12 lr: 0.000010 grad: 0.2201 (0.2452) loss: 0.6963 (0.7081) time: 0.1357 data: 0.0553 max mem: 9377 +Train: [83] [ 400/6250] eta: 0:15:15 lr: 0.000010 grad: 0.2014 (0.2378) loss: 0.7263 (0.7101) time: 0.1399 data: 0.0475 max mem: 9377 +Train: [83] [ 500/6250] eta: 0:14:43 lr: 0.000010 grad: 0.2053 (0.2320) loss: 0.7149 (0.7119) time: 0.1455 data: 0.0544 max mem: 9377 +Train: [83] [ 600/6250] eta: 0:14:12 lr: 0.000010 grad: 0.2024 (0.2284) loss: 0.7271 (0.7128) time: 0.1389 data: 0.0531 max mem: 9377 +Train: [83] [ 700/6250] eta: 0:13:44 lr: 0.000009 grad: 0.2017 (0.2250) loss: 0.7189 (0.7146) time: 0.1271 data: 0.0321 max mem: 9377 +Train: [83] [ 800/6250] eta: 0:13:22 lr: 0.000009 grad: 0.2084 (0.2222) loss: 0.7150 (0.7158) time: 0.1472 data: 0.0518 max mem: 9377 +Train: [83] [ 900/6250] eta: 0:12:58 lr: 0.000009 grad: 0.1981 (0.2203) loss: 0.7096 (0.7157) time: 0.1259 data: 0.0277 max mem: 9377 +Train: [83] [1000/6250] eta: 0:12:36 lr: 0.000009 grad: 0.2049 (0.2187) loss: 0.7020 (0.7157) time: 0.1296 data: 0.0450 max mem: 9377 +Train: [83] [1100/6250] eta: 0:12:17 lr: 0.000009 grad: 0.2045 (0.2175) loss: 0.6950 (0.7157) time: 0.1271 data: 0.0447 max mem: 9377 +Train: [83] [1200/6250] eta: 0:12:00 lr: 0.000009 grad: 0.2005 (0.2164) loss: 0.7102 (0.7153) time: 0.1471 data: 0.0592 max mem: 9377 +Train: [83] [1300/6250] eta: 0:11:43 lr: 0.000009 grad: 0.2066 (0.2155) loss: 0.7216 (0.7153) time: 0.1391 data: 0.0543 max mem: 9377 +Train: [83] [1400/6250] eta: 0:11:29 lr: 0.000009 grad: 0.2077 (0.2148) loss: 0.7097 (0.7149) time: 0.1594 data: 0.0797 max mem: 9377 +Train: [83] [1500/6250] eta: 0:11:14 lr: 0.000009 grad: 0.2030 (0.2142) loss: 0.7157 (0.7144) time: 0.1206 data: 0.0389 max mem: 9377 +Train: [83] [1600/6250] eta: 0:11:01 lr: 0.000009 grad: 0.2052 (0.2137) loss: 0.7123 (0.7138) time: 0.1514 data: 0.0702 max mem: 9377 +Train: [83] [1700/6250] eta: 0:10:45 lr: 0.000009 grad: 0.2018 (0.2133) loss: 0.7171 (0.7134) time: 0.1392 data: 0.0575 max mem: 9377 +Train: [83] [1800/6250] eta: 0:10:37 lr: 0.000009 grad: 0.2005 (0.2129) loss: 0.7061 (0.7132) time: 0.1427 data: 0.0607 max mem: 9377 +Train: [83] [1900/6250] eta: 0:10:25 lr: 0.000009 grad: 0.2038 (0.2126) loss: 0.7157 (0.7131) time: 0.1496 data: 0.0614 max mem: 9377 +Train: [83] [2000/6250] eta: 0:10:12 lr: 0.000009 grad: 0.1983 (0.2123) loss: 0.7108 (0.7131) time: 0.1425 data: 0.0634 max mem: 9377 +Train: [83] [2100/6250] eta: 0:09:58 lr: 0.000009 grad: 0.2014 (0.2121) loss: 0.7130 (0.7133) time: 0.1407 data: 0.0553 max mem: 9377 +Train: [83] [2200/6250] eta: 0:09:43 lr: 0.000009 grad: 0.2059 (0.2117) loss: 0.7131 (0.7134) time: 0.1511 data: 0.0695 max mem: 9377 +Train: [83] [2300/6250] eta: 0:09:28 lr: 0.000009 grad: 0.2066 (0.2115) loss: 0.7093 (0.7134) time: 0.1458 data: 0.0627 max mem: 9377 +Train: [83] [2400/6250] eta: 0:09:12 lr: 0.000009 grad: 0.2074 (0.2113) loss: 0.7028 (0.7132) time: 0.1286 data: 0.0415 max mem: 9377 +Train: [83] [2500/6250] eta: 0:08:56 lr: 0.000009 grad: 0.2028 (0.2110) loss: 0.7119 (0.7133) time: 0.1246 data: 0.0343 max mem: 9377 +Train: [83] [2600/6250] eta: 0:08:40 lr: 0.000009 grad: 0.2046 (0.2109) loss: 0.7130 (0.7132) time: 0.1313 data: 0.0468 max mem: 9377 +Train: [83] [2700/6250] eta: 0:08:24 lr: 0.000009 grad: 0.2034 (0.2107) loss: 0.7184 (0.7132) time: 0.1395 data: 0.0497 max mem: 9377 +Train: [83] [2800/6250] eta: 0:08:09 lr: 0.000009 grad: 0.2042 (0.2106) loss: 0.7072 (0.7132) time: 0.1339 data: 0.0538 max mem: 9377 +Train: [83] [2900/6250] eta: 0:07:53 lr: 0.000009 grad: 0.2073 (0.2105) loss: 0.7095 (0.7132) time: 0.1332 data: 0.0507 max mem: 9377 +Train: [83] [3000/6250] eta: 0:07:38 lr: 0.000009 grad: 0.2146 (0.2105) loss: 0.6986 (0.7129) time: 0.1412 data: 0.0572 max mem: 9377 +Train: [83] [3100/6250] eta: 0:07:23 lr: 0.000009 grad: 0.2142 (0.2104) loss: 0.6968 (0.7128) time: 0.1502 data: 0.0631 max mem: 9377 +Train: [83] [3200/6250] eta: 0:07:09 lr: 0.000009 grad: 0.2056 (0.2103) loss: 0.6980 (0.7127) time: 0.1313 data: 0.0512 max mem: 9377 +Train: [83] [3300/6250] eta: 0:06:56 lr: 0.000009 grad: 0.2063 (0.2102) loss: 0.7010 (0.7124) time: 0.1724 data: 0.0969 max mem: 9377 +Train: [83] [3400/6250] eta: 0:06:41 lr: 0.000009 grad: 0.2044 (0.2102) loss: 0.7103 (0.7121) time: 0.1343 data: 0.0463 max mem: 9377 +Train: [83] [3500/6250] eta: 0:06:27 lr: 0.000009 grad: 0.2056 (0.2102) loss: 0.7007 (0.7118) time: 0.1361 data: 0.0518 max mem: 9377 +Train: [83] [3600/6250] eta: 0:06:13 lr: 0.000009 grad: 0.2025 (0.2101) loss: 0.7067 (0.7116) time: 0.1343 data: 0.0521 max mem: 9377 +Train: [83] [3700/6250] eta: 0:05:59 lr: 0.000009 grad: 0.2044 (0.2101) loss: 0.7009 (0.7112) time: 0.1412 data: 0.0543 max mem: 9377 +Train: [83] [3800/6250] eta: 0:05:45 lr: 0.000009 grad: 0.2094 (0.2101) loss: 0.7093 (0.7110) time: 0.1517 data: 0.0714 max mem: 9377 +Train: [83] [3900/6250] eta: 0:05:30 lr: 0.000009 grad: 0.2037 (0.2100) loss: 0.7074 (0.7108) time: 0.1354 data: 0.0506 max mem: 9377 +Train: [83] [4000/6250] eta: 0:05:16 lr: 0.000009 grad: 0.2047 (0.2100) loss: 0.7068 (0.7106) time: 0.1241 data: 0.0429 max mem: 9377 +Train: [83] [4100/6250] eta: 0:05:02 lr: 0.000009 grad: 0.2060 (0.2099) loss: 0.7049 (0.7104) time: 0.1269 data: 0.0435 max mem: 9377 +Train: [83] [4200/6250] eta: 0:04:47 lr: 0.000009 grad: 0.2084 (0.2099) loss: 0.7075 (0.7102) time: 0.1359 data: 0.0583 max mem: 9377 +Train: [83] [4300/6250] eta: 0:04:33 lr: 0.000009 grad: 0.2090 (0.2098) loss: 0.6937 (0.7100) time: 0.1424 data: 0.0663 max mem: 9377 +Train: [83] [4400/6250] eta: 0:04:19 lr: 0.000009 grad: 0.2099 (0.2098) loss: 0.7047 (0.7098) time: 0.1298 data: 0.0472 max mem: 9377 +Train: [83] [4500/6250] eta: 0:04:05 lr: 0.000009 grad: 0.2062 (0.2098) loss: 0.7070 (0.7095) time: 0.1382 data: 0.0609 max mem: 9377 +Train: [83] [4600/6250] eta: 0:03:51 lr: 0.000009 grad: 0.2110 (0.2097) loss: 0.6970 (0.7094) time: 0.1427 data: 0.0675 max mem: 9377 +Train: [83] [4700/6250] eta: 0:03:37 lr: 0.000009 grad: 0.2033 (0.2097) loss: 0.7020 (0.7091) time: 0.1220 data: 0.0455 max mem: 9377 +Train: [83] [4800/6250] eta: 0:03:23 lr: 0.000009 grad: 0.2129 (0.2097) loss: 0.6979 (0.7089) time: 0.1421 data: 0.0572 max mem: 9377 +Train: [83] [4900/6250] eta: 0:03:09 lr: 0.000009 grad: 0.2114 (0.2097) loss: 0.7027 (0.7088) time: 0.1339 data: 0.0537 max mem: 9377 +Train: [83] [5000/6250] eta: 0:02:55 lr: 0.000009 grad: 0.1999 (0.2096) loss: 0.7084 (0.7088) time: 0.1412 data: 0.0604 max mem: 9377 +Train: [83] [5100/6250] eta: 0:02:41 lr: 0.000009 grad: 0.2025 (0.2095) loss: 0.7116 (0.7088) time: 0.1329 data: 0.0521 max mem: 9377 +Train: [83] [5200/6250] eta: 0:02:27 lr: 0.000009 grad: 0.2034 (0.2094) loss: 0.7060 (0.7088) time: 0.1329 data: 0.0524 max mem: 9377 +Train: [83] [5300/6250] eta: 0:02:13 lr: 0.000009 grad: 0.2033 (0.2093) loss: 0.7039 (0.7088) time: 0.1429 data: 0.0577 max mem: 9377 +Train: [83] [5400/6250] eta: 0:01:59 lr: 0.000009 grad: 0.2014 (0.2093) loss: 0.7105 (0.7088) time: 0.1484 data: 0.0674 max mem: 9377 +Train: [83] [5500/6250] eta: 0:01:45 lr: 0.000009 grad: 0.2022 (0.2092) loss: 0.7074 (0.7089) time: 0.1490 data: 0.0685 max mem: 9377 +Train: [83] [5600/6250] eta: 0:01:31 lr: 0.000009 grad: 0.2041 (0.2092) loss: 0.7152 (0.7089) time: 0.1208 data: 0.0431 max mem: 9377 +Train: [83] [5700/6250] eta: 0:01:17 lr: 0.000009 grad: 0.2071 (0.2091) loss: 0.7104 (0.7090) time: 0.1482 data: 0.0639 max mem: 9377 +Train: [83] [5800/6250] eta: 0:01:03 lr: 0.000009 grad: 0.2088 (0.2091) loss: 0.7135 (0.7090) time: 0.1356 data: 0.0550 max mem: 9377 +Train: [83] [5900/6250] eta: 0:00:49 lr: 0.000009 grad: 0.2054 (0.2091) loss: 0.7153 (0.7090) time: 0.1498 data: 0.0683 max mem: 9377 +Train: [83] [6000/6250] eta: 0:00:35 lr: 0.000009 grad: 0.2062 (0.2090) loss: 0.7081 (0.7091) time: 0.1392 data: 0.0611 max mem: 9377 +Train: [83] [6100/6250] eta: 0:00:21 lr: 0.000009 grad: 0.2116 (0.2091) loss: 0.6926 (0.7090) time: 0.1250 data: 0.0466 max mem: 9377 +Train: [83] [6200/6250] eta: 0:00:07 lr: 0.000009 grad: 0.1980 (0.2090) loss: 0.7159 (0.7091) time: 0.1233 data: 0.0458 max mem: 9377 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.2133 (0.2090) loss: 0.7089 (0.7091) time: 0.1295 data: 0.0472 max mem: 9377 +Train: [83] Total time: 0:14:40 (0.1409 s / it) +Averaged stats: lr: 0.000009 grad: 0.2133 (0.2090) loss: 0.7089 (0.7091) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:05:35 loss: 0.8659 (0.8659) time: 5.4098 data: 5.3800 max mem: 9377 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.8741 (0.8746) time: 0.1265 data: 0.0995 max mem: 9377 +Eval (hcp-train-subset): [83] Total time: 0:00:13 (0.2243 s / it) +Averaged stats (hcp-train-subset): loss: 0.8741 (0.8746) +Eval (hcp-val): [83] [ 0/62] eta: 0:04:45 loss: 0.8760 (0.8760) time: 4.6041 data: 4.5097 max mem: 9377 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.8689 (0.8719) time: 0.1384 data: 0.1133 max mem: 9377 +Eval (hcp-val): [83] Total time: 0:00:14 (0.2263 s / it) +Averaged stats (hcp-val): loss: 0.8689 (0.8719) +Eval (nsd-val): [83] [ 0/62] eta: 0:04:20 loss: 0.8337 (0.8337) time: 4.1945 data: 4.1122 max mem: 9377 +Eval (nsd-val): [83] [61/62] eta: 0:00:00 loss: 0.8436 (0.8440) time: 0.1144 data: 0.0874 max mem: 9377 +Eval (nsd-val): [83] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (nsd-val): loss: 0.8436 (0.8440) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [84] [ 0/6250] eta: 9:14:29 lr: 0.000009 grad: 0.3318 (0.3318) loss: 0.6917 (0.6917) time: 5.3231 data: 5.2079 max mem: 9377 +Train: [84] [ 100/6250] eta: 0:19:07 lr: 0.000009 grad: 0.2251 (0.2482) loss: 0.7203 (0.7364) time: 0.1229 data: 0.0178 max mem: 9377 +Train: [84] [ 200/6250] eta: 0:17:18 lr: 0.000009 grad: 0.2154 (0.2355) loss: 0.7199 (0.7323) time: 0.1102 data: 0.0199 max mem: 9377 +Train: [84] [ 300/6250] eta: 0:16:03 lr: 0.000008 grad: 0.2081 (0.2303) loss: 0.7197 (0.7294) time: 0.1413 data: 0.0532 max mem: 9377 +Train: [84] [ 400/6250] eta: 0:15:14 lr: 0.000008 grad: 0.2095 (0.2259) loss: 0.7288 (0.7276) time: 0.1465 data: 0.0509 max mem: 9377 +Train: [84] [ 500/6250] eta: 0:14:34 lr: 0.000008 grad: 0.2118 (0.2231) loss: 0.7238 (0.7265) time: 0.1278 data: 0.0430 max mem: 9377 +Train: [84] [ 600/6250] eta: 0:14:08 lr: 0.000008 grad: 0.2052 (0.2211) loss: 0.7126 (0.7259) time: 0.1279 data: 0.0473 max mem: 9377 +Train: [84] [ 700/6250] eta: 0:13:45 lr: 0.000008 grad: 0.2052 (0.2194) loss: 0.7124 (0.7249) time: 0.1287 data: 0.0355 max mem: 9377 +Train: [84] [ 800/6250] eta: 0:13:20 lr: 0.000008 grad: 0.2035 (0.2183) loss: 0.7160 (0.7241) time: 0.1138 data: 0.0169 max mem: 9377 +Train: [84] [ 900/6250] eta: 0:12:58 lr: 0.000008 grad: 0.1979 (0.2167) loss: 0.7169 (0.7235) time: 0.1486 data: 0.0632 max mem: 9377 +Train: [84] [1000/6250] eta: 0:12:34 lr: 0.000008 grad: 0.2029 (0.2158) loss: 0.7132 (0.7224) time: 0.1304 data: 0.0477 max mem: 9377 +Train: [84] [1100/6250] eta: 0:12:10 lr: 0.000008 grad: 0.2048 (0.2149) loss: 0.7231 (0.7218) time: 0.1196 data: 0.0357 max mem: 9377 +Train: [84] [1200/6250] eta: 0:11:49 lr: 0.000008 grad: 0.2055 (0.2142) loss: 0.7171 (0.7213) time: 0.1272 data: 0.0392 max mem: 9377 +Train: [84] [1300/6250] eta: 0:11:33 lr: 0.000008 grad: 0.2005 (0.2136) loss: 0.7190 (0.7209) time: 0.1058 data: 0.0144 max mem: 9377 +Train: [84] [1400/6250] eta: 0:11:24 lr: 0.000008 grad: 0.2053 (0.2129) loss: 0.7130 (0.7206) time: 0.1714 data: 0.0929 max mem: 9377 +Train: [84] [1500/6250] eta: 0:11:12 lr: 0.000008 grad: 0.1970 (0.2123) loss: 0.7197 (0.7202) time: 0.1464 data: 0.0687 max mem: 9377 +Train: [84] [1600/6250] eta: 0:11:03 lr: 0.000008 grad: 0.2059 (0.2118) loss: 0.7073 (0.7197) time: 0.1645 data: 0.0875 max mem: 9377 +Train: [84] [1700/6250] eta: 0:10:53 lr: 0.000008 grad: 0.2061 (0.2113) loss: 0.7046 (0.7193) time: 0.1579 data: 0.0823 max mem: 9377 +Train: [84] [1800/6250] eta: 0:10:41 lr: 0.000008 grad: 0.2106 (0.2110) loss: 0.6979 (0.7189) time: 0.1576 data: 0.0729 max mem: 9377 +Train: [84] [1900/6250] eta: 0:10:29 lr: 0.000008 grad: 0.2094 (0.2108) loss: 0.7019 (0.7185) time: 0.1631 data: 0.0793 max mem: 9377 +Train: [84] [2000/6250] eta: 0:10:16 lr: 0.000008 grad: 0.2060 (0.2105) loss: 0.7086 (0.7181) time: 0.1591 data: 0.0776 max mem: 9377 +Train: [84] [2100/6250] eta: 0:10:02 lr: 0.000008 grad: 0.2061 (0.2103) loss: 0.7119 (0.7179) time: 0.1237 data: 0.0386 max mem: 9377 +Train: [84] [2200/6250] eta: 0:09:50 lr: 0.000008 grad: 0.2051 (0.2101) loss: 0.7148 (0.7175) time: 0.1513 data: 0.0663 max mem: 9377 +Train: [84] [2300/6250] eta: 0:09:37 lr: 0.000008 grad: 0.2082 (0.2100) loss: 0.6982 (0.7171) time: 0.1447 data: 0.0588 max mem: 9377 +Train: [84] [2400/6250] eta: 0:09:22 lr: 0.000008 grad: 0.2049 (0.2099) loss: 0.6974 (0.7165) time: 0.1596 data: 0.0751 max mem: 9377 +Train: [84] [2500/6250] eta: 0:09:06 lr: 0.000008 grad: 0.2034 (0.2098) loss: 0.7115 (0.7161) time: 0.1597 data: 0.0742 max mem: 9377 +Train: [84] [2600/6250] eta: 0:08:50 lr: 0.000008 grad: 0.2079 (0.2097) loss: 0.7217 (0.7159) time: 0.1389 data: 0.0605 max mem: 9377 +Train: [84] [2700/6250] eta: 0:08:35 lr: 0.000008 grad: 0.2006 (0.2097) loss: 0.7135 (0.7157) time: 0.1529 data: 0.0740 max mem: 9377 +Train: [84] [2800/6250] eta: 0:08:19 lr: 0.000008 grad: 0.2079 (0.2096) loss: 0.7165 (0.7156) time: 0.1122 data: 0.0257 max mem: 9377 +Train: [84] [2900/6250] eta: 0:08:06 lr: 0.000008 grad: 0.2061 (0.2095) loss: 0.7155 (0.7156) time: 0.1596 data: 0.0809 max mem: 9377 +Train: [84] [3000/6250] eta: 0:07:53 lr: 0.000008 grad: 0.2111 (0.2095) loss: 0.7072 (0.7153) time: 0.1677 data: 0.0866 max mem: 9377 +Train: [84] [3100/6250] eta: 0:07:40 lr: 0.000008 grad: 0.2045 (0.2094) loss: 0.7115 (0.7151) time: 0.1614 data: 0.0840 max mem: 9377 +Train: [84] [3200/6250] eta: 0:07:28 lr: 0.000008 grad: 0.2076 (0.2094) loss: 0.7123 (0.7149) time: 0.1680 data: 0.0879 max mem: 9377 +Train: [84] [3300/6250] eta: 0:07:14 lr: 0.000008 grad: 0.2047 (0.2094) loss: 0.7091 (0.7147) time: 0.1674 data: 0.0886 max mem: 9377 +Train: [84] [3400/6250] eta: 0:07:02 lr: 0.000008 grad: 0.2010 (0.2093) loss: 0.7147 (0.7146) time: 0.1820 data: 0.1049 max mem: 9377 +Train: [84] [3500/6250] eta: 0:06:49 lr: 0.000008 grad: 0.2129 (0.2092) loss: 0.7045 (0.7145) time: 0.1744 data: 0.0950 max mem: 9377 +Train: [84] [3600/6250] eta: 0:06:34 lr: 0.000008 grad: 0.2103 (0.2092) loss: 0.7072 (0.7143) time: 0.1599 data: 0.0842 max mem: 9377 +Train: [84] [3700/6250] eta: 0:06:19 lr: 0.000008 grad: 0.2132 (0.2092) loss: 0.6965 (0.7141) time: 0.1494 data: 0.0668 max mem: 9377 +Train: [84] [3800/6250] eta: 0:06:05 lr: 0.000008 grad: 0.2031 (0.2091) loss: 0.7106 (0.7140) time: 0.1448 data: 0.0619 max mem: 9377 +Train: [84] [3900/6250] eta: 0:05:49 lr: 0.000008 grad: 0.2056 (0.2090) loss: 0.6999 (0.7139) time: 0.1471 data: 0.0635 max mem: 9377 +Train: [84] [4000/6250] eta: 0:05:33 lr: 0.000008 grad: 0.2035 (0.2089) loss: 0.7223 (0.7140) time: 0.1296 data: 0.0468 max mem: 9377 +Train: [84] [4100/6250] eta: 0:05:18 lr: 0.000008 grad: 0.1997 (0.2088) loss: 0.7267 (0.7140) time: 0.1496 data: 0.0667 max mem: 9377 +Train: [84] [4200/6250] eta: 0:05:03 lr: 0.000008 grad: 0.2026 (0.2087) loss: 0.7098 (0.7141) time: 0.1290 data: 0.0494 max mem: 9377 +Train: [84] [4300/6250] eta: 0:04:47 lr: 0.000008 grad: 0.2010 (0.2086) loss: 0.7231 (0.7142) time: 0.1302 data: 0.0503 max mem: 9377 +Train: [84] [4400/6250] eta: 0:04:32 lr: 0.000008 grad: 0.2018 (0.2085) loss: 0.7184 (0.7143) time: 0.1354 data: 0.0554 max mem: 9377 +Train: [84] [4500/6250] eta: 0:04:17 lr: 0.000008 grad: 0.2090 (0.2085) loss: 0.7149 (0.7143) time: 0.1340 data: 0.0495 max mem: 9377 +Train: [84] [4600/6250] eta: 0:04:02 lr: 0.000008 grad: 0.2077 (0.2085) loss: 0.7019 (0.7142) time: 0.1295 data: 0.0390 max mem: 9377 +Train: [84] [4700/6250] eta: 0:03:47 lr: 0.000008 grad: 0.2033 (0.2084) loss: 0.7146 (0.7143) time: 0.1237 data: 0.0329 max mem: 9377 +Train: [84] [4800/6250] eta: 0:03:32 lr: 0.000008 grad: 0.2076 (0.2084) loss: 0.7078 (0.7141) time: 0.1285 data: 0.0465 max mem: 9377 +Train: [84] [4900/6250] eta: 0:03:17 lr: 0.000008 grad: 0.2049 (0.2084) loss: 0.7159 (0.7141) time: 0.1378 data: 0.0563 max mem: 9377 +Train: [84] [5000/6250] eta: 0:03:02 lr: 0.000008 grad: 0.2076 (0.2084) loss: 0.7062 (0.7139) time: 0.1180 data: 0.0241 max mem: 9377 +Train: [84] [5100/6250] eta: 0:02:47 lr: 0.000008 grad: 0.2040 (0.2084) loss: 0.7139 (0.7140) time: 0.1265 data: 0.0397 max mem: 9377 +Train: [84] [5200/6250] eta: 0:02:32 lr: 0.000008 grad: 0.2013 (0.2083) loss: 0.7160 (0.7140) time: 0.1306 data: 0.0463 max mem: 9377 +Train: [84] [5300/6250] eta: 0:02:18 lr: 0.000008 grad: 0.2070 (0.2083) loss: 0.7048 (0.7138) time: 0.1388 data: 0.0603 max mem: 9377 +Train: [84] [5400/6250] eta: 0:02:03 lr: 0.000008 grad: 0.2075 (0.2083) loss: 0.7077 (0.7137) time: 0.1634 data: 0.0843 max mem: 9377 +Train: [84] [5500/6250] eta: 0:01:49 lr: 0.000008 grad: 0.2077 (0.2084) loss: 0.7055 (0.7135) time: 0.1416 data: 0.0592 max mem: 9377 +Train: [84] [5600/6250] eta: 0:01:34 lr: 0.000008 grad: 0.2112 (0.2084) loss: 0.6968 (0.7134) time: 0.1329 data: 0.0516 max mem: 9377 +Train: [84] [5700/6250] eta: 0:01:20 lr: 0.000008 grad: 0.2056 (0.2084) loss: 0.7137 (0.7134) time: 0.1423 data: 0.0698 max mem: 9377 +Train: [84] [5800/6250] eta: 0:01:05 lr: 0.000008 grad: 0.2035 (0.2084) loss: 0.7224 (0.7133) time: 0.1481 data: 0.0663 max mem: 9377 +Train: [84] [5900/6250] eta: 0:00:51 lr: 0.000008 grad: 0.2101 (0.2084) loss: 0.7008 (0.7132) time: 0.1560 data: 0.0769 max mem: 9377 +Train: [84] [6000/6250] eta: 0:00:36 lr: 0.000008 grad: 0.2110 (0.2084) loss: 0.7035 (0.7132) time: 0.1487 data: 0.0638 max mem: 9377 +Train: [84] [6100/6250] eta: 0:00:21 lr: 0.000008 grad: 0.2009 (0.2084) loss: 0.7109 (0.7132) time: 0.1317 data: 0.0503 max mem: 9377 +Train: [84] [6200/6250] eta: 0:00:07 lr: 0.000008 grad: 0.2016 (0.2084) loss: 0.7122 (0.7132) time: 0.1224 data: 0.0420 max mem: 9377 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.2055 (0.2084) loss: 0.7158 (0.7131) time: 0.1327 data: 0.0503 max mem: 9377 +Train: [84] Total time: 0:15:16 (0.1466 s / it) +Averaged stats: lr: 0.000008 grad: 0.2055 (0.2084) loss: 0.7158 (0.7131) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:04:12 loss: 0.8621 (0.8621) time: 4.0668 data: 3.9859 max mem: 9377 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.8700 (0.8734) time: 0.1567 data: 0.1299 max mem: 9377 +Eval (hcp-train-subset): [84] Total time: 0:00:14 (0.2390 s / it) +Averaged stats (hcp-train-subset): loss: 0.8700 (0.8734) +Making plots (hcp-train-subset): example=49 +Eval (hcp-val): [84] [ 0/62] eta: 0:04:27 loss: 0.8757 (0.8757) time: 4.3204 data: 4.2279 max mem: 9377 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.8679 (0.8722) time: 0.1569 data: 0.1293 max mem: 9377 +Eval (hcp-val): [84] Total time: 0:00:15 (0.2440 s / it) +Averaged stats (hcp-val): loss: 0.8679 (0.8722) +Making plots (hcp-val): example=57 +Eval (nsd-val): [84] [ 0/62] eta: 0:06:06 loss: 0.8360 (0.8360) time: 5.9100 data: 5.8791 max mem: 9377 +Eval (nsd-val): [84] [61/62] eta: 0:00:00 loss: 0.8428 (0.8446) time: 0.1442 data: 0.1187 max mem: 9377 +Eval (nsd-val): [84] Total time: 0:00:15 (0.2442 s / it) +Averaged stats (nsd-val): loss: 0.8428 (0.8446) +Making plots (nsd-val): example=40 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00084.pth +Train: [85] [ 0/6250] eta: 8:56:54 lr: 0.000008 grad: 0.1890 (0.1890) loss: 0.8140 (0.8140) time: 5.1543 data: 4.9212 max mem: 9377 +Train: [85] [ 100/6250] eta: 0:22:46 lr: 0.000008 grad: 0.2255 (0.2464) loss: 0.6967 (0.7166) time: 0.1679 data: 0.0694 max mem: 9377 +Train: [85] [ 200/6250] eta: 0:18:58 lr: 0.000008 grad: 0.2198 (0.2386) loss: 0.7070 (0.7086) time: 0.1515 data: 0.0551 max mem: 9377 +Train: [85] [ 300/6250] eta: 0:17:38 lr: 0.000007 grad: 0.2205 (0.2345) loss: 0.6937 (0.7039) time: 0.1497 data: 0.0586 max mem: 9377 +Train: [85] [ 400/6250] eta: 0:17:13 lr: 0.000007 grad: 0.2045 (0.2290) loss: 0.7130 (0.7055) time: 0.1749 data: 0.0777 max mem: 9377 +Train: [85] [ 500/6250] eta: 0:16:33 lr: 0.000007 grad: 0.2087 (0.2259) loss: 0.7181 (0.7056) time: 0.1433 data: 0.0538 max mem: 9377 +Train: [85] [ 600/6250] eta: 0:15:46 lr: 0.000007 grad: 0.2074 (0.2235) loss: 0.7247 (0.7073) time: 0.1293 data: 0.0520 max mem: 9377 +Train: [85] [ 700/6250] eta: 0:15:07 lr: 0.000007 grad: 0.2053 (0.2214) loss: 0.7224 (0.7083) time: 0.1366 data: 0.0459 max mem: 9377 +Train: [85] [ 800/6250] eta: 0:14:40 lr: 0.000007 grad: 0.2047 (0.2199) loss: 0.7118 (0.7087) time: 0.1416 data: 0.0563 max mem: 9377 +Train: [85] [ 900/6250] eta: 0:14:13 lr: 0.000007 grad: 0.2071 (0.2187) loss: 0.7178 (0.7093) time: 0.1618 data: 0.0727 max mem: 9377 +Train: [85] [1000/6250] eta: 0:13:42 lr: 0.000007 grad: 0.2110 (0.2176) loss: 0.6998 (0.7095) time: 0.1308 data: 0.0470 max mem: 9377 +Train: [85] [1100/6250] eta: 0:13:15 lr: 0.000007 grad: 0.2088 (0.2168) loss: 0.7175 (0.7099) time: 0.1363 data: 0.0456 max mem: 9377 +Train: [85] [1200/6250] eta: 0:12:51 lr: 0.000007 grad: 0.2060 (0.2160) loss: 0.7119 (0.7103) time: 0.1380 data: 0.0542 max mem: 9377 +Train: [85] [1300/6250] eta: 0:12:29 lr: 0.000007 grad: 0.2067 (0.2154) loss: 0.7212 (0.7105) time: 0.1331 data: 0.0558 max mem: 9377 +Train: [85] [1400/6250] eta: 0:12:06 lr: 0.000007 grad: 0.2058 (0.2148) loss: 0.7111 (0.7109) time: 0.1278 data: 0.0440 max mem: 9377 +Train: [85] [1500/6250] eta: 0:11:49 lr: 0.000007 grad: 0.2008 (0.2146) loss: 0.7154 (0.7110) time: 0.1383 data: 0.0543 max mem: 9377 +Train: [85] [1600/6250] eta: 0:11:31 lr: 0.000007 grad: 0.2085 (0.2142) loss: 0.6983 (0.7111) time: 0.1231 data: 0.0360 max mem: 9377 +Train: [85] [1700/6250] eta: 0:11:15 lr: 0.000007 grad: 0.2059 (0.2139) loss: 0.7074 (0.7112) time: 0.1739 data: 0.0958 max mem: 9377 +Train: [85] [1800/6250] eta: 0:11:02 lr: 0.000007 grad: 0.2058 (0.2135) loss: 0.7087 (0.7112) time: 0.1602 data: 0.0781 max mem: 9377 +Train: [85] [1900/6250] eta: 0:10:52 lr: 0.000007 grad: 0.2061 (0.2133) loss: 0.7117 (0.7112) time: 0.1497 data: 0.0636 max mem: 9377 +Train: [85] [2000/6250] eta: 0:10:37 lr: 0.000007 grad: 0.2022 (0.2130) loss: 0.7117 (0.7112) time: 0.1355 data: 0.0539 max mem: 9377 +Train: [85] [2100/6250] eta: 0:10:20 lr: 0.000007 grad: 0.2083 (0.2128) loss: 0.7191 (0.7113) time: 0.1457 data: 0.0642 max mem: 9377 +Train: [85] [2200/6250] eta: 0:10:03 lr: 0.000007 grad: 0.2066 (0.2125) loss: 0.7135 (0.7113) time: 0.1298 data: 0.0441 max mem: 9377 +Train: [85] [2300/6250] eta: 0:09:46 lr: 0.000007 grad: 0.2010 (0.2123) loss: 0.7123 (0.7113) time: 0.1307 data: 0.0493 max mem: 9377 +Train: [85] [2400/6250] eta: 0:09:29 lr: 0.000007 grad: 0.2127 (0.2121) loss: 0.7172 (0.7113) time: 0.1247 data: 0.0390 max mem: 9377 +Train: [85] [2500/6250] eta: 0:09:13 lr: 0.000007 grad: 0.2065 (0.2120) loss: 0.7154 (0.7112) time: 0.1245 data: 0.0398 max mem: 9377 +Train: [85] [2600/6250] eta: 0:08:57 lr: 0.000007 grad: 0.2000 (0.2118) loss: 0.7199 (0.7112) time: 0.1415 data: 0.0584 max mem: 9377 +Train: [85] [2700/6250] eta: 0:08:39 lr: 0.000007 grad: 0.2092 (0.2116) loss: 0.7136 (0.7113) time: 0.1141 data: 0.0243 max mem: 9377 +Train: [85] [2800/6250] eta: 0:08:22 lr: 0.000007 grad: 0.1959 (0.2114) loss: 0.7160 (0.7114) time: 0.1266 data: 0.0452 max mem: 9377 +Train: [85] [2900/6250] eta: 0:08:06 lr: 0.000007 grad: 0.2133 (0.2113) loss: 0.7116 (0.7113) time: 0.1319 data: 0.0510 max mem: 9377 +Train: [85] [3000/6250] eta: 0:07:51 lr: 0.000007 grad: 0.2020 (0.2111) loss: 0.7210 (0.7114) time: 0.1400 data: 0.0568 max mem: 9377 +Train: [85] [3100/6250] eta: 0:07:35 lr: 0.000007 grad: 0.2058 (0.2110) loss: 0.7078 (0.7114) time: 0.1334 data: 0.0522 max mem: 9377 +Train: [85] [3200/6250] eta: 0:07:19 lr: 0.000007 grad: 0.1993 (0.2108) loss: 0.7151 (0.7114) time: 0.1193 data: 0.0384 max mem: 9377 +Train: [85] [3300/6250] eta: 0:07:04 lr: 0.000007 grad: 0.2033 (0.2107) loss: 0.7138 (0.7115) time: 0.1369 data: 0.0523 max mem: 9377 +Train: [85] [3400/6250] eta: 0:06:50 lr: 0.000007 grad: 0.2074 (0.2106) loss: 0.7101 (0.7115) time: 0.1552 data: 0.0739 max mem: 9377 +Train: [85] [3500/6250] eta: 0:06:35 lr: 0.000007 grad: 0.1995 (0.2105) loss: 0.7151 (0.7116) time: 0.1253 data: 0.0402 max mem: 9377 +Train: [85] [3600/6250] eta: 0:06:20 lr: 0.000007 grad: 0.2124 (0.2103) loss: 0.7093 (0.7116) time: 0.1269 data: 0.0401 max mem: 9377 +Train: [85] [3700/6250] eta: 0:06:06 lr: 0.000007 grad: 0.2049 (0.2102) loss: 0.7173 (0.7117) time: 0.1278 data: 0.0461 max mem: 9377 +Train: [85] [3800/6250] eta: 0:05:51 lr: 0.000007 grad: 0.2089 (0.2101) loss: 0.7061 (0.7117) time: 0.1330 data: 0.0509 max mem: 9377 +Train: [85] [3900/6250] eta: 0:05:36 lr: 0.000007 grad: 0.2107 (0.2101) loss: 0.7118 (0.7116) time: 0.1364 data: 0.0516 max mem: 9377 +Train: [85] [4000/6250] eta: 0:05:21 lr: 0.000007 grad: 0.2073 (0.2100) loss: 0.7093 (0.7116) time: 0.1404 data: 0.0589 max mem: 9377 +Train: [85] [4100/6250] eta: 0:05:07 lr: 0.000007 grad: 0.2056 (0.2100) loss: 0.7047 (0.7115) time: 0.1470 data: 0.0663 max mem: 9377 +Train: [85] [4200/6250] eta: 0:04:53 lr: 0.000007 grad: 0.2046 (0.2099) loss: 0.6967 (0.7115) time: 0.1427 data: 0.0634 max mem: 9377 +Train: [85] [4300/6250] eta: 0:04:38 lr: 0.000007 grad: 0.2018 (0.2099) loss: 0.7068 (0.7114) time: 0.1511 data: 0.0700 max mem: 9377 +Train: [85] [4400/6250] eta: 0:04:24 lr: 0.000007 grad: 0.2029 (0.2098) loss: 0.7087 (0.7114) time: 0.1583 data: 0.0824 max mem: 9377 +Train: [85] [4500/6250] eta: 0:04:09 lr: 0.000007 grad: 0.2013 (0.2097) loss: 0.7197 (0.7114) time: 0.1440 data: 0.0597 max mem: 9377 +Train: [85] [4600/6250] eta: 0:03:55 lr: 0.000007 grad: 0.2048 (0.2097) loss: 0.7212 (0.7114) time: 0.1262 data: 0.0437 max mem: 9377 +Train: [85] [4700/6250] eta: 0:03:41 lr: 0.000007 grad: 0.2025 (0.2096) loss: 0.7107 (0.7114) time: 0.1470 data: 0.0723 max mem: 9377 +Train: [85] [4800/6250] eta: 0:03:26 lr: 0.000007 grad: 0.2028 (0.2095) loss: 0.7224 (0.7115) time: 0.1309 data: 0.0513 max mem: 9377 +Train: [85] [4900/6250] eta: 0:03:12 lr: 0.000007 grad: 0.1983 (0.2095) loss: 0.7101 (0.7115) time: 0.1415 data: 0.0575 max mem: 9377 +Train: [85] [5000/6250] eta: 0:02:58 lr: 0.000007 grad: 0.2052 (0.2094) loss: 0.7076 (0.7115) time: 0.1487 data: 0.0696 max mem: 9377 +Train: [85] [5100/6250] eta: 0:02:43 lr: 0.000007 grad: 0.2132 (0.2094) loss: 0.7030 (0.7115) time: 0.1215 data: 0.0437 max mem: 9377 +Train: [85] [5200/6250] eta: 0:02:29 lr: 0.000007 grad: 0.2104 (0.2093) loss: 0.6997 (0.7116) time: 0.1344 data: 0.0538 max mem: 9377 +Train: [85] [5300/6250] eta: 0:02:15 lr: 0.000007 grad: 0.2095 (0.2093) loss: 0.7085 (0.7115) time: 0.1529 data: 0.0740 max mem: 9377 +Train: [85] [5400/6250] eta: 0:02:00 lr: 0.000007 grad: 0.2034 (0.2092) loss: 0.7189 (0.7116) time: 0.1476 data: 0.0640 max mem: 9377 +Train: [85] [5500/6250] eta: 0:01:46 lr: 0.000007 grad: 0.2117 (0.2092) loss: 0.6989 (0.7116) time: 0.1321 data: 0.0524 max mem: 9377 +Train: [85] [5600/6250] eta: 0:01:32 lr: 0.000007 grad: 0.1987 (0.2092) loss: 0.7328 (0.7117) time: 0.1357 data: 0.0517 max mem: 9377 +Train: [85] [5700/6250] eta: 0:01:18 lr: 0.000007 grad: 0.2043 (0.2091) loss: 0.7089 (0.7117) time: 0.1422 data: 0.0559 max mem: 9377 +Train: [85] [5800/6250] eta: 0:01:03 lr: 0.000007 grad: 0.2048 (0.2091) loss: 0.7136 (0.7118) time: 0.1446 data: 0.0650 max mem: 9377 +Train: [85] [5900/6250] eta: 0:00:49 lr: 0.000007 grad: 0.2038 (0.2091) loss: 0.7070 (0.7118) time: 0.1414 data: 0.0600 max mem: 9377 +Train: [85] [6000/6250] eta: 0:00:35 lr: 0.000007 grad: 0.2077 (0.2090) loss: 0.7023 (0.7118) time: 0.1406 data: 0.0606 max mem: 9377 +Train: [85] [6100/6250] eta: 0:00:21 lr: 0.000007 grad: 0.2107 (0.2090) loss: 0.7220 (0.7118) time: 0.1537 data: 0.0814 max mem: 9377 +Train: [85] [6200/6250] eta: 0:00:07 lr: 0.000007 grad: 0.2042 (0.2090) loss: 0.7135 (0.7118) time: 0.1228 data: 0.0385 max mem: 9377 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.2096 (0.2089) loss: 0.7040 (0.7118) time: 0.1674 data: 0.0816 max mem: 9377 +Train: [85] Total time: 0:14:50 (0.1425 s / it) +Averaged stats: lr: 0.000007 grad: 0.2096 (0.2089) loss: 0.7040 (0.7118) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:05:12 loss: 0.8683 (0.8683) time: 5.0371 data: 4.9654 max mem: 9377 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.8716 (0.8750) time: 0.1404 data: 0.1152 max mem: 9377 +Eval (hcp-train-subset): [85] Total time: 0:00:13 (0.2254 s / it) +Averaged stats (hcp-train-subset): loss: 0.8716 (0.8750) +Eval (hcp-val): [85] [ 0/62] eta: 0:05:11 loss: 0.8780 (0.8780) time: 5.0166 data: 4.9748 max mem: 9377 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.8731 (0.8721) time: 0.1183 data: 0.0934 max mem: 9377 +Eval (hcp-val): [85] Total time: 0:00:14 (0.2361 s / it) +Averaged stats (hcp-val): loss: 0.8731 (0.8721) +Eval (nsd-val): [85] [ 0/62] eta: 0:04:01 loss: 0.8308 (0.8308) time: 3.8975 data: 3.8124 max mem: 9377 +Eval (nsd-val): [85] [61/62] eta: 0:00:00 loss: 0.8439 (0.8438) time: 0.1173 data: 0.0922 max mem: 9377 +Eval (nsd-val): [85] Total time: 0:00:13 (0.2173 s / it) +Averaged stats (nsd-val): loss: 0.8439 (0.8438) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 10:06:25 lr: 0.000007 grad: 0.3240 (0.3240) loss: 0.6960 (0.6960) time: 5.8217 data: 5.6594 max mem: 9377 +Train: [86] [ 100/6250] eta: 0:20:17 lr: 0.000007 grad: 0.2328 (0.2473) loss: 0.7187 (0.7253) time: 0.1461 data: 0.0531 max mem: 9377 +Train: [86] [ 200/6250] eta: 0:17:09 lr: 0.000007 grad: 0.2278 (0.2380) loss: 0.6886 (0.7139) time: 0.1411 data: 0.0485 max mem: 9377 +Train: [86] [ 300/6250] eta: 0:15:56 lr: 0.000007 grad: 0.2130 (0.2331) loss: 0.7201 (0.7098) time: 0.1391 data: 0.0527 max mem: 9377 +Train: [86] [ 400/6250] eta: 0:15:18 lr: 0.000007 grad: 0.2078 (0.2284) loss: 0.7097 (0.7084) time: 0.1287 data: 0.0383 max mem: 9377 +Train: [86] [ 500/6250] eta: 0:14:58 lr: 0.000007 grad: 0.2057 (0.2257) loss: 0.7201 (0.7083) time: 0.1604 data: 0.0661 max mem: 9377 +Train: [86] [ 600/6250] eta: 0:14:54 lr: 0.000006 grad: 0.2018 (0.2225) loss: 0.7083 (0.7097) time: 0.1714 data: 0.0720 max mem: 9377 +Train: [86] [ 700/6250] eta: 0:14:41 lr: 0.000006 grad: 0.2140 (0.2209) loss: 0.7147 (0.7101) time: 0.1554 data: 0.0662 max mem: 9377 +Train: [86] [ 800/6250] eta: 0:14:28 lr: 0.000006 grad: 0.2103 (0.2198) loss: 0.6908 (0.7092) time: 0.1830 data: 0.1019 max mem: 9377 +Train: [86] [ 900/6250] eta: 0:14:11 lr: 0.000006 grad: 0.2089 (0.2189) loss: 0.7014 (0.7091) time: 0.1760 data: 0.0881 max mem: 9377 +Train: [86] [1000/6250] eta: 0:13:54 lr: 0.000006 grad: 0.2053 (0.2178) loss: 0.7081 (0.7094) time: 0.1274 data: 0.0319 max mem: 9377 +Train: [86] [1100/6250] eta: 0:13:41 lr: 0.000006 grad: 0.2065 (0.2171) loss: 0.7074 (0.7093) time: 0.1577 data: 0.0656 max mem: 9377 +Train: [86] [1200/6250] eta: 0:13:22 lr: 0.000006 grad: 0.2002 (0.2164) loss: 0.7047 (0.7094) time: 0.1539 data: 0.0650 max mem: 9377 +Train: [86] [1300/6250] eta: 0:12:57 lr: 0.000006 grad: 0.2038 (0.2157) loss: 0.7120 (0.7095) time: 0.1329 data: 0.0360 max mem: 9377 +Train: [86] [1400/6250] eta: 0:12:35 lr: 0.000006 grad: 0.2041 (0.2152) loss: 0.6974 (0.7096) time: 0.1536 data: 0.0704 max mem: 9377 +Train: [86] [1500/6250] eta: 0:12:15 lr: 0.000006 grad: 0.2105 (0.2148) loss: 0.6850 (0.7094) time: 0.1479 data: 0.0598 max mem: 9377 +Train: [86] [1600/6250] eta: 0:12:00 lr: 0.000006 grad: 0.2088 (0.2145) loss: 0.7080 (0.7094) time: 0.1585 data: 0.0829 max mem: 9377 +Train: [86] [1700/6250] eta: 0:11:46 lr: 0.000006 grad: 0.2072 (0.2141) loss: 0.7155 (0.7096) time: 0.1636 data: 0.0861 max mem: 9377 +Train: [86] [1800/6250] eta: 0:11:38 lr: 0.000006 grad: 0.2051 (0.2139) loss: 0.7181 (0.7096) time: 0.1701 data: 0.0966 max mem: 9377 +Train: [86] [1900/6250] eta: 0:11:22 lr: 0.000006 grad: 0.2175 (0.2136) loss: 0.7080 (0.7097) time: 0.1522 data: 0.0658 max mem: 9377 +Train: [86] [2000/6250] eta: 0:11:05 lr: 0.000006 grad: 0.2062 (0.2133) loss: 0.7106 (0.7100) time: 0.1405 data: 0.0609 max mem: 9377 +Train: [86] [2100/6250] eta: 0:10:48 lr: 0.000006 grad: 0.2094 (0.2132) loss: 0.6916 (0.7099) time: 0.1471 data: 0.0736 max mem: 9377 +Train: [86] [2200/6250] eta: 0:10:29 lr: 0.000006 grad: 0.2058 (0.2130) loss: 0.7195 (0.7100) time: 0.1324 data: 0.0537 max mem: 9377 +Train: [86] [2300/6250] eta: 0:10:11 lr: 0.000006 grad: 0.2161 (0.2127) loss: 0.6970 (0.7103) time: 0.1239 data: 0.0345 max mem: 9377 +Train: [86] [2400/6250] eta: 0:09:54 lr: 0.000006 grad: 0.2047 (0.2125) loss: 0.7133 (0.7104) time: 0.1340 data: 0.0405 max mem: 9377 +Train: [86] [2500/6250] eta: 0:09:38 lr: 0.000006 grad: 0.2126 (0.2124) loss: 0.7009 (0.7105) time: 0.1460 data: 0.0510 max mem: 9377 +Train: [86] [2600/6250] eta: 0:09:21 lr: 0.000006 grad: 0.2152 (0.2123) loss: 0.7132 (0.7105) time: 0.1446 data: 0.0620 max mem: 9377 +Train: [86] [2700/6250] eta: 0:09:03 lr: 0.000006 grad: 0.2159 (0.2123) loss: 0.7017 (0.7103) time: 0.1184 data: 0.0360 max mem: 9377 +Train: [86] [2800/6250] eta: 0:08:47 lr: 0.000006 grad: 0.2123 (0.2122) loss: 0.7021 (0.7103) time: 0.1556 data: 0.0795 max mem: 9377 +Train: [86] [2900/6250] eta: 0:08:32 lr: 0.000006 grad: 0.2133 (0.2123) loss: 0.7010 (0.7100) time: 0.1748 data: 0.0916 max mem: 9377 +Train: [86] [3000/6250] eta: 0:08:16 lr: 0.000006 grad: 0.2104 (0.2123) loss: 0.7030 (0.7099) time: 0.1441 data: 0.0610 max mem: 9377 +Train: [86] [3100/6250] eta: 0:08:01 lr: 0.000006 grad: 0.2051 (0.2123) loss: 0.7063 (0.7098) time: 0.1249 data: 0.0481 max mem: 9377 +Train: [86] [3200/6250] eta: 0:07:46 lr: 0.000006 grad: 0.2097 (0.2123) loss: 0.7087 (0.7095) time: 0.1580 data: 0.0821 max mem: 9377 +Train: [86] [3300/6250] eta: 0:07:30 lr: 0.000006 grad: 0.2107 (0.2122) loss: 0.7020 (0.7094) time: 0.1452 data: 0.0657 max mem: 9377 +Train: [86] [3400/6250] eta: 0:07:16 lr: 0.000006 grad: 0.2110 (0.2122) loss: 0.6966 (0.7092) time: 0.1957 data: 0.1216 max mem: 9377 +Train: [86] [3500/6250] eta: 0:06:59 lr: 0.000006 grad: 0.2100 (0.2122) loss: 0.7016 (0.7091) time: 0.1314 data: 0.0470 max mem: 9377 +Train: [86] [3600/6250] eta: 0:06:43 lr: 0.000006 grad: 0.2084 (0.2121) loss: 0.7045 (0.7090) time: 0.1405 data: 0.0516 max mem: 9377 +Train: [86] [3700/6250] eta: 0:06:26 lr: 0.000006 grad: 0.2079 (0.2121) loss: 0.7011 (0.7089) time: 0.1481 data: 0.0647 max mem: 9377 +Train: [86] [3800/6250] eta: 0:06:10 lr: 0.000006 grad: 0.2081 (0.2120) loss: 0.7073 (0.7088) time: 0.1211 data: 0.0399 max mem: 9377 +Train: [86] [3900/6250] eta: 0:05:55 lr: 0.000006 grad: 0.2074 (0.2120) loss: 0.7172 (0.7088) time: 0.1576 data: 0.0803 max mem: 9377 +Train: [86] [4000/6250] eta: 0:05:39 lr: 0.000006 grad: 0.2084 (0.2119) loss: 0.7091 (0.7088) time: 0.1407 data: 0.0590 max mem: 9377 +Train: [86] [4100/6250] eta: 0:05:23 lr: 0.000006 grad: 0.2122 (0.2119) loss: 0.6999 (0.7088) time: 0.1446 data: 0.0604 max mem: 9377 +Train: [86] [4200/6250] eta: 0:05:07 lr: 0.000006 grad: 0.2111 (0.2119) loss: 0.7159 (0.7089) time: 0.1420 data: 0.0623 max mem: 9377 +Train: [86] [4300/6250] eta: 0:04:52 lr: 0.000006 grad: 0.2077 (0.2118) loss: 0.7063 (0.7089) time: 0.1533 data: 0.0763 max mem: 9377 +Train: [86] [4400/6250] eta: 0:04:36 lr: 0.000006 grad: 0.2063 (0.2117) loss: 0.7105 (0.7090) time: 0.1477 data: 0.0668 max mem: 9377 +Train: [86] [4500/6250] eta: 0:04:21 lr: 0.000006 grad: 0.2070 (0.2117) loss: 0.6980 (0.7091) time: 0.1326 data: 0.0483 max mem: 9377 +Train: [86] [4600/6250] eta: 0:04:05 lr: 0.000006 grad: 0.2036 (0.2116) loss: 0.7186 (0.7092) time: 0.1269 data: 0.0438 max mem: 9377 +Train: [86] [4700/6250] eta: 0:03:50 lr: 0.000006 grad: 0.2103 (0.2116) loss: 0.7085 (0.7093) time: 0.1392 data: 0.0571 max mem: 9377 +Train: [86] [4800/6250] eta: 0:03:35 lr: 0.000006 grad: 0.2016 (0.2115) loss: 0.7245 (0.7094) time: 0.1367 data: 0.0542 max mem: 9377 +Train: [86] [4900/6250] eta: 0:03:20 lr: 0.000006 grad: 0.2061 (0.2114) loss: 0.7085 (0.7095) time: 0.1373 data: 0.0527 max mem: 9377 +Train: [86] [5000/6250] eta: 0:03:05 lr: 0.000006 grad: 0.2120 (0.2113) loss: 0.7136 (0.7096) time: 0.1465 data: 0.0655 max mem: 9377 +Train: [86] [5100/6250] eta: 0:02:50 lr: 0.000006 grad: 0.2069 (0.2112) loss: 0.7160 (0.7097) time: 0.1466 data: 0.0646 max mem: 9377 +Train: [86] [5200/6250] eta: 0:02:35 lr: 0.000006 grad: 0.2134 (0.2112) loss: 0.6960 (0.7098) time: 0.1367 data: 0.0548 max mem: 9377 +Train: [86] [5300/6250] eta: 0:02:20 lr: 0.000006 grad: 0.2091 (0.2111) loss: 0.7032 (0.7098) time: 0.1496 data: 0.0731 max mem: 9377 +Train: [86] [5400/6250] eta: 0:02:05 lr: 0.000006 grad: 0.2016 (0.2110) loss: 0.7073 (0.7099) time: 0.1614 data: 0.0800 max mem: 9377 +Train: [86] [5500/6250] eta: 0:01:50 lr: 0.000006 grad: 0.1993 (0.2109) loss: 0.7224 (0.7100) time: 0.1503 data: 0.0690 max mem: 9377 +Train: [86] [5600/6250] eta: 0:01:35 lr: 0.000006 grad: 0.2134 (0.2109) loss: 0.7052 (0.7100) time: 0.1289 data: 0.0476 max mem: 9377 +Train: [86] [5700/6250] eta: 0:01:21 lr: 0.000006 grad: 0.2099 (0.2109) loss: 0.7060 (0.7100) time: 0.1094 data: 0.0201 max mem: 9377 +Train: [86] [5800/6250] eta: 0:01:06 lr: 0.000006 grad: 0.2048 (0.2108) loss: 0.7124 (0.7100) time: 0.1352 data: 0.0506 max mem: 9377 +Train: [86] [5900/6250] eta: 0:00:51 lr: 0.000006 grad: 0.2098 (0.2108) loss: 0.7091 (0.7100) time: 0.1421 data: 0.0554 max mem: 9377 +Train: [86] [6000/6250] eta: 0:00:36 lr: 0.000006 grad: 0.2080 (0.2108) loss: 0.7110 (0.7101) time: 0.1192 data: 0.0354 max mem: 9377 +Train: [86] [6100/6250] eta: 0:00:22 lr: 0.000006 grad: 0.2101 (0.2108) loss: 0.7010 (0.7100) time: 0.1446 data: 0.0675 max mem: 9377 +Train: [86] [6200/6250] eta: 0:00:07 lr: 0.000006 grad: 0.2039 (0.2108) loss: 0.7136 (0.7101) time: 0.1379 data: 0.0566 max mem: 9377 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.2095 (0.2108) loss: 0.7110 (0.7101) time: 0.1384 data: 0.0576 max mem: 9377 +Train: [86] Total time: 0:15:21 (0.1474 s / it) +Averaged stats: lr: 0.000006 grad: 0.2095 (0.2108) loss: 0.7110 (0.7101) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:06:00 loss: 0.8695 (0.8695) time: 5.8194 data: 5.7868 max mem: 9377 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.8703 (0.8739) time: 0.1385 data: 0.1115 max mem: 9377 +Eval (hcp-train-subset): [86] Total time: 0:00:14 (0.2331 s / it) +Averaged stats (hcp-train-subset): loss: 0.8703 (0.8739) +Eval (hcp-val): [86] [ 0/62] eta: 0:03:59 loss: 0.8736 (0.8736) time: 3.8684 data: 3.7893 max mem: 9377 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.8686 (0.8719) time: 0.1267 data: 0.1015 max mem: 9377 +Eval (hcp-val): [86] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (hcp-val): loss: 0.8686 (0.8719) +Eval (nsd-val): [86] [ 0/62] eta: 0:05:34 loss: 0.8348 (0.8348) time: 5.3918 data: 5.3615 max mem: 9377 +Eval (nsd-val): [86] [61/62] eta: 0:00:00 loss: 0.8449 (0.8461) time: 0.1426 data: 0.1170 max mem: 9377 +Eval (nsd-val): [86] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (nsd-val): loss: 0.8449 (0.8461) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [87] [ 0/6250] eta: 10:16:53 lr: 0.000006 grad: 0.2395 (0.2395) loss: 0.8209 (0.8209) time: 5.9222 data: 5.7754 max mem: 9377 +Train: [87] [ 100/6250] eta: 0:21:18 lr: 0.000006 grad: 0.2240 (0.2506) loss: 0.7132 (0.7266) time: 0.1592 data: 0.0721 max mem: 9377 +Train: [87] [ 200/6250] eta: 0:17:39 lr: 0.000006 grad: 0.2168 (0.2405) loss: 0.7231 (0.7174) time: 0.1561 data: 0.0592 max mem: 9377 +Train: [87] [ 300/6250] eta: 0:16:23 lr: 0.000006 grad: 0.2208 (0.2354) loss: 0.7047 (0.7125) time: 0.1425 data: 0.0609 max mem: 9377 +Train: [87] [ 400/6250] eta: 0:15:27 lr: 0.000006 grad: 0.2166 (0.2311) loss: 0.6967 (0.7099) time: 0.1432 data: 0.0503 max mem: 9377 +Train: [87] [ 500/6250] eta: 0:14:51 lr: 0.000006 grad: 0.2118 (0.2274) loss: 0.6987 (0.7079) time: 0.1494 data: 0.0605 max mem: 9377 +Train: [87] [ 600/6250] eta: 0:14:19 lr: 0.000006 grad: 0.2156 (0.2246) loss: 0.6911 (0.7075) time: 0.1355 data: 0.0466 max mem: 9377 +Train: [87] [ 700/6250] eta: 0:13:58 lr: 0.000006 grad: 0.2139 (0.2228) loss: 0.6894 (0.7071) time: 0.1994 data: 0.0998 max mem: 9377 +Train: [87] [ 800/6250] eta: 0:13:40 lr: 0.000006 grad: 0.2065 (0.2215) loss: 0.7025 (0.7065) time: 0.1523 data: 0.0720 max mem: 9377 +Train: [87] [ 900/6250] eta: 0:13:21 lr: 0.000006 grad: 0.2081 (0.2200) loss: 0.7048 (0.7064) time: 0.1524 data: 0.0664 max mem: 9377 +Train: [87] [1000/6250] eta: 0:12:59 lr: 0.000006 grad: 0.2115 (0.2191) loss: 0.7040 (0.7061) time: 0.1460 data: 0.0714 max mem: 9377 +Train: [87] [1100/6250] eta: 0:12:40 lr: 0.000006 grad: 0.2101 (0.2184) loss: 0.7001 (0.7057) time: 0.1454 data: 0.0636 max mem: 9377 +Train: [87] [1200/6250] eta: 0:12:19 lr: 0.000006 grad: 0.2062 (0.2176) loss: 0.7101 (0.7057) time: 0.1342 data: 0.0479 max mem: 9377 +Train: [87] [1300/6250] eta: 0:12:02 lr: 0.000006 grad: 0.2114 (0.2173) loss: 0.7005 (0.7055) time: 0.1313 data: 0.0533 max mem: 9377 +Train: [87] [1400/6250] eta: 0:11:44 lr: 0.000005 grad: 0.2138 (0.2168) loss: 0.6971 (0.7053) time: 0.1508 data: 0.0695 max mem: 9377 +Train: [87] [1500/6250] eta: 0:11:28 lr: 0.000005 grad: 0.2097 (0.2163) loss: 0.7046 (0.7055) time: 0.1540 data: 0.0691 max mem: 9377 +Train: [87] [1600/6250] eta: 0:11:11 lr: 0.000005 grad: 0.2068 (0.2157) loss: 0.7114 (0.7059) time: 0.1259 data: 0.0349 max mem: 9377 +Train: [87] [1700/6250] eta: 0:10:54 lr: 0.000005 grad: 0.2057 (0.2152) loss: 0.7141 (0.7063) time: 0.1538 data: 0.0707 max mem: 9377 +Train: [87] [1800/6250] eta: 0:10:36 lr: 0.000005 grad: 0.2044 (0.2147) loss: 0.7100 (0.7067) time: 0.1162 data: 0.0330 max mem: 9377 +Train: [87] [1900/6250] eta: 0:10:23 lr: 0.000005 grad: 0.2027 (0.2144) loss: 0.7273 (0.7070) time: 0.1418 data: 0.0586 max mem: 9377 +Train: [87] [2000/6250] eta: 0:10:11 lr: 0.000005 grad: 0.2053 (0.2140) loss: 0.7095 (0.7073) time: 0.1787 data: 0.0984 max mem: 9377 +Train: [87] [2100/6250] eta: 0:09:59 lr: 0.000005 grad: 0.2048 (0.2138) loss: 0.7128 (0.7075) time: 0.1299 data: 0.0452 max mem: 9377 +Train: [87] [2200/6250] eta: 0:09:45 lr: 0.000005 grad: 0.2112 (0.2136) loss: 0.7162 (0.7077) time: 0.1384 data: 0.0552 max mem: 9377 +Train: [87] [2300/6250] eta: 0:09:29 lr: 0.000005 grad: 0.2044 (0.2133) loss: 0.7197 (0.7080) time: 0.1345 data: 0.0508 max mem: 9377 +Train: [87] [2400/6250] eta: 0:09:14 lr: 0.000005 grad: 0.2082 (0.2130) loss: 0.7072 (0.7082) time: 0.1418 data: 0.0573 max mem: 9377 +Train: [87] [2500/6250] eta: 0:08:58 lr: 0.000005 grad: 0.2059 (0.2129) loss: 0.7194 (0.7085) time: 0.1293 data: 0.0480 max mem: 9377 +Train: [87] [2600/6250] eta: 0:08:42 lr: 0.000005 grad: 0.2128 (0.2127) loss: 0.7089 (0.7088) time: 0.1325 data: 0.0520 max mem: 9377 +Train: [87] [2700/6250] eta: 0:08:26 lr: 0.000005 grad: 0.1983 (0.2126) loss: 0.7120 (0.7089) time: 0.1339 data: 0.0503 max mem: 9377 +Train: [87] [2800/6250] eta: 0:08:10 lr: 0.000005 grad: 0.2035 (0.2124) loss: 0.7239 (0.7092) time: 0.1228 data: 0.0401 max mem: 9377 +Train: [87] [2900/6250] eta: 0:07:55 lr: 0.000005 grad: 0.2068 (0.2123) loss: 0.7111 (0.7092) time: 0.1262 data: 0.0461 max mem: 9377 +Train: [87] [3000/6250] eta: 0:07:41 lr: 0.000005 grad: 0.2088 (0.2122) loss: 0.7101 (0.7093) time: 0.1450 data: 0.0674 max mem: 9377 +Train: [87] [3100/6250] eta: 0:07:27 lr: 0.000005 grad: 0.2022 (0.2121) loss: 0.7091 (0.7093) time: 0.1351 data: 0.0541 max mem: 9377 +Train: [87] [3200/6250] eta: 0:07:14 lr: 0.000005 grad: 0.2111 (0.2120) loss: 0.7182 (0.7093) time: 0.1343 data: 0.0529 max mem: 9377 +Train: [87] [3300/6250] eta: 0:06:59 lr: 0.000005 grad: 0.2116 (0.2119) loss: 0.7161 (0.7095) time: 0.1326 data: 0.0510 max mem: 9377 +Train: [87] [3400/6250] eta: 0:06:44 lr: 0.000005 grad: 0.2162 (0.2118) loss: 0.6966 (0.7096) time: 0.1027 data: 0.0181 max mem: 9377 +Train: [87] [3500/6250] eta: 0:06:29 lr: 0.000005 grad: 0.2134 (0.2118) loss: 0.7187 (0.7096) time: 0.1346 data: 0.0528 max mem: 9377 +Train: [87] [3600/6250] eta: 0:06:14 lr: 0.000005 grad: 0.2042 (0.2117) loss: 0.7104 (0.7098) time: 0.1249 data: 0.0390 max mem: 9377 +Train: [87] [3700/6250] eta: 0:06:00 lr: 0.000005 grad: 0.2059 (0.2116) loss: 0.7206 (0.7099) time: 0.1369 data: 0.0539 max mem: 9377 +Train: [87] [3800/6250] eta: 0:05:46 lr: 0.000005 grad: 0.2079 (0.2114) loss: 0.7169 (0.7101) time: 0.1610 data: 0.0791 max mem: 9377 +Train: [87] [3900/6250] eta: 0:05:31 lr: 0.000005 grad: 0.2134 (0.2114) loss: 0.7140 (0.7101) time: 0.1266 data: 0.0537 max mem: 9377 +Train: [87] [4000/6250] eta: 0:05:17 lr: 0.000005 grad: 0.2039 (0.2114) loss: 0.7130 (0.7101) time: 0.1480 data: 0.0716 max mem: 9377 +Train: [87] [4100/6250] eta: 0:05:02 lr: 0.000005 grad: 0.2080 (0.2113) loss: 0.7147 (0.7103) time: 0.1345 data: 0.0559 max mem: 9377 +Train: [87] [4200/6250] eta: 0:04:48 lr: 0.000005 grad: 0.1960 (0.2111) loss: 0.7170 (0.7104) time: 0.1307 data: 0.0498 max mem: 9377 +Train: [87] [4300/6250] eta: 0:04:34 lr: 0.000005 grad: 0.2019 (0.2110) loss: 0.7094 (0.7105) time: 0.1218 data: 0.0417 max mem: 9377 +Train: [87] [4400/6250] eta: 0:04:20 lr: 0.000005 grad: 0.2007 (0.2109) loss: 0.7274 (0.7107) time: 0.1180 data: 0.0357 max mem: 9377 +Train: [87] [4500/6250] eta: 0:04:06 lr: 0.000005 grad: 0.2061 (0.2108) loss: 0.7256 (0.7109) time: 0.1512 data: 0.0751 max mem: 9377 +Train: [87] [4600/6250] eta: 0:03:52 lr: 0.000005 grad: 0.1995 (0.2107) loss: 0.7137 (0.7110) time: 0.1249 data: 0.0399 max mem: 9377 +Train: [87] [4700/6250] eta: 0:03:38 lr: 0.000005 grad: 0.2038 (0.2106) loss: 0.7150 (0.7112) time: 0.1533 data: 0.0804 max mem: 9377 +Train: [87] [4800/6250] eta: 0:03:24 lr: 0.000005 grad: 0.1989 (0.2105) loss: 0.7242 (0.7113) time: 0.1204 data: 0.0368 max mem: 9377 +Train: [87] [4900/6250] eta: 0:03:10 lr: 0.000005 grad: 0.2043 (0.2104) loss: 0.7097 (0.7114) time: 0.1370 data: 0.0564 max mem: 9377 +Train: [87] [5000/6250] eta: 0:02:55 lr: 0.000005 grad: 0.1942 (0.2104) loss: 0.7270 (0.7115) time: 0.1404 data: 0.0572 max mem: 9377 +Train: [87] [5100/6250] eta: 0:02:41 lr: 0.000005 grad: 0.2072 (0.2103) loss: 0.7039 (0.7115) time: 0.1304 data: 0.0494 max mem: 9377 +Train: [87] [5200/6250] eta: 0:02:27 lr: 0.000005 grad: 0.1968 (0.2103) loss: 0.7273 (0.7117) time: 0.1522 data: 0.0705 max mem: 9377 +Train: [87] [5300/6250] eta: 0:02:13 lr: 0.000005 grad: 0.2049 (0.2102) loss: 0.7116 (0.7118) time: 0.1337 data: 0.0462 max mem: 9377 +Train: [87] [5400/6250] eta: 0:01:59 lr: 0.000005 grad: 0.2082 (0.2102) loss: 0.7189 (0.7118) time: 0.1435 data: 0.0587 max mem: 9377 +Train: [87] [5500/6250] eta: 0:01:45 lr: 0.000005 grad: 0.2062 (0.2101) loss: 0.7206 (0.7118) time: 0.1381 data: 0.0546 max mem: 9377 +Train: [87] [5600/6250] eta: 0:01:31 lr: 0.000005 grad: 0.2125 (0.2101) loss: 0.7201 (0.7117) time: 0.1395 data: 0.0521 max mem: 9377 +Train: [87] [5700/6250] eta: 0:01:17 lr: 0.000005 grad: 0.2075 (0.2101) loss: 0.7126 (0.7117) time: 0.1334 data: 0.0505 max mem: 9377 +Train: [87] [5800/6250] eta: 0:01:03 lr: 0.000005 grad: 0.2101 (0.2101) loss: 0.7054 (0.7117) time: 0.1317 data: 0.0456 max mem: 9377 +Train: [87] [5900/6250] eta: 0:00:49 lr: 0.000005 grad: 0.2022 (0.2101) loss: 0.7165 (0.7116) time: 0.1472 data: 0.0676 max mem: 9377 +Train: [87] [6000/6250] eta: 0:00:35 lr: 0.000005 grad: 0.2059 (0.2100) loss: 0.7122 (0.7116) time: 0.1390 data: 0.0556 max mem: 9377 +Train: [87] [6100/6250] eta: 0:00:21 lr: 0.000005 grad: 0.2066 (0.2100) loss: 0.7033 (0.7115) time: 0.1287 data: 0.0473 max mem: 9377 +Train: [87] [6200/6250] eta: 0:00:07 lr: 0.000005 grad: 0.2063 (0.2100) loss: 0.7030 (0.7115) time: 0.1530 data: 0.0749 max mem: 9377 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.2067 (0.2100) loss: 0.6947 (0.7114) time: 0.1456 data: 0.0657 max mem: 9377 +Train: [87] Total time: 0:14:40 (0.1409 s / it) +Averaged stats: lr: 0.000005 grad: 0.2067 (0.2100) loss: 0.6947 (0.7114) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:05:50 loss: 0.8666 (0.8666) time: 5.6548 data: 5.6249 max mem: 9377 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.8718 (0.8741) time: 0.1144 data: 0.0890 max mem: 9377 +Eval (hcp-train-subset): [87] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (hcp-train-subset): loss: 0.8718 (0.8741) +Eval (hcp-val): [87] [ 0/62] eta: 0:04:14 loss: 0.8822 (0.8822) time: 4.1087 data: 4.0370 max mem: 9377 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.8703 (0.8712) time: 0.1302 data: 0.1050 max mem: 9377 +Eval (hcp-val): [87] Total time: 0:00:14 (0.2314 s / it) +Averaged stats (hcp-val): loss: 0.8703 (0.8712) +Eval (nsd-val): [87] [ 0/62] eta: 0:03:51 loss: 0.8350 (0.8350) time: 3.7265 data: 3.6393 max mem: 9377 +Eval (nsd-val): [87] [61/62] eta: 0:00:00 loss: 0.8450 (0.8460) time: 0.1166 data: 0.0913 max mem: 9377 +Eval (nsd-val): [87] Total time: 0:00:13 (0.2152 s / it) +Averaged stats (nsd-val): loss: 0.8450 (0.8460) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [88] [ 0/6250] eta: 10:34:20 lr: 0.000005 grad: 0.2343 (0.2343) loss: 0.7558 (0.7558) time: 6.0897 data: 5.9771 max mem: 9377 +Train: [88] [ 100/6250] eta: 0:19:48 lr: 0.000005 grad: 0.2316 (0.2463) loss: 0.7055 (0.7101) time: 0.1550 data: 0.0606 max mem: 9377 +Train: [88] [ 200/6250] eta: 0:17:05 lr: 0.000005 grad: 0.2122 (0.2332) loss: 0.7040 (0.7122) time: 0.1593 data: 0.0645 max mem: 9377 +Train: [88] [ 300/6250] eta: 0:15:57 lr: 0.000005 grad: 0.2096 (0.2283) loss: 0.7110 (0.7102) time: 0.1457 data: 0.0448 max mem: 9377 +Train: [88] [ 400/6250] eta: 0:15:43 lr: 0.000005 grad: 0.2120 (0.2252) loss: 0.6968 (0.7089) time: 0.1866 data: 0.0859 max mem: 9377 +Train: [88] [ 500/6250] eta: 0:15:05 lr: 0.000005 grad: 0.2095 (0.2227) loss: 0.7066 (0.7089) time: 0.1404 data: 0.0528 max mem: 9377 +Train: [88] [ 600/6250] eta: 0:14:27 lr: 0.000005 grad: 0.2182 (0.2210) loss: 0.7065 (0.7097) time: 0.1386 data: 0.0488 max mem: 9377 +Train: [88] [ 700/6250] eta: 0:14:12 lr: 0.000005 grad: 0.2077 (0.2193) loss: 0.7286 (0.7107) time: 0.1614 data: 0.0716 max mem: 9377 +Train: [88] [ 800/6250] eta: 0:13:48 lr: 0.000005 grad: 0.2161 (0.2186) loss: 0.7147 (0.7110) time: 0.1419 data: 0.0441 max mem: 9377 +Train: [88] [ 900/6250] eta: 0:13:30 lr: 0.000005 grad: 0.2110 (0.2184) loss: 0.7128 (0.7109) time: 0.1287 data: 0.0399 max mem: 9377 +Train: [88] [1000/6250] eta: 0:13:20 lr: 0.000005 grad: 0.2041 (0.2177) loss: 0.7133 (0.7112) time: 0.1563 data: 0.0642 max mem: 9377 +Train: [88] [1100/6250] eta: 0:13:05 lr: 0.000005 grad: 0.2136 (0.2171) loss: 0.7010 (0.7111) time: 0.1407 data: 0.0585 max mem: 9377 +Train: [88] [1200/6250] eta: 0:12:51 lr: 0.000005 grad: 0.2094 (0.2165) loss: 0.7090 (0.7110) time: 0.1497 data: 0.0607 max mem: 9377 +Train: [88] [1300/6250] eta: 0:12:32 lr: 0.000005 grad: 0.2126 (0.2161) loss: 0.7131 (0.7110) time: 0.1337 data: 0.0553 max mem: 9377 +Train: [88] [1400/6250] eta: 0:12:12 lr: 0.000005 grad: 0.2151 (0.2159) loss: 0.7047 (0.7110) time: 0.1368 data: 0.0585 max mem: 9377 +Train: [88] [1500/6250] eta: 0:11:53 lr: 0.000005 grad: 0.2082 (0.2155) loss: 0.7138 (0.7111) time: 0.1613 data: 0.0795 max mem: 9377 +Train: [88] [1600/6250] eta: 0:11:36 lr: 0.000005 grad: 0.2085 (0.2150) loss: 0.7112 (0.7111) time: 0.1477 data: 0.0676 max mem: 9377 +Train: [88] [1700/6250] eta: 0:11:17 lr: 0.000005 grad: 0.2056 (0.2146) loss: 0.7211 (0.7112) time: 0.1380 data: 0.0510 max mem: 9377 +Train: [88] [1800/6250] eta: 0:10:58 lr: 0.000005 grad: 0.2059 (0.2143) loss: 0.7186 (0.7113) time: 0.1336 data: 0.0484 max mem: 9377 +Train: [88] [1900/6250] eta: 0:10:41 lr: 0.000005 grad: 0.2052 (0.2141) loss: 0.7213 (0.7112) time: 0.1539 data: 0.0673 max mem: 9377 +Train: [88] [2000/6250] eta: 0:10:27 lr: 0.000005 grad: 0.2062 (0.2139) loss: 0.7115 (0.7111) time: 0.1422 data: 0.0587 max mem: 9377 +Train: [88] [2100/6250] eta: 0:10:11 lr: 0.000005 grad: 0.2078 (0.2137) loss: 0.7081 (0.7110) time: 0.1406 data: 0.0612 max mem: 9377 +Train: [88] [2200/6250] eta: 0:09:54 lr: 0.000005 grad: 0.2056 (0.2133) loss: 0.7079 (0.7110) time: 0.1129 data: 0.0327 max mem: 9377 +Train: [88] [2300/6250] eta: 0:09:39 lr: 0.000005 grad: 0.2018 (0.2130) loss: 0.7172 (0.7111) time: 0.1420 data: 0.0485 max mem: 9377 +Train: [88] [2400/6250] eta: 0:09:24 lr: 0.000005 grad: 0.2051 (0.2128) loss: 0.7113 (0.7110) time: 0.1683 data: 0.0841 max mem: 9377 +Train: [88] [2500/6250] eta: 0:09:10 lr: 0.000005 grad: 0.2056 (0.2125) loss: 0.7143 (0.7110) time: 0.1427 data: 0.0518 max mem: 9377 +Train: [88] [2600/6250] eta: 0:08:55 lr: 0.000005 grad: 0.2083 (0.2123) loss: 0.7079 (0.7112) time: 0.1461 data: 0.0620 max mem: 9377 +Train: [88] [2700/6250] eta: 0:08:38 lr: 0.000005 grad: 0.2056 (0.2123) loss: 0.7149 (0.7112) time: 0.1224 data: 0.0319 max mem: 9377 +Train: [88] [2800/6250] eta: 0:08:22 lr: 0.000005 grad: 0.2041 (0.2120) loss: 0.7159 (0.7114) time: 0.1101 data: 0.0251 max mem: 9377 +Train: [88] [2900/6250] eta: 0:08:06 lr: 0.000004 grad: 0.2023 (0.2119) loss: 0.7138 (0.7116) time: 0.1193 data: 0.0337 max mem: 9377 +Train: [88] [3000/6250] eta: 0:07:51 lr: 0.000004 grad: 0.2003 (0.2116) loss: 0.7072 (0.7118) time: 0.1379 data: 0.0603 max mem: 9377 +Train: [88] [3100/6250] eta: 0:07:36 lr: 0.000004 grad: 0.2021 (0.2113) loss: 0.7185 (0.7119) time: 0.1476 data: 0.0680 max mem: 9377 +Train: [88] [3200/6250] eta: 0:07:23 lr: 0.000004 grad: 0.2033 (0.2111) loss: 0.7105 (0.7120) time: 0.1967 data: 0.1172 max mem: 9377 +Train: [88] [3300/6250] eta: 0:07:10 lr: 0.000004 grad: 0.2095 (0.2111) loss: 0.7084 (0.7121) time: 0.1473 data: 0.0717 max mem: 9377 +Train: [88] [3400/6250] eta: 0:06:56 lr: 0.000004 grad: 0.2067 (0.2109) loss: 0.7138 (0.7122) time: 0.1452 data: 0.0601 max mem: 9377 +Train: [88] [3500/6250] eta: 0:06:43 lr: 0.000004 grad: 0.2025 (0.2109) loss: 0.7130 (0.7122) time: 0.1490 data: 0.0715 max mem: 9377 +Train: [88] [3600/6250] eta: 0:06:29 lr: 0.000004 grad: 0.2077 (0.2109) loss: 0.6977 (0.7121) time: 0.1814 data: 0.1001 max mem: 9377 +Train: [88] [3700/6250] eta: 0:06:16 lr: 0.000004 grad: 0.2091 (0.2109) loss: 0.7038 (0.7120) time: 0.1464 data: 0.0657 max mem: 9377 +Train: [88] [3800/6250] eta: 0:06:01 lr: 0.000004 grad: 0.2016 (0.2108) loss: 0.7097 (0.7119) time: 0.1654 data: 0.0839 max mem: 9377 +Train: [88] [3900/6250] eta: 0:05:48 lr: 0.000004 grad: 0.2016 (0.2107) loss: 0.7086 (0.7119) time: 0.1731 data: 0.0898 max mem: 9377 +Train: [88] [4000/6250] eta: 0:05:33 lr: 0.000004 grad: 0.2014 (0.2106) loss: 0.7141 (0.7120) time: 0.1624 data: 0.0800 max mem: 9377 +Train: [88] [4100/6250] eta: 0:05:18 lr: 0.000004 grad: 0.2078 (0.2106) loss: 0.7082 (0.7120) time: 0.1427 data: 0.0571 max mem: 9377 +Train: [88] [4200/6250] eta: 0:05:03 lr: 0.000004 grad: 0.2032 (0.2105) loss: 0.7080 (0.7120) time: 0.1364 data: 0.0550 max mem: 9377 +Train: [88] [4300/6250] eta: 0:04:48 lr: 0.000004 grad: 0.2083 (0.2105) loss: 0.7201 (0.7120) time: 0.1400 data: 0.0566 max mem: 9377 +Train: [88] [4400/6250] eta: 0:04:32 lr: 0.000004 grad: 0.2063 (0.2104) loss: 0.6968 (0.7119) time: 0.1374 data: 0.0543 max mem: 9377 +Train: [88] [4500/6250] eta: 0:04:17 lr: 0.000004 grad: 0.2112 (0.2104) loss: 0.7033 (0.7118) time: 0.1510 data: 0.0627 max mem: 9377 +Train: [88] [4600/6250] eta: 0:04:02 lr: 0.000004 grad: 0.2009 (0.2103) loss: 0.7251 (0.7117) time: 0.1554 data: 0.0744 max mem: 9377 +Train: [88] [4700/6250] eta: 0:03:47 lr: 0.000004 grad: 0.1983 (0.2103) loss: 0.7103 (0.7117) time: 0.1403 data: 0.0589 max mem: 9377 +Train: [88] [4800/6250] eta: 0:03:32 lr: 0.000004 grad: 0.2001 (0.2102) loss: 0.7188 (0.7117) time: 0.1256 data: 0.0434 max mem: 9377 +Train: [88] [4900/6250] eta: 0:03:17 lr: 0.000004 grad: 0.2049 (0.2101) loss: 0.7176 (0.7117) time: 0.1504 data: 0.0668 max mem: 9377 +Train: [88] [5000/6250] eta: 0:03:02 lr: 0.000004 grad: 0.1975 (0.2099) loss: 0.7242 (0.7118) time: 0.1615 data: 0.0856 max mem: 9377 +Train: [88] [5100/6250] eta: 0:02:48 lr: 0.000004 grad: 0.2044 (0.2098) loss: 0.7274 (0.7120) time: 0.1368 data: 0.0549 max mem: 9377 +Train: [88] [5200/6250] eta: 0:02:33 lr: 0.000004 grad: 0.2107 (0.2098) loss: 0.7207 (0.7122) time: 0.1558 data: 0.0723 max mem: 9377 +Train: [88] [5300/6250] eta: 0:02:18 lr: 0.000004 grad: 0.2021 (0.2097) loss: 0.7172 (0.7123) time: 0.1443 data: 0.0622 max mem: 9377 +Train: [88] [5400/6250] eta: 0:02:03 lr: 0.000004 grad: 0.1962 (0.2096) loss: 0.7235 (0.7125) time: 0.1445 data: 0.0622 max mem: 9377 +Train: [88] [5500/6250] eta: 0:01:49 lr: 0.000004 grad: 0.2025 (0.2095) loss: 0.7254 (0.7126) time: 0.1344 data: 0.0515 max mem: 9377 +Train: [88] [5600/6250] eta: 0:01:34 lr: 0.000004 grad: 0.2034 (0.2094) loss: 0.7123 (0.7128) time: 0.1325 data: 0.0459 max mem: 9377 +Train: [88] [5700/6250] eta: 0:01:19 lr: 0.000004 grad: 0.1946 (0.2094) loss: 0.7228 (0.7129) time: 0.1297 data: 0.0386 max mem: 9377 +Train: [88] [5800/6250] eta: 0:01:05 lr: 0.000004 grad: 0.2055 (0.2093) loss: 0.7202 (0.7130) time: 0.1258 data: 0.0362 max mem: 9377 +Train: [88] [5900/6250] eta: 0:00:50 lr: 0.000004 grad: 0.2061 (0.2092) loss: 0.7058 (0.7131) time: 0.1207 data: 0.0420 max mem: 9377 +Train: [88] [6000/6250] eta: 0:00:36 lr: 0.000004 grad: 0.2042 (0.2092) loss: 0.7138 (0.7131) time: 0.1205 data: 0.0378 max mem: 9377 +Train: [88] [6100/6250] eta: 0:00:21 lr: 0.000004 grad: 0.1988 (0.2091) loss: 0.7119 (0.7131) time: 0.1474 data: 0.0627 max mem: 9377 +Train: [88] [6200/6250] eta: 0:00:07 lr: 0.000004 grad: 0.2023 (0.2091) loss: 0.7079 (0.7131) time: 0.1106 data: 0.0266 max mem: 9377 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.1994 (0.2090) loss: 0.7076 (0.7131) time: 0.1324 data: 0.0539 max mem: 9377 +Train: [88] Total time: 0:15:12 (0.1460 s / it) +Averaged stats: lr: 0.000004 grad: 0.1994 (0.2090) loss: 0.7076 (0.7131) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:03:54 loss: 0.8683 (0.8683) time: 3.7818 data: 3.6957 max mem: 9377 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.8715 (0.8760) time: 0.1415 data: 0.1156 max mem: 9377 +Eval (hcp-train-subset): [88] Total time: 0:00:13 (0.2194 s / it) +Averaged stats (hcp-train-subset): loss: 0.8715 (0.8760) +Eval (hcp-val): [88] [ 0/62] eta: 0:05:31 loss: 0.8752 (0.8752) time: 5.3542 data: 5.3244 max mem: 9377 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.8729 (0.8730) time: 0.1502 data: 0.1245 max mem: 9377 +Eval (hcp-val): [88] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-val): loss: 0.8729 (0.8730) +Eval (nsd-val): [88] [ 0/62] eta: 0:04:15 loss: 0.8370 (0.8370) time: 4.1140 data: 4.0456 max mem: 9377 +Eval (nsd-val): [88] [61/62] eta: 0:00:00 loss: 0.8450 (0.8468) time: 0.1308 data: 0.1037 max mem: 9377 +Eval (nsd-val): [88] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (nsd-val): loss: 0.8450 (0.8468) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 10:39:41 lr: 0.000004 grad: 0.3582 (0.3582) loss: 0.6239 (0.6239) time: 6.1411 data: 6.0468 max mem: 9377 +Train: [89] [ 100/6250] eta: 0:19:43 lr: 0.000004 grad: 0.2291 (0.2362) loss: 0.7182 (0.7194) time: 0.1388 data: 0.0455 max mem: 9377 +Train: [89] [ 200/6250] eta: 0:17:15 lr: 0.000004 grad: 0.2253 (0.2308) loss: 0.6849 (0.7095) time: 0.1463 data: 0.0450 max mem: 9377 +Train: [89] [ 300/6250] eta: 0:16:07 lr: 0.000004 grad: 0.2232 (0.2304) loss: 0.6856 (0.7020) time: 0.1746 data: 0.0770 max mem: 9377 +Train: [89] [ 400/6250] eta: 0:15:17 lr: 0.000004 grad: 0.2213 (0.2298) loss: 0.6870 (0.6982) time: 0.1577 data: 0.0740 max mem: 9377 +Train: [89] [ 500/6250] eta: 0:14:32 lr: 0.000004 grad: 0.2069 (0.2278) loss: 0.7009 (0.6984) time: 0.1299 data: 0.0306 max mem: 9377 +Train: [89] [ 600/6250] eta: 0:13:59 lr: 0.000004 grad: 0.2089 (0.2258) loss: 0.7144 (0.7000) time: 0.1323 data: 0.0356 max mem: 9377 +Train: [89] [ 700/6250] eta: 0:13:33 lr: 0.000004 grad: 0.2053 (0.2239) loss: 0.7164 (0.7017) time: 0.1230 data: 0.0215 max mem: 9377 +Train: [89] [ 800/6250] eta: 0:13:13 lr: 0.000004 grad: 0.2087 (0.2224) loss: 0.6988 (0.7026) time: 0.1526 data: 0.0604 max mem: 9377 +Train: [89] [ 900/6250] eta: 0:12:50 lr: 0.000004 grad: 0.2093 (0.2209) loss: 0.7173 (0.7039) time: 0.1499 data: 0.0602 max mem: 9377 +Train: [89] [1000/6250] eta: 0:12:31 lr: 0.000004 grad: 0.2087 (0.2195) loss: 0.7190 (0.7049) time: 0.1186 data: 0.0193 max mem: 9377 +Train: [89] [1100/6250] eta: 0:12:12 lr: 0.000004 grad: 0.2082 (0.2186) loss: 0.7001 (0.7058) time: 0.1251 data: 0.0342 max mem: 9377 +Train: [89] [1200/6250] eta: 0:11:54 lr: 0.000004 grad: 0.2058 (0.2179) loss: 0.7046 (0.7061) time: 0.1376 data: 0.0525 max mem: 9377 +Train: [89] [1300/6250] eta: 0:11:38 lr: 0.000004 grad: 0.2060 (0.2172) loss: 0.7025 (0.7063) time: 0.1339 data: 0.0434 max mem: 9377 +Train: [89] [1400/6250] eta: 0:11:33 lr: 0.000004 grad: 0.2096 (0.2164) loss: 0.6964 (0.7066) time: 0.1610 data: 0.0680 max mem: 9377 +Train: [89] [1500/6250] eta: 0:11:24 lr: 0.000004 grad: 0.2069 (0.2160) loss: 0.7009 (0.7067) time: 0.1863 data: 0.0926 max mem: 9377 +Train: [89] [1600/6250] eta: 0:11:14 lr: 0.000004 grad: 0.2103 (0.2157) loss: 0.7027 (0.7066) time: 0.1531 data: 0.0733 max mem: 9377 +Train: [89] [1700/6250] eta: 0:11:03 lr: 0.000004 grad: 0.2065 (0.2155) loss: 0.7039 (0.7066) time: 0.1832 data: 0.0969 max mem: 9377 +Train: [89] [1800/6250] eta: 0:10:50 lr: 0.000004 grad: 0.2099 (0.2153) loss: 0.7082 (0.7068) time: 0.1577 data: 0.0724 max mem: 9377 +Train: [89] [1900/6250] eta: 0:10:37 lr: 0.000004 grad: 0.2135 (0.2150) loss: 0.6913 (0.7068) time: 0.1501 data: 0.0623 max mem: 9377 +Train: [89] [2000/6250] eta: 0:10:27 lr: 0.000004 grad: 0.2107 (0.2147) loss: 0.7098 (0.7070) time: 0.1546 data: 0.0685 max mem: 9377 +Train: [89] [2100/6250] eta: 0:10:11 lr: 0.000004 grad: 0.2041 (0.2144) loss: 0.7175 (0.7071) time: 0.1504 data: 0.0715 max mem: 9377 +Train: [89] [2200/6250] eta: 0:09:54 lr: 0.000004 grad: 0.2053 (0.2141) loss: 0.7127 (0.7071) time: 0.1299 data: 0.0484 max mem: 9377 +Train: [89] [2300/6250] eta: 0:09:37 lr: 0.000004 grad: 0.2105 (0.2140) loss: 0.6947 (0.7070) time: 0.1406 data: 0.0583 max mem: 9377 +Train: [89] [2400/6250] eta: 0:09:22 lr: 0.000004 grad: 0.2097 (0.2137) loss: 0.7038 (0.7071) time: 0.1429 data: 0.0591 max mem: 9377 +Train: [89] [2500/6250] eta: 0:09:06 lr: 0.000004 grad: 0.2060 (0.2136) loss: 0.7132 (0.7071) time: 0.1282 data: 0.0447 max mem: 9377 +Train: [89] [2600/6250] eta: 0:08:51 lr: 0.000004 grad: 0.2078 (0.2135) loss: 0.7115 (0.7072) time: 0.1486 data: 0.0627 max mem: 9377 +Train: [89] [2700/6250] eta: 0:08:34 lr: 0.000004 grad: 0.2141 (0.2133) loss: 0.7091 (0.7074) time: 0.1341 data: 0.0443 max mem: 9377 +Train: [89] [2800/6250] eta: 0:08:17 lr: 0.000004 grad: 0.2011 (0.2131) loss: 0.7097 (0.7074) time: 0.1236 data: 0.0373 max mem: 9377 +Train: [89] [2900/6250] eta: 0:08:01 lr: 0.000004 grad: 0.2153 (0.2131) loss: 0.6967 (0.7074) time: 0.1255 data: 0.0403 max mem: 9377 +Train: [89] [3000/6250] eta: 0:07:45 lr: 0.000004 grad: 0.2099 (0.2130) loss: 0.7060 (0.7073) time: 0.1327 data: 0.0477 max mem: 9377 +Train: [89] [3100/6250] eta: 0:07:30 lr: 0.000004 grad: 0.2056 (0.2129) loss: 0.7190 (0.7073) time: 0.1301 data: 0.0458 max mem: 9377 +Train: [89] [3200/6250] eta: 0:07:16 lr: 0.000004 grad: 0.2043 (0.2128) loss: 0.7223 (0.7074) time: 0.1260 data: 0.0414 max mem: 9377 +Train: [89] [3300/6250] eta: 0:07:01 lr: 0.000004 grad: 0.2034 (0.2128) loss: 0.7097 (0.7074) time: 0.1537 data: 0.0698 max mem: 9377 +Train: [89] [3400/6250] eta: 0:06:47 lr: 0.000004 grad: 0.2094 (0.2127) loss: 0.7157 (0.7074) time: 0.1389 data: 0.0487 max mem: 9377 +Train: [89] [3500/6250] eta: 0:06:32 lr: 0.000004 grad: 0.2032 (0.2126) loss: 0.7170 (0.7073) time: 0.1218 data: 0.0394 max mem: 9377 +Train: [89] [3600/6250] eta: 0:06:18 lr: 0.000004 grad: 0.2030 (0.2125) loss: 0.7098 (0.7074) time: 0.1378 data: 0.0580 max mem: 9377 +Train: [89] [3700/6250] eta: 0:06:03 lr: 0.000004 grad: 0.2125 (0.2125) loss: 0.7004 (0.7073) time: 0.1334 data: 0.0477 max mem: 9377 +Train: [89] [3800/6250] eta: 0:05:48 lr: 0.000004 grad: 0.2088 (0.2124) loss: 0.7120 (0.7072) time: 0.1449 data: 0.0615 max mem: 9377 +Train: [89] [3900/6250] eta: 0:05:34 lr: 0.000004 grad: 0.2109 (0.2124) loss: 0.6969 (0.7071) time: 0.1399 data: 0.0549 max mem: 9377 +Train: [89] [4000/6250] eta: 0:05:20 lr: 0.000004 grad: 0.2035 (0.2123) loss: 0.7150 (0.7071) time: 0.1425 data: 0.0586 max mem: 9377 +Train: [89] [4100/6250] eta: 0:05:06 lr: 0.000004 grad: 0.2082 (0.2122) loss: 0.7188 (0.7072) time: 0.1447 data: 0.0636 max mem: 9377 +Train: [89] [4200/6250] eta: 0:04:51 lr: 0.000004 grad: 0.2065 (0.2121) loss: 0.7167 (0.7072) time: 0.1256 data: 0.0413 max mem: 9377 +Train: [89] [4300/6250] eta: 0:04:36 lr: 0.000004 grad: 0.2080 (0.2121) loss: 0.6918 (0.7072) time: 0.1269 data: 0.0471 max mem: 9377 +Train: [89] [4400/6250] eta: 0:04:22 lr: 0.000004 grad: 0.2031 (0.2120) loss: 0.7062 (0.7072) time: 0.1413 data: 0.0619 max mem: 9377 +Train: [89] [4500/6250] eta: 0:04:08 lr: 0.000004 grad: 0.2092 (0.2120) loss: 0.6948 (0.7072) time: 0.1275 data: 0.0424 max mem: 9377 +Train: [89] [4600/6250] eta: 0:03:53 lr: 0.000004 grad: 0.2120 (0.2120) loss: 0.7040 (0.7071) time: 0.1356 data: 0.0532 max mem: 9377 +Train: [89] [4700/6250] eta: 0:03:39 lr: 0.000004 grad: 0.2087 (0.2119) loss: 0.7091 (0.7071) time: 0.1271 data: 0.0428 max mem: 9377 +Train: [89] [4800/6250] eta: 0:03:25 lr: 0.000004 grad: 0.2062 (0.2119) loss: 0.7099 (0.7072) time: 0.1410 data: 0.0593 max mem: 9377 +Train: [89] [4900/6250] eta: 0:03:11 lr: 0.000004 grad: 0.2079 (0.2118) loss: 0.7066 (0.7073) time: 0.1380 data: 0.0562 max mem: 9377 +Train: [89] [5000/6250] eta: 0:02:57 lr: 0.000004 grad: 0.1999 (0.2116) loss: 0.7186 (0.7074) time: 0.1427 data: 0.0608 max mem: 9377 +Train: [89] [5100/6250] eta: 0:02:43 lr: 0.000004 grad: 0.2072 (0.2115) loss: 0.7099 (0.7075) time: 0.1500 data: 0.0659 max mem: 9377 +Train: [89] [5200/6250] eta: 0:02:28 lr: 0.000003 grad: 0.2061 (0.2114) loss: 0.7133 (0.7076) time: 0.1434 data: 0.0616 max mem: 9377 +Train: [89] [5300/6250] eta: 0:02:14 lr: 0.000003 grad: 0.2085 (0.2114) loss: 0.7166 (0.7077) time: 0.1687 data: 0.0910 max mem: 9377 +Train: [89] [5400/6250] eta: 0:02:00 lr: 0.000003 grad: 0.2050 (0.2114) loss: 0.7192 (0.7078) time: 0.1249 data: 0.0342 max mem: 9377 +Train: [89] [5500/6250] eta: 0:01:46 lr: 0.000003 grad: 0.2068 (0.2114) loss: 0.7075 (0.7078) time: 0.1352 data: 0.0521 max mem: 9377 +Train: [89] [5600/6250] eta: 0:01:32 lr: 0.000003 grad: 0.2084 (0.2114) loss: 0.7045 (0.7079) time: 0.1691 data: 0.0879 max mem: 9377 +Train: [89] [5700/6250] eta: 0:01:18 lr: 0.000003 grad: 0.2004 (0.2113) loss: 0.7133 (0.7080) time: 0.1620 data: 0.0812 max mem: 9377 +Train: [89] [5800/6250] eta: 0:01:03 lr: 0.000003 grad: 0.2128 (0.2113) loss: 0.7019 (0.7080) time: 0.1209 data: 0.0399 max mem: 9377 +Train: [89] [5900/6250] eta: 0:00:49 lr: 0.000003 grad: 0.2037 (0.2112) loss: 0.7239 (0.7081) time: 0.1673 data: 0.0817 max mem: 9377 +Train: [89] [6000/6250] eta: 0:00:35 lr: 0.000003 grad: 0.2008 (0.2112) loss: 0.7072 (0.7081) time: 0.1573 data: 0.0781 max mem: 9377 +Train: [89] [6100/6250] eta: 0:00:21 lr: 0.000003 grad: 0.2079 (0.2111) loss: 0.7154 (0.7082) time: 0.1351 data: 0.0506 max mem: 9377 +Train: [89] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.2092 (0.2110) loss: 0.6971 (0.7082) time: 0.1365 data: 0.0504 max mem: 9377 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.2095 (0.2110) loss: 0.7129 (0.7083) time: 0.1529 data: 0.0697 max mem: 9377 +Train: [89] Total time: 0:14:54 (0.1431 s / it) +Averaged stats: lr: 0.000003 grad: 0.2095 (0.2110) loss: 0.7129 (0.7083) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:04:03 loss: 0.8694 (0.8694) time: 3.9316 data: 3.8578 max mem: 9377 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.8736 (0.8762) time: 0.1385 data: 0.1115 max mem: 9377 +Eval (hcp-train-subset): [89] Total time: 0:00:14 (0.2276 s / it) +Averaged stats (hcp-train-subset): loss: 0.8736 (0.8762) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [89] [ 0/62] eta: 0:03:57 loss: 0.8768 (0.8768) time: 3.8229 data: 3.7097 max mem: 9377 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.8710 (0.8725) time: 0.1223 data: 0.0974 max mem: 9377 +Eval (hcp-val): [89] Total time: 0:00:13 (0.2200 s / it) +Averaged stats (hcp-val): loss: 0.8710 (0.8725) +Making plots (hcp-val): example=26 +Eval (nsd-val): [89] [ 0/62] eta: 0:04:09 loss: 0.8332 (0.8332) time: 4.0227 data: 3.9131 max mem: 9377 +Eval (nsd-val): [89] [61/62] eta: 0:00:00 loss: 0.8448 (0.8461) time: 0.1247 data: 0.0979 max mem: 9377 +Eval (nsd-val): [89] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (nsd-val): loss: 0.8448 (0.8461) +Making plots (nsd-val): example=61 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00089.pth +Train: [90] [ 0/6250] eta: 9:28:35 lr: 0.000003 grad: 0.3482 (0.3482) loss: 0.5913 (0.5913) time: 5.4585 data: 5.1610 max mem: 9377 +Train: [90] [ 100/6250] eta: 0:23:12 lr: 0.000003 grad: 0.2131 (0.2426) loss: 0.7372 (0.7076) time: 0.1652 data: 0.0602 max mem: 9377 +Train: [90] [ 200/6250] eta: 0:20:14 lr: 0.000003 grad: 0.2207 (0.2320) loss: 0.7167 (0.7123) time: 0.1693 data: 0.0608 max mem: 9377 +Train: [90] [ 300/6250] eta: 0:18:40 lr: 0.000003 grad: 0.2131 (0.2265) loss: 0.7014 (0.7142) time: 0.1419 data: 0.0389 max mem: 9377 +Train: [90] [ 400/6250] eta: 0:17:28 lr: 0.000003 grad: 0.2162 (0.2246) loss: 0.7114 (0.7146) time: 0.1391 data: 0.0423 max mem: 9377 +Train: [90] [ 500/6250] eta: 0:16:35 lr: 0.000003 grad: 0.2151 (0.2225) loss: 0.7020 (0.7144) time: 0.1438 data: 0.0512 max mem: 9377 +Train: [90] [ 600/6250] eta: 0:16:04 lr: 0.000003 grad: 0.2131 (0.2213) loss: 0.7025 (0.7140) time: 0.1419 data: 0.0622 max mem: 9377 +Train: [90] [ 700/6250] eta: 0:15:34 lr: 0.000003 grad: 0.2105 (0.2199) loss: 0.7015 (0.7132) time: 0.1778 data: 0.0903 max mem: 9377 +Train: [90] [ 800/6250] eta: 0:15:10 lr: 0.000003 grad: 0.2144 (0.2189) loss: 0.6876 (0.7123) time: 0.1448 data: 0.0659 max mem: 9377 +Train: [90] [ 900/6250] eta: 0:15:00 lr: 0.000003 grad: 0.2162 (0.2181) loss: 0.7056 (0.7114) time: 0.1645 data: 0.0832 max mem: 9377 +Train: [90] [1000/6250] eta: 0:14:40 lr: 0.000003 grad: 0.2137 (0.2176) loss: 0.7023 (0.7107) time: 0.1834 data: 0.0994 max mem: 9377 +Train: [90] [1100/6250] eta: 0:14:27 lr: 0.000003 grad: 0.2075 (0.2173) loss: 0.7124 (0.7099) time: 0.1792 data: 0.0982 max mem: 9377 +Train: [90] [1200/6250] eta: 0:14:08 lr: 0.000003 grad: 0.2099 (0.2169) loss: 0.6890 (0.7094) time: 0.1708 data: 0.0888 max mem: 9377 +Train: [90] [1300/6250] eta: 0:13:46 lr: 0.000003 grad: 0.2100 (0.2164) loss: 0.7029 (0.7089) time: 0.1355 data: 0.0559 max mem: 9377 +Train: [90] [1400/6250] eta: 0:13:23 lr: 0.000003 grad: 0.2121 (0.2163) loss: 0.7053 (0.7085) time: 0.1513 data: 0.0635 max mem: 9377 +Train: [90] [1500/6250] eta: 0:13:05 lr: 0.000003 grad: 0.2065 (0.2159) loss: 0.7108 (0.7084) time: 0.1608 data: 0.0717 max mem: 9377 +Train: [90] [1600/6250] eta: 0:12:47 lr: 0.000003 grad: 0.2116 (0.2155) loss: 0.6959 (0.7083) time: 0.1567 data: 0.0755 max mem: 9377 +Train: [90] [1700/6250] eta: 0:12:27 lr: 0.000003 grad: 0.2071 (0.2153) loss: 0.7159 (0.7080) time: 0.1583 data: 0.0672 max mem: 9377 +Train: [90] [1800/6250] eta: 0:12:12 lr: 0.000003 grad: 0.2061 (0.2150) loss: 0.6934 (0.7077) time: 0.1535 data: 0.0728 max mem: 9377 +Train: [90] [1900/6250] eta: 0:11:57 lr: 0.000003 grad: 0.2117 (0.2148) loss: 0.7065 (0.7076) time: 0.1795 data: 0.0895 max mem: 9377 +Train: [90] [2000/6250] eta: 0:11:38 lr: 0.000003 grad: 0.2064 (0.2145) loss: 0.6968 (0.7076) time: 0.1572 data: 0.0743 max mem: 9377 +Train: [90] [2100/6250] eta: 0:11:21 lr: 0.000003 grad: 0.2117 (0.2143) loss: 0.7028 (0.7075) time: 0.1441 data: 0.0665 max mem: 9377 +Train: [90] [2200/6250] eta: 0:11:05 lr: 0.000003 grad: 0.2055 (0.2141) loss: 0.7049 (0.7075) time: 0.1404 data: 0.0622 max mem: 9377 +Train: [90] [2300/6250] eta: 0:10:45 lr: 0.000003 grad: 0.2099 (0.2141) loss: 0.7002 (0.7074) time: 0.1620 data: 0.0699 max mem: 9377 +Train: [90] [2400/6250] eta: 0:10:28 lr: 0.000003 grad: 0.2029 (0.2140) loss: 0.6988 (0.7072) time: 0.1700 data: 0.0737 max mem: 9377 +Train: [90] [2500/6250] eta: 0:10:11 lr: 0.000003 grad: 0.2157 (0.2140) loss: 0.7097 (0.7073) time: 0.1646 data: 0.0739 max mem: 9377 +Train: [90] [2600/6250] eta: 0:09:53 lr: 0.000003 grad: 0.2113 (0.2139) loss: 0.7172 (0.7074) time: 0.1429 data: 0.0616 max mem: 9377 +Train: [90] [2700/6250] eta: 0:09:35 lr: 0.000003 grad: 0.2082 (0.2139) loss: 0.7045 (0.7073) time: 0.1560 data: 0.0800 max mem: 9377 +Train: [90] [2800/6250] eta: 0:09:18 lr: 0.000003 grad: 0.2159 (0.2139) loss: 0.7037 (0.7070) time: 0.1602 data: 0.0798 max mem: 9377 +Train: [90] [2900/6250] eta: 0:09:01 lr: 0.000003 grad: 0.2124 (0.2139) loss: 0.6914 (0.7069) time: 0.1730 data: 0.0979 max mem: 9377 +Train: [90] [3000/6250] eta: 0:08:45 lr: 0.000003 grad: 0.2128 (0.2139) loss: 0.7001 (0.7067) time: 0.1569 data: 0.0755 max mem: 9377 +Train: [90] [3100/6250] eta: 0:08:29 lr: 0.000003 grad: 0.2119 (0.2139) loss: 0.7049 (0.7066) time: 0.1763 data: 0.1017 max mem: 9377 +Train: [90] [3200/6250] eta: 0:08:15 lr: 0.000003 grad: 0.2111 (0.2139) loss: 0.7051 (0.7066) time: 0.2094 data: 0.1327 max mem: 9377 +Train: [90] [3300/6250] eta: 0:07:58 lr: 0.000003 grad: 0.2116 (0.2138) loss: 0.7094 (0.7065) time: 0.1428 data: 0.0655 max mem: 9377 +Train: [90] [3400/6250] eta: 0:07:40 lr: 0.000003 grad: 0.2107 (0.2138) loss: 0.7000 (0.7065) time: 0.1440 data: 0.0584 max mem: 9377 +Train: [90] [3500/6250] eta: 0:07:22 lr: 0.000003 grad: 0.2089 (0.2138) loss: 0.7029 (0.7063) time: 0.1432 data: 0.0692 max mem: 9377 +Train: [90] [3600/6250] eta: 0:07:04 lr: 0.000003 grad: 0.2119 (0.2138) loss: 0.7060 (0.7061) time: 0.1394 data: 0.0578 max mem: 9377 +Train: [90] [3700/6250] eta: 0:06:47 lr: 0.000003 grad: 0.2082 (0.2138) loss: 0.6979 (0.7060) time: 0.1042 data: 0.0182 max mem: 9377 +Train: [90] [3800/6250] eta: 0:06:29 lr: 0.000003 grad: 0.2069 (0.2138) loss: 0.6976 (0.7059) time: 0.1328 data: 0.0477 max mem: 9377 +Train: [90] [3900/6250] eta: 0:06:12 lr: 0.000003 grad: 0.2086 (0.2138) loss: 0.7024 (0.7057) time: 0.1392 data: 0.0590 max mem: 9377 +Train: [90] [4000/6250] eta: 0:05:55 lr: 0.000003 grad: 0.2091 (0.2137) loss: 0.7047 (0.7057) time: 0.1298 data: 0.0474 max mem: 9377 +Train: [90] [4100/6250] eta: 0:05:39 lr: 0.000003 grad: 0.2020 (0.2137) loss: 0.7064 (0.7056) time: 0.1514 data: 0.0703 max mem: 9377 +Train: [90] [4200/6250] eta: 0:05:23 lr: 0.000003 grad: 0.2120 (0.2137) loss: 0.7081 (0.7056) time: 0.1694 data: 0.0941 max mem: 9377 +Train: [90] [4300/6250] eta: 0:05:06 lr: 0.000003 grad: 0.2143 (0.2137) loss: 0.6890 (0.7055) time: 0.1453 data: 0.0627 max mem: 9377 +Train: [90] [4400/6250] eta: 0:04:50 lr: 0.000003 grad: 0.2133 (0.2137) loss: 0.7106 (0.7056) time: 0.1263 data: 0.0437 max mem: 9377 +Train: [90] [4500/6250] eta: 0:04:33 lr: 0.000003 grad: 0.2087 (0.2137) loss: 0.7046 (0.7055) time: 0.1340 data: 0.0493 max mem: 9377 +Train: [90] [4600/6250] eta: 0:04:17 lr: 0.000003 grad: 0.2110 (0.2137) loss: 0.7025 (0.7055) time: 0.1341 data: 0.0523 max mem: 9377 +Train: [90] [4700/6250] eta: 0:04:01 lr: 0.000003 grad: 0.2100 (0.2136) loss: 0.7080 (0.7056) time: 0.1357 data: 0.0566 max mem: 9377 +Train: [90] [4800/6250] eta: 0:03:45 lr: 0.000003 grad: 0.2123 (0.2135) loss: 0.6971 (0.7057) time: 0.1306 data: 0.0502 max mem: 9377 +Train: [90] [4900/6250] eta: 0:03:29 lr: 0.000003 grad: 0.2093 (0.2135) loss: 0.7044 (0.7057) time: 0.1415 data: 0.0619 max mem: 9377 +Train: [90] [5000/6250] eta: 0:03:13 lr: 0.000003 grad: 0.2068 (0.2135) loss: 0.7054 (0.7057) time: 0.1613 data: 0.0783 max mem: 9377 +Train: [90] [5100/6250] eta: 0:02:58 lr: 0.000003 grad: 0.2073 (0.2134) loss: 0.7155 (0.7058) time: 0.1440 data: 0.0617 max mem: 9377 +Train: [90] [5200/6250] eta: 0:02:42 lr: 0.000003 grad: 0.2103 (0.2134) loss: 0.7019 (0.7058) time: 0.1472 data: 0.0674 max mem: 9377 +Train: [90] [5300/6250] eta: 0:02:26 lr: 0.000003 grad: 0.2094 (0.2133) loss: 0.7037 (0.7058) time: 0.1498 data: 0.0683 max mem: 9377 +Train: [90] [5400/6250] eta: 0:02:11 lr: 0.000003 grad: 0.2029 (0.2132) loss: 0.7105 (0.7058) time: 0.1479 data: 0.0682 max mem: 9377 +Train: [90] [5500/6250] eta: 0:01:55 lr: 0.000003 grad: 0.2091 (0.2131) loss: 0.7044 (0.7058) time: 0.1363 data: 0.0567 max mem: 9377 +Train: [90] [5600/6250] eta: 0:01:39 lr: 0.000003 grad: 0.2106 (0.2131) loss: 0.7031 (0.7059) time: 0.1147 data: 0.0262 max mem: 9377 +Train: [90] [5700/6250] eta: 0:01:24 lr: 0.000003 grad: 0.2104 (0.2130) loss: 0.7007 (0.7058) time: 0.1580 data: 0.0807 max mem: 9377 +Train: [90] [5800/6250] eta: 0:01:09 lr: 0.000003 grad: 0.2020 (0.2130) loss: 0.7129 (0.7058) time: 0.1661 data: 0.0842 max mem: 9377 +Train: [90] [5900/6250] eta: 0:00:53 lr: 0.000003 grad: 0.2172 (0.2130) loss: 0.6964 (0.7057) time: 0.1335 data: 0.0469 max mem: 9377 +Train: [90] [6000/6250] eta: 0:00:38 lr: 0.000003 grad: 0.2109 (0.2130) loss: 0.6980 (0.7057) time: 0.1468 data: 0.0688 max mem: 9377 +Train: [90] [6100/6250] eta: 0:00:22 lr: 0.000003 grad: 0.2092 (0.2130) loss: 0.6982 (0.7055) time: 0.1399 data: 0.0590 max mem: 9377 +Train: [90] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.2123 (0.2130) loss: 0.6973 (0.7055) time: 0.1522 data: 0.0750 max mem: 9377 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.2102 (0.2130) loss: 0.6969 (0.7054) time: 0.1637 data: 0.0804 max mem: 9377 +Train: [90] Total time: 0:15:59 (0.1536 s / it) +Averaged stats: lr: 0.000003 grad: 0.2102 (0.2130) loss: 0.6969 (0.7054) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:04:10 loss: 0.8699 (0.8699) time: 4.0383 data: 3.9789 max mem: 9377 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.8736 (0.8761) time: 0.1435 data: 0.1181 max mem: 9377 +Eval (hcp-train-subset): [90] Total time: 0:00:13 (0.2245 s / it) +Averaged stats (hcp-train-subset): loss: 0.8736 (0.8761) +Eval (hcp-val): [90] [ 0/62] eta: 0:03:59 loss: 0.8781 (0.8781) time: 3.8622 data: 3.7712 max mem: 9377 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.8710 (0.8739) time: 0.1306 data: 0.1054 max mem: 9377 +Eval (hcp-val): [90] Total time: 0:00:13 (0.2234 s / it) +Averaged stats (hcp-val): loss: 0.8710 (0.8739) +Eval (nsd-val): [90] [ 0/62] eta: 0:03:51 loss: 0.8305 (0.8305) time: 3.7334 data: 3.6662 max mem: 9377 +Eval (nsd-val): [90] [61/62] eta: 0:00:00 loss: 0.8458 (0.8470) time: 0.1197 data: 0.0942 max mem: 9377 +Eval (nsd-val): [90] Total time: 0:00:13 (0.2185 s / it) +Averaged stats (nsd-val): loss: 0.8458 (0.8470) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 11:06:16 lr: 0.000003 grad: 0.4123 (0.4123) loss: 0.7054 (0.7054) time: 6.3963 data: 6.2822 max mem: 9377 +Train: [91] [ 100/6250] eta: 0:20:06 lr: 0.000003 grad: 0.2309 (0.2442) loss: 0.7116 (0.7151) time: 0.1347 data: 0.0315 max mem: 9377 +Train: [91] [ 200/6250] eta: 0:16:59 lr: 0.000003 grad: 0.2171 (0.2369) loss: 0.7051 (0.7127) time: 0.1397 data: 0.0458 max mem: 9377 +Train: [91] [ 300/6250] eta: 0:15:53 lr: 0.000003 grad: 0.2290 (0.2342) loss: 0.6962 (0.7085) time: 0.1302 data: 0.0348 max mem: 9377 +Train: [91] [ 400/6250] eta: 0:15:13 lr: 0.000003 grad: 0.2168 (0.2309) loss: 0.7032 (0.7073) time: 0.1548 data: 0.0656 max mem: 9377 +Train: [91] [ 500/6250] eta: 0:14:36 lr: 0.000003 grad: 0.2104 (0.2279) loss: 0.7204 (0.7070) time: 0.1471 data: 0.0594 max mem: 9377 +Train: [91] [ 600/6250] eta: 0:14:09 lr: 0.000003 grad: 0.2152 (0.2261) loss: 0.7078 (0.7067) time: 0.1393 data: 0.0478 max mem: 9377 +Train: [91] [ 700/6250] eta: 0:13:54 lr: 0.000003 grad: 0.2158 (0.2244) loss: 0.7004 (0.7059) time: 0.1361 data: 0.0477 max mem: 9377 +Train: [91] [ 800/6250] eta: 0:13:33 lr: 0.000003 grad: 0.2114 (0.2231) loss: 0.7053 (0.7059) time: 0.1521 data: 0.0583 max mem: 9377 +Train: [91] [ 900/6250] eta: 0:13:13 lr: 0.000003 grad: 0.2169 (0.2221) loss: 0.7029 (0.7062) time: 0.1238 data: 0.0343 max mem: 9377 +Train: [91] [1000/6250] eta: 0:12:53 lr: 0.000003 grad: 0.2011 (0.2208) loss: 0.7072 (0.7062) time: 0.1283 data: 0.0408 max mem: 9377 +Train: [91] [1100/6250] eta: 0:12:34 lr: 0.000003 grad: 0.2093 (0.2197) loss: 0.7065 (0.7066) time: 0.1458 data: 0.0577 max mem: 9377 +Train: [91] [1200/6250] eta: 0:12:14 lr: 0.000003 grad: 0.2055 (0.2189) loss: 0.7076 (0.7070) time: 0.1425 data: 0.0590 max mem: 9377 +Train: [91] [1300/6250] eta: 0:11:57 lr: 0.000003 grad: 0.2092 (0.2181) loss: 0.7034 (0.7072) time: 0.1371 data: 0.0431 max mem: 9377 +Train: [91] [1400/6250] eta: 0:11:40 lr: 0.000003 grad: 0.2020 (0.2173) loss: 0.7299 (0.7077) time: 0.1520 data: 0.0644 max mem: 9377 +Train: [91] [1500/6250] eta: 0:11:30 lr: 0.000003 grad: 0.2115 (0.2169) loss: 0.7160 (0.7079) time: 0.1209 data: 0.0341 max mem: 9377 +Train: [91] [1600/6250] eta: 0:11:18 lr: 0.000003 grad: 0.2081 (0.2164) loss: 0.7162 (0.7082) time: 0.1439 data: 0.0608 max mem: 9377 +Train: [91] [1700/6250] eta: 0:11:04 lr: 0.000003 grad: 0.2085 (0.2161) loss: 0.7072 (0.7084) time: 0.1467 data: 0.0679 max mem: 9377 +Train: [91] [1800/6250] eta: 0:10:50 lr: 0.000003 grad: 0.2036 (0.2159) loss: 0.7218 (0.7086) time: 0.1563 data: 0.0771 max mem: 9377 +Train: [91] [1900/6250] eta: 0:10:33 lr: 0.000003 grad: 0.2125 (0.2157) loss: 0.7008 (0.7088) time: 0.1343 data: 0.0469 max mem: 9377 +Train: [91] [2000/6250] eta: 0:10:20 lr: 0.000003 grad: 0.2066 (0.2153) loss: 0.7117 (0.7091) time: 0.1376 data: 0.0508 max mem: 9377 +Train: [91] [2100/6250] eta: 0:10:05 lr: 0.000003 grad: 0.2055 (0.2150) loss: 0.7090 (0.7093) time: 0.1530 data: 0.0644 max mem: 9377 +Train: [91] [2200/6250] eta: 0:09:48 lr: 0.000003 grad: 0.2061 (0.2147) loss: 0.7085 (0.7095) time: 0.1384 data: 0.0580 max mem: 9377 +Train: [91] [2300/6250] eta: 0:09:32 lr: 0.000003 grad: 0.2091 (0.2146) loss: 0.7032 (0.7096) time: 0.1444 data: 0.0660 max mem: 9377 +Train: [91] [2400/6250] eta: 0:09:16 lr: 0.000003 grad: 0.2033 (0.2144) loss: 0.7158 (0.7098) time: 0.1470 data: 0.0651 max mem: 9377 +Train: [91] [2500/6250] eta: 0:09:01 lr: 0.000003 grad: 0.2116 (0.2142) loss: 0.7091 (0.7099) time: 0.1382 data: 0.0533 max mem: 9377 +Train: [91] [2600/6250] eta: 0:08:44 lr: 0.000003 grad: 0.2054 (0.2142) loss: 0.7113 (0.7099) time: 0.1276 data: 0.0430 max mem: 9377 +Train: [91] [2700/6250] eta: 0:08:28 lr: 0.000002 grad: 0.2170 (0.2142) loss: 0.7099 (0.7098) time: 0.1216 data: 0.0339 max mem: 9377 +Train: [91] [2800/6250] eta: 0:08:12 lr: 0.000002 grad: 0.2084 (0.2141) loss: 0.7169 (0.7098) time: 0.1292 data: 0.0492 max mem: 9377 +Train: [91] [2900/6250] eta: 0:07:57 lr: 0.000002 grad: 0.2066 (0.2139) loss: 0.7030 (0.7097) time: 0.1247 data: 0.0340 max mem: 9377 +Train: [91] [3000/6250] eta: 0:07:42 lr: 0.000002 grad: 0.2055 (0.2138) loss: 0.7125 (0.7098) time: 0.1294 data: 0.0404 max mem: 9377 +Train: [91] [3100/6250] eta: 0:07:27 lr: 0.000002 grad: 0.2061 (0.2136) loss: 0.7255 (0.7100) time: 0.1355 data: 0.0553 max mem: 9377 +Train: [91] [3200/6250] eta: 0:07:12 lr: 0.000002 grad: 0.2078 (0.2134) loss: 0.7049 (0.7101) time: 0.1441 data: 0.0648 max mem: 9377 +Train: [91] [3300/6250] eta: 0:06:58 lr: 0.000002 grad: 0.2085 (0.2133) loss: 0.7086 (0.7102) time: 0.1680 data: 0.0905 max mem: 9377 +Train: [91] [3400/6250] eta: 0:06:43 lr: 0.000002 grad: 0.2034 (0.2132) loss: 0.7241 (0.7104) time: 0.1370 data: 0.0502 max mem: 9377 +Train: [91] [3500/6250] eta: 0:06:29 lr: 0.000002 grad: 0.2102 (0.2130) loss: 0.7167 (0.7106) time: 0.1228 data: 0.0354 max mem: 9377 +Train: [91] [3600/6250] eta: 0:06:15 lr: 0.000002 grad: 0.2114 (0.2129) loss: 0.7174 (0.7108) time: 0.1288 data: 0.0446 max mem: 9377 +Train: [91] [3700/6250] eta: 0:06:00 lr: 0.000002 grad: 0.2127 (0.2128) loss: 0.7027 (0.7108) time: 0.1129 data: 0.0271 max mem: 9377 +Train: [91] [3800/6250] eta: 0:05:46 lr: 0.000002 grad: 0.2050 (0.2128) loss: 0.7186 (0.7110) time: 0.1500 data: 0.0667 max mem: 9377 +Train: [91] [3900/6250] eta: 0:05:32 lr: 0.000002 grad: 0.2071 (0.2128) loss: 0.7140 (0.7109) time: 0.1358 data: 0.0500 max mem: 9377 +Train: [91] [4000/6250] eta: 0:05:18 lr: 0.000002 grad: 0.2075 (0.2127) loss: 0.7013 (0.7108) time: 0.1353 data: 0.0548 max mem: 9377 +Train: [91] [4100/6250] eta: 0:05:04 lr: 0.000002 grad: 0.2095 (0.2126) loss: 0.7096 (0.7108) time: 0.1340 data: 0.0536 max mem: 9377 +Train: [91] [4200/6250] eta: 0:04:50 lr: 0.000002 grad: 0.2053 (0.2126) loss: 0.7206 (0.7107) time: 0.1441 data: 0.0627 max mem: 9377 +Train: [91] [4300/6250] eta: 0:04:36 lr: 0.000002 grad: 0.2051 (0.2125) loss: 0.7171 (0.7108) time: 0.1506 data: 0.0722 max mem: 9377 +Train: [91] [4400/6250] eta: 0:04:22 lr: 0.000002 grad: 0.2087 (0.2124) loss: 0.6988 (0.7108) time: 0.1472 data: 0.0658 max mem: 9377 +Train: [91] [4500/6250] eta: 0:04:08 lr: 0.000002 grad: 0.2066 (0.2124) loss: 0.7223 (0.7108) time: 0.1435 data: 0.0643 max mem: 9377 +Train: [91] [4600/6250] eta: 0:03:54 lr: 0.000002 grad: 0.2134 (0.2123) loss: 0.7000 (0.7107) time: 0.2102 data: 0.1269 max mem: 9377 +Train: [91] [4700/6250] eta: 0:03:39 lr: 0.000002 grad: 0.2078 (0.2122) loss: 0.7037 (0.7106) time: 0.1259 data: 0.0433 max mem: 9377 +Train: [91] [4800/6250] eta: 0:03:25 lr: 0.000002 grad: 0.2137 (0.2123) loss: 0.7007 (0.7105) time: 0.1477 data: 0.0696 max mem: 9377 +Train: [91] [4900/6250] eta: 0:03:11 lr: 0.000002 grad: 0.2116 (0.2123) loss: 0.7039 (0.7103) time: 0.1349 data: 0.0546 max mem: 9377 +Train: [91] [5000/6250] eta: 0:02:57 lr: 0.000002 grad: 0.2089 (0.2124) loss: 0.7061 (0.7102) time: 0.1634 data: 0.0794 max mem: 9377 +Train: [91] [5100/6250] eta: 0:02:43 lr: 0.000002 grad: 0.2103 (0.2124) loss: 0.6999 (0.7100) time: 0.1549 data: 0.0790 max mem: 9377 +Train: [91] [5200/6250] eta: 0:02:29 lr: 0.000002 grad: 0.2097 (0.2125) loss: 0.7081 (0.7097) time: 0.1292 data: 0.0400 max mem: 9377 +Train: [91] [5300/6250] eta: 0:02:15 lr: 0.000002 grad: 0.2165 (0.2125) loss: 0.7034 (0.7095) time: 0.1468 data: 0.0641 max mem: 9377 +Train: [91] [5400/6250] eta: 0:02:00 lr: 0.000002 grad: 0.2046 (0.2125) loss: 0.7135 (0.7094) time: 0.1413 data: 0.0612 max mem: 9377 +Train: [91] [5500/6250] eta: 0:01:46 lr: 0.000002 grad: 0.2090 (0.2125) loss: 0.7049 (0.7094) time: 0.1325 data: 0.0496 max mem: 9377 +Train: [91] [5600/6250] eta: 0:01:32 lr: 0.000002 grad: 0.2112 (0.2124) loss: 0.7044 (0.7093) time: 0.1454 data: 0.0647 max mem: 9377 +Train: [91] [5700/6250] eta: 0:01:18 lr: 0.000002 grad: 0.2091 (0.2124) loss: 0.7103 (0.7093) time: 0.1375 data: 0.0564 max mem: 9377 +Train: [91] [5800/6250] eta: 0:01:03 lr: 0.000002 grad: 0.2105 (0.2124) loss: 0.7064 (0.7092) time: 0.1378 data: 0.0551 max mem: 9377 +Train: [91] [5900/6250] eta: 0:00:49 lr: 0.000002 grad: 0.2066 (0.2123) loss: 0.7041 (0.7093) time: 0.1354 data: 0.0576 max mem: 9377 +Train: [91] [6000/6250] eta: 0:00:35 lr: 0.000002 grad: 0.2054 (0.2123) loss: 0.7170 (0.7092) time: 0.1259 data: 0.0412 max mem: 9377 +Train: [91] [6100/6250] eta: 0:00:21 lr: 0.000002 grad: 0.2126 (0.2122) loss: 0.6965 (0.7092) time: 0.1500 data: 0.0748 max mem: 9377 +Train: [91] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.2025 (0.2122) loss: 0.7205 (0.7092) time: 0.1522 data: 0.0681 max mem: 9377 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.2099 (0.2122) loss: 0.7034 (0.7092) time: 0.1560 data: 0.0765 max mem: 9377 +Train: [91] Total time: 0:14:50 (0.1424 s / it) +Averaged stats: lr: 0.000002 grad: 0.2099 (0.2122) loss: 0.7034 (0.7092) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:06:04 loss: 0.8703 (0.8703) time: 5.8855 data: 5.8506 max mem: 9377 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.8720 (0.8751) time: 0.1125 data: 0.0865 max mem: 9377 +Eval (hcp-train-subset): [91] Total time: 0:00:14 (0.2270 s / it) +Averaged stats (hcp-train-subset): loss: 0.8720 (0.8751) +Eval (hcp-val): [91] [ 0/62] eta: 0:03:56 loss: 0.8764 (0.8764) time: 3.8197 data: 3.7370 max mem: 9377 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.8703 (0.8721) time: 0.1125 data: 0.0858 max mem: 9377 +Eval (hcp-val): [91] Total time: 0:00:13 (0.2212 s / it) +Averaged stats (hcp-val): loss: 0.8703 (0.8721) +Eval (nsd-val): [91] [ 0/62] eta: 0:05:06 loss: 0.8422 (0.8422) time: 4.9387 data: 4.9072 max mem: 9377 +Eval (nsd-val): [91] [61/62] eta: 0:00:00 loss: 0.8486 (0.8472) time: 0.1172 data: 0.0907 max mem: 9377 +Eval (nsd-val): [91] Total time: 0:00:13 (0.2179 s / it) +Averaged stats (nsd-val): loss: 0.8486 (0.8472) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [92] [ 0/6250] eta: 7:57:17 lr: 0.000002 grad: 0.2952 (0.2952) loss: 0.6910 (0.6910) time: 4.5821 data: 4.3261 max mem: 9377 +Train: [92] [ 100/6250] eta: 0:20:09 lr: 0.000002 grad: 0.2205 (0.2401) loss: 0.7149 (0.7179) time: 0.1501 data: 0.0539 max mem: 9377 +Train: [92] [ 200/6250] eta: 0:17:34 lr: 0.000002 grad: 0.2212 (0.2334) loss: 0.7110 (0.7161) time: 0.1660 data: 0.0706 max mem: 9377 +Train: [92] [ 300/6250] eta: 0:16:16 lr: 0.000002 grad: 0.2172 (0.2280) loss: 0.7044 (0.7159) time: 0.1489 data: 0.0552 max mem: 9377 +Train: [92] [ 400/6250] eta: 0:15:39 lr: 0.000002 grad: 0.2203 (0.2267) loss: 0.7074 (0.7126) time: 0.1485 data: 0.0576 max mem: 9377 +Train: [92] [ 500/6250] eta: 0:14:58 lr: 0.000002 grad: 0.2142 (0.2250) loss: 0.6976 (0.7111) time: 0.1484 data: 0.0563 max mem: 9377 +Train: [92] [ 600/6250] eta: 0:14:17 lr: 0.000002 grad: 0.2185 (0.2238) loss: 0.7096 (0.7100) time: 0.1356 data: 0.0466 max mem: 9377 +Train: [92] [ 700/6250] eta: 0:13:53 lr: 0.000002 grad: 0.2119 (0.2223) loss: 0.7093 (0.7100) time: 0.1244 data: 0.0318 max mem: 9377 +Train: [92] [ 800/6250] eta: 0:13:33 lr: 0.000002 grad: 0.2031 (0.2213) loss: 0.6990 (0.7096) time: 0.1626 data: 0.0764 max mem: 9377 +Train: [92] [ 900/6250] eta: 0:13:24 lr: 0.000002 grad: 0.2094 (0.2204) loss: 0.7158 (0.7098) time: 0.1568 data: 0.0670 max mem: 9377 +Train: [92] [1000/6250] eta: 0:13:10 lr: 0.000002 grad: 0.2065 (0.2197) loss: 0.7157 (0.7096) time: 0.1559 data: 0.0667 max mem: 9377 +Train: [92] [1100/6250] eta: 0:12:58 lr: 0.000002 grad: 0.2118 (0.2190) loss: 0.7112 (0.7094) time: 0.1702 data: 0.0888 max mem: 9377 +Train: [92] [1200/6250] eta: 0:12:40 lr: 0.000002 grad: 0.2082 (0.2184) loss: 0.6923 (0.7091) time: 0.1345 data: 0.0451 max mem: 9377 +Train: [92] [1300/6250] eta: 0:12:20 lr: 0.000002 grad: 0.2036 (0.2177) loss: 0.7060 (0.7092) time: 0.1483 data: 0.0712 max mem: 9377 +Train: [92] [1400/6250] eta: 0:11:59 lr: 0.000002 grad: 0.2060 (0.2170) loss: 0.7127 (0.7095) time: 0.1494 data: 0.0576 max mem: 9377 +Train: [92] [1500/6250] eta: 0:11:42 lr: 0.000002 grad: 0.2115 (0.2167) loss: 0.7076 (0.7093) time: 0.1239 data: 0.0352 max mem: 9377 +Train: [92] [1600/6250] eta: 0:11:29 lr: 0.000002 grad: 0.2041 (0.2162) loss: 0.7178 (0.7092) time: 0.1739 data: 0.0950 max mem: 9377 +Train: [92] [1700/6250] eta: 0:11:19 lr: 0.000002 grad: 0.1978 (0.2157) loss: 0.7079 (0.7091) time: 0.1548 data: 0.0660 max mem: 9377 +Train: [92] [1800/6250] eta: 0:11:12 lr: 0.000002 grad: 0.2088 (0.2154) loss: 0.6981 (0.7091) time: 0.1949 data: 0.1110 max mem: 9377 +Train: [92] [1900/6250] eta: 0:11:03 lr: 0.000002 grad: 0.2065 (0.2149) loss: 0.7067 (0.7093) time: 0.2066 data: 0.1343 max mem: 9377 +Train: [92] [2000/6250] eta: 0:10:52 lr: 0.000002 grad: 0.2075 (0.2147) loss: 0.7160 (0.7094) time: 0.1613 data: 0.0768 max mem: 9377 +Train: [92] [2100/6250] eta: 0:10:38 lr: 0.000002 grad: 0.2116 (0.2145) loss: 0.7098 (0.7093) time: 0.1519 data: 0.0668 max mem: 9377 +Train: [92] [2200/6250] eta: 0:10:23 lr: 0.000002 grad: 0.2107 (0.2143) loss: 0.7143 (0.7092) time: 0.1507 data: 0.0623 max mem: 9377 +Train: [92] [2300/6250] eta: 0:10:07 lr: 0.000002 grad: 0.2112 (0.2142) loss: 0.6976 (0.7089) time: 0.1558 data: 0.0714 max mem: 9377 +Train: [92] [2400/6250] eta: 0:09:50 lr: 0.000002 grad: 0.2120 (0.2141) loss: 0.6972 (0.7087) time: 0.1334 data: 0.0474 max mem: 9377 +Train: [92] [2500/6250] eta: 0:09:32 lr: 0.000002 grad: 0.2090 (0.2142) loss: 0.7025 (0.7084) time: 0.1410 data: 0.0544 max mem: 9377 +Train: [92] [2600/6250] eta: 0:09:15 lr: 0.000002 grad: 0.2081 (0.2142) loss: 0.7020 (0.7081) time: 0.1432 data: 0.0541 max mem: 9377 +Train: [92] [2700/6250] eta: 0:08:58 lr: 0.000002 grad: 0.2077 (0.2142) loss: 0.7026 (0.7079) time: 0.1521 data: 0.0578 max mem: 9377 +Train: [92] [2800/6250] eta: 0:08:40 lr: 0.000002 grad: 0.2129 (0.2142) loss: 0.6953 (0.7075) time: 0.1405 data: 0.0524 max mem: 9377 +Train: [92] [2900/6250] eta: 0:08:21 lr: 0.000002 grad: 0.2049 (0.2141) loss: 0.7123 (0.7074) time: 0.1263 data: 0.0342 max mem: 9377 +Train: [92] [3000/6250] eta: 0:08:04 lr: 0.000002 grad: 0.2170 (0.2141) loss: 0.6935 (0.7072) time: 0.1425 data: 0.0592 max mem: 9377 +Train: [92] [3100/6250] eta: 0:07:48 lr: 0.000002 grad: 0.2057 (0.2140) loss: 0.7095 (0.7072) time: 0.1386 data: 0.0588 max mem: 9377 +Train: [92] [3200/6250] eta: 0:07:33 lr: 0.000002 grad: 0.2088 (0.2139) loss: 0.7119 (0.7070) time: 0.1461 data: 0.0632 max mem: 9377 +Train: [92] [3300/6250] eta: 0:07:16 lr: 0.000002 grad: 0.2124 (0.2138) loss: 0.7054 (0.7069) time: 0.1219 data: 0.0423 max mem: 9377 +Train: [92] [3400/6250] eta: 0:07:01 lr: 0.000002 grad: 0.2113 (0.2138) loss: 0.7049 (0.7069) time: 0.1385 data: 0.0540 max mem: 9377 +Train: [92] [3500/6250] eta: 0:06:45 lr: 0.000002 grad: 0.2111 (0.2137) loss: 0.7116 (0.7071) time: 0.1293 data: 0.0535 max mem: 9377 +Train: [92] [3600/6250] eta: 0:06:30 lr: 0.000002 grad: 0.2037 (0.2136) loss: 0.7063 (0.7072) time: 0.1385 data: 0.0575 max mem: 9377 +Train: [92] [3700/6250] eta: 0:06:15 lr: 0.000002 grad: 0.2093 (0.2135) loss: 0.7161 (0.7073) time: 0.1341 data: 0.0456 max mem: 9377 +Train: [92] [3800/6250] eta: 0:06:00 lr: 0.000002 grad: 0.2093 (0.2135) loss: 0.7101 (0.7072) time: 0.1322 data: 0.0520 max mem: 9377 +Train: [92] [3900/6250] eta: 0:05:44 lr: 0.000002 grad: 0.2120 (0.2134) loss: 0.7068 (0.7072) time: 0.1529 data: 0.0716 max mem: 9377 +Train: [92] [4000/6250] eta: 0:05:29 lr: 0.000002 grad: 0.2133 (0.2133) loss: 0.7054 (0.7073) time: 0.1414 data: 0.0584 max mem: 9377 +Train: [92] [4100/6250] eta: 0:05:14 lr: 0.000002 grad: 0.2066 (0.2133) loss: 0.7021 (0.7072) time: 0.1252 data: 0.0463 max mem: 9377 +Train: [92] [4200/6250] eta: 0:04:59 lr: 0.000002 grad: 0.2119 (0.2132) loss: 0.7125 (0.7073) time: 0.1291 data: 0.0430 max mem: 9377 +Train: [92] [4300/6250] eta: 0:04:44 lr: 0.000002 grad: 0.2120 (0.2132) loss: 0.7045 (0.7073) time: 0.1438 data: 0.0661 max mem: 9377 +Train: [92] [4400/6250] eta: 0:04:30 lr: 0.000002 grad: 0.2138 (0.2132) loss: 0.7021 (0.7072) time: 0.1281 data: 0.0465 max mem: 9377 +Train: [92] [4500/6250] eta: 0:04:15 lr: 0.000002 grad: 0.2154 (0.2132) loss: 0.7011 (0.7071) time: 0.1506 data: 0.0673 max mem: 9377 +Train: [92] [4600/6250] eta: 0:04:00 lr: 0.000002 grad: 0.2098 (0.2132) loss: 0.6867 (0.7071) time: 0.1460 data: 0.0679 max mem: 9377 +Train: [92] [4700/6250] eta: 0:03:45 lr: 0.000002 grad: 0.2124 (0.2132) loss: 0.6990 (0.7070) time: 0.1419 data: 0.0580 max mem: 9377 +Train: [92] [4800/6250] eta: 0:03:30 lr: 0.000002 grad: 0.2102 (0.2131) loss: 0.7073 (0.7070) time: 0.1303 data: 0.0485 max mem: 9377 +Train: [92] [4900/6250] eta: 0:03:15 lr: 0.000002 grad: 0.2116 (0.2130) loss: 0.7064 (0.7070) time: 0.1268 data: 0.0480 max mem: 9377 +Train: [92] [5000/6250] eta: 0:03:01 lr: 0.000002 grad: 0.2067 (0.2129) loss: 0.7023 (0.7071) time: 0.1289 data: 0.0473 max mem: 9377 +Train: [92] [5100/6250] eta: 0:02:46 lr: 0.000002 grad: 0.2075 (0.2129) loss: 0.7027 (0.7072) time: 0.1270 data: 0.0423 max mem: 9377 +Train: [92] [5200/6250] eta: 0:02:32 lr: 0.000002 grad: 0.2064 (0.2127) loss: 0.7120 (0.7073) time: 0.1440 data: 0.0612 max mem: 9377 +Train: [92] [5300/6250] eta: 0:02:17 lr: 0.000002 grad: 0.2087 (0.2126) loss: 0.7083 (0.7074) time: 0.1355 data: 0.0555 max mem: 9377 +Train: [92] [5400/6250] eta: 0:02:03 lr: 0.000002 grad: 0.2078 (0.2126) loss: 0.7167 (0.7074) time: 0.1219 data: 0.0373 max mem: 9377 +Train: [92] [5500/6250] eta: 0:01:48 lr: 0.000002 grad: 0.2055 (0.2125) loss: 0.7115 (0.7074) time: 0.1495 data: 0.0688 max mem: 9377 +Train: [92] [5600/6250] eta: 0:01:34 lr: 0.000002 grad: 0.2071 (0.2124) loss: 0.7018 (0.7074) time: 0.1200 data: 0.0351 max mem: 9377 +Train: [92] [5700/6250] eta: 0:01:19 lr: 0.000002 grad: 0.2055 (0.2123) loss: 0.7125 (0.7075) time: 0.1353 data: 0.0571 max mem: 9377 +Train: [92] [5800/6250] eta: 0:01:05 lr: 0.000002 grad: 0.2083 (0.2122) loss: 0.7075 (0.7075) time: 0.1614 data: 0.0798 max mem: 9377 +Train: [92] [5900/6250] eta: 0:00:50 lr: 0.000002 grad: 0.2041 (0.2121) loss: 0.7093 (0.7075) time: 0.1421 data: 0.0519 max mem: 9377 +Train: [92] [6000/6250] eta: 0:00:36 lr: 0.000002 grad: 0.2042 (0.2121) loss: 0.7027 (0.7075) time: 0.1452 data: 0.0559 max mem: 9377 +Train: [92] [6100/6250] eta: 0:00:21 lr: 0.000002 grad: 0.2113 (0.2121) loss: 0.6952 (0.7075) time: 0.1516 data: 0.0708 max mem: 9377 +Train: [92] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.2119 (0.2120) loss: 0.7030 (0.7074) time: 0.1420 data: 0.0538 max mem: 9377 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.2066 (0.2120) loss: 0.7094 (0.7075) time: 0.1354 data: 0.0535 max mem: 9377 +Train: [92] Total time: 0:15:08 (0.1454 s / it) +Averaged stats: lr: 0.000002 grad: 0.2066 (0.2120) loss: 0.7094 (0.7075) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:04:54 loss: 0.8742 (0.8742) time: 4.7557 data: 4.6838 max mem: 9377 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.8716 (0.8745) time: 0.1324 data: 0.1071 max mem: 9377 +Eval (hcp-train-subset): [92] Total time: 0:00:14 (0.2304 s / it) +Averaged stats (hcp-train-subset): loss: 0.8716 (0.8745) +Eval (hcp-val): [92] [ 0/62] eta: 0:03:53 loss: 0.8861 (0.8861) time: 3.7615 data: 3.6888 max mem: 9377 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.8705 (0.8724) time: 0.1292 data: 0.1038 max mem: 9377 +Eval (hcp-val): [92] Total time: 0:00:14 (0.2356 s / it) +Averaged stats (hcp-val): loss: 0.8705 (0.8724) +Eval (nsd-val): [92] [ 0/62] eta: 0:03:50 loss: 0.8364 (0.8364) time: 3.7217 data: 3.6554 max mem: 9377 +Eval (nsd-val): [92] [61/62] eta: 0:00:00 loss: 0.8479 (0.8472) time: 0.1376 data: 0.1111 max mem: 9377 +Eval (nsd-val): [92] Total time: 0:00:13 (0.2158 s / it) +Averaged stats (nsd-val): loss: 0.8479 (0.8472) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 8:34:53 lr: 0.000002 grad: 0.2750 (0.2750) loss: 0.7435 (0.7435) time: 4.9430 data: 4.6401 max mem: 9377 +Train: [93] [ 100/6250] eta: 0:20:55 lr: 0.000002 grad: 0.2138 (0.2305) loss: 0.7162 (0.7267) time: 0.1613 data: 0.0734 max mem: 9377 +Train: [93] [ 200/6250] eta: 0:17:31 lr: 0.000002 grad: 0.2157 (0.2258) loss: 0.7263 (0.7180) time: 0.1336 data: 0.0376 max mem: 9377 +Train: [93] [ 300/6250] eta: 0:16:09 lr: 0.000002 grad: 0.2177 (0.2232) loss: 0.6999 (0.7130) time: 0.1401 data: 0.0413 max mem: 9377 +Train: [93] [ 400/6250] eta: 0:15:22 lr: 0.000002 grad: 0.2123 (0.2214) loss: 0.7111 (0.7106) time: 0.1222 data: 0.0319 max mem: 9377 +Train: [93] [ 500/6250] eta: 0:14:39 lr: 0.000002 grad: 0.2072 (0.2191) loss: 0.7165 (0.7104) time: 0.1268 data: 0.0330 max mem: 9377 +Train: [93] [ 600/6250] eta: 0:14:11 lr: 0.000002 grad: 0.2113 (0.2182) loss: 0.6981 (0.7093) time: 0.1370 data: 0.0449 max mem: 9377 +Train: [93] [ 700/6250] eta: 0:13:44 lr: 0.000002 grad: 0.2130 (0.2172) loss: 0.6991 (0.7078) time: 0.1349 data: 0.0397 max mem: 9377 +Train: [93] [ 800/6250] eta: 0:13:18 lr: 0.000002 grad: 0.2051 (0.2166) loss: 0.7041 (0.7069) time: 0.1432 data: 0.0543 max mem: 9377 +Train: [93] [ 900/6250] eta: 0:12:59 lr: 0.000002 grad: 0.2060 (0.2156) loss: 0.7035 (0.7072) time: 0.1649 data: 0.0768 max mem: 9377 +Train: [93] [1000/6250] eta: 0:12:35 lr: 0.000002 grad: 0.2074 (0.2149) loss: 0.7000 (0.7074) time: 0.1399 data: 0.0566 max mem: 9377 +Train: [93] [1100/6250] eta: 0:12:15 lr: 0.000002 grad: 0.2083 (0.2145) loss: 0.7080 (0.7077) time: 0.1344 data: 0.0469 max mem: 9377 +Train: [93] [1200/6250] eta: 0:12:00 lr: 0.000002 grad: 0.2067 (0.2138) loss: 0.7066 (0.7081) time: 0.1523 data: 0.0696 max mem: 9377 +Train: [93] [1300/6250] eta: 0:11:41 lr: 0.000002 grad: 0.2058 (0.2133) loss: 0.7001 (0.7086) time: 0.1415 data: 0.0564 max mem: 9377 +Train: [93] [1400/6250] eta: 0:11:25 lr: 0.000002 grad: 0.2122 (0.2130) loss: 0.6938 (0.7090) time: 0.1289 data: 0.0445 max mem: 9377 +Train: [93] [1500/6250] eta: 0:11:08 lr: 0.000002 grad: 0.2111 (0.2128) loss: 0.6946 (0.7089) time: 0.1207 data: 0.0399 max mem: 9377 +Train: [93] [1600/6250] eta: 0:10:52 lr: 0.000002 grad: 0.2082 (0.2125) loss: 0.7165 (0.7091) time: 0.1339 data: 0.0515 max mem: 9377 +Train: [93] [1700/6250] eta: 0:10:37 lr: 0.000002 grad: 0.2099 (0.2125) loss: 0.7076 (0.7090) time: 0.1392 data: 0.0610 max mem: 9377 +Train: [93] [1800/6250] eta: 0:10:21 lr: 0.000002 grad: 0.2106 (0.2124) loss: 0.7091 (0.7090) time: 0.1337 data: 0.0467 max mem: 9377 +Train: [93] [1900/6250] eta: 0:10:14 lr: 0.000002 grad: 0.2114 (0.2123) loss: 0.7051 (0.7090) time: 0.1664 data: 0.0849 max mem: 9377 +Train: [93] [2000/6250] eta: 0:10:02 lr: 0.000002 grad: 0.2136 (0.2123) loss: 0.7152 (0.7090) time: 0.1396 data: 0.0645 max mem: 9377 +Train: [93] [2100/6250] eta: 0:09:49 lr: 0.000002 grad: 0.2088 (0.2123) loss: 0.7123 (0.7090) time: 0.1336 data: 0.0548 max mem: 9377 +Train: [93] [2200/6250] eta: 0:09:34 lr: 0.000002 grad: 0.2192 (0.2123) loss: 0.7076 (0.7091) time: 0.1339 data: 0.0586 max mem: 9377 +Train: [93] [2300/6250] eta: 0:09:21 lr: 0.000001 grad: 0.2105 (0.2123) loss: 0.7018 (0.7091) time: 0.1568 data: 0.0763 max mem: 9377 +Train: [93] [2400/6250] eta: 0:09:12 lr: 0.000001 grad: 0.2088 (0.2123) loss: 0.7105 (0.7093) time: 0.1702 data: 0.0830 max mem: 9377 +Train: [93] [2500/6250] eta: 0:09:01 lr: 0.000001 grad: 0.2078 (0.2123) loss: 0.7107 (0.7092) time: 0.1631 data: 0.0640 max mem: 9377 +Train: [93] [2600/6250] eta: 0:08:50 lr: 0.000001 grad: 0.2106 (0.2122) loss: 0.7111 (0.7092) time: 0.1653 data: 0.0631 max mem: 9377 +Train: [93] [2700/6250] eta: 0:08:38 lr: 0.000001 grad: 0.2059 (0.2121) loss: 0.7191 (0.7093) time: 0.1479 data: 0.0519 max mem: 9377 +Train: [93] [2800/6250] eta: 0:08:24 lr: 0.000001 grad: 0.2083 (0.2121) loss: 0.7101 (0.7094) time: 0.1342 data: 0.0397 max mem: 9377 +Train: [93] [2900/6250] eta: 0:08:09 lr: 0.000001 grad: 0.2040 (0.2119) loss: 0.7176 (0.7096) time: 0.1490 data: 0.0682 max mem: 9377 +Train: [93] [3000/6250] eta: 0:07:54 lr: 0.000001 grad: 0.2145 (0.2119) loss: 0.7045 (0.7096) time: 0.1601 data: 0.0741 max mem: 9377 +Train: [93] [3100/6250] eta: 0:07:39 lr: 0.000001 grad: 0.2025 (0.2119) loss: 0.7227 (0.7097) time: 0.1629 data: 0.0749 max mem: 9377 +Train: [93] [3200/6250] eta: 0:07:24 lr: 0.000001 grad: 0.2056 (0.2117) loss: 0.7091 (0.7098) time: 0.1413 data: 0.0648 max mem: 9377 +Train: [93] [3300/6250] eta: 0:07:11 lr: 0.000001 grad: 0.2091 (0.2117) loss: 0.7035 (0.7098) time: 0.1540 data: 0.0764 max mem: 9377 +Train: [93] [3400/6250] eta: 0:06:57 lr: 0.000001 grad: 0.2043 (0.2115) loss: 0.7121 (0.7099) time: 0.1517 data: 0.0699 max mem: 9377 +Train: [93] [3500/6250] eta: 0:06:43 lr: 0.000001 grad: 0.2060 (0.2114) loss: 0.7113 (0.7100) time: 0.1584 data: 0.0785 max mem: 9377 +Train: [93] [3600/6250] eta: 0:06:29 lr: 0.000001 grad: 0.2055 (0.2113) loss: 0.7076 (0.7100) time: 0.1812 data: 0.1062 max mem: 9377 +Train: [93] [3700/6250] eta: 0:06:14 lr: 0.000001 grad: 0.2066 (0.2113) loss: 0.7074 (0.7100) time: 0.1524 data: 0.0708 max mem: 9377 +Train: [93] [3800/6250] eta: 0:06:00 lr: 0.000001 grad: 0.2063 (0.2113) loss: 0.7056 (0.7099) time: 0.1441 data: 0.0633 max mem: 9377 +Train: [93] [3900/6250] eta: 0:05:46 lr: 0.000001 grad: 0.2047 (0.2112) loss: 0.7125 (0.7100) time: 0.1547 data: 0.0746 max mem: 9377 +Train: [93] [4000/6250] eta: 0:05:31 lr: 0.000001 grad: 0.2123 (0.2112) loss: 0.6945 (0.7099) time: 0.1342 data: 0.0493 max mem: 9377 +Train: [93] [4100/6250] eta: 0:05:16 lr: 0.000001 grad: 0.2031 (0.2112) loss: 0.7063 (0.7098) time: 0.1409 data: 0.0561 max mem: 9377 +Train: [93] [4200/6250] eta: 0:05:01 lr: 0.000001 grad: 0.2092 (0.2112) loss: 0.7057 (0.7097) time: 0.1270 data: 0.0434 max mem: 9377 +Train: [93] [4300/6250] eta: 0:04:46 lr: 0.000001 grad: 0.2109 (0.2112) loss: 0.7056 (0.7096) time: 0.1314 data: 0.0466 max mem: 9377 +Train: [93] [4400/6250] eta: 0:04:31 lr: 0.000001 grad: 0.2056 (0.2112) loss: 0.7004 (0.7095) time: 0.1388 data: 0.0557 max mem: 9377 +Train: [93] [4500/6250] eta: 0:04:16 lr: 0.000001 grad: 0.2035 (0.2111) loss: 0.7231 (0.7095) time: 0.1347 data: 0.0505 max mem: 9377 +Train: [93] [4600/6250] eta: 0:04:01 lr: 0.000001 grad: 0.2074 (0.2111) loss: 0.7111 (0.7095) time: 0.1516 data: 0.0683 max mem: 9377 +Train: [93] [4700/6250] eta: 0:03:46 lr: 0.000001 grad: 0.2076 (0.2110) loss: 0.7123 (0.7096) time: 0.1504 data: 0.0728 max mem: 9377 +Train: [93] [4800/6250] eta: 0:03:31 lr: 0.000001 grad: 0.2131 (0.2110) loss: 0.7094 (0.7096) time: 0.1217 data: 0.0355 max mem: 9377 +Train: [93] [4900/6250] eta: 0:03:16 lr: 0.000001 grad: 0.2072 (0.2110) loss: 0.7100 (0.7095) time: 0.1331 data: 0.0474 max mem: 9377 +Train: [93] [5000/6250] eta: 0:03:02 lr: 0.000001 grad: 0.2076 (0.2110) loss: 0.7093 (0.7095) time: 0.1712 data: 0.0848 max mem: 9377 +Train: [93] [5100/6250] eta: 0:02:47 lr: 0.000001 grad: 0.2091 (0.2110) loss: 0.7180 (0.7095) time: 0.1563 data: 0.0791 max mem: 9377 +Train: [93] [5200/6250] eta: 0:02:33 lr: 0.000001 grad: 0.2038 (0.2110) loss: 0.7159 (0.7095) time: 0.1526 data: 0.0707 max mem: 9377 +Train: [93] [5300/6250] eta: 0:02:18 lr: 0.000001 grad: 0.2035 (0.2109) loss: 0.7149 (0.7095) time: 0.1274 data: 0.0399 max mem: 9377 +Train: [93] [5400/6250] eta: 0:02:03 lr: 0.000001 grad: 0.2087 (0.2109) loss: 0.7124 (0.7095) time: 0.1392 data: 0.0581 max mem: 9377 +Train: [93] [5500/6250] eta: 0:01:48 lr: 0.000001 grad: 0.2046 (0.2109) loss: 0.6993 (0.7095) time: 0.1425 data: 0.0603 max mem: 9377 +Train: [93] [5600/6250] eta: 0:01:34 lr: 0.000001 grad: 0.2116 (0.2109) loss: 0.7079 (0.7095) time: 0.1433 data: 0.0631 max mem: 9377 +Train: [93] [5700/6250] eta: 0:01:19 lr: 0.000001 grad: 0.2016 (0.2109) loss: 0.7142 (0.7094) time: 0.1500 data: 0.0650 max mem: 9377 +Train: [93] [5800/6250] eta: 0:01:05 lr: 0.000001 grad: 0.2118 (0.2109) loss: 0.7079 (0.7093) time: 0.1348 data: 0.0493 max mem: 9377 +Train: [93] [5900/6250] eta: 0:00:50 lr: 0.000001 grad: 0.2096 (0.2109) loss: 0.7031 (0.7092) time: 0.1306 data: 0.0486 max mem: 9377 +Train: [93] [6000/6250] eta: 0:00:36 lr: 0.000001 grad: 0.2054 (0.2109) loss: 0.7054 (0.7093) time: 0.1336 data: 0.0524 max mem: 9377 +Train: [93] [6100/6250] eta: 0:00:21 lr: 0.000001 grad: 0.2081 (0.2108) loss: 0.7074 (0.7093) time: 0.1260 data: 0.0391 max mem: 9377 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2066 (0.2108) loss: 0.7139 (0.7093) time: 0.1440 data: 0.0608 max mem: 9377 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2104 (0.2108) loss: 0.7057 (0.7093) time: 0.1251 data: 0.0452 max mem: 9377 +Train: [93] Total time: 0:15:07 (0.1453 s / it) +Averaged stats: lr: 0.000001 grad: 0.2104 (0.2108) loss: 0.7057 (0.7093) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:03:46 loss: 0.8696 (0.8696) time: 3.6579 data: 3.5788 max mem: 9377 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.8737 (0.8745) time: 0.1434 data: 0.1181 max mem: 9377 +Eval (hcp-train-subset): [93] Total time: 0:00:13 (0.2251 s / it) +Averaged stats (hcp-train-subset): loss: 0.8737 (0.8745) +Eval (hcp-val): [93] [ 0/62] eta: 0:05:10 loss: 0.8763 (0.8763) time: 5.0070 data: 4.9759 max mem: 9377 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.8727 (0.8721) time: 0.1127 data: 0.0851 max mem: 9377 +Eval (hcp-val): [93] Total time: 0:00:14 (0.2259 s / it) +Averaged stats (hcp-val): loss: 0.8727 (0.8721) +Eval (nsd-val): [93] [ 0/62] eta: 0:05:14 loss: 0.8315 (0.8315) time: 5.0775 data: 5.0457 max mem: 9377 +Eval (nsd-val): [93] [61/62] eta: 0:00:00 loss: 0.8468 (0.8467) time: 0.1234 data: 0.0982 max mem: 9377 +Eval (nsd-val): [93] Total time: 0:00:13 (0.2134 s / it) +Averaged stats (nsd-val): loss: 0.8468 (0.8467) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [94] [ 0/6250] eta: 10:25:25 lr: 0.000001 grad: 0.2082 (0.2082) loss: 0.7966 (0.7966) time: 6.0041 data: 5.8808 max mem: 9377 +Train: [94] [ 100/6250] eta: 0:20:44 lr: 0.000001 grad: 0.2073 (0.2182) loss: 0.7227 (0.7517) time: 0.1440 data: 0.0195 max mem: 9377 +Train: [94] [ 200/6250] eta: 0:18:11 lr: 0.000001 grad: 0.2293 (0.2201) loss: 0.7103 (0.7340) time: 0.1503 data: 0.0459 max mem: 9377 +Train: [94] [ 300/6250] eta: 0:17:09 lr: 0.000001 grad: 0.2097 (0.2182) loss: 0.7159 (0.7272) time: 0.1508 data: 0.0515 max mem: 9377 +Train: [94] [ 400/6250] eta: 0:16:35 lr: 0.000001 grad: 0.2076 (0.2172) loss: 0.7259 (0.7252) time: 0.1592 data: 0.0523 max mem: 9377 +Train: [94] [ 500/6250] eta: 0:15:56 lr: 0.000001 grad: 0.2164 (0.2156) loss: 0.7034 (0.7249) time: 0.1462 data: 0.0606 max mem: 9377 +Train: [94] [ 600/6250] eta: 0:15:17 lr: 0.000001 grad: 0.2074 (0.2152) loss: 0.7174 (0.7229) time: 0.1400 data: 0.0484 max mem: 9377 +Train: [94] [ 700/6250] eta: 0:14:48 lr: 0.000001 grad: 0.2152 (0.2146) loss: 0.7043 (0.7217) time: 0.1400 data: 0.0465 max mem: 9377 +Train: [94] [ 800/6250] eta: 0:14:29 lr: 0.000001 grad: 0.2077 (0.2143) loss: 0.7227 (0.7209) time: 0.1645 data: 0.0821 max mem: 9377 +Train: [94] [ 900/6250] eta: 0:14:19 lr: 0.000001 grad: 0.2081 (0.2140) loss: 0.7179 (0.7200) time: 0.1826 data: 0.0954 max mem: 9377 +Train: [94] [1000/6250] eta: 0:14:08 lr: 0.000001 grad: 0.2153 (0.2140) loss: 0.7057 (0.7187) time: 0.1621 data: 0.0820 max mem: 9377 +Train: [94] [1100/6250] eta: 0:13:52 lr: 0.000001 grad: 0.2076 (0.2138) loss: 0.7049 (0.7174) time: 0.1741 data: 0.0908 max mem: 9377 +Train: [94] [1200/6250] eta: 0:13:40 lr: 0.000001 grad: 0.2095 (0.2137) loss: 0.7172 (0.7166) time: 0.1993 data: 0.1188 max mem: 9377 +Train: [94] [1300/6250] eta: 0:13:18 lr: 0.000001 grad: 0.2067 (0.2133) loss: 0.6987 (0.7158) time: 0.1484 data: 0.0601 max mem: 9377 +Train: [94] [1400/6250] eta: 0:12:53 lr: 0.000001 grad: 0.2105 (0.2131) loss: 0.7003 (0.7150) time: 0.1342 data: 0.0457 max mem: 9377 +Train: [94] [1500/6250] eta: 0:12:35 lr: 0.000001 grad: 0.2130 (0.2130) loss: 0.7072 (0.7142) time: 0.1912 data: 0.1034 max mem: 9377 +Train: [94] [1600/6250] eta: 0:12:10 lr: 0.000001 grad: 0.2095 (0.2130) loss: 0.7096 (0.7135) time: 0.1340 data: 0.0506 max mem: 9377 +Train: [94] [1700/6250] eta: 0:11:52 lr: 0.000001 grad: 0.2040 (0.2129) loss: 0.7138 (0.7130) time: 0.1480 data: 0.0674 max mem: 9377 +Train: [94] [1800/6250] eta: 0:11:34 lr: 0.000001 grad: 0.2016 (0.2126) loss: 0.7199 (0.7129) time: 0.1444 data: 0.0660 max mem: 9377 +Train: [94] [1900/6250] eta: 0:11:17 lr: 0.000001 grad: 0.2069 (0.2124) loss: 0.6974 (0.7126) time: 0.1902 data: 0.1172 max mem: 9377 +Train: [94] [2000/6250] eta: 0:11:01 lr: 0.000001 grad: 0.2062 (0.2122) loss: 0.6980 (0.7125) time: 0.1441 data: 0.0593 max mem: 9377 +Train: [94] [2100/6250] eta: 0:10:49 lr: 0.000001 grad: 0.2072 (0.2120) loss: 0.7171 (0.7125) time: 0.1459 data: 0.0578 max mem: 9377 +Train: [94] [2200/6250] eta: 0:10:33 lr: 0.000001 grad: 0.2032 (0.2118) loss: 0.7131 (0.7123) time: 0.1792 data: 0.0972 max mem: 9377 +Train: [94] [2300/6250] eta: 0:10:15 lr: 0.000001 grad: 0.2059 (0.2117) loss: 0.7017 (0.7122) time: 0.1451 data: 0.0607 max mem: 9377 +Train: [94] [2400/6250] eta: 0:09:56 lr: 0.000001 grad: 0.2117 (0.2116) loss: 0.7074 (0.7119) time: 0.1213 data: 0.0380 max mem: 9377 +Train: [94] [2500/6250] eta: 0:09:39 lr: 0.000001 grad: 0.2046 (0.2114) loss: 0.7077 (0.7118) time: 0.1393 data: 0.0515 max mem: 9377 +Train: [94] [2600/6250] eta: 0:09:23 lr: 0.000001 grad: 0.2054 (0.2114) loss: 0.6981 (0.7116) time: 0.1547 data: 0.0661 max mem: 9377 +Train: [94] [2700/6250] eta: 0:09:05 lr: 0.000001 grad: 0.2140 (0.2113) loss: 0.6980 (0.7112) time: 0.1290 data: 0.0413 max mem: 9377 +Train: [94] [2800/6250] eta: 0:08:47 lr: 0.000001 grad: 0.2048 (0.2114) loss: 0.7069 (0.7107) time: 0.1362 data: 0.0506 max mem: 9377 +Train: [94] [2900/6250] eta: 0:08:30 lr: 0.000001 grad: 0.1985 (0.2113) loss: 0.7077 (0.7105) time: 0.1283 data: 0.0434 max mem: 9377 +Train: [94] [3000/6250] eta: 0:08:12 lr: 0.000001 grad: 0.2127 (0.2112) loss: 0.7105 (0.7103) time: 0.1234 data: 0.0331 max mem: 9377 +Train: [94] [3100/6250] eta: 0:07:55 lr: 0.000001 grad: 0.2094 (0.2113) loss: 0.7059 (0.7101) time: 0.1331 data: 0.0394 max mem: 9377 +Train: [94] [3200/6250] eta: 0:07:39 lr: 0.000001 grad: 0.2085 (0.2112) loss: 0.7095 (0.7100) time: 0.1344 data: 0.0438 max mem: 9377 +Train: [94] [3300/6250] eta: 0:07:22 lr: 0.000001 grad: 0.2068 (0.2112) loss: 0.6972 (0.7098) time: 0.1343 data: 0.0469 max mem: 9377 +Train: [94] [3400/6250] eta: 0:07:06 lr: 0.000001 grad: 0.2074 (0.2113) loss: 0.7039 (0.7095) time: 0.1256 data: 0.0368 max mem: 9377 +Train: [94] [3500/6250] eta: 0:06:50 lr: 0.000001 grad: 0.2098 (0.2113) loss: 0.7062 (0.7093) time: 0.1514 data: 0.0698 max mem: 9377 +Train: [94] [3600/6250] eta: 0:06:33 lr: 0.000001 grad: 0.2105 (0.2114) loss: 0.7113 (0.7091) time: 0.1414 data: 0.0628 max mem: 9377 +Train: [94] [3700/6250] eta: 0:06:18 lr: 0.000001 grad: 0.2152 (0.2115) loss: 0.6920 (0.7091) time: 0.1359 data: 0.0479 max mem: 9377 +Train: [94] [3800/6250] eta: 0:06:02 lr: 0.000001 grad: 0.2122 (0.2115) loss: 0.7031 (0.7089) time: 0.1494 data: 0.0712 max mem: 9377 +Train: [94] [3900/6250] eta: 0:05:47 lr: 0.000001 grad: 0.2193 (0.2116) loss: 0.6924 (0.7088) time: 0.1441 data: 0.0642 max mem: 9377 +Train: [94] [4000/6250] eta: 0:05:32 lr: 0.000001 grad: 0.2159 (0.2116) loss: 0.6997 (0.7087) time: 0.1395 data: 0.0569 max mem: 9377 +Train: [94] [4100/6250] eta: 0:05:17 lr: 0.000001 grad: 0.2144 (0.2117) loss: 0.7011 (0.7086) time: 0.1660 data: 0.0846 max mem: 9377 +Train: [94] [4200/6250] eta: 0:05:02 lr: 0.000001 grad: 0.2141 (0.2117) loss: 0.7031 (0.7086) time: 0.1418 data: 0.0626 max mem: 9377 +Train: [94] [4300/6250] eta: 0:04:46 lr: 0.000001 grad: 0.2155 (0.2118) loss: 0.7078 (0.7085) time: 0.1486 data: 0.0636 max mem: 9377 +Train: [94] [4400/6250] eta: 0:04:31 lr: 0.000001 grad: 0.2137 (0.2118) loss: 0.7137 (0.7085) time: 0.1470 data: 0.0662 max mem: 9377 +Train: [94] [4500/6250] eta: 0:04:16 lr: 0.000001 grad: 0.2104 (0.2118) loss: 0.7111 (0.7086) time: 0.1388 data: 0.0544 max mem: 9377 +Train: [94] [4600/6250] eta: 0:04:01 lr: 0.000001 grad: 0.2061 (0.2118) loss: 0.7028 (0.7085) time: 0.1263 data: 0.0437 max mem: 9377 +Train: [94] [4700/6250] eta: 0:03:46 lr: 0.000001 grad: 0.2219 (0.2119) loss: 0.6857 (0.7084) time: 0.1439 data: 0.0612 max mem: 9377 +Train: [94] [4800/6250] eta: 0:03:32 lr: 0.000001 grad: 0.2069 (0.2119) loss: 0.7019 (0.7083) time: 0.1465 data: 0.0559 max mem: 9377 +Train: [94] [4900/6250] eta: 0:03:17 lr: 0.000001 grad: 0.2136 (0.2119) loss: 0.6956 (0.7082) time: 0.1510 data: 0.0678 max mem: 9377 +Train: [94] [5000/6250] eta: 0:03:02 lr: 0.000001 grad: 0.2161 (0.2120) loss: 0.6979 (0.7081) time: 0.1329 data: 0.0478 max mem: 9377 +Train: [94] [5100/6250] eta: 0:02:47 lr: 0.000001 grad: 0.2067 (0.2120) loss: 0.7049 (0.7080) time: 0.1290 data: 0.0430 max mem: 9377 +Train: [94] [5200/6250] eta: 0:02:32 lr: 0.000001 grad: 0.2087 (0.2120) loss: 0.7047 (0.7080) time: 0.1417 data: 0.0602 max mem: 9377 +Train: [94] [5300/6250] eta: 0:02:18 lr: 0.000001 grad: 0.2065 (0.2120) loss: 0.7227 (0.7079) time: 0.1443 data: 0.0676 max mem: 9377 +Train: [94] [5400/6250] eta: 0:02:03 lr: 0.000001 grad: 0.2143 (0.2120) loss: 0.6992 (0.7079) time: 0.1468 data: 0.0623 max mem: 9377 +Train: [94] [5500/6250] eta: 0:01:49 lr: 0.000001 grad: 0.2077 (0.2120) loss: 0.7073 (0.7078) time: 0.1376 data: 0.0544 max mem: 9377 +Train: [94] [5600/6250] eta: 0:01:34 lr: 0.000001 grad: 0.2133 (0.2120) loss: 0.6943 (0.7079) time: 0.1239 data: 0.0410 max mem: 9377 +Train: [94] [5700/6250] eta: 0:01:19 lr: 0.000001 grad: 0.2052 (0.2119) loss: 0.7172 (0.7079) time: 0.1500 data: 0.0662 max mem: 9377 +Train: [94] [5800/6250] eta: 0:01:05 lr: 0.000001 grad: 0.2155 (0.2118) loss: 0.7104 (0.7080) time: 0.1351 data: 0.0468 max mem: 9377 +Train: [94] [5900/6250] eta: 0:00:50 lr: 0.000001 grad: 0.2080 (0.2118) loss: 0.7081 (0.7080) time: 0.1434 data: 0.0566 max mem: 9377 +Train: [94] [6000/6250] eta: 0:00:36 lr: 0.000001 grad: 0.2083 (0.2118) loss: 0.7139 (0.7080) time: 0.1228 data: 0.0361 max mem: 9377 +Train: [94] [6100/6250] eta: 0:00:21 lr: 0.000001 grad: 0.2102 (0.2118) loss: 0.7137 (0.7080) time: 0.1370 data: 0.0518 max mem: 9377 +Train: [94] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2133 (0.2118) loss: 0.7013 (0.7079) time: 0.1345 data: 0.0446 max mem: 9377 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2068 (0.2118) loss: 0.7077 (0.7079) time: 0.1241 data: 0.0392 max mem: 9377 +Train: [94] Total time: 0:15:09 (0.1455 s / it) +Averaged stats: lr: 0.000001 grad: 0.2068 (0.2118) loss: 0.7077 (0.7079) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:05:17 loss: 0.8715 (0.8715) time: 5.1138 data: 5.0845 max mem: 9377 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.8741 (0.8746) time: 0.1236 data: 0.0980 max mem: 9377 +Eval (hcp-train-subset): [94] Total time: 0:00:14 (0.2283 s / it) +Averaged stats (hcp-train-subset): loss: 0.8741 (0.8746) +Making plots (hcp-train-subset): example=16 +Eval (hcp-val): [94] [ 0/62] eta: 0:05:28 loss: 0.8774 (0.8774) time: 5.3032 data: 5.2718 max mem: 9377 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.8724 (0.8726) time: 0.1195 data: 0.0926 max mem: 9377 +Eval (hcp-val): [94] Total time: 0:00:13 (0.2245 s / it) +Averaged stats (hcp-val): loss: 0.8724 (0.8726) +Making plots (hcp-val): example=51 +Eval (nsd-val): [94] [ 0/62] eta: 0:05:04 loss: 0.8335 (0.8335) time: 4.9158 data: 4.8848 max mem: 9377 +Eval (nsd-val): [94] [61/62] eta: 0:00:00 loss: 0.8450 (0.8467) time: 0.1232 data: 0.0982 max mem: 9377 +Eval (nsd-val): [94] Total time: 0:00:13 (0.2157 s / it) +Averaged stats (nsd-val): loss: 0.8450 (0.8467) +Making plots (nsd-val): example=41 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00094.pth +Train: [95] [ 0/6250] eta: 9:40:13 lr: 0.000001 grad: 0.2582 (0.2582) loss: 0.7522 (0.7522) time: 5.5701 data: 5.4334 max mem: 9377 +Train: [95] [ 100/6250] eta: 0:19:58 lr: 0.000001 grad: 0.2316 (0.2351) loss: 0.7247 (0.7192) time: 0.1494 data: 0.0480 max mem: 9377 +Train: [95] [ 200/6250] eta: 0:17:08 lr: 0.000001 grad: 0.2144 (0.2309) loss: 0.6877 (0.7092) time: 0.1449 data: 0.0499 max mem: 9377 +Train: [95] [ 300/6250] eta: 0:15:50 lr: 0.000001 grad: 0.2165 (0.2279) loss: 0.7169 (0.7069) time: 0.1553 data: 0.0700 max mem: 9377 +Train: [95] [ 400/6250] eta: 0:15:08 lr: 0.000001 grad: 0.2157 (0.2255) loss: 0.7173 (0.7070) time: 0.1406 data: 0.0461 max mem: 9377 +Train: [95] [ 500/6250] eta: 0:14:29 lr: 0.000001 grad: 0.2064 (0.2217) loss: 0.7272 (0.7094) time: 0.1333 data: 0.0432 max mem: 9377 +Train: [95] [ 600/6250] eta: 0:14:01 lr: 0.000001 grad: 0.2081 (0.2193) loss: 0.7091 (0.7107) time: 0.1261 data: 0.0317 max mem: 9377 +Train: [95] [ 700/6250] eta: 0:13:31 lr: 0.000001 grad: 0.2045 (0.2184) loss: 0.7158 (0.7108) time: 0.1304 data: 0.0391 max mem: 9377 +Train: [95] [ 800/6250] eta: 0:13:09 lr: 0.000001 grad: 0.2057 (0.2173) loss: 0.7162 (0.7109) time: 0.1348 data: 0.0436 max mem: 9377 +Train: [95] [ 900/6250] eta: 0:12:50 lr: 0.000001 grad: 0.2078 (0.2168) loss: 0.7233 (0.7107) time: 0.1299 data: 0.0399 max mem: 9377 +Train: [95] [1000/6250] eta: 0:12:29 lr: 0.000001 grad: 0.2074 (0.2161) loss: 0.7113 (0.7108) time: 0.1338 data: 0.0467 max mem: 9377 +Train: [95] [1100/6250] eta: 0:12:11 lr: 0.000001 grad: 0.2155 (0.2159) loss: 0.7024 (0.7103) time: 0.1248 data: 0.0373 max mem: 9377 +Train: [95] [1200/6250] eta: 0:11:57 lr: 0.000001 grad: 0.2115 (0.2157) loss: 0.7075 (0.7098) time: 0.1369 data: 0.0553 max mem: 9377 +Train: [95] [1300/6250] eta: 0:11:37 lr: 0.000001 grad: 0.2094 (0.2154) loss: 0.7137 (0.7097) time: 0.1332 data: 0.0428 max mem: 9377 +Train: [95] [1400/6250] eta: 0:11:21 lr: 0.000001 grad: 0.2048 (0.2153) loss: 0.7130 (0.7098) time: 0.1265 data: 0.0380 max mem: 9377 +Train: [95] [1500/6250] eta: 0:11:06 lr: 0.000001 grad: 0.2079 (0.2150) loss: 0.7010 (0.7097) time: 0.1365 data: 0.0461 max mem: 9377 +Train: [95] [1600/6250] eta: 0:10:49 lr: 0.000001 grad: 0.2019 (0.2146) loss: 0.7052 (0.7096) time: 0.1375 data: 0.0547 max mem: 9377 +Train: [95] [1700/6250] eta: 0:10:34 lr: 0.000001 grad: 0.2058 (0.2143) loss: 0.7133 (0.7095) time: 0.1280 data: 0.0371 max mem: 9377 +Train: [95] [1800/6250] eta: 0:10:22 lr: 0.000001 grad: 0.2127 (0.2142) loss: 0.7005 (0.7092) time: 0.1718 data: 0.0984 max mem: 9377 +Train: [95] [1900/6250] eta: 0:10:07 lr: 0.000001 grad: 0.2145 (0.2142) loss: 0.6977 (0.7088) time: 0.1378 data: 0.0571 max mem: 9377 +Train: [95] [2000/6250] eta: 0:09:54 lr: 0.000001 grad: 0.2129 (0.2142) loss: 0.7000 (0.7085) time: 0.1448 data: 0.0639 max mem: 9377 +Train: [95] [2100/6250] eta: 0:09:41 lr: 0.000001 grad: 0.2089 (0.2141) loss: 0.7091 (0.7085) time: 0.1505 data: 0.0642 max mem: 9377 +Train: [95] [2200/6250] eta: 0:09:30 lr: 0.000001 grad: 0.2167 (0.2141) loss: 0.6955 (0.7082) time: 0.1282 data: 0.0433 max mem: 9377 +Train: [95] [2300/6250] eta: 0:09:18 lr: 0.000001 grad: 0.2049 (0.2139) loss: 0.7165 (0.7082) time: 0.1520 data: 0.0676 max mem: 9377 +Train: [95] [2400/6250] eta: 0:09:02 lr: 0.000001 grad: 0.2127 (0.2138) loss: 0.7094 (0.7082) time: 0.1450 data: 0.0662 max mem: 9377 +Train: [95] [2500/6250] eta: 0:08:47 lr: 0.000001 grad: 0.2114 (0.2138) loss: 0.6994 (0.7082) time: 0.1369 data: 0.0595 max mem: 9377 +Train: [95] [2600/6250] eta: 0:08:32 lr: 0.000001 grad: 0.2129 (0.2137) loss: 0.6959 (0.7081) time: 0.1505 data: 0.0672 max mem: 9377 +Train: [95] [2700/6250] eta: 0:08:19 lr: 0.000001 grad: 0.2071 (0.2136) loss: 0.7046 (0.7081) time: 0.1409 data: 0.0543 max mem: 9377 +Train: [95] [2800/6250] eta: 0:08:04 lr: 0.000001 grad: 0.2181 (0.2137) loss: 0.7028 (0.7079) time: 0.1400 data: 0.0460 max mem: 9377 +Train: [95] [2900/6250] eta: 0:07:49 lr: 0.000001 grad: 0.2104 (0.2136) loss: 0.7110 (0.7078) time: 0.1175 data: 0.0296 max mem: 9377 +Train: [95] [3000/6250] eta: 0:07:34 lr: 0.000001 grad: 0.2106 (0.2137) loss: 0.7081 (0.7077) time: 0.1178 data: 0.0313 max mem: 9377 +Train: [95] [3100/6250] eta: 0:07:19 lr: 0.000001 grad: 0.2088 (0.2136) loss: 0.6908 (0.7076) time: 0.1300 data: 0.0408 max mem: 9377 +Train: [95] [3200/6250] eta: 0:07:04 lr: 0.000001 grad: 0.2106 (0.2135) loss: 0.7049 (0.7075) time: 0.1419 data: 0.0615 max mem: 9377 +Train: [95] [3300/6250] eta: 0:06:50 lr: 0.000001 grad: 0.2121 (0.2134) loss: 0.7047 (0.7076) time: 0.1664 data: 0.0901 max mem: 9377 +Train: [95] [3400/6250] eta: 0:06:37 lr: 0.000001 grad: 0.2092 (0.2135) loss: 0.7040 (0.7076) time: 0.1491 data: 0.0700 max mem: 9377 +Train: [95] [3500/6250] eta: 0:06:24 lr: 0.000001 grad: 0.2145 (0.2135) loss: 0.7044 (0.7076) time: 0.1489 data: 0.0680 max mem: 9377 +Train: [95] [3600/6250] eta: 0:06:10 lr: 0.000001 grad: 0.2075 (0.2134) loss: 0.7049 (0.7077) time: 0.1234 data: 0.0427 max mem: 9377 +Train: [95] [3700/6250] eta: 0:05:56 lr: 0.000001 grad: 0.2118 (0.2133) loss: 0.7069 (0.7078) time: 0.1205 data: 0.0385 max mem: 9377 +Train: [95] [3800/6250] eta: 0:05:42 lr: 0.000001 grad: 0.2052 (0.2132) loss: 0.7153 (0.7080) time: 0.1482 data: 0.0654 max mem: 9377 +Train: [95] [3900/6250] eta: 0:05:27 lr: 0.000001 grad: 0.2110 (0.2132) loss: 0.7050 (0.7081) time: 0.1298 data: 0.0347 max mem: 9377 +Train: [95] [4000/6250] eta: 0:05:13 lr: 0.000001 grad: 0.2106 (0.2132) loss: 0.7076 (0.7081) time: 0.1600 data: 0.0778 max mem: 9377 +Train: [95] [4100/6250] eta: 0:04:59 lr: 0.000001 grad: 0.2064 (0.2131) loss: 0.7100 (0.7081) time: 0.1313 data: 0.0482 max mem: 9377 +Train: [95] [4200/6250] eta: 0:04:45 lr: 0.000001 grad: 0.2033 (0.2129) loss: 0.7044 (0.7081) time: 0.1338 data: 0.0498 max mem: 9377 +Train: [95] [4300/6250] eta: 0:04:31 lr: 0.000001 grad: 0.2149 (0.2129) loss: 0.6984 (0.7082) time: 0.1329 data: 0.0439 max mem: 9377 +Train: [95] [4400/6250] eta: 0:04:17 lr: 0.000001 grad: 0.2097 (0.2128) loss: 0.7057 (0.7082) time: 0.1363 data: 0.0466 max mem: 9377 +Train: [95] [4500/6250] eta: 0:04:03 lr: 0.000001 grad: 0.2093 (0.2128) loss: 0.7066 (0.7081) time: 0.1043 data: 0.0002 max mem: 9377 +Train: [95] [4600/6250] eta: 0:03:49 lr: 0.000001 grad: 0.2174 (0.2127) loss: 0.6971 (0.7081) time: 0.1472 data: 0.0660 max mem: 9377 +Train: [95] [4700/6250] eta: 0:03:35 lr: 0.000001 grad: 0.2129 (0.2127) loss: 0.7097 (0.7082) time: 0.1480 data: 0.0628 max mem: 9377 +Train: [95] [4800/6250] eta: 0:03:21 lr: 0.000001 grad: 0.2111 (0.2126) loss: 0.7049 (0.7082) time: 0.1252 data: 0.0378 max mem: 9377 +Train: [95] [4900/6250] eta: 0:03:07 lr: 0.000001 grad: 0.2078 (0.2126) loss: 0.7010 (0.7082) time: 0.1203 data: 0.0322 max mem: 9377 +Train: [95] [5000/6250] eta: 0:02:53 lr: 0.000001 grad: 0.2126 (0.2127) loss: 0.7037 (0.7082) time: 0.1331 data: 0.0495 max mem: 9377 +Train: [95] [5100/6250] eta: 0:02:39 lr: 0.000001 grad: 0.2088 (0.2126) loss: 0.7069 (0.7082) time: 0.1221 data: 0.0426 max mem: 9377 +Train: [95] [5200/6250] eta: 0:02:25 lr: 0.000001 grad: 0.2097 (0.2126) loss: 0.7089 (0.7083) time: 0.1448 data: 0.0699 max mem: 9377 +Train: [95] [5300/6250] eta: 0:02:11 lr: 0.000001 grad: 0.2039 (0.2125) loss: 0.7271 (0.7085) time: 0.1282 data: 0.0491 max mem: 9377 +Train: [95] [5400/6250] eta: 0:01:57 lr: 0.000001 grad: 0.2147 (0.2125) loss: 0.7033 (0.7085) time: 0.1423 data: 0.0649 max mem: 9377 +Train: [95] [5500/6250] eta: 0:01:43 lr: 0.000001 grad: 0.2048 (0.2125) loss: 0.7114 (0.7085) time: 0.1451 data: 0.0650 max mem: 9377 +Train: [95] [5600/6250] eta: 0:01:29 lr: 0.000001 grad: 0.2096 (0.2125) loss: 0.7110 (0.7087) time: 0.1154 data: 0.0335 max mem: 9377 +Train: [95] [5700/6250] eta: 0:01:16 lr: 0.000001 grad: 0.2077 (0.2124) loss: 0.7112 (0.7087) time: 0.1546 data: 0.0719 max mem: 9377 +Train: [95] [5800/6250] eta: 0:01:02 lr: 0.000001 grad: 0.2156 (0.2124) loss: 0.7034 (0.7087) time: 0.1262 data: 0.0418 max mem: 9377 +Train: [95] [5900/6250] eta: 0:00:48 lr: 0.000001 grad: 0.2104 (0.2123) loss: 0.7160 (0.7088) time: 0.1344 data: 0.0524 max mem: 9377 +Train: [95] [6000/6250] eta: 0:00:34 lr: 0.000001 grad: 0.2029 (0.2122) loss: 0.7136 (0.7089) time: 0.1434 data: 0.0669 max mem: 9377 +Train: [95] [6100/6250] eta: 0:00:20 lr: 0.000001 grad: 0.2031 (0.2121) loss: 0.7174 (0.7091) time: 0.1360 data: 0.0499 max mem: 9377 +Train: [95] [6200/6250] eta: 0:00:06 lr: 0.000001 grad: 0.2120 (0.2121) loss: 0.7075 (0.7090) time: 0.1412 data: 0.0566 max mem: 9377 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2123 (0.2121) loss: 0.6965 (0.7090) time: 0.1348 data: 0.0513 max mem: 9377 +Train: [95] Total time: 0:14:31 (0.1394 s / it) +Averaged stats: lr: 0.000001 grad: 0.2123 (0.2121) loss: 0.6965 (0.7090) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:04:14 loss: 0.8727 (0.8727) time: 4.1116 data: 4.0529 max mem: 9377 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.8732 (0.8754) time: 0.1480 data: 0.1224 max mem: 9377 +Eval (hcp-train-subset): [95] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (hcp-train-subset): loss: 0.8732 (0.8754) +Eval (hcp-val): [95] [ 0/62] eta: 0:05:49 loss: 0.8817 (0.8817) time: 5.6395 data: 5.6066 max mem: 9377 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.8716 (0.8722) time: 0.1499 data: 0.1242 max mem: 9377 +Eval (hcp-val): [95] Total time: 0:00:15 (0.2498 s / it) +Averaged stats (hcp-val): loss: 0.8716 (0.8722) +Eval (nsd-val): [95] [ 0/62] eta: 0:07:00 loss: 0.8371 (0.8371) time: 6.7899 data: 6.7592 max mem: 9377 +Eval (nsd-val): [95] [61/62] eta: 0:00:00 loss: 0.8458 (0.8468) time: 0.1303 data: 0.1052 max mem: 9377 +Eval (nsd-val): [95] Total time: 0:00:15 (0.2540 s / it) +Averaged stats (nsd-val): loss: 0.8458 (0.8468) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [96] [ 0/6250] eta: 9:34:15 lr: 0.000001 grad: 0.2850 (0.2850) loss: 0.6856 (0.6856) time: 5.5128 data: 5.2158 max mem: 9377 +Train: [96] [ 100/6250] eta: 0:22:04 lr: 0.000001 grad: 0.2187 (0.2265) loss: 0.7214 (0.7363) time: 0.1671 data: 0.0664 max mem: 9377 +Train: [96] [ 200/6250] eta: 0:18:50 lr: 0.000001 grad: 0.2172 (0.2270) loss: 0.7260 (0.7275) time: 0.1580 data: 0.0616 max mem: 9377 +Train: [96] [ 300/6250] eta: 0:17:40 lr: 0.000001 grad: 0.2002 (0.2219) loss: 0.7393 (0.7267) time: 0.1441 data: 0.0453 max mem: 9377 +Train: [96] [ 400/6250] eta: 0:16:49 lr: 0.000001 grad: 0.2047 (0.2193) loss: 0.7390 (0.7245) time: 0.1695 data: 0.0830 max mem: 9377 +Train: [96] [ 500/6250] eta: 0:16:07 lr: 0.000001 grad: 0.2096 (0.2176) loss: 0.7083 (0.7220) time: 0.1516 data: 0.0540 max mem: 9377 +Train: [96] [ 600/6250] eta: 0:15:23 lr: 0.000001 grad: 0.2115 (0.2167) loss: 0.7203 (0.7209) time: 0.1358 data: 0.0439 max mem: 9377 +Train: [96] [ 700/6250] eta: 0:14:56 lr: 0.000001 grad: 0.2089 (0.2158) loss: 0.7129 (0.7203) time: 0.1719 data: 0.0877 max mem: 9377 +Train: [96] [ 800/6250] eta: 0:14:37 lr: 0.000001 grad: 0.2063 (0.2151) loss: 0.7258 (0.7201) time: 0.1431 data: 0.0506 max mem: 9377 +Train: [96] [ 900/6250] eta: 0:14:24 lr: 0.000001 grad: 0.2159 (0.2145) loss: 0.7121 (0.7192) time: 0.1849 data: 0.1054 max mem: 9377 +Train: [96] [1000/6250] eta: 0:14:04 lr: 0.000001 grad: 0.2075 (0.2143) loss: 0.7116 (0.7184) time: 0.1286 data: 0.0390 max mem: 9377 +Train: [96] [1100/6250] eta: 0:13:54 lr: 0.000000 grad: 0.2106 (0.2139) loss: 0.7043 (0.7181) time: 0.1490 data: 0.0646 max mem: 9377 +Train: [96] [1200/6250] eta: 0:13:36 lr: 0.000000 grad: 0.2052 (0.2135) loss: 0.7124 (0.7180) time: 0.1639 data: 0.0797 max mem: 9377 +Train: [96] [1300/6250] eta: 0:13:20 lr: 0.000000 grad: 0.2051 (0.2132) loss: 0.7166 (0.7178) time: 0.1713 data: 0.0906 max mem: 9377 +Train: [96] [1400/6250] eta: 0:13:02 lr: 0.000000 grad: 0.2060 (0.2129) loss: 0.7254 (0.7178) time: 0.1405 data: 0.0472 max mem: 9377 +Train: [96] [1500/6250] eta: 0:12:39 lr: 0.000000 grad: 0.2107 (0.2128) loss: 0.7150 (0.7175) time: 0.1294 data: 0.0438 max mem: 9377 +Train: [96] [1600/6250] eta: 0:12:17 lr: 0.000000 grad: 0.2061 (0.2126) loss: 0.7143 (0.7173) time: 0.1211 data: 0.0313 max mem: 9377 +Train: [96] [1700/6250] eta: 0:11:56 lr: 0.000000 grad: 0.2118 (0.2125) loss: 0.7011 (0.7171) time: 0.1520 data: 0.0713 max mem: 9377 +Train: [96] [1800/6250] eta: 0:11:36 lr: 0.000000 grad: 0.2100 (0.2123) loss: 0.7106 (0.7169) time: 0.1425 data: 0.0595 max mem: 9377 +Train: [96] [1900/6250] eta: 0:11:22 lr: 0.000000 grad: 0.2123 (0.2121) loss: 0.7110 (0.7168) time: 0.1402 data: 0.0596 max mem: 9377 +Train: [96] [2000/6250] eta: 0:11:06 lr: 0.000000 grad: 0.2107 (0.2121) loss: 0.7173 (0.7165) time: 0.1514 data: 0.0666 max mem: 9377 +Train: [96] [2100/6250] eta: 0:10:49 lr: 0.000000 grad: 0.2016 (0.2118) loss: 0.7223 (0.7166) time: 0.1585 data: 0.0769 max mem: 9377 +Train: [96] [2200/6250] eta: 0:10:33 lr: 0.000000 grad: 0.2022 (0.2116) loss: 0.7124 (0.7165) time: 0.1569 data: 0.0750 max mem: 9377 +Train: [96] [2300/6250] eta: 0:10:17 lr: 0.000000 grad: 0.2077 (0.2114) loss: 0.7038 (0.7165) time: 0.1501 data: 0.0646 max mem: 9377 +Train: [96] [2400/6250] eta: 0:09:59 lr: 0.000000 grad: 0.2076 (0.2114) loss: 0.7165 (0.7162) time: 0.1344 data: 0.0446 max mem: 9377 +Train: [96] [2500/6250] eta: 0:09:42 lr: 0.000000 grad: 0.2040 (0.2113) loss: 0.7181 (0.7160) time: 0.1635 data: 0.0820 max mem: 9377 +Train: [96] [2600/6250] eta: 0:09:27 lr: 0.000000 grad: 0.2038 (0.2113) loss: 0.7066 (0.7157) time: 0.1364 data: 0.0585 max mem: 9377 +Train: [96] [2700/6250] eta: 0:09:10 lr: 0.000000 grad: 0.2076 (0.2112) loss: 0.7197 (0.7157) time: 0.1298 data: 0.0510 max mem: 9377 +Train: [96] [2800/6250] eta: 0:08:52 lr: 0.000000 grad: 0.2081 (0.2111) loss: 0.7103 (0.7157) time: 0.1226 data: 0.0474 max mem: 9377 +Train: [96] [2900/6250] eta: 0:08:35 lr: 0.000000 grad: 0.2047 (0.2110) loss: 0.7183 (0.7157) time: 0.1310 data: 0.0455 max mem: 9377 +Train: [96] [3000/6250] eta: 0:08:17 lr: 0.000000 grad: 0.2095 (0.2110) loss: 0.7126 (0.7157) time: 0.1348 data: 0.0483 max mem: 9377 +Train: [96] [3100/6250] eta: 0:08:00 lr: 0.000000 grad: 0.2093 (0.2109) loss: 0.7129 (0.7157) time: 0.1423 data: 0.0541 max mem: 9377 +Train: [96] [3200/6250] eta: 0:07:43 lr: 0.000000 grad: 0.2097 (0.2109) loss: 0.7154 (0.7157) time: 0.1241 data: 0.0370 max mem: 9377 +Train: [96] [3300/6250] eta: 0:07:25 lr: 0.000000 grad: 0.2129 (0.2110) loss: 0.7226 (0.7155) time: 0.1194 data: 0.0318 max mem: 9377 +Train: [96] [3400/6250] eta: 0:07:08 lr: 0.000000 grad: 0.2076 (0.2109) loss: 0.7233 (0.7155) time: 0.1292 data: 0.0435 max mem: 9377 +Train: [96] [3500/6250] eta: 0:06:52 lr: 0.000000 grad: 0.2086 (0.2109) loss: 0.7131 (0.7156) time: 0.1402 data: 0.0530 max mem: 9377 +Train: [96] [3600/6250] eta: 0:06:35 lr: 0.000000 grad: 0.2030 (0.2108) loss: 0.7230 (0.7156) time: 0.1256 data: 0.0403 max mem: 9377 +Train: [96] [3700/6250] eta: 0:06:19 lr: 0.000000 grad: 0.2065 (0.2108) loss: 0.7105 (0.7157) time: 0.1499 data: 0.0685 max mem: 9377 +Train: [96] [3800/6250] eta: 0:06:04 lr: 0.000000 grad: 0.2024 (0.2108) loss: 0.7155 (0.7156) time: 0.1361 data: 0.0594 max mem: 9377 +Train: [96] [3900/6250] eta: 0:05:49 lr: 0.000000 grad: 0.2074 (0.2107) loss: 0.7122 (0.7156) time: 0.1637 data: 0.0830 max mem: 9377 +Train: [96] [4000/6250] eta: 0:05:34 lr: 0.000000 grad: 0.2114 (0.2107) loss: 0.7186 (0.7155) time: 0.1195 data: 0.0362 max mem: 9377 +Train: [96] [4100/6250] eta: 0:05:19 lr: 0.000000 grad: 0.2097 (0.2107) loss: 0.7155 (0.7155) time: 0.1572 data: 0.0759 max mem: 9377 +Train: [96] [4200/6250] eta: 0:05:04 lr: 0.000000 grad: 0.2071 (0.2106) loss: 0.7150 (0.7155) time: 0.1186 data: 0.0331 max mem: 9377 +Train: [96] [4300/6250] eta: 0:04:49 lr: 0.000000 grad: 0.2009 (0.2105) loss: 0.7243 (0.7155) time: 0.1447 data: 0.0714 max mem: 9377 +Train: [96] [4400/6250] eta: 0:04:33 lr: 0.000000 grad: 0.1974 (0.2104) loss: 0.7251 (0.7155) time: 0.1316 data: 0.0495 max mem: 9377 +Train: [96] [4500/6250] eta: 0:04:18 lr: 0.000000 grad: 0.2038 (0.2104) loss: 0.7105 (0.7155) time: 0.1273 data: 0.0458 max mem: 9377 +Train: [96] [4600/6250] eta: 0:04:03 lr: 0.000000 grad: 0.2047 (0.2103) loss: 0.7295 (0.7156) time: 0.1493 data: 0.0687 max mem: 9377 +Train: [96] [4700/6250] eta: 0:03:48 lr: 0.000000 grad: 0.2042 (0.2102) loss: 0.7168 (0.7157) time: 0.1604 data: 0.0817 max mem: 9377 +Train: [96] [4800/6250] eta: 0:03:33 lr: 0.000000 grad: 0.1970 (0.2101) loss: 0.7282 (0.7158) time: 0.1340 data: 0.0481 max mem: 9377 +Train: [96] [4900/6250] eta: 0:03:18 lr: 0.000000 grad: 0.2050 (0.2101) loss: 0.7184 (0.7160) time: 0.1390 data: 0.0575 max mem: 9377 +Train: [96] [5000/6250] eta: 0:03:03 lr: 0.000000 grad: 0.2043 (0.2100) loss: 0.7156 (0.7160) time: 0.1399 data: 0.0576 max mem: 9377 +Train: [96] [5100/6250] eta: 0:02:48 lr: 0.000000 grad: 0.2036 (0.2099) loss: 0.7222 (0.7160) time: 0.1422 data: 0.0585 max mem: 9377 +Train: [96] [5200/6250] eta: 0:02:34 lr: 0.000000 grad: 0.2090 (0.2099) loss: 0.7180 (0.7161) time: 0.1574 data: 0.0728 max mem: 9377 +Train: [96] [5300/6250] eta: 0:02:19 lr: 0.000000 grad: 0.2080 (0.2098) loss: 0.7186 (0.7162) time: 0.1259 data: 0.0415 max mem: 9377 +Train: [96] [5400/6250] eta: 0:02:04 lr: 0.000000 grad: 0.2108 (0.2098) loss: 0.7083 (0.7162) time: 0.1527 data: 0.0716 max mem: 9377 +Train: [96] [5500/6250] eta: 0:01:49 lr: 0.000000 grad: 0.2106 (0.2098) loss: 0.6988 (0.7161) time: 0.1370 data: 0.0570 max mem: 9377 +Train: [96] [5600/6250] eta: 0:01:35 lr: 0.000000 grad: 0.2025 (0.2098) loss: 0.7115 (0.7161) time: 0.1416 data: 0.0593 max mem: 9377 +Train: [96] [5700/6250] eta: 0:01:20 lr: 0.000000 grad: 0.2097 (0.2098) loss: 0.7099 (0.7160) time: 0.1424 data: 0.0629 max mem: 9377 +Train: [96] [5800/6250] eta: 0:01:05 lr: 0.000000 grad: 0.2078 (0.2098) loss: 0.7111 (0.7159) time: 0.1222 data: 0.0416 max mem: 9377 +Train: [96] [5900/6250] eta: 0:00:50 lr: 0.000000 grad: 0.2069 (0.2098) loss: 0.7244 (0.7159) time: 0.1495 data: 0.0711 max mem: 9377 +Train: [96] [6000/6250] eta: 0:00:36 lr: 0.000000 grad: 0.2051 (0.2098) loss: 0.7113 (0.7157) time: 0.1348 data: 0.0593 max mem: 9377 +Train: [96] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.2135 (0.2098) loss: 0.7008 (0.7156) time: 0.1306 data: 0.0513 max mem: 9377 +Train: [96] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2103 (0.2098) loss: 0.7232 (0.7157) time: 0.1441 data: 0.0626 max mem: 9377 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2113 (0.2098) loss: 0.7090 (0.7156) time: 0.1348 data: 0.0515 max mem: 9377 +Train: [96] Total time: 0:15:11 (0.1459 s / it) +Averaged stats: lr: 0.000000 grad: 0.2113 (0.2098) loss: 0.7090 (0.7156) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:03:57 loss: 0.8661 (0.8661) time: 3.8370 data: 3.7813 max mem: 9377 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.8711 (0.8753) time: 0.1052 data: 0.0782 max mem: 9377 +Eval (hcp-train-subset): [96] Total time: 0:00:14 (0.2281 s / it) +Averaged stats (hcp-train-subset): loss: 0.8711 (0.8753) +Eval (hcp-val): [96] [ 0/62] eta: 0:05:05 loss: 0.8750 (0.8750) time: 4.9308 data: 4.8934 max mem: 9377 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.8720 (0.8723) time: 0.1165 data: 0.0910 max mem: 9377 +Eval (hcp-val): [96] Total time: 0:00:13 (0.2174 s / it) +Averaged stats (hcp-val): loss: 0.8720 (0.8723) +Eval (nsd-val): [96] [ 0/62] eta: 0:03:55 loss: 0.8328 (0.8328) time: 3.7905 data: 3.7174 max mem: 9377 +Eval (nsd-val): [96] [61/62] eta: 0:00:00 loss: 0.8457 (0.8468) time: 0.1370 data: 0.1120 max mem: 9377 +Eval (nsd-val): [96] Total time: 0:00:13 (0.2185 s / it) +Averaged stats (nsd-val): loss: 0.8457 (0.8468) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [97] [ 0/6250] eta: 10:21:57 lr: 0.000000 grad: 0.2815 (0.2815) loss: 0.6743 (0.6743) time: 5.9709 data: 5.8712 max mem: 9377 +Train: [97] [ 100/6250] eta: 0:20:11 lr: 0.000000 grad: 0.2312 (0.2404) loss: 0.7086 (0.7038) time: 0.1459 data: 0.0517 max mem: 9377 +Train: [97] [ 200/6250] eta: 0:17:00 lr: 0.000000 grad: 0.2198 (0.2359) loss: 0.7083 (0.6998) time: 0.1245 data: 0.0287 max mem: 9377 +Train: [97] [ 300/6250] eta: 0:15:55 lr: 0.000000 grad: 0.2182 (0.2309) loss: 0.7012 (0.7023) time: 0.1399 data: 0.0466 max mem: 9377 +Train: [97] [ 400/6250] eta: 0:14:57 lr: 0.000000 grad: 0.2111 (0.2283) loss: 0.7136 (0.7019) time: 0.1251 data: 0.0270 max mem: 9377 +Train: [97] [ 500/6250] eta: 0:14:25 lr: 0.000000 grad: 0.2118 (0.2260) loss: 0.7173 (0.7028) time: 0.1419 data: 0.0543 max mem: 9377 +Train: [97] [ 600/6250] eta: 0:14:06 lr: 0.000000 grad: 0.2146 (0.2243) loss: 0.7135 (0.7037) time: 0.1767 data: 0.0733 max mem: 9377 +Train: [97] [ 700/6250] eta: 0:13:42 lr: 0.000000 grad: 0.2159 (0.2230) loss: 0.6867 (0.7037) time: 0.1365 data: 0.0429 max mem: 9377 +Train: [97] [ 800/6250] eta: 0:13:20 lr: 0.000000 grad: 0.2150 (0.2224) loss: 0.7087 (0.7036) time: 0.1185 data: 0.0262 max mem: 9377 +Train: [97] [ 900/6250] eta: 0:13:00 lr: 0.000000 grad: 0.2103 (0.2217) loss: 0.6992 (0.7037) time: 0.1471 data: 0.0631 max mem: 9377 +Train: [97] [1000/6250] eta: 0:12:42 lr: 0.000000 grad: 0.2125 (0.2210) loss: 0.7045 (0.7040) time: 0.1488 data: 0.0617 max mem: 9377 +Train: [97] [1100/6250] eta: 0:12:21 lr: 0.000000 grad: 0.2207 (0.2206) loss: 0.7070 (0.7042) time: 0.1467 data: 0.0606 max mem: 9377 +Train: [97] [1200/6250] eta: 0:12:00 lr: 0.000000 grad: 0.2123 (0.2201) loss: 0.7127 (0.7043) time: 0.1247 data: 0.0376 max mem: 9377 +Train: [97] [1300/6250] eta: 0:11:42 lr: 0.000000 grad: 0.2093 (0.2196) loss: 0.7142 (0.7044) time: 0.1169 data: 0.0291 max mem: 9377 +Train: [97] [1400/6250] eta: 0:11:26 lr: 0.000000 grad: 0.2146 (0.2193) loss: 0.7069 (0.7046) time: 0.1296 data: 0.0526 max mem: 9377 +Train: [97] [1500/6250] eta: 0:11:09 lr: 0.000000 grad: 0.2117 (0.2189) loss: 0.7024 (0.7049) time: 0.1460 data: 0.0629 max mem: 9377 +Train: [97] [1600/6250] eta: 0:10:52 lr: 0.000000 grad: 0.2121 (0.2187) loss: 0.7141 (0.7050) time: 0.1225 data: 0.0322 max mem: 9377 +Train: [97] [1700/6250] eta: 0:10:36 lr: 0.000000 grad: 0.2091 (0.2182) loss: 0.7020 (0.7052) time: 0.1333 data: 0.0450 max mem: 9377 +Train: [97] [1800/6250] eta: 0:10:21 lr: 0.000000 grad: 0.2184 (0.2181) loss: 0.7035 (0.7052) time: 0.1372 data: 0.0502 max mem: 9377 +Train: [97] [1900/6250] eta: 0:10:10 lr: 0.000000 grad: 0.2189 (0.2181) loss: 0.6980 (0.7049) time: 0.1692 data: 0.0883 max mem: 9377 +Train: [97] [2000/6250] eta: 0:10:00 lr: 0.000000 grad: 0.2076 (0.2177) loss: 0.7160 (0.7051) time: 0.1812 data: 0.0975 max mem: 9377 +Train: [97] [2100/6250] eta: 0:09:49 lr: 0.000000 grad: 0.2095 (0.2176) loss: 0.7103 (0.7050) time: 0.1689 data: 0.0892 max mem: 9377 +Train: [97] [2200/6250] eta: 0:09:36 lr: 0.000000 grad: 0.2115 (0.2175) loss: 0.6908 (0.7048) time: 0.1427 data: 0.0697 max mem: 9377 +Train: [97] [2300/6250] eta: 0:09:21 lr: 0.000000 grad: 0.2204 (0.2175) loss: 0.7139 (0.7048) time: 0.1534 data: 0.0718 max mem: 9377 +Train: [97] [2400/6250] eta: 0:09:07 lr: 0.000000 grad: 0.2130 (0.2174) loss: 0.7068 (0.7049) time: 0.1315 data: 0.0404 max mem: 9377 +Train: [97] [2500/6250] eta: 0:08:51 lr: 0.000000 grad: 0.2138 (0.2173) loss: 0.7138 (0.7050) time: 0.1310 data: 0.0466 max mem: 9377 +Train: [97] [2600/6250] eta: 0:08:36 lr: 0.000000 grad: 0.2105 (0.2171) loss: 0.7157 (0.7052) time: 0.1169 data: 0.0375 max mem: 9377 +Train: [97] [2700/6250] eta: 0:08:22 lr: 0.000000 grad: 0.2072 (0.2169) loss: 0.7112 (0.7053) time: 0.1424 data: 0.0596 max mem: 9377 +Train: [97] [2800/6250] eta: 0:08:07 lr: 0.000000 grad: 0.2102 (0.2167) loss: 0.7094 (0.7054) time: 0.1514 data: 0.0617 max mem: 9377 +Train: [97] [2900/6250] eta: 0:07:53 lr: 0.000000 grad: 0.2135 (0.2166) loss: 0.7021 (0.7055) time: 0.1436 data: 0.0575 max mem: 9377 +Train: [97] [3000/6250] eta: 0:07:40 lr: 0.000000 grad: 0.2092 (0.2165) loss: 0.7043 (0.7056) time: 0.1709 data: 0.0784 max mem: 9377 +Train: [97] [3100/6250] eta: 0:07:28 lr: 0.000000 grad: 0.2143 (0.2164) loss: 0.7040 (0.7056) time: 0.1887 data: 0.1047 max mem: 9377 +Train: [97] [3200/6250] eta: 0:07:13 lr: 0.000000 grad: 0.2109 (0.2162) loss: 0.7161 (0.7057) time: 0.1142 data: 0.0270 max mem: 9377 +Train: [97] [3300/6250] eta: 0:06:59 lr: 0.000000 grad: 0.2071 (0.2161) loss: 0.7009 (0.7057) time: 0.1161 data: 0.0269 max mem: 9377 +Train: [97] [3400/6250] eta: 0:06:45 lr: 0.000000 grad: 0.2074 (0.2160) loss: 0.7084 (0.7056) time: 0.1380 data: 0.0531 max mem: 9377 +Train: [97] [3500/6250] eta: 0:06:31 lr: 0.000000 grad: 0.2105 (0.2159) loss: 0.7114 (0.7056) time: 0.1350 data: 0.0274 max mem: 9377 +Train: [97] [3600/6250] eta: 0:06:16 lr: 0.000000 grad: 0.2064 (0.2157) loss: 0.7162 (0.7057) time: 0.1382 data: 0.0492 max mem: 9377 +Train: [97] [3700/6250] eta: 0:06:01 lr: 0.000000 grad: 0.2124 (0.2157) loss: 0.6998 (0.7057) time: 0.1301 data: 0.0445 max mem: 9377 +Train: [97] [3800/6250] eta: 0:05:48 lr: 0.000000 grad: 0.2076 (0.2156) loss: 0.7172 (0.7058) time: 0.1737 data: 0.0856 max mem: 9377 +Train: [97] [3900/6250] eta: 0:05:33 lr: 0.000000 grad: 0.2092 (0.2155) loss: 0.7141 (0.7058) time: 0.1340 data: 0.0405 max mem: 9377 +Train: [97] [4000/6250] eta: 0:05:19 lr: 0.000000 grad: 0.2097 (0.2155) loss: 0.7065 (0.7058) time: 0.1408 data: 0.0595 max mem: 9377 +Train: [97] [4100/6250] eta: 0:05:04 lr: 0.000000 grad: 0.2104 (0.2154) loss: 0.7051 (0.7058) time: 0.1231 data: 0.0347 max mem: 9377 +Train: [97] [4200/6250] eta: 0:04:50 lr: 0.000000 grad: 0.2057 (0.2153) loss: 0.7075 (0.7057) time: 0.1601 data: 0.0641 max mem: 9377 +Train: [97] [4300/6250] eta: 0:04:36 lr: 0.000000 grad: 0.2143 (0.2153) loss: 0.7048 (0.7057) time: 0.1507 data: 0.0673 max mem: 9377 +Train: [97] [4400/6250] eta: 0:04:22 lr: 0.000000 grad: 0.2158 (0.2152) loss: 0.7008 (0.7056) time: 0.1524 data: 0.0667 max mem: 9377 +Train: [97] [4500/6250] eta: 0:04:09 lr: 0.000000 grad: 0.2064 (0.2151) loss: 0.7062 (0.7056) time: 0.1528 data: 0.0436 max mem: 9377 +Train: [97] [4600/6250] eta: 0:03:56 lr: 0.000000 grad: 0.1989 (0.2151) loss: 0.7131 (0.7056) time: 0.1428 data: 0.0569 max mem: 9377 +Train: [97] [4700/6250] eta: 0:03:42 lr: 0.000000 grad: 0.2140 (0.2150) loss: 0.6931 (0.7055) time: 0.1421 data: 0.0582 max mem: 9377 +Train: [97] [4800/6250] eta: 0:03:27 lr: 0.000000 grad: 0.2081 (0.2150) loss: 0.7166 (0.7056) time: 0.1290 data: 0.0409 max mem: 9377 +Train: [97] [4900/6250] eta: 0:03:13 lr: 0.000000 grad: 0.2092 (0.2149) loss: 0.6996 (0.7056) time: 0.1362 data: 0.0524 max mem: 9377 +Train: [97] [5000/6250] eta: 0:02:58 lr: 0.000000 grad: 0.2102 (0.2148) loss: 0.7050 (0.7056) time: 0.1373 data: 0.0535 max mem: 9377 +Train: [97] [5100/6250] eta: 0:02:44 lr: 0.000000 grad: 0.2124 (0.2148) loss: 0.7241 (0.7058) time: 0.1398 data: 0.0513 max mem: 9377 +Train: [97] [5200/6250] eta: 0:02:29 lr: 0.000000 grad: 0.2074 (0.2147) loss: 0.7138 (0.7059) time: 0.1387 data: 0.0548 max mem: 9377 +Train: [97] [5300/6250] eta: 0:02:15 lr: 0.000000 grad: 0.2139 (0.2146) loss: 0.7122 (0.7058) time: 0.1224 data: 0.0360 max mem: 9377 +Train: [97] [5400/6250] eta: 0:02:00 lr: 0.000000 grad: 0.2028 (0.2146) loss: 0.7014 (0.7058) time: 0.1258 data: 0.0436 max mem: 9377 +Train: [97] [5500/6250] eta: 0:01:46 lr: 0.000000 grad: 0.2099 (0.2146) loss: 0.7161 (0.7059) time: 0.1329 data: 0.0483 max mem: 9377 +Train: [97] [5600/6250] eta: 0:01:32 lr: 0.000000 grad: 0.2072 (0.2145) loss: 0.7125 (0.7059) time: 0.1465 data: 0.0649 max mem: 9377 +Train: [97] [5700/6250] eta: 0:01:17 lr: 0.000000 grad: 0.2083 (0.2144) loss: 0.7033 (0.7060) time: 0.1324 data: 0.0493 max mem: 9377 +Train: [97] [5800/6250] eta: 0:01:03 lr: 0.000000 grad: 0.2097 (0.2144) loss: 0.7019 (0.7060) time: 0.1466 data: 0.0662 max mem: 9377 +Train: [97] [5900/6250] eta: 0:00:49 lr: 0.000000 grad: 0.2094 (0.2143) loss: 0.7127 (0.7060) time: 0.1435 data: 0.0548 max mem: 9377 +Train: [97] [6000/6250] eta: 0:00:35 lr: 0.000000 grad: 0.2062 (0.2142) loss: 0.6993 (0.7060) time: 0.1361 data: 0.0444 max mem: 9377 +Train: [97] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.2115 (0.2141) loss: 0.6988 (0.7060) time: 0.1397 data: 0.0589 max mem: 9377 +Train: [97] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2058 (0.2140) loss: 0.7093 (0.7061) time: 0.1428 data: 0.0591 max mem: 9377 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2125 (0.2140) loss: 0.7049 (0.7061) time: 0.1171 data: 0.0263 max mem: 9377 +Train: [97] Total time: 0:14:48 (0.1421 s / it) +Averaged stats: lr: 0.000000 grad: 0.2125 (0.2140) loss: 0.7049 (0.7061) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:04:43 loss: 0.8630 (0.8630) time: 4.5799 data: 4.5437 max mem: 9377 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.8735 (0.8749) time: 0.1443 data: 0.1188 max mem: 9377 +Eval (hcp-train-subset): [97] Total time: 0:00:14 (0.2258 s / it) +Averaged stats (hcp-train-subset): loss: 0.8735 (0.8749) +Eval (hcp-val): [97] [ 0/62] eta: 0:04:06 loss: 0.8692 (0.8692) time: 3.9820 data: 3.8887 max mem: 9377 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.8721 (0.8721) time: 0.1452 data: 0.1199 max mem: 9377 +Eval (hcp-val): [97] Total time: 0:00:14 (0.2391 s / it) +Averaged stats (hcp-val): loss: 0.8721 (0.8721) +Eval (nsd-val): [97] [ 0/62] eta: 0:05:23 loss: 0.8285 (0.8285) time: 5.2194 data: 5.1889 max mem: 9377 +Eval (nsd-val): [97] [61/62] eta: 0:00:00 loss: 0.8459 (0.8469) time: 0.1359 data: 0.1089 max mem: 9377 +Eval (nsd-val): [97] Total time: 0:00:13 (0.2206 s / it) +Averaged stats (nsd-val): loss: 0.8459 (0.8469) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [98] [ 0/6250] eta: 12:36:12 lr: 0.000000 grad: 0.3113 (0.3113) loss: 0.6336 (0.6336) time: 7.2595 data: 7.1299 max mem: 9377 +Train: [98] [ 100/6250] eta: 0:20:43 lr: 0.000000 grad: 0.2075 (0.2230) loss: 0.7098 (0.7273) time: 0.1561 data: 0.0555 max mem: 9377 +Train: [98] [ 200/6250] eta: 0:17:18 lr: 0.000000 grad: 0.2355 (0.2207) loss: 0.6970 (0.7213) time: 0.1339 data: 0.0391 max mem: 9377 +Train: [98] [ 300/6250] eta: 0:16:21 lr: 0.000000 grad: 0.2102 (0.2191) loss: 0.6860 (0.7167) time: 0.1542 data: 0.0599 max mem: 9377 +Train: [98] [ 400/6250] eta: 0:15:26 lr: 0.000000 grad: 0.2158 (0.2184) loss: 0.6866 (0.7133) time: 0.1453 data: 0.0505 max mem: 9377 +Train: [98] [ 500/6250] eta: 0:14:45 lr: 0.000000 grad: 0.2127 (0.2176) loss: 0.6981 (0.7117) time: 0.1268 data: 0.0351 max mem: 9377 +Train: [98] [ 600/6250] eta: 0:14:13 lr: 0.000000 grad: 0.2104 (0.2166) loss: 0.7054 (0.7110) time: 0.1288 data: 0.0372 max mem: 9377 +Train: [98] [ 700/6250] eta: 0:13:46 lr: 0.000000 grad: 0.2039 (0.2157) loss: 0.7039 (0.7106) time: 0.1301 data: 0.0355 max mem: 9377 +Train: [98] [ 800/6250] eta: 0:13:20 lr: 0.000000 grad: 0.2005 (0.2150) loss: 0.7070 (0.7100) time: 0.1317 data: 0.0390 max mem: 9377 +Train: [98] [ 900/6250] eta: 0:12:59 lr: 0.000000 grad: 0.2134 (0.2147) loss: 0.7069 (0.7092) time: 0.1412 data: 0.0472 max mem: 9377 +Train: [98] [1000/6250] eta: 0:12:41 lr: 0.000000 grad: 0.2096 (0.2145) loss: 0.7081 (0.7083) time: 0.1342 data: 0.0475 max mem: 9377 +Train: [98] [1100/6250] eta: 0:12:21 lr: 0.000000 grad: 0.2132 (0.2143) loss: 0.6953 (0.7076) time: 0.1230 data: 0.0300 max mem: 9377 +Train: [98] [1200/6250] eta: 0:12:03 lr: 0.000000 grad: 0.2095 (0.2141) loss: 0.7005 (0.7072) time: 0.1437 data: 0.0605 max mem: 9377 +Train: [98] [1300/6250] eta: 0:11:47 lr: 0.000000 grad: 0.2134 (0.2142) loss: 0.7086 (0.7066) time: 0.1365 data: 0.0517 max mem: 9377 +Train: [98] [1400/6250] eta: 0:11:32 lr: 0.000000 grad: 0.2134 (0.2141) loss: 0.6948 (0.7063) time: 0.1260 data: 0.0439 max mem: 9377 +Train: [98] [1500/6250] eta: 0:11:17 lr: 0.000000 grad: 0.2092 (0.2139) loss: 0.6984 (0.7061) time: 0.1327 data: 0.0435 max mem: 9377 +Train: [98] [1600/6250] eta: 0:11:02 lr: 0.000000 grad: 0.2104 (0.2140) loss: 0.6951 (0.7057) time: 0.1519 data: 0.0716 max mem: 9377 +Train: [98] [1700/6250] eta: 0:10:46 lr: 0.000000 grad: 0.2146 (0.2138) loss: 0.6934 (0.7055) time: 0.1167 data: 0.0356 max mem: 9377 +Train: [98] [1800/6250] eta: 0:10:31 lr: 0.000000 grad: 0.2142 (0.2137) loss: 0.7155 (0.7057) time: 0.1335 data: 0.0507 max mem: 9377 +Train: [98] [1900/6250] eta: 0:10:20 lr: 0.000000 grad: 0.2111 (0.2137) loss: 0.7072 (0.7053) time: 0.1595 data: 0.0756 max mem: 9377 +Train: [98] [2000/6250] eta: 0:10:09 lr: 0.000000 grad: 0.2080 (0.2136) loss: 0.7021 (0.7053) time: 0.1886 data: 0.1124 max mem: 9377 +Train: [98] [2100/6250] eta: 0:09:58 lr: 0.000000 grad: 0.2094 (0.2136) loss: 0.6993 (0.7052) time: 0.1549 data: 0.0713 max mem: 9377 +Train: [98] [2200/6250] eta: 0:09:44 lr: 0.000000 grad: 0.2109 (0.2136) loss: 0.6921 (0.7051) time: 0.1424 data: 0.0582 max mem: 9377 +Train: [98] [2300/6250] eta: 0:09:30 lr: 0.000000 grad: 0.2041 (0.2135) loss: 0.7075 (0.7052) time: 0.1186 data: 0.0289 max mem: 9377 +Train: [98] [2400/6250] eta: 0:09:16 lr: 0.000000 grad: 0.2131 (0.2135) loss: 0.6970 (0.7050) time: 0.1581 data: 0.0730 max mem: 9377 +Train: [98] [2500/6250] eta: 0:09:01 lr: 0.000000 grad: 0.2091 (0.2135) loss: 0.6971 (0.7048) time: 0.1394 data: 0.0648 max mem: 9377 +Train: [98] [2600/6250] eta: 0:08:45 lr: 0.000000 grad: 0.2054 (0.2135) loss: 0.7108 (0.7049) time: 0.1467 data: 0.0607 max mem: 9377 +Train: [98] [2700/6250] eta: 0:08:30 lr: 0.000000 grad: 0.2150 (0.2135) loss: 0.6935 (0.7047) time: 0.1390 data: 0.0600 max mem: 9377 +Train: [98] [2800/6250] eta: 0:08:14 lr: 0.000000 grad: 0.2114 (0.2134) loss: 0.6933 (0.7046) time: 0.1437 data: 0.0647 max mem: 9377 +Train: [98] [2900/6250] eta: 0:07:59 lr: 0.000000 grad: 0.2076 (0.2133) loss: 0.7109 (0.7047) time: 0.1483 data: 0.0627 max mem: 9377 +Train: [98] [3000/6250] eta: 0:07:43 lr: 0.000000 grad: 0.2114 (0.2133) loss: 0.7056 (0.7046) time: 0.1255 data: 0.0421 max mem: 9377 +Train: [98] [3100/6250] eta: 0:07:28 lr: 0.000000 grad: 0.2104 (0.2133) loss: 0.7022 (0.7046) time: 0.1492 data: 0.0668 max mem: 9377 +Train: [98] [3200/6250] eta: 0:07:14 lr: 0.000000 grad: 0.2134 (0.2133) loss: 0.7123 (0.7046) time: 0.1390 data: 0.0530 max mem: 9377 +Train: [98] [3300/6250] eta: 0:07:01 lr: 0.000000 grad: 0.2014 (0.2131) loss: 0.7226 (0.7048) time: 0.1549 data: 0.0737 max mem: 9377 +Train: [98] [3400/6250] eta: 0:06:47 lr: 0.000000 grad: 0.2027 (0.2130) loss: 0.7151 (0.7049) time: 0.1274 data: 0.0493 max mem: 9377 +Train: [98] [3500/6250] eta: 0:06:32 lr: 0.000000 grad: 0.1988 (0.2128) loss: 0.7206 (0.7052) time: 0.1322 data: 0.0486 max mem: 9377 +Train: [98] [3600/6250] eta: 0:06:18 lr: 0.000000 grad: 0.2083 (0.2128) loss: 0.7057 (0.7054) time: 0.1427 data: 0.0658 max mem: 9377 +Train: [98] [3700/6250] eta: 0:06:04 lr: 0.000000 grad: 0.2046 (0.2128) loss: 0.7093 (0.7055) time: 0.1428 data: 0.0494 max mem: 9377 +Train: [98] [3800/6250] eta: 0:05:51 lr: 0.000000 grad: 0.2082 (0.2126) loss: 0.7016 (0.7057) time: 0.1796 data: 0.0990 max mem: 9377 +Train: [98] [3900/6250] eta: 0:05:37 lr: 0.000000 grad: 0.2031 (0.2125) loss: 0.7141 (0.7058) time: 0.1510 data: 0.0572 max mem: 9377 +Train: [98] [4000/6250] eta: 0:05:22 lr: 0.000000 grad: 0.2085 (0.2124) loss: 0.7007 (0.7059) time: 0.1314 data: 0.0416 max mem: 9377 +Train: [98] [4100/6250] eta: 0:05:08 lr: 0.000000 grad: 0.2018 (0.2124) loss: 0.7091 (0.7059) time: 0.1490 data: 0.0715 max mem: 9377 +Train: [98] [4200/6250] eta: 0:04:53 lr: 0.000000 grad: 0.2123 (0.2123) loss: 0.6969 (0.7059) time: 0.1371 data: 0.0551 max mem: 9377 +Train: [98] [4300/6250] eta: 0:04:40 lr: 0.000000 grad: 0.2089 (0.2123) loss: 0.7082 (0.7058) time: 0.1511 data: 0.0668 max mem: 9377 +Train: [98] [4400/6250] eta: 0:04:26 lr: 0.000000 grad: 0.2075 (0.2123) loss: 0.7091 (0.7059) time: 0.1819 data: 0.1065 max mem: 9377 +Train: [98] [4500/6250] eta: 0:04:13 lr: 0.000000 grad: 0.2054 (0.2123) loss: 0.7008 (0.7059) time: 0.1149 data: 0.0258 max mem: 9377 +Train: [98] [4600/6250] eta: 0:03:59 lr: 0.000000 grad: 0.2084 (0.2123) loss: 0.7056 (0.7059) time: 0.1576 data: 0.0764 max mem: 9377 +Train: [98] [4700/6250] eta: 0:03:45 lr: 0.000000 grad: 0.2001 (0.2122) loss: 0.7018 (0.7060) time: 0.1209 data: 0.0431 max mem: 9377 +Train: [98] [4800/6250] eta: 0:03:30 lr: 0.000000 grad: 0.2022 (0.2121) loss: 0.7118 (0.7060) time: 0.1286 data: 0.0434 max mem: 9377 +Train: [98] [4900/6250] eta: 0:03:15 lr: 0.000000 grad: 0.2078 (0.2121) loss: 0.7015 (0.7060) time: 0.1136 data: 0.0275 max mem: 9377 +Train: [98] [5000/6250] eta: 0:03:01 lr: 0.000000 grad: 0.1986 (0.2120) loss: 0.7146 (0.7060) time: 0.1346 data: 0.0502 max mem: 9377 +Train: [98] [5100/6250] eta: 0:02:46 lr: 0.000000 grad: 0.2062 (0.2120) loss: 0.6961 (0.7060) time: 0.1506 data: 0.0639 max mem: 9377 +Train: [98] [5200/6250] eta: 0:02:31 lr: 0.000000 grad: 0.2136 (0.2120) loss: 0.6917 (0.7060) time: 0.1460 data: 0.0595 max mem: 9377 +Train: [98] [5300/6250] eta: 0:02:17 lr: 0.000000 grad: 0.2097 (0.2120) loss: 0.6961 (0.7058) time: 0.1488 data: 0.0649 max mem: 9377 +Train: [98] [5400/6250] eta: 0:02:02 lr: 0.000000 grad: 0.2089 (0.2120) loss: 0.7018 (0.7058) time: 0.1222 data: 0.0431 max mem: 9377 +Train: [98] [5500/6250] eta: 0:01:48 lr: 0.000000 grad: 0.2082 (0.2120) loss: 0.6931 (0.7058) time: 0.1307 data: 0.0434 max mem: 9377 +Train: [98] [5600/6250] eta: 0:01:33 lr: 0.000000 grad: 0.2114 (0.2120) loss: 0.6966 (0.7057) time: 0.1304 data: 0.0381 max mem: 9377 +Train: [98] [5700/6250] eta: 0:01:19 lr: 0.000000 grad: 0.2133 (0.2120) loss: 0.7102 (0.7058) time: 0.1305 data: 0.0503 max mem: 9377 +Train: [98] [5800/6250] eta: 0:01:04 lr: 0.000000 grad: 0.2104 (0.2119) loss: 0.7051 (0.7058) time: 0.1319 data: 0.0479 max mem: 9377 +Train: [98] [5900/6250] eta: 0:00:50 lr: 0.000000 grad: 0.2103 (0.2119) loss: 0.7046 (0.7058) time: 0.1728 data: 0.0940 max mem: 9377 +Train: [98] [6000/6250] eta: 0:00:35 lr: 0.000000 grad: 0.2090 (0.2119) loss: 0.7142 (0.7058) time: 0.1360 data: 0.0546 max mem: 9377 +Train: [98] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.2114 (0.2119) loss: 0.7092 (0.7059) time: 0.1250 data: 0.0427 max mem: 9377 +Train: [98] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2087 (0.2118) loss: 0.6986 (0.7059) time: 0.1264 data: 0.0389 max mem: 9377 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2033 (0.2118) loss: 0.7113 (0.7059) time: 0.1401 data: 0.0555 max mem: 9377 +Train: [98] Total time: 0:15:03 (0.1445 s / it) +Averaged stats: lr: 0.000000 grad: 0.2033 (0.2118) loss: 0.7113 (0.7059) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:04:57 loss: 0.8716 (0.8716) time: 4.8018 data: 4.7213 max mem: 9377 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.8711 (0.8750) time: 0.1317 data: 0.1044 max mem: 9377 +Eval (hcp-train-subset): [98] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (hcp-train-subset): loss: 0.8711 (0.8750) +Eval (hcp-val): [98] [ 0/62] eta: 0:04:20 loss: 0.8726 (0.8726) time: 4.1937 data: 4.1352 max mem: 9377 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.8708 (0.8721) time: 0.1170 data: 0.0904 max mem: 9377 +Eval (hcp-val): [98] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (hcp-val): loss: 0.8708 (0.8721) +Eval (nsd-val): [98] [ 0/62] eta: 0:03:46 loss: 0.8360 (0.8360) time: 3.6532 data: 3.5850 max mem: 9377 +Eval (nsd-val): [98] [61/62] eta: 0:00:00 loss: 0.8452 (0.8467) time: 0.1207 data: 0.0953 max mem: 9377 +Eval (nsd-val): [98] Total time: 0:00:12 (0.2090 s / it) +Averaged stats (nsd-val): loss: 0.8452 (0.8467) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +Train: [99] [ 0/6250] eta: 9:41:07 lr: 0.000000 grad: 0.2820 (0.2820) loss: 0.6145 (0.6145) time: 5.5788 data: 5.4367 max mem: 9377 +Train: [99] [ 100/6250] eta: 0:19:52 lr: 0.000000 grad: 0.2116 (0.2164) loss: 0.7251 (0.7438) time: 0.1549 data: 0.0553 max mem: 9377 +Train: [99] [ 200/6250] eta: 0:17:46 lr: 0.000000 grad: 0.2187 (0.2189) loss: 0.6955 (0.7256) time: 0.1526 data: 0.0462 max mem: 9377 +Train: [99] [ 300/6250] eta: 0:16:50 lr: 0.000000 grad: 0.2109 (0.2190) loss: 0.7099 (0.7204) time: 0.1418 data: 0.0313 max mem: 9377 +Train: [99] [ 400/6250] eta: 0:16:14 lr: 0.000000 grad: 0.2202 (0.2185) loss: 0.6946 (0.7167) time: 0.1716 data: 0.0766 max mem: 9377 +Train: [99] [ 500/6250] eta: 0:15:35 lr: 0.000000 grad: 0.2177 (0.2183) loss: 0.7116 (0.7142) time: 0.1525 data: 0.0609 max mem: 9377 +Train: [99] [ 600/6250] eta: 0:14:57 lr: 0.000000 grad: 0.2105 (0.2177) loss: 0.7043 (0.7130) time: 0.1528 data: 0.0556 max mem: 9377 +Train: [99] [ 700/6250] eta: 0:14:27 lr: 0.000000 grad: 0.2086 (0.2167) loss: 0.7075 (0.7126) time: 0.1404 data: 0.0501 max mem: 9377 +Train: [99] [ 800/6250] eta: 0:14:09 lr: 0.000000 grad: 0.2082 (0.2160) loss: 0.7116 (0.7131) time: 0.1714 data: 0.0863 max mem: 9377 +Train: [99] [ 900/6250] eta: 0:13:46 lr: 0.000000 grad: 0.2081 (0.2155) loss: 0.7152 (0.7134) time: 0.1448 data: 0.0548 max mem: 9377 +Train: [99] [1000/6250] eta: 0:13:30 lr: 0.000000 grad: 0.2118 (0.2153) loss: 0.7126 (0.7132) time: 0.1568 data: 0.0625 max mem: 9377 +Train: [99] [1100/6250] eta: 0:13:10 lr: 0.000000 grad: 0.2093 (0.2146) loss: 0.7046 (0.7130) time: 0.1595 data: 0.0742 max mem: 9377 +Train: [99] [1200/6250] eta: 0:12:53 lr: 0.000000 grad: 0.2064 (0.2144) loss: 0.7114 (0.7123) time: 0.1836 data: 0.0999 max mem: 9377 +Train: [99] [1300/6250] eta: 0:12:34 lr: 0.000000 grad: 0.2102 (0.2142) loss: 0.6994 (0.7118) time: 0.1585 data: 0.0778 max mem: 9377 +Train: [99] [1400/6250] eta: 0:12:15 lr: 0.000000 grad: 0.2066 (0.2139) loss: 0.7153 (0.7112) time: 0.1434 data: 0.0573 max mem: 9377 +Train: [99] [1500/6250] eta: 0:11:57 lr: 0.000000 grad: 0.2029 (0.2136) loss: 0.7077 (0.7111) time: 0.1369 data: 0.0497 max mem: 9377 +Train: [99] [1600/6250] eta: 0:11:39 lr: 0.000000 grad: 0.2050 (0.2134) loss: 0.7076 (0.7107) time: 0.1370 data: 0.0530 max mem: 9377 +Train: [99] [1700/6250] eta: 0:11:21 lr: 0.000000 grad: 0.2030 (0.2132) loss: 0.7083 (0.7105) time: 0.1243 data: 0.0440 max mem: 9377 +Train: [99] [1800/6250] eta: 0:11:04 lr: 0.000000 grad: 0.2064 (0.2129) loss: 0.7015 (0.7106) time: 0.1141 data: 0.0337 max mem: 9377 +Train: [99] [1900/6250] eta: 0:10:50 lr: 0.000000 grad: 0.2034 (0.2126) loss: 0.7179 (0.7109) time: 0.1525 data: 0.0746 max mem: 9377 +Train: [99] [2000/6250] eta: 0:10:38 lr: 0.000000 grad: 0.2078 (0.2123) loss: 0.7122 (0.7109) time: 0.1277 data: 0.0412 max mem: 9377 +Train: [99] [2100/6250] eta: 0:10:25 lr: 0.000000 grad: 0.2041 (0.2121) loss: 0.7178 (0.7113) time: 0.1380 data: 0.0563 max mem: 9377 +Train: [99] [2200/6250] eta: 0:10:11 lr: 0.000000 grad: 0.2114 (0.2119) loss: 0.7102 (0.7114) time: 0.1407 data: 0.0519 max mem: 9377 +Train: [99] [2300/6250] eta: 0:09:57 lr: 0.000000 grad: 0.2057 (0.2117) loss: 0.7127 (0.7114) time: 0.1572 data: 0.0744 max mem: 9377 +Train: [99] [2400/6250] eta: 0:09:42 lr: 0.000000 grad: 0.2090 (0.2115) loss: 0.7173 (0.7114) time: 0.1455 data: 0.0653 max mem: 9377 +Train: [99] [2500/6250] eta: 0:09:26 lr: 0.000000 grad: 0.2024 (0.2114) loss: 0.7109 (0.7113) time: 0.1543 data: 0.0724 max mem: 9377 +Train: [99] [2600/6250] eta: 0:09:11 lr: 0.000000 grad: 0.2036 (0.2112) loss: 0.7111 (0.7113) time: 0.1431 data: 0.0465 max mem: 9377 +Train: [99] [2700/6250] eta: 0:08:54 lr: 0.000000 grad: 0.2026 (0.2110) loss: 0.7021 (0.7113) time: 0.1477 data: 0.0582 max mem: 9377 +Train: [99] [2800/6250] eta: 0:08:40 lr: 0.000000 grad: 0.2036 (0.2109) loss: 0.7205 (0.7112) time: 0.1922 data: 0.1070 max mem: 9377 +Train: [99] [2900/6250] eta: 0:08:22 lr: 0.000000 grad: 0.2062 (0.2108) loss: 0.7203 (0.7113) time: 0.1235 data: 0.0289 max mem: 9377 +Train: [99] [3000/6250] eta: 0:08:07 lr: 0.000000 grad: 0.2115 (0.2109) loss: 0.7089 (0.7115) time: 0.2136 data: 0.1272 max mem: 9377 +Train: [99] [3100/6250] eta: 0:07:51 lr: 0.000000 grad: 0.1984 (0.2107) loss: 0.7157 (0.7117) time: 0.1432 data: 0.0465 max mem: 9377 +Train: [99] [3200/6250] eta: 0:07:37 lr: 0.000000 grad: 0.2062 (0.2106) loss: 0.7177 (0.7117) time: 0.1665 data: 0.0803 max mem: 9377 +Train: [99] [3300/6250] eta: 0:07:21 lr: 0.000000 grad: 0.2084 (0.2105) loss: 0.7003 (0.7118) time: 0.1452 data: 0.0339 max mem: 9377 +Train: [99] [3400/6250] eta: 0:07:06 lr: 0.000000 grad: 0.2141 (0.2105) loss: 0.6973 (0.7117) time: 0.1244 data: 0.0451 max mem: 9377 +Train: [99] [3500/6250] eta: 0:06:50 lr: 0.000000 grad: 0.2064 (0.2105) loss: 0.7134 (0.7116) time: 0.1503 data: 0.0630 max mem: 9377 +Train: [99] [3600/6250] eta: 0:06:36 lr: 0.000000 grad: 0.2061 (0.2104) loss: 0.6951 (0.7114) time: 0.1579 data: 0.0789 max mem: 9377 +Train: [99] [3700/6250] eta: 0:06:22 lr: 0.000000 grad: 0.2056 (0.2103) loss: 0.7004 (0.7113) time: 0.1466 data: 0.0649 max mem: 9377 +Train: [99] [3800/6250] eta: 0:06:06 lr: 0.000000 grad: 0.2098 (0.2103) loss: 0.6985 (0.7112) time: 0.1297 data: 0.0411 max mem: 9377 +Train: [99] [3900/6250] eta: 0:05:51 lr: 0.000000 grad: 0.2123 (0.2103) loss: 0.7058 (0.7111) time: 0.1274 data: 0.0434 max mem: 9377 +Train: [99] [4000/6250] eta: 0:05:36 lr: 0.000000 grad: 0.2087 (0.2103) loss: 0.7042 (0.7110) time: 0.1420 data: 0.0580 max mem: 9377 +Train: [99] [4100/6250] eta: 0:05:20 lr: 0.000000 grad: 0.2120 (0.2103) loss: 0.7044 (0.7108) time: 0.1197 data: 0.0306 max mem: 9377 +Train: [99] [4200/6250] eta: 0:05:05 lr: 0.000000 grad: 0.2095 (0.2103) loss: 0.7089 (0.7106) time: 0.1459 data: 0.0629 max mem: 9377 +Train: [99] [4300/6250] eta: 0:04:49 lr: 0.000000 grad: 0.2043 (0.2103) loss: 0.7057 (0.7105) time: 0.1461 data: 0.0657 max mem: 9377 +Train: [99] [4400/6250] eta: 0:04:34 lr: 0.000000 grad: 0.2163 (0.2104) loss: 0.7056 (0.7103) time: 0.1439 data: 0.0602 max mem: 9377 +Train: [99] [4500/6250] eta: 0:04:18 lr: 0.000000 grad: 0.2063 (0.2104) loss: 0.6899 (0.7101) time: 0.1205 data: 0.0319 max mem: 9377 +Train: [99] [4600/6250] eta: 0:04:03 lr: 0.000000 grad: 0.2118 (0.2104) loss: 0.7018 (0.7099) time: 0.1324 data: 0.0509 max mem: 9377 +Train: [99] [4700/6250] eta: 0:03:48 lr: 0.000000 grad: 0.2108 (0.2105) loss: 0.7041 (0.7097) time: 0.1243 data: 0.0361 max mem: 9377 +Train: [99] [4800/6250] eta: 0:03:33 lr: 0.000000 grad: 0.2064 (0.2105) loss: 0.7013 (0.7096) time: 0.1057 data: 0.0161 max mem: 9377 +Train: [99] [4900/6250] eta: 0:03:18 lr: 0.000000 grad: 0.2112 (0.2106) loss: 0.6992 (0.7094) time: 0.1691 data: 0.0920 max mem: 9377 +Train: [99] [5000/6250] eta: 0:03:03 lr: 0.000000 grad: 0.2037 (0.2106) loss: 0.7122 (0.7094) time: 0.1206 data: 0.0363 max mem: 9377 +Train: [99] [5100/6250] eta: 0:02:48 lr: 0.000000 grad: 0.2099 (0.2106) loss: 0.7057 (0.7094) time: 0.1400 data: 0.0583 max mem: 9377 +Train: [99] [5200/6250] eta: 0:02:33 lr: 0.000000 grad: 0.2137 (0.2106) loss: 0.7071 (0.7093) time: 0.1419 data: 0.0633 max mem: 9377 +Train: [99] [5300/6250] eta: 0:02:18 lr: 0.000000 grad: 0.2081 (0.2106) loss: 0.7055 (0.7091) time: 0.1242 data: 0.0458 max mem: 9377 +Train: [99] [5400/6250] eta: 0:02:04 lr: 0.000000 grad: 0.2102 (0.2107) loss: 0.7084 (0.7092) time: 0.1360 data: 0.0498 max mem: 9377 +Train: [99] [5500/6250] eta: 0:01:49 lr: 0.000000 grad: 0.2052 (0.2107) loss: 0.7148 (0.7091) time: 0.1303 data: 0.0394 max mem: 9377 +Train: [99] [5600/6250] eta: 0:01:34 lr: 0.000000 grad: 0.2117 (0.2107) loss: 0.7116 (0.7092) time: 0.1296 data: 0.0443 max mem: 9377 +Train: [99] [5700/6250] eta: 0:01:20 lr: 0.000000 grad: 0.2099 (0.2107) loss: 0.7163 (0.7092) time: 0.1231 data: 0.0429 max mem: 9377 +Train: [99] [5800/6250] eta: 0:01:05 lr: 0.000000 grad: 0.2091 (0.2107) loss: 0.7014 (0.7092) time: 0.1062 data: 0.0218 max mem: 9377 +Train: [99] [5900/6250] eta: 0:00:50 lr: 0.000000 grad: 0.2127 (0.2106) loss: 0.6999 (0.7092) time: 0.1314 data: 0.0503 max mem: 9377 +Train: [99] [6000/6250] eta: 0:00:36 lr: 0.000000 grad: 0.2115 (0.2106) loss: 0.7089 (0.7093) time: 0.1370 data: 0.0548 max mem: 9377 +Train: [99] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.2099 (0.2106) loss: 0.7259 (0.7094) time: 0.1335 data: 0.0533 max mem: 9377 +Train: [99] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2075 (0.2105) loss: 0.7040 (0.7094) time: 0.1428 data: 0.0608 max mem: 9377 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2077 (0.2105) loss: 0.7049 (0.7095) time: 0.1288 data: 0.0395 max mem: 9377 +Train: [99] Total time: 0:15:13 (0.1461 s / it) +Averaged stats: lr: 0.000000 grad: 0.2077 (0.2105) loss: 0.7049 (0.7095) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:04:34 loss: 0.8699 (0.8699) time: 4.4275 data: 4.3641 max mem: 9377 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.8723 (0.8747) time: 0.1420 data: 0.1170 max mem: 9377 +Eval (hcp-train-subset): [99] Total time: 0:00:14 (0.2390 s / it) +Averaged stats (hcp-train-subset): loss: 0.8723 (0.8747) +Making plots (hcp-train-subset): example=4 +Eval (hcp-val): [99] [ 0/62] eta: 0:05:16 loss: 0.8790 (0.8790) time: 5.1025 data: 5.0066 max mem: 9377 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.8706 (0.8730) time: 0.1112 data: 0.0860 max mem: 9377 +Eval (hcp-val): [99] Total time: 0:00:14 (0.2276 s / it) +Averaged stats (hcp-val): loss: 0.8706 (0.8730) +Making plots (hcp-val): example=41 +Eval (nsd-val): [99] [ 0/62] eta: 0:06:12 loss: 0.8331 (0.8331) time: 6.0145 data: 5.9834 max mem: 9377 +Eval (nsd-val): [99] [61/62] eta: 0:00:00 loss: 0.8457 (0.8472) time: 0.1152 data: 0.0886 max mem: 9377 +Eval (nsd-val): [99] Total time: 0:00:13 (0.2183 s / it) +Averaged stats (nsd-val): loss: 0.8457 (0.8472) +Making plots (nsd-val): example=43 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-00099.pth +done! training time: 1 day, 2:32:15 diff --git a/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic/config.yaml b/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b7b2daf7e39a9f7fd4c973886636baee8c8b24b0 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic/eval_table.csv b/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..96d7a06cbff442d59907d0533ecf4af34037ac6c --- /dev/null +++ b/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,0.046415888336127774,train,0.8366141732283464,0.017304224075997805,0.8374166745166707,0.017270180496607626,0.8371057381214022,0.01731051358221176 +flat_mae,patch,logistic,aabc_age,,0.046415888336127774,test,0.38461538461538464,0.057949427043539926,0.37859649122807015,0.05797698426232789,0.3740842490842491,0.057542111571559786 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,train,0.8523622047244095,0.015160432783316071,0.8522637481254558,0.015262048372313798,0.8529298661843653,0.015172538605698267 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,test,0.5192307692307693,0.0623871762729618,0.5219915848527349,0.06017802432524102,0.5157967032967032,0.062236807096345596 +flat_mae,patch,logistic,aabc_age,2,9.999999999999999e-05,train,0.4881889763779528,0.020160661965932112,0.4694745631175553,0.020762451132304603,0.48674022658705884,0.020040536296096464 +flat_mae,patch,logistic,aabc_age,2,9.999999999999999e-05,test,0.46153846153846156,0.06388899165300599,0.426585173193946,0.06119357747452577,0.4535256410256411,0.06262066287810407 +flat_mae,patch,logistic,aabc_age,3,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,3,21.54434690031882,test,0.46153846153846156,0.06524192575131152,0.45466570466570466,0.06482470264517816,0.45970695970695974,0.06498110562727226 +flat_mae,patch,logistic,aabc_age,4,0.3593813663804626,train,0.984251968503937,0.005526555083017095,0.9846078279860113,0.005399985646330674,0.9844236980450298,0.005440807435428798 +flat_mae,patch,logistic,aabc_age,4,0.3593813663804626,test,0.5192307692307693,0.06443055065718564,0.5116483516483517,0.0650331388161421,0.5201465201465201,0.06462726599464377 +flat_mae,patch,logistic,aabc_age,5,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,5,2.782559402207126,test,0.4807692307692308,0.0617952694422784,0.47609978588239454,0.06279175548211148,0.48031135531135527,0.06206483155694821 +flat_mae,patch,logistic,aabc_age,6,0.3593813663804626,train,0.984251968503937,0.005289350664728219,0.9845581997411132,0.005198571345366485,0.9849088204776089,0.005092115159400898 +flat_mae,patch,logistic,aabc_age,6,0.3593813663804626,test,0.5192307692307693,0.06371162843338048,0.5122053872053872,0.06416665991261807,0.5249542124542124,0.0642360784865812 +flat_mae,patch,logistic,aabc_age,7,0.000774263682681127,train,0.5610236220472441,0.02042458671130176,0.5546119670779828,0.02109033117091042,0.5624866941956965,0.020413143068824684 +flat_mae,patch,logistic,aabc_age,7,0.000774263682681127,test,0.4807692307692308,0.0615353845384577,0.4376142142062609,0.06121649682855208,0.4741300366300366,0.06066880513261205 +flat_mae,patch,logistic,aabc_age,8,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,8,21.54434690031882,test,0.46153846153846156,0.06576022205508227,0.46042572463768117,0.06487791290788249,0.46863553113553114,0.06635234674296442 +flat_mae,patch,logistic,aabc_age,9,0.005994842503189409,train,0.6614173228346457,0.019985298629850965,0.6588975092055919,0.0204787729543169,0.6637481803413574,0.019887646047387998 +flat_mae,patch,logistic,aabc_age,9,0.005994842503189409,test,0.5384615384615384,0.06190242130412678,0.5288539553752536,0.0679387437598958,0.538003663003663,0.06219816353545317 +flat_mae,patch,logistic,aabc_age,10,0.3593813663804626,train,0.9862204724409449,0.005153421496006914,0.986480137602754,0.005044066121638992,0.9864398270772878,0.005083290707373919 +flat_mae,patch,logistic,aabc_age,10,0.3593813663804626,test,0.5,0.07216594125294201,0.5118248992386923,0.07041445830854252,0.5027472527472527,0.0723557845570276 +flat_mae,patch,logistic,aabc_age,11,0.046415888336127774,train,0.8346456692913385,0.016644918281764242,0.8343608133266487,0.016803556748954913,0.8354550297378732,0.016667297805469383 +flat_mae,patch,logistic,aabc_age,11,0.046415888336127774,test,0.5576923076923077,0.06693654815786321,0.5756060606060607,0.06473787371741364,0.5606684981684982,0.06697772160222111 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,train,0.6751968503937008,0.02010432945581935,0.671980968951026,0.02045283363922507,0.6779110702373852,0.019933471815921006 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,test,0.46153846153846156,0.06553687024070302,0.45795074812824,0.06634004442730149,0.4597069597069597,0.0654509001386346 +flat_mae,patch,logistic,aabc_age,13,0.046415888336127774,train,0.8464566929133859,0.016422120185045026,0.8458267804361161,0.016663516074116976,0.8465139511957482,0.016489442129094176 +flat_mae,patch,logistic,aabc_age,13,0.046415888336127774,test,0.38461538461538464,0.06320731129531956,0.38568376068376065,0.06044551634336766,0.3882783882783883,0.06412114375397251 +flat_mae,patch,logistic,aabc_age,14,0.3593813663804626,train,0.9822834645669292,0.005997802468176894,0.9824005858708567,0.005943817699825465,0.9820900277911502,0.0060531211192861294 +flat_mae,patch,logistic,aabc_age,14,0.3593813663804626,test,0.5,0.06341728060465807,0.5011733094491715,0.06418396716573467,0.5057234432234432,0.06352568745341663 +flat_mae,patch,logistic,aabc_age,15,0.3593813663804626,train,0.984251968503937,0.0055891980174501655,0.984601940608802,0.00544737054833931,0.9841561434936295,0.005622061272098373 +flat_mae,patch,logistic,aabc_age,15,0.3593813663804626,test,0.5769230769230769,0.06423075771533752,0.5815322580645161,0.06388026233559048,0.5771520146520146,0.06442034689101897 +flat_mae,patch,logistic,aabc_age,16,0.000774263682681127,train,0.5551181102362205,0.02136047341562562,0.549334969982032,0.021723616908749223,0.5557356167851644,0.02123994782391769 +flat_mae,patch,logistic,aabc_age,16,0.000774263682681127,test,0.4807692307692308,0.0633639809579562,0.47022546419098143,0.06343769505593227,0.47870879120879123,0.06334870908274497 +flat_mae,patch,logistic,aabc_age,17,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,17,2.782559402207126,test,0.4230769230769231,0.06329348377801144,0.4166269841269841,0.06275823129090746,0.4237637362637363,0.06342433434426233 +flat_mae,patch,logistic,aabc_age,18,0.3593813663804626,train,0.9822834645669292,0.005918154446202299,0.9823429874669921,0.005889874389855083,0.9824075690127716,0.005875951780187102 +flat_mae,patch,logistic,aabc_age,18,0.3593813663804626,test,0.40384615384615385,0.06913209848847436,0.4096962629796213,0.06846346696398492,0.40796703296703296,0.06957443001465052 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,train,0.8326771653543307,0.016970758203841932,0.832627450502353,0.01709774244295392,0.833874036467973,0.016850955002157306 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,test,0.4423076923076923,0.06807665145534687,0.4446703296703297,0.06880843796884462,0.44505494505494503,0.06839798620861753 +flat_mae,patch,logistic,aabc_age,20,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,20,2.782559402207126,test,0.5,0.06821011525049057,0.4909688013136289,0.06848508840388282,0.49793956043956045,0.06819440187499051 +flat_mae,patch,logistic,aabc_age,21,0.3593813663804626,train,0.9862204724409449,0.005027636129857651,0.9863201337071931,0.004992184818086114,0.9856871500933084,0.005222947866718278 +flat_mae,patch,logistic,aabc_age,21,0.3593813663804626,test,0.5192307692307693,0.05233137190258917,0.48503637566137564,0.04975114814772035,0.5258699633699634,0.053755919961926545 +flat_mae,patch,logistic,aabc_age,22,0.3593813663804626,train,0.9862204724409449,0.005628908008593731,0.9864319197338205,0.005533432613905819,0.9864398270772878,0.0055499154431457275 +flat_mae,patch,logistic,aabc_age,22,0.3593813663804626,test,0.4423076923076923,0.06771820964230939,0.45055858120374254,0.06617821920266428,0.4466575091575092,0.06814361301590238 +flat_mae,patch,logistic,aabc_age,23,0.046415888336127774,train,0.844488188976378,0.01598435669644914,0.8447804141166625,0.016050928210342395,0.8454180803584272,0.01593752219359285 +flat_mae,patch,logistic,aabc_age,23,0.046415888336127774,test,0.4230769230769231,0.05982469459044682,0.41280786099865047,0.05780069976389986,0.41941391941391937,0.05946350142948324 +flat_mae,patch,logistic,aabc_age,24,0.046415888336127774,train,0.8523622047244095,0.015164611779762157,0.853063538114029,0.015114696670533573,0.853297394076208,0.01507044461471897 +flat_mae,patch,logistic,aabc_age,24,0.046415888336127774,test,0.36538461538461536,0.060143750382389456,0.35896993505689156,0.05873125603552922,0.36744505494505497,0.060525387632811414 +flat_mae,patch,logistic,aabc_age,25,0.046415888336127774,train,0.84251968503937,0.015523026442311447,0.842369971808399,0.015616946248626171,0.8432343701152115,0.015436272940504822 +flat_mae,patch,logistic,aabc_age,25,0.046415888336127774,test,0.3076923076923077,0.06092573566459571,0.32169055082848186,0.058994593669884424,0.3067765567765568,0.060883066869048846 +flat_mae,patch,logistic,aabc_age,26,0.000774263682681127,train,0.5728346456692913,0.022061568743532965,0.5661268359175173,0.02240147970384463,0.5732780611021713,0.021969989529761317 +flat_mae,patch,logistic,aabc_age,26,0.000774263682681127,test,0.40384615384615385,0.06300440249480833,0.39202551834130783,0.06404430005433341,0.4001831501831502,0.06261948844984895 +flat_mae,patch,logistic,aabc_age,27,0.046415888336127774,train,0.8543307086614174,0.015088127975438565,0.8543510571706774,0.01521560509407616,0.8552635364382448,0.01505773716552186 +flat_mae,patch,logistic,aabc_age,27,0.046415888336127774,test,0.38461538461538464,0.06725628806307757,0.38421481899742765,0.06760645536317131,0.38278388278388276,0.06702383451020436 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,train,0.6594488188976378,0.020846307613989645,0.6564926844695819,0.021270798784434195,0.6606118464332773,0.020708451110238335 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,test,0.5192307692307693,0.06609267073027351,0.5105042016806722,0.0700139892398969,0.5247252747252747,0.06648703176882743 +flat_mae,patch,logistic,aabc_age,29,0.000774263682681127,train,0.5433070866141733,0.0209330468012876,0.5333680345685327,0.021495813505817234,0.5442091940950039,0.02082994310841855 +flat_mae,patch,logistic,aabc_age,29,0.000774263682681127,test,0.46153846153846156,0.06464829747846347,0.45146520146520147,0.06577861543241968,0.459478021978022,0.06465919222965302 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,train,0.8385826771653543,0.016592299572657513,0.8387770345596433,0.016711163108045823,0.8396372478130532,0.01655672913119235 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,test,0.5769230769230769,0.069163332540063,0.5751719576719576,0.06955112487931323,0.5828754578754578,0.06896529519250544 +flat_mae,patch,logistic,aabc_age,31,0.005994842503189409,train,0.6515748031496063,0.021155094825504196,0.6482721884400435,0.02159592489564974,0.6534176018289607,0.02107357917020877 +flat_mae,patch,logistic,aabc_age,31,0.005994842503189409,test,0.5576923076923077,0.06704819206489909,0.5408740176232436,0.07130820324176729,0.5558608058608059,0.06677979720913636 +flat_mae,patch,logistic,aabc_age,32,0.3593813663804626,train,0.9822834645669292,0.005918058849558781,0.982441234915656,0.005866505931011071,0.9824575556829929,0.005868523624928659 +flat_mae,patch,logistic,aabc_age,32,0.3593813663804626,test,0.4423076923076923,0.06118968470904961,0.4282176157176157,0.06127410700816197,0.44299450549450553,0.061556466042595244 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,train,0.8523622047244095,0.015436896420444835,0.8526464040514825,0.015503765630147612,0.8530298395248077,0.015386746705399844 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,test,0.5576923076923077,0.0652738428178778,0.5582919254658385,0.06614300974257094,0.5590659340659341,0.06556453681880652 +flat_mae,patch,logistic,aabc_age,34,0.046415888336127774,train,0.8523622047244095,0.016020514218143933,0.8525601553415326,0.01605002066379656,0.8531974207357654,0.01597909810083145 +flat_mae,patch,logistic,aabc_age,34,0.046415888336127774,test,0.4423076923076923,0.06530171214805516,0.4492713737875028,0.06484840239157982,0.4432234432234432,0.06558736264401135 +flat_mae,patch,logistic,aabc_age,35,0.005994842503189409,train,0.6732283464566929,0.020152280025564857,0.6700672417604137,0.020667987492229593,0.6754098187725479,0.020115328320113115 +flat_mae,patch,logistic,aabc_age,35,0.005994842503189409,test,0.4230769230769231,0.06423058498361414,0.4195623985522118,0.06469782093245457,0.4223901098901099,0.06419945702259432 +flat_mae,patch,logistic,aabc_age,36,0.3593813663804626,train,0.9803149606299213,0.0061001253202748115,0.9806555716522977,0.0059971673986943036,0.9810441436240505,0.005888779349119141 +flat_mae,patch,logistic,aabc_age,36,0.3593813663804626,test,0.4807692307692308,0.06299219215259419,0.4533880237300705,0.0646290857410857,0.4860347985347986,0.06399464873685771 +flat_mae,patch,logistic,aabc_age,37,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,37,166.81005372000556,test,0.38461538461538464,0.06413009152537719,0.38902116402116405,0.06428738273190888,0.3841575091575091,0.06433070209897496 +flat_mae,patch,logistic,aabc_age,38,0.046415888336127774,train,0.8385826771653543,0.01647649977847511,0.8377610383862729,0.016661077285063258,0.8388345841588526,0.016516882295181088 +flat_mae,patch,logistic,aabc_age,38,0.046415888336127774,test,0.5769230769230769,0.06492617564606488,0.5681697612732095,0.0663409114958182,0.575091575091575,0.06485475122448725 +flat_mae,patch,logistic,aabc_age,39,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,39,2.782559402207126,test,0.4807692307692308,0.06719661163618755,0.4769137866963954,0.06720643835893196,0.4775641025641026,0.06707026983024775 +flat_mae,patch,logistic,aabc_age,40,0.046415888336127774,train,0.84251968503937,0.01566507431482874,0.8435775778757,0.015617190481106267,0.8437018713474969,0.015598986669402659 +flat_mae,patch,logistic,aabc_age,40,0.046415888336127774,test,0.5384615384615384,0.06683995722363087,0.5308862433862434,0.06828493723383323,0.5396062271062272,0.06709156417340942 +flat_mae,patch,logistic,aabc_age,41,0.3593813663804626,train,0.9862204724409449,0.005136096500161814,0.9863440390498199,0.005103478978239385,0.986707381628688,0.004985251452857188 +flat_mae,patch,logistic,aabc_age,41,0.3593813663804626,test,0.5,0.06252781629523685,0.5004417434140073,0.060324350117111,0.5022893772893773,0.06288170989868064 +flat_mae,patch,logistic,aabc_age,42,0.000774263682681127,train,0.5511811023622047,0.020533885683935285,0.5423376508885052,0.02128698100624967,0.5517209799209419,0.020568341902777322 +flat_mae,patch,logistic,aabc_age,42,0.000774263682681127,test,0.4230769230769231,0.04863485491317966,0.38369963369963367,0.05961748395225562,0.4251373626373626,0.04911938812421867 +flat_mae,patch,logistic,aabc_age,43,0.046415888336127774,train,0.84251968503937,0.016228804908193888,0.8427866300898963,0.016307922810576123,0.8432343701152114,0.01618598864758266 +flat_mae,patch,logistic,aabc_age,43,0.046415888336127774,test,0.5769230769230769,0.06724338686386251,0.5769047619047619,0.06881765818112363,0.5771520146520146,0.06738253467740424 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,train,0.8346456692913385,0.016055720042162307,0.8342399098639675,0.016161231241252728,0.8353374351971369,0.015980090269044893 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,test,0.5,0.07110550332815524,0.5025000000000001,0.07129035176151845,0.4983974358974359,0.07138900894804541 +flat_mae,patch,logistic,aabc_age,45,0.046415888336127774,train,0.8346456692913385,0.015948952527930285,0.8345031401663541,0.01608157409722121,0.8360401255108949,0.015917524101886488 +flat_mae,patch,logistic,aabc_age,45,0.046415888336127774,test,0.4807692307692308,0.06487214325435997,0.48148693510387663,0.06353068449861626,0.4862637362637363,0.06574287996009155 +flat_mae,patch,logistic,aabc_age,46,0.000774263682681127,train,0.5393700787401575,0.020262969468364294,0.5311671495256034,0.02082010109510627,0.5402769093709303,0.02022785668812309 +flat_mae,patch,logistic,aabc_age,46,0.000774263682681127,test,0.5769230769230769,0.06438516680216036,0.5743686061381075,0.07027834743891563,0.5753205128205128,0.0645452214856435 +flat_mae,patch,logistic,aabc_age,47,0.000774263682681127,train,0.5728346456692913,0.019582211351785648,0.567200617381456,0.01997371747270983,0.5735456156535715,0.019565219470533926 +flat_mae,patch,logistic,aabc_age,47,0.000774263682681127,test,0.4230769230769231,0.06323548316697633,0.4089279895731508,0.06411366667151348,0.41941391941391937,0.06257990943290731 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,train,0.8366141732283464,0.01642375862827067,0.8365362923424959,0.01655581959045081,0.8377886999917528,0.01635772447292449 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,test,0.40384615384615385,0.06832149678598182,0.4122980481676134,0.06558963688299861,0.40613553113553114,0.0686484628371634 +flat_mae,patch,logistic,aabc_age,49,0.005994842503189409,train,0.6496062992125984,0.019538563701330332,0.6453295970679597,0.020132377964804066,0.6516190406778816,0.019537979257332902 +flat_mae,patch,logistic,aabc_age,49,0.005994842503189409,test,0.40384615384615385,0.0649302197128014,0.4029971988795518,0.06477845406577831,0.40041208791208793,0.0647196560149341 +flat_mae,patch,logistic,aabc_age,50,0.3593813663804626,train,0.9881889763779528,0.005031383619805011,0.9883273189342456,0.004956999800576488,0.988238388228367,0.005018578953962119 +flat_mae,patch,logistic,aabc_age,50,0.3593813663804626,test,0.5384615384615384,0.06745526679030689,0.5433214882943144,0.06734358620754474,0.5384615384615385,0.06762030133758463 +flat_mae,patch,logistic,aabc_age,51,0.046415888336127774,train,0.8287401574803149,0.016786053483269472,0.8293113738509714,0.016753576480814814,0.8298741438733843,0.016685835255586135 +flat_mae,patch,logistic,aabc_age,51,0.046415888336127774,test,0.5769230769230769,0.06488036327319337,0.5727609427609428,0.06440225643183178,0.5737179487179487,0.06493301202675035 +flat_mae,patch,logistic,aabc_age,52,0.000774263682681127,train,0.5728346456692913,0.019701928188173578,0.5614659978824065,0.020835827380585623,0.5736808313946018,0.019631767584404446 +flat_mae,patch,logistic,aabc_age,52,0.000774263682681127,test,0.4230769230769231,0.06208901559847898,0.4263083735909823,0.06099594284855781,0.4269688644688645,0.06277916775184823 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,train,0.468503937007874,0.020784903139644335,0.4530944018923541,0.021079195068412368,0.46744920778919385,0.020673724135065075 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,test,0.5192307692307693,0.058336150960409956,0.4778935185185185,0.056919293573939544,0.5112179487179487,0.057354562492450176 +flat_mae,patch,logistic,aabc_age,54,0.046415888336127774,train,0.8484251968503937,0.015622559205479515,0.8486552321005761,0.01563466009548066,0.8496502851038285,0.01558910593982665 +flat_mae,patch,logistic,aabc_age,54,0.046415888336127774,test,0.40384615384615385,0.060034248115101455,0.37649262413327744,0.05826356801998579,0.41048534798534797,0.06119411094178349 +flat_mae,patch,logistic,aabc_age,55,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,55,166.81005372000556,test,0.4807692307692308,0.058148249628954804,0.4733514492753623,0.054474379757819845,0.48901098901098905,0.05936499621774483 +flat_mae,patch,logistic,aabc_age,56,0.000774263682681127,train,0.5570866141732284,0.021456591417805166,0.5516428130371738,0.0221731970124133,0.5582868549202227,0.02149860627951304 +flat_mae,patch,logistic,aabc_age,56,0.000774263682681127,test,0.40384615384615385,0.06304242724654639,0.39622153209109734,0.06076532629668424,0.4015567765567766,0.0628410538051134 +flat_mae,patch,logistic,aabc_age,57,0.046415888336127774,train,0.84251968503937,0.01585001427273507,0.8429679148216352,0.015845536674912588,0.8429491943635175,0.01579359627148672 +flat_mae,patch,logistic,aabc_age,57,0.046415888336127774,test,0.5192307692307693,0.0574054921810133,0.49670506912442397,0.05769100681961992,0.5155677655677655,0.05699896632817456 +flat_mae,patch,logistic,aabc_age,58,0.3593813663804626,train,0.9881889763779528,0.0045682373228176755,0.9884091937456655,0.0044769662048898306,0.9882883748985881,0.004523856120692117 +flat_mae,patch,logistic,aabc_age,58,0.3593813663804626,test,0.4230769230769231,0.05967917182984243,0.40654511453950437,0.06115877536204072,0.4207875457875458,0.05927754848986136 +flat_mae,patch,logistic,aabc_age,59,0.000774263682681127,train,0.5531496062992126,0.02068855495274167,0.548695508777664,0.021214498182337373,0.5545221514071068,0.020672050320110234 +flat_mae,patch,logistic,aabc_age,59,0.000774263682681127,test,0.5192307692307693,0.06252805287588688,0.49991466120498373,0.06623523954461033,0.5157967032967034,0.06215046603781298 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,train,0.6437007874015748,0.020110982295830755,0.6373354488322319,0.020686630752902453,0.6452707335597798,0.019998915197279275 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,test,0.4423076923076923,0.05898123009024635,0.4291287565481114,0.06176947870265391,0.44459706959706957,0.059268388111311805 +flat_mae,patch,logistic,aabc_age,61,0.3593813663804626,train,0.9881889763779528,0.005015833013285185,0.9882702763688528,0.004987194708346153,0.988238388228367,0.004995454120842215 +flat_mae,patch,logistic,aabc_age,61,0.3593813663804626,test,0.3269230769230769,0.05999924802290117,0.32605377714073364,0.059493436151958734,0.3276098901098901,0.06022930518905783 +flat_mae,patch,logistic,aabc_age,62,0.046415888336127774,train,0.84251968503937,0.01590315869005001,0.8427382825934824,0.015979609557455223,0.8437694792180118,0.01587821722224166 +flat_mae,patch,logistic,aabc_age,62,0.046415888336127774,test,0.46153846153846156,0.06850689940635314,0.46724310311266837,0.06788405332706389,0.4626831501831502,0.06884036815356925 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,train,0.6811023622047244,0.019722460101388586,0.6784399453588775,0.020234469117763814,0.6833067536906227,0.019652142929701525 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,test,0.4230769230769231,0.057567698762660324,0.41338566827697265,0.05654634466025146,0.42078754578754574,0.05736800421015458 +flat_mae,patch,logistic,aabc_age,64,0.3593813663804626,train,0.9822834645669292,0.006042064300641128,0.9825556325174406,0.0059483277986337795,0.9826751235641717,0.005920658717795833 +flat_mae,patch,logistic,aabc_age,64,0.3593813663804626,test,0.40384615384615385,0.06583110000456206,0.4105528127267257,0.0649950965697301,0.40476190476190477,0.0660530661722229 +flat_mae,patch,logistic,aabc_age,65,0.005994842503189409,train,0.65748031496063,0.020643393974815044,0.6545504262215979,0.02089566619682225,0.6596159489363987,0.02059833384736603 +flat_mae,patch,logistic,aabc_age,65,0.005994842503189409,test,0.5769230769230769,0.06426212960815372,0.5621632996632997,0.06409258973084567,0.5707417582417582,0.06371763986513436 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,train,0.6791338582677166,0.019349365966251357,0.6762686227659211,0.019775659507591196,0.6815081925395434,0.019316414111550124 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,test,0.4807692307692308,0.06016132158941171,0.4596607566662013,0.06068762171743191,0.4757326007326007,0.05987106851884831 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,train,0.84251968503937,0.016581048349315227,0.8431730211813073,0.016610184471139867,0.8425816664716748,0.016681710516475722 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,test,0.5192307692307693,0.06417316624783438,0.5067438055165966,0.06537415870734964,0.5144230769230769,0.06381588776883253 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,train,0.8503937007874016,0.01580411813548293,0.85022044131321,0.015899428555084014,0.850428614719528,0.015760100115970378 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,test,0.46153846153846156,0.06188234446280311,0.4398809523809524,0.06553892205323958,0.4578754578754579,0.06168321469782148 +flat_mae,patch,logistic,aabc_age,69,0.3593813663804626,train,0.9901574803149606,0.0041434758762160795,0.9902062227487987,0.004133871688238401,0.9903045039308463,0.004102602413405874 +flat_mae,patch,logistic,aabc_age,69,0.3593813663804626,test,0.40384615384615385,0.06426305038644224,0.3963817563922753,0.06504523412636465,0.40773809523809523,0.06493784273981329 +flat_mae,patch,logistic,aabc_age,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,70,2.782559402207126,test,0.38461538461538464,0.062168065300403766,0.3762747668997669,0.06254008406954978,0.38690476190476186,0.06265227732451385 +flat_mae,patch,logistic,aabc_age,71,0.000774263682681127,train,0.5551181102362205,0.02044959506620795,0.5499779922717755,0.020836274907345898,0.5553004810228064,0.020438680146624068 +flat_mae,patch,logistic,aabc_age,71,0.000774263682681127,test,0.5192307692307693,0.06314414804202098,0.5127731463938361,0.06407723343517392,0.5187728937728937,0.06344692866309007 +flat_mae,patch,logistic,aabc_age,72,0.046415888336127774,train,0.8326771653543307,0.016223470787781788,0.8329414961230179,0.01629707972194343,0.8331537249539211,0.016275344660622817 +flat_mae,patch,logistic,aabc_age,72,0.046415888336127774,test,0.5769230769230769,0.06315197223866517,0.5670588235294118,0.0659766853724605,0.5737179487179487,0.06326882135573896 +flat_mae,patch,logistic,aabc_age,73,0.046415888336127774,train,0.844488188976378,0.015980312050793845,0.8450532450167012,0.01593449503047919,0.845500432498576,0.015872013515514965 +flat_mae,patch,logistic,aabc_age,73,0.046415888336127774,test,0.5,0.06387463451880757,0.5131649831649832,0.060681143120914265,0.4981684981684981,0.06381269474662798 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,train,0.6633858267716536,0.020166957663771773,0.6610446188469901,0.020419262022998368,0.6647940645084572,0.02008329406786545 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,test,0.46153846153846156,0.06643343200207232,0.4487179487179487,0.06994467821680307,0.459478021978022,0.06653138472875025 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,train,0.6712598425196851,0.020076231123149386,0.6686696567798635,0.020533151747504353,0.6738288255026477,0.020053442925762586 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,test,0.5192307692307693,0.07084296952670001,0.5222355488922206,0.07127761237252711,0.5192307692307693,0.07084768210974748 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,train,0.8543307086614174,0.015593200979780704,0.8543200760840167,0.0156666603636682,0.8551635630978022,0.015575475529999617 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,test,0.5,0.06617507485862315,0.4999823165340407,0.06630484056353396,0.5041208791208791,0.06666621260109828 +flat_mae,patch,logistic,aabc_age,77,0.3593813663804626,train,0.984251968503937,0.005426875104693928,0.9842061301638507,0.00545015171760585,0.9842061301638507,0.005442270672159946 +flat_mae,patch,logistic,aabc_age,77,0.3593813663804626,test,0.5192307692307693,0.060222209333778344,0.5068734015345269,0.06189476526915229,0.5231227106227107,0.06061078712711297 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,train,0.8366141732283464,0.016117756343987197,0.8369691236207231,0.016134155216635103,0.837353564229395,0.016059157618446042 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,test,0.5384615384615384,0.06728517205668887,0.5408772262220538,0.0679401843207676,0.5412087912087912,0.06763574196912679 +flat_mae,patch,logistic,aabc_age,79,0.005994842503189409,train,0.6535433070866141,0.02003785554971665,0.6503583480571433,0.0205657637424062,0.6560188266342405,0.01995186624942705 +flat_mae,patch,logistic,aabc_age,79,0.005994842503189409,test,0.5,0.06592128172764379,0.49559419075548106,0.06713670130154296,0.4967948717948718,0.06592070603861995 +flat_mae,patch,logistic,aabc_age,80,0.3593813663804626,train,0.9803149606299213,0.006362497069762622,0.9805187052833295,0.006291234886607907,0.9803914399805136,0.006353343997347285 +flat_mae,patch,logistic,aabc_age,80,0.3593813663804626,test,0.5384615384615384,0.05982340876852798,0.5142857142857142,0.05976425329194862,0.5485347985347985,0.060826067221274344 +flat_mae,patch,logistic,aabc_age,81,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,81,21.54434690031882,test,0.4807692307692308,0.06616140386166806,0.49083639998182726,0.0655279966055778,0.48489010989010983,0.06648539090209249 +flat_mae,patch,logistic,aabc_age,82,0.3593813663804626,train,0.9881889763779528,0.004984761949752797,0.9881514076651944,0.0049891112081997075,0.988020820347188,0.005057113610509552 +flat_mae,patch,logistic,aabc_age,82,0.3593813663804626,test,0.5576923076923077,0.06565176194249356,0.5509476031215161,0.06716327472566319,0.5604395604395604,0.06582553511038909 +flat_mae,patch,logistic,aabc_age,83,0.3593813663804626,train,0.9862204724409449,0.004917094300054926,0.9862445801892442,0.00490188961419264,0.9864398270772878,0.004846208046352392 +flat_mae,patch,logistic,aabc_age,83,0.3593813663804626,test,0.40384615384615385,0.062201369398346744,0.39993961352657004,0.0607564887485186,0.40453296703296704,0.062312736109117806 +flat_mae,patch,logistic,aabc_age,84,0.3593813663804626,train,0.9862204724409449,0.005196886184558207,0.9864395045862365,0.005088050594614636,0.9864398270772878,0.005099977340462223 +flat_mae,patch,logistic,aabc_age,84,0.3593813663804626,test,0.38461538461538464,0.06326046233370854,0.37483766233766236,0.06495607335268341,0.3869047619047619,0.06373309536986203 +flat_mae,patch,logistic,aabc_age,85,9.999999999999999e-05,train,0.4704724409448819,0.020838330522669944,0.43745539649888754,0.021093376759289125,0.4667897913074499,0.02063862728475097 +flat_mae,patch,logistic,aabc_age,85,9.999999999999999e-05,test,0.46153846153846156,0.053040943769660925,0.3851662404092072,0.04907110049187005,0.4519230769230769,0.0511340841744937 +flat_mae,patch,logistic,aabc_age,86,0.3593813663804626,train,0.9901574803149606,0.004232926155194254,0.9902265318889731,0.004198225305084891,0.9905220718120253,0.0040883434528134815 +flat_mae,patch,logistic,aabc_age,86,0.3593813663804626,test,0.4423076923076923,0.061233571640546894,0.42765567765567764,0.06052194979561253,0.43727106227106227,0.06066059562879579 +flat_mae,patch,logistic,aabc_age,87,0.000774263682681127,train,0.5531496062992126,0.020486880129109816,0.5423112069186254,0.021584580744139086,0.5534695544017998,0.02042158499736588 +flat_mae,patch,logistic,aabc_age,87,0.000774263682681127,test,0.5192307692307693,0.06085460166970694,0.5059791758161323,0.06262279436498262,0.5247252747252747,0.061538786983484466 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,train,0.49606299212598426,0.019055880686894986,0.4659381949782827,0.01960224329282935,0.49406968693240555,0.018882776380105135 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,test,0.4230769230769231,0.06529472327511593,0.409168956043956,0.06741595981671455,0.4194139194139195,0.0648659643165565 +flat_mae,patch,logistic,aabc_age,89,0.005994842503189409,train,0.6456692913385826,0.020597108303879483,0.6411799893613015,0.02112791802960769,0.6469517001701225,0.02054778477592729 +flat_mae,patch,logistic,aabc_age,89,0.005994842503189409,test,0.36538461538461536,0.06330889567974905,0.36523809523809525,0.06335446073116911,0.36469780219780223,0.06316007137707644 +flat_mae,patch,logistic,aabc_age,90,0.3593813663804626,train,0.9763779527559056,0.0067190907712548304,0.9767636745357544,0.006590533987604508,0.9767943176783553,0.0066146771550144885 +flat_mae,patch,logistic,aabc_age,90,0.3593813663804626,test,0.5576923076923077,0.06483902461038442,0.5511001642036124,0.06610021730548786,0.5558608058608059,0.06470996539416102 +flat_mae,patch,logistic,aabc_age,91,0.005994842503189409,train,0.6515748031496063,0.02078358762136675,0.6482322954913878,0.021185465070569478,0.6531500472775605,0.020702216188663792 +flat_mae,patch,logistic,aabc_age,91,0.005994842503189409,test,0.5,0.0690114474682049,0.48875562218890556,0.07052548400517213,0.502518315018315,0.06933149024853995 +flat_mae,patch,logistic,aabc_age,92,0.046415888336127774,train,0.8366141732283464,0.017213568691991053,0.8363365216422284,0.017284848392465666,0.8367008605858581,0.017227081518123688 +flat_mae,patch,logistic,aabc_age,92,0.046415888336127774,test,0.4423076923076923,0.06068426387175513,0.429047619047619,0.05864433060922454,0.440018315018315,0.06037021971931907 +flat_mae,patch,logistic,aabc_age,93,0.046415888336127774,train,0.8503937007874016,0.015749477787917337,0.8508925145060335,0.01580819985686153,0.8510137104925497,0.015784680987908653 +flat_mae,patch,logistic,aabc_age,93,0.046415888336127774,test,0.4230769230769231,0.055069670773060325,0.4018481518481518,0.05030816480280495,0.42651098901098905,0.056241979872844475 +flat_mae,patch,logistic,aabc_age,94,0.046415888336127774,train,0.8464566929133859,0.01551094365855111,0.847043854490663,0.015546579860273689,0.8468314924173698,0.015425055337285597 +flat_mae,patch,logistic,aabc_age,94,0.046415888336127774,test,0.4423076923076923,0.06667827684898806,0.43925925925925924,0.0660750310674081,0.4461996336996337,0.06714379839131301 +flat_mae,patch,logistic,aabc_age,95,0.005994842503189409,train,0.6614173228346457,0.02029038677304085,0.659764679474932,0.02067499530347954,0.6633630312492208,0.020267012650694926 +flat_mae,patch,logistic,aabc_age,95,0.005994842503189409,test,0.4807692307692308,0.05936926864272119,0.4450167887667888,0.05625942854371244,0.4741300366300366,0.058357027881618034 +flat_mae,patch,logistic,aabc_age,96,0.3593813663804626,train,0.9862204724409449,0.005267385498584652,0.9864242341793276,0.005190113121863687,0.9862222591961088,0.0053097431931405405 +flat_mae,patch,logistic,aabc_age,96,0.3593813663804626,test,0.5,0.0666339799059497,0.5007411067193676,0.06746337764280509,0.49702380952380953,0.06696772910592041 +flat_mae,patch,logistic,aabc_age,97,0.000774263682681127,train,0.5649606299212598,0.021254226438153015,0.5606024827719674,0.021795878497464567,0.5655310861947606,0.021299319819808742 +flat_mae,patch,logistic,aabc_age,97,0.000774263682681127,test,0.4423076923076923,0.06360951279756194,0.43199905463806143,0.06659253905666594,0.44024725274725274,0.06332580940705823 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,train,0.6614173228346457,0.0205487142906635,0.6595842398225854,0.020924703036748118,0.6634806257899573,0.02059742805569028 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,test,0.4230769230769231,0.06037274503611101,0.3986236802413273,0.06471730328954058,0.42376373626373626,0.06077272210040215 +flat_mae,patch,logistic,aabc_age,99,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,99,21.54434690031882,test,0.4230769230769231,0.0604779288803511,0.40507700695106996,0.0595019975378678,0.42673992673992667,0.0612278220368281 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,train,0.8543307086614174,0.015237253520493446,0.8548547905001331,0.015295170074683243,0.8550459685570657,0.015208881882936889 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,test,0.4230769230769231,0.062172824120926,0.4162878787878789,0.06087541953541348,0.42994505494505497,0.06295408741684336 diff --git a/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic/log.txt b/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9c2a69d3999013f3050fc2f89ace6722e9d42053 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:46 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:25:57 time: 6.8296 data: 5.6508 max mem: 3205 +extract (train) [ 20/228] eta: 0:02:04 time: 0.2894 data: 0.0840 max mem: 3393 +extract (train) [ 40/228] eta: 0:01:19 time: 0.2309 data: 0.0582 max mem: 3393 +extract (train) [ 60/228] eta: 0:01:01 time: 0.2477 data: 0.0690 max mem: 3393 +extract (train) [ 80/228] eta: 0:00:48 time: 0.2062 data: 0.0548 max mem: 3393 +extract (train) [100/228] eta: 0:00:38 time: 0.2223 data: 0.0717 max mem: 3393 +extract (train) [120/228] eta: 0:00:31 time: 0.2430 data: 0.0775 max mem: 3393 +extract (train) [140/228] eta: 0:00:24 time: 0.2038 data: 0.0610 max mem: 3393 +extract (train) [160/228] eta: 0:00:18 time: 0.2514 data: 0.0840 max mem: 3393 +extract (train) [180/228] eta: 0:00:13 time: 0.2168 data: 0.0721 max mem: 3393 +extract (train) [200/228] eta: 0:00:07 time: 0.2066 data: 0.0699 max mem: 3393 +extract (train) [220/228] eta: 0:00:02 time: 0.1907 data: 0.0627 max mem: 3393 +extract (train) [227/228] eta: 0:00:00 time: 0.1901 data: 0.0632 max mem: 3393 +extract (train) Total time: 0:00:58 (0.2581 s / it) +extract (validation) [ 0/27] eta: 0:02:08 time: 4.7776 data: 4.6140 max mem: 3393 +extract (validation) [20/27] eta: 0:00:02 time: 0.1772 data: 0.0503 max mem: 3393 +extract (validation) [26/27] eta: 0:00:00 time: 0.1666 data: 0.0494 max mem: 3393 +extract (validation) Total time: 0:00:09 (0.3605 s / it) +extract (test) [ 0/26] eta: 0:02:03 time: 4.7632 data: 4.5802 max mem: 3393 +extract (test) [20/26] eta: 0:00:02 time: 0.1666 data: 0.0418 max mem: 3393 +extract (test) [25/26] eta: 0:00:00 time: 0.1589 data: 0.0380 max mem: 3393 +extract (test) Total time: 0:00:09 (0.3558 s / it) +feature extraction time: 0:01:17 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.046416 | train | 0.83661 | 0.017304 | 0.83742 | 0.01727 | 0.83711 | 0.017311 | +| flat_mae | patch | logistic | aabc_age | | 0.046416 | test | 0.38462 | 0.057949 | 0.3786 | 0.057977 | 0.37408 | 0.057542 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.0623871762729618, "f1": 0.5219915848527349, "f1_std": 0.06017802432524102, "bacc": 0.5157967032967032, "bacc_std": 0.062236807096345596} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06388899165300599, "f1": 0.426585173193946, "f1_std": 0.06119357747452577, "bacc": 0.4535256410256411, "bacc_std": 0.06262066287810407} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 21.54434690031882, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06524192575131152, "f1": 0.45466570466570466, "f1_std": 0.06482470264517816, "bacc": 0.45970695970695974, "bacc_std": 0.06498110562727226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06443055065718564, "f1": 0.5116483516483517, "f1_std": 0.0650331388161421, "bacc": 0.5201465201465201, "bacc_std": 0.06462726599464377} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 2.782559402207126, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0617952694422784, "f1": 0.47609978588239454, "f1_std": 0.06279175548211148, "bacc": 0.48031135531135527, "bacc_std": 0.06206483155694821} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06371162843338048, "f1": 0.5122053872053872, "f1_std": 0.06416665991261807, "bacc": 0.5249542124542124, "bacc_std": 0.0642360784865812} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0615353845384577, "f1": 0.4376142142062609, "f1_std": 0.06121649682855208, "bacc": 0.4741300366300366, "bacc_std": 0.06066880513261205} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 21.54434690031882, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06576022205508227, "f1": 0.46042572463768117, "f1_std": 0.06487791290788249, "bacc": 0.46863553113553114, "bacc_std": 0.06635234674296442} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06190242130412678, "f1": 0.5288539553752536, "f1_std": 0.0679387437598958, "bacc": 0.538003663003663, "bacc_std": 0.06219816353545317} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.07216594125294201, "f1": 0.5118248992386923, "f1_std": 0.07041445830854252, "bacc": 0.5027472527472527, "bacc_std": 0.0723557845570276} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06693654815786321, "f1": 0.5756060606060607, "f1_std": 0.06473787371741364, "bacc": 0.5606684981684982, "bacc_std": 0.06697772160222111} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06553687024070302, "f1": 0.45795074812824, "f1_std": 0.06634004442730149, "bacc": 0.4597069597069597, "bacc_std": 0.0654509001386346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06320731129531956, "f1": 0.38568376068376065, "f1_std": 0.06044551634336766, "bacc": 0.3882783882783883, "bacc_std": 0.06412114375397251} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.06341728060465807, "f1": 0.5011733094491715, "f1_std": 0.06418396716573467, "bacc": 0.5057234432234432, "bacc_std": 0.06352568745341663} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06423075771533752, "f1": 0.5815322580645161, "f1_std": 0.06388026233559048, "bacc": 0.5771520146520146, "bacc_std": 0.06442034689101897} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0633639809579562, "f1": 0.47022546419098143, "f1_std": 0.06343769505593227, "bacc": 0.47870879120879123, "bacc_std": 0.06334870908274497} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 2.782559402207126, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06329348377801144, "f1": 0.4166269841269841, "f1_std": 0.06275823129090746, "bacc": 0.4237637362637363, "bacc_std": 0.06342433434426233} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06913209848847436, "f1": 0.4096962629796213, "f1_std": 0.06846346696398492, "bacc": 0.40796703296703296, "bacc_std": 0.06957443001465052} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06807665145534687, "f1": 0.4446703296703297, "f1_std": 0.06880843796884462, "bacc": 0.44505494505494503, "bacc_std": 0.06839798620861753} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 2.782559402207126, "split": "test", "acc": 0.5, "acc_std": 0.06821011525049057, "f1": 0.4909688013136289, "f1_std": 0.06848508840388282, "bacc": 0.49793956043956045, "bacc_std": 0.06819440187499051} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.05233137190258917, "f1": 0.48503637566137564, "f1_std": 0.04975114814772035, "bacc": 0.5258699633699634, "bacc_std": 0.053755919961926545} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06771820964230939, "f1": 0.45055858120374254, "f1_std": 0.06617821920266428, "bacc": 0.4466575091575092, "bacc_std": 0.06814361301590238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05982469459044682, "f1": 0.41280786099865047, "f1_std": 0.05780069976389986, "bacc": 0.41941391941391937, "bacc_std": 0.05946350142948324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.060143750382389456, "f1": 0.35896993505689156, "f1_std": 0.05873125603552922, "bacc": 0.36744505494505497, "bacc_std": 0.060525387632811414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.06092573566459571, "f1": 0.32169055082848186, "f1_std": 0.058994593669884424, "bacc": 0.3067765567765568, "bacc_std": 0.060883066869048846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06300440249480833, "f1": 0.39202551834130783, "f1_std": 0.06404430005433341, "bacc": 0.4001831501831502, "bacc_std": 0.06261948844984895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06725628806307757, "f1": 0.38421481899742765, "f1_std": 0.06760645536317131, "bacc": 0.38278388278388276, "bacc_std": 0.06702383451020436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06609267073027351, "f1": 0.5105042016806722, "f1_std": 0.0700139892398969, "bacc": 0.5247252747252747, "bacc_std": 0.06648703176882743} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06464829747846347, "f1": 0.45146520146520147, "f1_std": 0.06577861543241968, "bacc": 0.459478021978022, "bacc_std": 0.06465919222965302} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.069163332540063, "f1": 0.5751719576719576, "f1_std": 0.06955112487931323, "bacc": 0.5828754578754578, "bacc_std": 0.06896529519250544} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06704819206489909, "f1": 0.5408740176232436, "f1_std": 0.07130820324176729, "bacc": 0.5558608058608059, "bacc_std": 0.06677979720913636} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06118968470904961, "f1": 0.4282176157176157, "f1_std": 0.06127410700816197, "bacc": 0.44299450549450553, "bacc_std": 0.061556466042595244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.0652738428178778, "f1": 0.5582919254658385, "f1_std": 0.06614300974257094, "bacc": 0.5590659340659341, "bacc_std": 0.06556453681880652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06530171214805516, "f1": 0.4492713737875028, "f1_std": 0.06484840239157982, "bacc": 0.4432234432234432, "bacc_std": 0.06558736264401135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06423058498361414, "f1": 0.4195623985522118, "f1_std": 0.06469782093245457, "bacc": 0.4223901098901099, "bacc_std": 0.06419945702259432} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06299219215259419, "f1": 0.4533880237300705, "f1_std": 0.0646290857410857, "bacc": 0.4860347985347986, "bacc_std": 0.06399464873685771} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 166.81005372000556, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06413009152537719, "f1": 0.38902116402116405, "f1_std": 0.06428738273190888, "bacc": 0.3841575091575091, "bacc_std": 0.06433070209897496} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06492617564606488, "f1": 0.5681697612732095, "f1_std": 0.0663409114958182, "bacc": 0.575091575091575, "bacc_std": 0.06485475122448725} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 2.782559402207126, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06719661163618755, "f1": 0.4769137866963954, "f1_std": 0.06720643835893196, "bacc": 0.4775641025641026, "bacc_std": 0.06707026983024775} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06683995722363087, "f1": 0.5308862433862434, "f1_std": 0.06828493723383323, "bacc": 0.5396062271062272, "bacc_std": 0.06709156417340942} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.06252781629523685, "f1": 0.5004417434140073, "f1_std": 0.060324350117111, "bacc": 0.5022893772893773, "bacc_std": 0.06288170989868064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.04863485491317966, "f1": 0.38369963369963367, "f1_std": 0.05961748395225562, "bacc": 0.4251373626373626, "bacc_std": 0.04911938812421867} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06724338686386251, "f1": 0.5769047619047619, "f1_std": 0.06881765818112363, "bacc": 0.5771520146520146, "bacc_std": 0.06738253467740424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.07110550332815524, "f1": 0.5025000000000001, "f1_std": 0.07129035176151845, "bacc": 0.4983974358974359, "bacc_std": 0.07138900894804541} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06487214325435997, "f1": 0.48148693510387663, "f1_std": 0.06353068449861626, "bacc": 0.4862637362637363, "bacc_std": 0.06574287996009155} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.000774263682681127, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06438516680216036, "f1": 0.5743686061381075, "f1_std": 0.07027834743891563, "bacc": 0.5753205128205128, "bacc_std": 0.0645452214856435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06323548316697633, "f1": 0.4089279895731508, "f1_std": 0.06411366667151348, "bacc": 0.41941391941391937, "bacc_std": 0.06257990943290731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06832149678598182, "f1": 0.4122980481676134, "f1_std": 0.06558963688299861, "bacc": 0.40613553113553114, "bacc_std": 0.0686484628371634} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.0649302197128014, "f1": 0.4029971988795518, "f1_std": 0.06477845406577831, "bacc": 0.40041208791208793, "bacc_std": 0.0647196560149341} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06745526679030689, "f1": 0.5433214882943144, "f1_std": 0.06734358620754474, "bacc": 0.5384615384615385, "bacc_std": 0.06762030133758463} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06488036327319337, "f1": 0.5727609427609428, "f1_std": 0.06440225643183178, "bacc": 0.5737179487179487, "bacc_std": 0.06493301202675035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06208901559847898, "f1": 0.4263083735909823, "f1_std": 0.06099594284855781, "bacc": 0.4269688644688645, "bacc_std": 0.06277916775184823} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.058336150960409956, "f1": 0.4778935185185185, "f1_std": 0.056919293573939544, "bacc": 0.5112179487179487, "bacc_std": 0.057354562492450176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.060034248115101455, "f1": 0.37649262413327744, "f1_std": 0.05826356801998579, "bacc": 0.41048534798534797, "bacc_std": 0.06119411094178349} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 166.81005372000556, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.058148249628954804, "f1": 0.4733514492753623, "f1_std": 0.054474379757819845, "bacc": 0.48901098901098905, "bacc_std": 0.05936499621774483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06304242724654639, "f1": 0.39622153209109734, "f1_std": 0.06076532629668424, "bacc": 0.4015567765567766, "bacc_std": 0.0628410538051134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.0574054921810133, "f1": 0.49670506912442397, "f1_std": 0.05769100681961992, "bacc": 0.5155677655677655, "bacc_std": 0.05699896632817456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05967917182984243, "f1": 0.40654511453950437, "f1_std": 0.06115877536204072, "bacc": 0.4207875457875458, "bacc_std": 0.05927754848986136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 0.000774263682681127, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06252805287588688, "f1": 0.49991466120498373, "f1_std": 0.06623523954461033, "bacc": 0.5157967032967034, "bacc_std": 0.06215046603781298} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.05898123009024635, "f1": 0.4291287565481114, "f1_std": 0.06176947870265391, "bacc": 0.44459706959706957, "bacc_std": 0.059268388111311805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.05999924802290117, "f1": 0.32605377714073364, "f1_std": 0.059493436151958734, "bacc": 0.3276098901098901, "bacc_std": 0.06022930518905783} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06850689940635314, "f1": 0.46724310311266837, "f1_std": 0.06788405332706389, "bacc": 0.4626831501831502, "bacc_std": 0.06884036815356925} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.057567698762660324, "f1": 0.41338566827697265, "f1_std": 0.05654634466025146, "bacc": 0.42078754578754574, "bacc_std": 0.05736800421015458} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06583110000456206, "f1": 0.4105528127267257, "f1_std": 0.0649950965697301, "bacc": 0.40476190476190477, "bacc_std": 0.0660530661722229} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06426212960815372, "f1": 0.5621632996632997, "f1_std": 0.06409258973084567, "bacc": 0.5707417582417582, "bacc_std": 0.06371763986513436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06016132158941171, "f1": 0.4596607566662013, "f1_std": 0.06068762171743191, "bacc": 0.4757326007326007, "bacc_std": 0.05987106851884831} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06417316624783438, "f1": 0.5067438055165966, "f1_std": 0.06537415870734964, "bacc": 0.5144230769230769, "bacc_std": 0.06381588776883253} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06188234446280311, "f1": 0.4398809523809524, "f1_std": 0.06553892205323958, "bacc": 0.4578754578754579, "bacc_std": 0.06168321469782148} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06426305038644224, "f1": 0.3963817563922753, "f1_std": 0.06504523412636465, "bacc": 0.40773809523809523, "bacc_std": 0.06493784273981329} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.062168065300403766, "f1": 0.3762747668997669, "f1_std": 0.06254008406954978, "bacc": 0.38690476190476186, "bacc_std": 0.06265227732451385} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.000774263682681127, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06314414804202098, "f1": 0.5127731463938361, "f1_std": 0.06407723343517392, "bacc": 0.5187728937728937, "bacc_std": 0.06344692866309007} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06315197223866517, "f1": 0.5670588235294118, "f1_std": 0.0659766853724605, "bacc": 0.5737179487179487, "bacc_std": 0.06326882135573896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06387463451880757, "f1": 0.5131649831649832, "f1_std": 0.060681143120914265, "bacc": 0.4981684981684981, "bacc_std": 0.06381269474662798} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06643343200207232, "f1": 0.4487179487179487, "f1_std": 0.06994467821680307, "bacc": 0.459478021978022, "bacc_std": 0.06653138472875025} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.07084296952670001, "f1": 0.5222355488922206, "f1_std": 0.07127761237252711, "bacc": 0.5192307692307693, "bacc_std": 0.07084768210974748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06617507485862315, "f1": 0.4999823165340407, "f1_std": 0.06630484056353396, "bacc": 0.5041208791208791, "bacc_std": 0.06666621260109828} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.060222209333778344, "f1": 0.5068734015345269, "f1_std": 0.06189476526915229, "bacc": 0.5231227106227107, "bacc_std": 0.06061078712711297} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06728517205668887, "f1": 0.5408772262220538, "f1_std": 0.0679401843207676, "bacc": 0.5412087912087912, "bacc_std": 0.06763574196912679} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06592128172764379, "f1": 0.49559419075548106, "f1_std": 0.06713670130154296, "bacc": 0.4967948717948718, "bacc_std": 0.06592070603861995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05982340876852798, "f1": 0.5142857142857142, "f1_std": 0.05976425329194862, "bacc": 0.5485347985347985, "bacc_std": 0.060826067221274344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 21.54434690031882, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06616140386166806, "f1": 0.49083639998182726, "f1_std": 0.0655279966055778, "bacc": 0.48489010989010983, "bacc_std": 0.06648539090209249} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06565176194249356, "f1": 0.5509476031215161, "f1_std": 0.06716327472566319, "bacc": 0.5604395604395604, "bacc_std": 0.06582553511038909} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.062201369398346744, "f1": 0.39993961352657004, "f1_std": 0.0607564887485186, "bacc": 0.40453296703296704, "bacc_std": 0.062312736109117806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06326046233370854, "f1": 0.37483766233766236, "f1_std": 0.06495607335268341, "bacc": 0.3869047619047619, "bacc_std": 0.06373309536986203} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.053040943769660925, "f1": 0.3851662404092072, "f1_std": 0.04907110049187005, "bacc": 0.4519230769230769, "bacc_std": 0.0511340841744937} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.061233571640546894, "f1": 0.42765567765567764, "f1_std": 0.06052194979561253, "bacc": 0.43727106227106227, "bacc_std": 0.06066059562879579} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.000774263682681127, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06085460166970694, "f1": 0.5059791758161323, "f1_std": 0.06262279436498262, "bacc": 0.5247252747252747, "bacc_std": 0.061538786983484466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06529472327511593, "f1": 0.409168956043956, "f1_std": 0.06741595981671455, "bacc": 0.4194139194139195, "bacc_std": 0.0648659643165565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06330889567974905, "f1": 0.36523809523809525, "f1_std": 0.06335446073116911, "bacc": 0.36469780219780223, "bacc_std": 0.06316007137707644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 0.3593813663804626, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06483902461038442, "f1": 0.5511001642036124, "f1_std": 0.06610021730548786, "bacc": 0.5558608058608059, "bacc_std": 0.06470996539416102} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.0690114474682049, "f1": 0.48875562218890556, "f1_std": 0.07052548400517213, "bacc": 0.502518315018315, "bacc_std": 0.06933149024853995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06068426387175513, "f1": 0.429047619047619, "f1_std": 0.05864433060922454, "bacc": 0.440018315018315, "bacc_std": 0.06037021971931907} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.055069670773060325, "f1": 0.4018481518481518, "f1_std": 0.05030816480280495, "bacc": 0.42651098901098905, "bacc_std": 0.056241979872844475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06667827684898806, "f1": 0.43925925925925924, "f1_std": 0.0660750310674081, "bacc": 0.4461996336996337, "bacc_std": 0.06714379839131301} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05936926864272119, "f1": 0.4450167887667888, "f1_std": 0.05625942854371244, "bacc": 0.4741300366300366, "bacc_std": 0.058357027881618034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.0666339799059497, "f1": 0.5007411067193676, "f1_std": 0.06746337764280509, "bacc": 0.49702380952380953, "bacc_std": 0.06696772910592041} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06360951279756194, "f1": 0.43199905463806143, "f1_std": 0.06659253905666594, "bacc": 0.44024725274725274, "bacc_std": 0.06332580940705823} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06037274503611101, "f1": 0.3986236802413273, "f1_std": 0.06471730328954058, "bacc": 0.42376373626373626, "bacc_std": 0.06077272210040215} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 21.54434690031882, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.0604779288803511, "f1": 0.40507700695106996, "f1_std": 0.0595019975378678, "bacc": 0.42673992673992667, "bacc_std": 0.0612278220368281} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.062172824120926, "f1": 0.4162878787878789, "f1_std": 0.06087541953541348, "bacc": 0.42994505494505497, "bacc_std": 0.06295408741684336} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 4.4389 | 23.694 | 0.81199 | 0.16843 | 0.80964 | 0.17237 | 0.81261 | 0.16822 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 4.4389 | 23.694 | 0.47212 | 0.061569 | 0.46373 | 0.062725 | 0.47234 | 0.061544 | + + +done! total time: 0:05:47 diff --git a/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic/config.yaml b/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8cfe7259b61a4dfe0279636d92c21c6796770d91 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..5b2cf009fd42d91df1f29e49d9d9fc3cce6e1821 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,,2.782559402207126,test,0.8909090909090909,0.04386321337525678,0.8891129032258065,0.04392565910285936,0.9015151515151516,0.040231321487743096 +flat_mae,patch,logistic,aabc_sex,1,0.3593813663804626,train,0.9829867674858223,0.006411646870366585,0.9825679104559584,0.0065669777000106466,0.9828614554940063,0.006536396026268371 +flat_mae,patch,logistic,aabc_sex,1,0.3593813663804626,test,0.8181818181818182,0.05265588898336156,0.8166666666666667,0.05265479976916016,0.8254076086956521,0.05117159420062498 +flat_mae,patch,logistic,aabc_sex,2,0.046415888336127774,train,0.9319470699432892,0.01086034226940898,0.929871851524525,0.011259943188207838,0.9277968287464463,0.011680840048076627 +flat_mae,patch,logistic,aabc_sex,2,0.046415888336127774,test,0.8363636363636363,0.051022263816481866,0.8328267477203647,0.052100542340955486,0.8349184782608696,0.052040162627840746 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,train,0.9319470699432892,0.011067920394260862,0.9301434985474073,0.011376229768835774,0.9296213253612357,0.011579021635585057 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,test,0.7818181818181819,0.0554105458829032,0.7727272727272727,0.05863437044538351,0.7697010869565217,0.05848903869876677 +flat_mae,patch,logistic,aabc_sex,4,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,4,166.81005372000556,test,0.8181818181818182,0.050275682964238864,0.8166666666666667,0.05041723963269043,0.8254076086956521,0.049211660530014 +flat_mae,patch,logistic,aabc_sex,5,0.3593813663804626,train,0.9848771266540642,0.005187503776616098,0.9845140515222482,0.005302540181286461,0.9851036079603739,0.005121918594376673 +flat_mae,patch,logistic,aabc_sex,5,0.3593813663804626,test,0.8,0.05273855678629085,0.790003471017008,0.056230055783621925,0.7853260869565217,0.05558520219851598 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,train,0.9224952741020794,0.012217292881259117,0.9201850291269996,0.012659539177079727,0.918410563029397,0.013071585646073917 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,test,0.9090909090909091,0.03693972754466904,0.9045470322804582,0.03992807000558217,0.8974184782608696,0.04171778202044614 +flat_mae,patch,logistic,aabc_sex,7,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,7,2.782559402207126,test,0.8545454545454545,0.04707285017014047,0.8521505376344086,0.04774533501656769,0.8566576086956521,0.047426358897281806 +flat_mae,patch,logistic,aabc_sex,8,0.046415888336127774,train,0.9338374291115312,0.010641879984276134,0.9320413294426397,0.01093859371464472,0.9312553122893402,0.011089091819029057 +flat_mae,patch,logistic,aabc_sex,8,0.046415888336127774,test,0.8,0.05253974174669985,0.795677136102668,0.053761764767330654,0.7975543478260869,0.05411603893049472 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,train,0.9281663516068053,0.011227603005859503,0.9261694188164776,0.011568458626444254,0.9251370204285002,0.011798028807798027 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,test,0.8727272727272727,0.046299049487771055,0.8683760683760684,0.048387140550290475,0.8661684782608696,0.04900389469866049 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,train,0.9281663516068053,0.011193308769022714,0.9260738452486026,0.011581707043272778,0.9245288548902371,0.011932826505040623 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,test,0.8545454545454545,0.048698770497147895,0.8505434782608696,0.050083757649930345,0.8505434782608696,0.050190351568757355 +flat_mae,patch,logistic,aabc_sex,11,0.005994842503189409,train,0.8790170132325141,0.01374664196089188,0.8753277360435999,0.014250556812847759,0.8735308772238342,0.014485665767677215 +flat_mae,patch,logistic,aabc_sex,11,0.005994842503189409,test,0.9090909090909091,0.03667531878885939,0.9027925061859314,0.04190960075868798,0.8913043478260869,0.04385092463885362 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,train,0.9867674858223062,0.005020354081444497,0.9864417081324122,0.005141133475496999,0.9867375948884786,0.005112195158987262 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,test,0.8,0.05178686134787916,0.7861435136090491,0.057965069555226255,0.7792119565217391,0.05650578427288931 +flat_mae,patch,logistic,aabc_sex,13,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,13,166.81005372000556,test,0.8909090909090909,0.041913551330374846,0.8879076086956521,0.04308790390649044,0.8879076086956521,0.04317491706667154 +flat_mae,patch,logistic,aabc_sex,14,0.3593813663804626,train,0.9829867674858223,0.005482542184301136,0.9825679104559584,0.005615199175599613,0.9828614554940063,0.00560513262947946 +flat_mae,patch,logistic,aabc_sex,14,0.3593813663804626,test,0.8909090909090909,0.03883382695154418,0.8879076086956521,0.03999668307543158,0.8879076086956521,0.04032041382000847 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,train,0.9357277882797732,0.010606162295353015,0.9339410589410589,0.010951724833190065,0.9328892992174448,0.011283099850146543 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,test,0.7818181818181819,0.05706815991455355,0.7758152173913043,0.058976953550242794,0.7758152173913043,0.059001860339285686 +flat_mae,patch,logistic,aabc_sex,16,0.3593813663804626,train,0.9829867674858223,0.005437009488705511,0.9825885657235016,0.0055536891560321495,0.9834696210322693,0.005368527684275893 +flat_mae,patch,logistic,aabc_sex,16,0.3593813663804626,test,0.8363636363636363,0.051427027656220974,0.8307692307692308,0.05383410687411033,0.8288043478260869,0.054278706998561854 +flat_mae,patch,logistic,aabc_sex,17,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,17,2.782559402207126,test,0.8,0.051782839622203425,0.790003471017008,0.0555870379546963,0.7853260869565217,0.05496704207452725 +flat_mae,patch,logistic,aabc_sex,18,0.046415888336127774,train,0.9262759924385633,0.011199720654216351,0.9243690085598548,0.01149185670707677,0.9241111990386588,0.01162629757108021 +flat_mae,patch,logistic,aabc_sex,18,0.046415888336127774,test,0.8181818181818182,0.05072647767379556,0.8176392572944298,0.05062105628133805,0.8315217391304348,0.047879830440684626 +flat_mae,patch,logistic,aabc_sex,19,0.3593813663804626,train,0.9810964083175804,0.005691982892131781,0.9806193030276386,0.005839464865998627,0.9806193030276386,0.005975351154701763 +flat_mae,patch,logistic,aabc_sex,19,0.3593813663804626,test,0.7818181818181819,0.054926378976207925,0.7727272727272727,0.05772363026484643,0.7697010869565217,0.0571900716828565 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,train,0.9281663516068053,0.011587251621567082,0.9260738452486026,0.011993725891308314,0.9245288548902371,0.012375563916449365 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,test,0.8363636363636363,0.04610408633580652,0.8250265111346766,0.05230800624333397,0.8165760869565217,0.051851235104635476 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,train,0.9281663516068053,0.011182620940990545,0.9259758432758874,0.011577964166298232,0.923920689351974,0.01188135540486852 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,test,0.7818181818181819,0.052523229020851894,0.7727272727272727,0.05581268638224436,0.7697010869565217,0.055373226476392326 +flat_mae,patch,logistic,aabc_sex,22,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,22,21.54434690031882,test,0.8363636363636363,0.0519799579939985,0.8328267477203647,0.053059633258707335,0.8349184782608696,0.05295779550098547 +flat_mae,patch,logistic,aabc_sex,23,0.046415888336127774,train,0.9206049149338374,0.011682282011412184,0.9182921447484554,0.012081929223847643,0.9167765761012925,0.012419142391638986 +flat_mae,patch,logistic,aabc_sex,23,0.046415888336127774,test,0.8727272727272727,0.046282938683959336,0.8699763593380614,0.04729635012452601,0.8722826086956521,0.04700199204630469 +flat_mae,patch,logistic,aabc_sex,24,0.3593813663804626,train,0.9773156899810964,0.006661942191693025,0.9767431636331663,0.006832896592937214,0.9767431636331663,0.00691825329498112 +flat_mae,patch,logistic,aabc_sex,24,0.3593813663804626,test,0.8727272727272727,0.04799629462282125,0.8699763593380614,0.04910219932790519,0.8722826086956521,0.04885495493974446 +flat_mae,patch,logistic,aabc_sex,25,0.3593813663804626,train,0.9829867674858223,0.0054636534054791465,0.9825885657235016,0.005579542161040209,0.9834696210322693,0.005346358469112914 +flat_mae,patch,logistic,aabc_sex,25,0.3593813663804626,test,0.8181818181818182,0.05094568814429603,0.8151881720430108,0.05180721624321444,0.8192934782608696,0.05138047076561111 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,train,0.9357277882797732,0.01051201793541408,0.9339410589410589,0.010807699747920707,0.9328892992174448,0.010931716798019658 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,test,0.7818181818181819,0.05115633468606205,0.7727272727272727,0.05421765843013767,0.7697010869565217,0.053793701941407264 +flat_mae,patch,logistic,aabc_sex,27,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,27,2.782559402207126,test,0.8727272727272727,0.04253984422993856,0.8699763593380614,0.04326416544506314,0.8722826086956521,0.042897966358643964 +flat_mae,patch,logistic,aabc_sex,28,0.005994842503189409,train,0.8846880907372401,0.013423413699526822,0.8812508969938286,0.013929901414925486,0.8796491690846742,0.014290777024908783 +flat_mae,patch,logistic,aabc_sex,28,0.005994842503189409,test,0.8,0.05296143614941217,0.790003471017008,0.057136675986417966,0.7853260869565217,0.05670747727149538 +flat_mae,patch,logistic,aabc_sex,29,0.3593813663804626,train,0.9848771266540642,0.005168698287105385,0.9845140515222482,0.005284565854105462,0.9851036079603739,0.005127239953298392 +flat_mae,patch,logistic,aabc_sex,29,0.3593813663804626,test,0.8363636363636363,0.04905835957946512,0.8307692307692308,0.051178039635536604,0.8288043478260869,0.05126013007047176 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,train,0.9224952741020794,0.011869800493876193,0.9201850291269996,0.012283679653695662,0.918410563029397,0.01258180560817222 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,test,0.7636363636363637,0.059432144793123046,0.7555555555555555,0.061952149867716945,0.7540760869565217,0.061502168699851326 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,train,0.9262759924385633,0.010994583740998246,0.9241777748376498,0.011362782928780985,0.9228948679621325,0.011686003205109577 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,test,0.7818181818181819,0.05530347231039635,0.7782258064516129,0.055718803885056456,0.7819293478260869,0.055179640142934576 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,train,0.9243856332703214,0.011819434381201154,0.9223816650526748,0.012158151141020557,0.9218690465722911,0.012359061931176817 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,test,0.8727272727272727,0.04434598754758142,0.8683760683760684,0.046439782848170005,0.8661684782608696,0.04699345640291025 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,train,0.9206049149338374,0.01109500350179081,0.9185007483053085,0.011391645868646123,0.9179929071778188,0.011494820734193207 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,test,0.8545454545454545,0.045888820007196636,0.8505434782608696,0.047194520021074556,0.8505434782608696,0.04709612353182535 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,train,0.9792060491493384,0.006539549937667335,0.9787439225298349,0.006664929886575387,0.9802016471760602,0.006298504167404621 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,test,0.8363636363636363,0.049449344646318734,0.8307692307692308,0.05149415963006614,0.8288043478260869,0.0517093403774025 +flat_mae,patch,logistic,aabc_sex,35,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,35,2.782559402207126,test,0.8727272727272727,0.043602577829624116,0.8699763593380614,0.044378928880580606,0.8722826086956521,0.04402821761910765 +flat_mae,patch,logistic,aabc_sex,36,0.3593813663804626,train,0.9792060491493384,0.005942495509706932,0.9787193581065019,0.0060715560203381275,0.9795934816377971,0.005928774749430285 +flat_mae,patch,logistic,aabc_sex,36,0.3593813663804626,test,0.8363636363636363,0.04887976139856006,0.8328267477203647,0.04978049940641453,0.8349184782608696,0.04963764381180495 +flat_mae,patch,logistic,aabc_sex,37,0.3593813663804626,train,0.9829867674858223,0.005662676827736662,0.9825466942830434,0.005807475308733624,0.9822532899557432,0.005859545146694079 +flat_mae,patch,logistic,aabc_sex,37,0.3593813663804626,test,0.8181818181818182,0.052573267253919684,0.8151881720430108,0.053486615006010346,0.8192934782608696,0.053490187548087584 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,train,0.9319470699432892,0.01107592458261744,0.9302294908994988,0.011365434637962543,0.9302294908994988,0.011496074121349422 +flat_mae,patch,logistic,aabc_sex,38,0.046415888336127774,test,0.8727272727272727,0.04203854980159861,0.8639095086603039,0.04749952680671482,0.8539402173913043,0.04816852800661158 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,train,0.9262759924385633,0.011081137103900445,0.9240784423403167,0.01147087894886883,0.9222867024238695,0.011791069819642274 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,test,0.8545454545454545,0.04533743459004501,0.8505434782608696,0.046700870489602,0.8505434782608696,0.046837935783746884 +flat_mae,patch,logistic,aabc_sex,40,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,40,2.782559402207126,test,0.8181818181818182,0.049738489677229825,0.8131793478260869,0.05141701602857101,0.8131793478260869,0.05157697266027768 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,train,0.8752362948960303,0.014227651867596825,0.8712572642260834,0.014776466713096511,0.8690465722910987,0.015020895367525319 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,test,0.9090909090909091,0.038480287966483194,0.905982905982906,0.040362495966507414,0.9035326086956521,0.04107032746648314 +flat_mae,patch,logistic,aabc_sex,42,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,42,166.81005372000556,test,0.7818181818181819,0.055751941744341615,0.7782258064516129,0.05660795085808481,0.7819293478260869,0.05630541567402165 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,train,0.9224952741020794,0.011731038978893372,0.920289455598555,0.012104568368655253,0.9190187285676603,0.012398425713796818 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,test,0.8545454545454545,0.0439057743900889,0.8521505376344086,0.04445632866046411,0.8566576086956521,0.04384902873841091 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,train,0.9357277882797732,0.010737023747121861,0.9337678597731625,0.011125548686214466,0.9316729681409186,0.011524648482303512 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,test,0.8,0.05146166349737786,0.790003471017008,0.0554262734827424,0.7853260869565217,0.05497276355871275 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,train,0.9187145557655955,0.012329651719900681,0.916611983796763,0.012652940681382932,0.9163589202497142,0.012767122775365512 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,test,0.9272727272727272,0.034506216950927315,0.9266666666666667,0.03439735271827876,0.9375,0.029653780192203154 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,train,0.9224952741020794,0.011827916136415323,0.920289455598555,0.012206163443696166,0.9190187285676603,0.012444793296179587 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,test,0.9090909090909091,0.0393400097055957,0.9071259709557582,0.0400938976349243,0.9096467391304348,0.03956758514624431 +flat_mae,patch,logistic,aabc_sex,47,0.3593813663804626,train,0.9829867674858223,0.005712427699137532,0.9825885657235016,0.005834601586498123,0.9834696210322693,0.0056027681443567914 +flat_mae,patch,logistic,aabc_sex,47,0.3593813663804626,test,0.8909090909090909,0.04032423957771133,0.8863636363636364,0.042786489845962436,0.8817934782608696,0.04366744834135569 +flat_mae,patch,logistic,aabc_sex,48,0.005994842503189409,train,0.8752362948960303,0.014022730180077377,0.8714317277949624,0.01451698768635156,0.869654737829362,0.014751154643700679 +flat_mae,patch,logistic,aabc_sex,48,0.005994842503189409,test,0.8909090909090909,0.04058312151703114,0.8879076086956521,0.041885923840686734,0.8879076086956521,0.04216694092273785 +flat_mae,patch,logistic,aabc_sex,49,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,49,166.81005372000556,test,0.8363636363636363,0.047862060752793846,0.8343927735028438,0.04817250193499985,0.8410326086956521,0.04748820110383062 +flat_mae,patch,logistic,aabc_sex,50,0.046415888336127774,train,0.9224952741020794,0.01128131065195158,0.9201850291269996,0.01166506840396556,0.918410563029397,0.011942675922216046 +flat_mae,patch,logistic,aabc_sex,50,0.046415888336127774,test,0.8545454545454545,0.04820247927507941,0.8484848484848485,0.0507837081955852,0.8444293478260869,0.05090977446513677 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,train,0.9130434782608695,0.011595257890140247,0.910738914810576,0.0119401649416894,0.9102406283888742,0.012186540766105692 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,test,0.8181818181818182,0.04529986773191422,0.8074229691876751,0.05073726921765426,0.8009510869565217,0.050273032794018735 +flat_mae,patch,logistic,aabc_sex,52,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,52,21.54434690031882,test,0.8363636363636363,0.04892133675690641,0.8307692307692308,0.051052479399323294,0.8288043478260869,0.05116841613459943 +flat_mae,patch,logistic,aabc_sex,53,0.3593813663804626,train,0.9810964083175804,0.005998668143578677,0.9806193030276386,0.0061502017493534735,0.9806193030276386,0.00622021430283144 +flat_mae,patch,logistic,aabc_sex,53,0.3593813663804626,test,0.8545454545454545,0.049437483687870154,0.8521505376344086,0.05005743999904591,0.8566576086956521,0.049228255619581746 +flat_mae,patch,logistic,aabc_sex,54,0.005994842503189409,train,0.8809073724007561,0.014199795752770005,0.8770244091437427,0.014763356969062115,0.8745566986136757,0.015012534847057559 +flat_mae,patch,logistic,aabc_sex,54,0.005994842503189409,test,0.8545454545454545,0.047307106380010025,0.8521505376344086,0.04786339862693188,0.8566576086956521,0.04702470759122328 +flat_mae,patch,logistic,aabc_sex,55,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,55,21.54434690031882,test,0.8,0.053532546676942294,0.7931623931623932,0.05615577862990481,0.7914402173913043,0.05628428399847978 +flat_mae,patch,logistic,aabc_sex,56,0.046415888336127774,train,0.9300567107750473,0.01153923980765489,0.9281579768393621,0.01187831115229994,0.9273791728948679,0.012100881028858139 +flat_mae,patch,logistic,aabc_sex,56,0.046415888336127774,test,0.8363636363636363,0.048303106345706434,0.8307692307692308,0.050494865110545385,0.8288043478260869,0.05078730095183825 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,train,0.9262759924385633,0.010633434151226888,0.9241777748376498,0.010966500221882669,0.9228948679621325,0.011173700982284691 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,test,0.8909090909090909,0.04187341806559225,0.8879076086956521,0.04324370316732738,0.8879076086956521,0.04343313034334395 +flat_mae,patch,logistic,aabc_sex,58,0.046415888336127774,train,0.9243856332703214,0.011444890109943479,0.9220798350272499,0.011829487554457478,0.9200445499575016,0.0120498928722987 +flat_mae,patch,logistic,aabc_sex,58,0.046415888336127774,test,0.8,0.05269962911720449,0.7975911676145868,0.052978822356543255,0.8036684782608696,0.05225068674519407 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,train,0.9168241965973535,0.012166120743473485,0.9145119586296058,0.012555386906840141,0.9135086022450833,0.012798129046671157 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,test,0.8727272727272727,0.04536135902463663,0.8699763593380614,0.04632550080479619,0.8722826086956521,0.04603412930281962 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,train,0.8809073724007561,0.013806404837909878,0.8773574837805116,0.01422266443461675,0.875773029690202,0.014268415174205667 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,test,0.8363636363636363,0.04786946437798012,0.8250265111346766,0.05401258850812746,0.8165760869565217,0.05336562533883418 +flat_mae,patch,logistic,aabc_sex,61,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,61,2.782559402207126,test,0.7636363636363637,0.05629291751173917,0.7585275244849713,0.057295239610100114,0.7601902173913043,0.05716227324842578 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,train,0.9792060491493384,0.006119888839228942,0.9786941127795048,0.00627515442143354,0.978985316099534,0.0063844501604304665 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,test,0.8909090909090909,0.042532383384471135,0.8879076086956521,0.043869281572619365,0.8879076086956521,0.04397386528961242 +flat_mae,patch,logistic,aabc_sex,63,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,63,2.782559402207126,test,0.9454545454545454,0.030001123945887918,0.9435897435897436,0.031403955628115855,0.9408967391304348,0.03285763773574193 +flat_mae,patch,logistic,aabc_sex,64,0.3593813663804626,train,0.9810964083175804,0.0062271128177261775,0.9806193030276386,0.0063854341297253925,0.9806193030276386,0.006470150763331976 +flat_mae,patch,logistic,aabc_sex,64,0.3593813663804626,test,0.8,0.05472460277389255,0.7975911676145868,0.05498243239251332,0.8036684782608696,0.05441472233104929 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,train,0.9168241965973535,0.012194560330175588,0.9144012944983819,0.012605951161707313,0.9129004367068203,0.012861589890578437 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,test,0.8727272727272727,0.045901497121733706,0.8683760683760684,0.04795800107377598,0.8661684782608696,0.048408551303511474 +flat_mae,patch,logistic,aabc_sex,66,0.005994842503189409,train,0.8827977315689981,0.013499480879100606,0.8790598542729874,0.013964036552584425,0.8767988510800433,0.01408346683722175 +flat_mae,patch,logistic,aabc_sex,66,0.005994842503189409,test,0.7818181818181819,0.05590187388748458,0.7758152173913043,0.057523772721999294,0.7758152173913043,0.05755525874797466 +flat_mae,patch,logistic,aabc_sex,67,0.046415888336127774,train,0.9130434782608695,0.01185416425812814,0.9105104442483083,0.012262764179998872,0.9090242973123479,0.012507610801665132 +flat_mae,patch,logistic,aabc_sex,67,0.046415888336127774,test,0.9090909090909091,0.03910915430450041,0.9071259709557582,0.03983721099297673,0.9096467391304348,0.0393662235498639 +flat_mae,patch,logistic,aabc_sex,68,0.046415888336127774,train,0.9243856332703214,0.01154680897149735,0.9225702576112412,0.011834050305909541,0.9230853776488174,0.011986265103417331 +flat_mae,patch,logistic,aabc_sex,68,0.046415888336127774,test,0.9090909090909091,0.03885738280362057,0.9045470322804582,0.042070079932440566,0.8974184782608696,0.043712359140952936 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,train,0.9149338374291115,0.01199138487422192,0.9125128171203651,0.012370957852902594,0.9112664497787157,0.012610897536273077 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,test,0.9818181818181818,0.018124236091054922,0.9811965811965813,0.019033310260698953,0.9782608695652174,0.02167028228278305 +flat_mae,patch,logistic,aabc_sex,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,70,2.782559402207126,test,0.8545454545454545,0.048198967788405656,0.8533333333333333,0.04815418032403406,0.8627717391304348,0.04631582983263305 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,train,0.9130434782608695,0.011907668750406795,0.9105104442483083,0.01228180814891195,0.9090242973123479,0.012471793063889338 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,test,0.8545454545454545,0.042705387044781905,0.8428571428571429,0.04959198270756321,0.8322010869565217,0.048915602288142374 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,train,0.9848771266540642,0.0051731653410044855,0.9845140515222482,0.005293019915856736,0.9851036079603739,0.005184221081145655 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,test,0.8727272727272727,0.042466375614728866,0.8663658451926415,0.04573044341950363,0.8600543478260869,0.04643619318417449 +flat_mae,patch,logistic,aabc_sex,73,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,73,21.54434690031882,test,0.9090909090909091,0.038264412830412,0.9045470322804582,0.041573816592526514,0.8974184782608696,0.04329736525477135 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,train,0.8790170132325141,0.013284897127159228,0.8753277360435999,0.013770957615428923,0.8735308772238342,0.01405747786262297 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,test,0.8727272727272727,0.04654447442149876,0.8699763593380614,0.047472654440359224,0.8722826086956521,0.04711054185814096 +flat_mae,patch,logistic,aabc_sex,75,0.3593813663804626,train,0.9810964083175804,0.005925574980472664,0.9806425644028103,0.006061830611257943,0.9812274685659017,0.0059428712821264825 +flat_mae,patch,logistic,aabc_sex,75,0.3593813663804626,test,0.8545454545454545,0.04482577475788485,0.8505434782608696,0.04628517780841475,0.8505434782608696,0.04650196342541968 +flat_mae,patch,logistic,aabc_sex,76,0.3593813663804626,train,0.9867674858223062,0.004809066293344714,0.9864577733405013,0.004912546492483836,0.9873457604267417,0.004667884311138566 +flat_mae,patch,logistic,aabc_sex,76,0.3593813663804626,test,0.8181818181818182,0.05271973613849365,0.8106060606060606,0.05573070000133594,0.8070652173913043,0.05548169430489548 +flat_mae,patch,logistic,aabc_sex,77,0.046415888336127774,train,0.9243856332703214,0.011434804952975448,0.9220798350272499,0.011836343839198599,0.9200445499575016,0.012127629908291459 +flat_mae,patch,logistic,aabc_sex,77,0.046415888336127774,test,0.8363636363636363,0.04852091991524362,0.8250265111346766,0.05446445629425495,0.8165760869565217,0.053927853743368455 +flat_mae,patch,logistic,aabc_sex,78,0.046415888336127774,train,0.9224952741020794,0.011497051946718318,0.9203912716328067,0.011855368162087213,0.9196268941059234,0.01214576630283103 +flat_mae,patch,logistic,aabc_sex,78,0.046415888336127774,test,0.9272727272727272,0.03654997710596111,0.9252717391304348,0.0376408995727102,0.9252717391304348,0.037813069227190715 +flat_mae,patch,logistic,aabc_sex,79,0.046415888336127774,train,0.9187145557655955,0.011964943200071017,0.9165079190295289,0.01232157112279228,0.9157507547114512,0.012554500918901236 +flat_mae,patch,logistic,aabc_sex,79,0.046415888336127774,test,0.8545454545454545,0.04746194299707766,0.84593837535014,0.05219844710407987,0.8383152173913043,0.05233849217449428 +flat_mae,patch,logistic,aabc_sex,80,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,80,2.782559402207126,test,0.7090909090909091,0.062484113683478934,0.696969696969697,0.06558017612631742,0.6949728260869565,0.06471991278065005 +flat_mae,patch,logistic,aabc_sex,81,0.046415888336127774,train,0.9168241965973535,0.01124217051953436,0.9144012944983819,0.011606435760512894,0.9129004367068203,0.011827257885670173 +flat_mae,patch,logistic,aabc_sex,81,0.046415888336127774,test,0.8909090909090909,0.041228650874028876,0.8863636363636364,0.04358952704766233,0.8817934782608696,0.044539619255488855 +flat_mae,patch,logistic,aabc_sex,82,0.046415888336127774,train,0.9243856332703214,0.011549707776792909,0.922283598754187,0.011917302347633335,0.9212608810340279,0.012219877492282878 +flat_mae,patch,logistic,aabc_sex,82,0.046415888336127774,test,0.8545454545454545,0.04423042593636641,0.84593837535014,0.04841159693018112,0.8383152173913043,0.048776578278426956 +flat_mae,patch,logistic,aabc_sex,83,0.046415888336127774,train,0.9262759924385633,0.010915301819398107,0.924637543514869,0.011126032755325742,0.9259356956534482,0.011074466032620429 +flat_mae,patch,logistic,aabc_sex,83,0.046415888336127774,test,0.8909090909090909,0.03819523573769183,0.8821428571428571,0.04486985290714586,0.8695652173913043,0.045668216642892404 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,train,0.9300567107750473,0.011492761897415023,0.9280660940767447,0.011862949940555215,0.9267710073566048,0.012102672412429915 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,test,0.8,0.0560218493385017,0.7931623931623932,0.05827916762984857,0.7914402173913043,0.058255216809721115 +flat_mae,patch,logistic,aabc_sex,85,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,85,2.782559402207126,test,0.8909090909090909,0.04269276749865502,0.8879076086956521,0.044144738763871746,0.8879076086956521,0.044726083601717544 +flat_mae,patch,logistic,aabc_sex,86,0.046415888336127774,train,0.9130434782608695,0.012600970303903957,0.9106261385673151,0.012971798846859171,0.9096324628506112,0.01318939373536892 +flat_mae,patch,logistic,aabc_sex,86,0.046415888336127774,test,0.8727272727272727,0.04418742949776365,0.8699763593380614,0.04506553745968865,0.8722826086956521,0.044936406075238904 +flat_mae,patch,logistic,aabc_sex,87,0.005994842503189409,train,0.8733459357277883,0.014605526608566781,0.8690327944572749,0.015250837150799971,0.866196254286468,0.015541183347975251 +flat_mae,patch,logistic,aabc_sex,87,0.005994842503189409,test,0.8363636363636363,0.04991475708852653,0.8328267477203647,0.05084546982440388,0.8349184782608696,0.05046261887237688 +flat_mae,patch,logistic,aabc_sex,88,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,88,2.782559402207126,test,0.8181818181818182,0.05145016364252861,0.8106060606060606,0.05452648044088764,0.8070652173913043,0.05459618185922766 +flat_mae,patch,logistic,aabc_sex,89,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,89,2.782559402207126,test,0.8545454545454545,0.04382540868355924,0.84593837535014,0.04801965265036465,0.8383152173913043,0.04842113524321146 +flat_mae,patch,logistic,aabc_sex,90,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,90,166.81005372000556,test,0.8363636363636363,0.04808730352526097,0.8307692307692308,0.050320559157121116,0.8288043478260869,0.0504716924623748 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,train,0.9243856332703214,0.012274880994454413,0.922182994998529,0.012696916679473919,0.9206527154957649,0.013035118368354505 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,test,0.8363636363636363,0.04921847398912505,0.8328267477203647,0.05017049025434237,0.8349184782608696,0.05016582556230712 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,train,0.9149338374291115,0.012637726149938685,0.9123982027003654,0.01308055591951302,0.9106582842404525,0.01337527183095333 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,test,0.9090909090909091,0.037837477594889986,0.905982905982906,0.03944760922374126,0.9035326086956521,0.04011543204446846 +flat_mae,patch,logistic,aabc_sex,93,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,93,166.81005372000556,test,0.8181818181818182,0.04752462954324986,0.8035714285714286,0.054673392528910764,0.7948369565217391,0.05323249361949366 +flat_mae,patch,logistic,aabc_sex,94,0.046415888336127774,train,0.9111531190926276,0.012316593925511089,0.9087412138229735,0.012685549730581079,0.9079984759225066,0.01286583971074051 +flat_mae,patch,logistic,aabc_sex,94,0.046415888336127774,test,0.9090909090909091,0.038471197769906255,0.9071259709557582,0.039199159975394214,0.9096467391304348,0.03881062805028247 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,train,0.9206049149338374,0.011955376858895257,0.9183977786918963,0.0123151766397194,0.9173847416395557,0.012513533139835687 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,test,0.8909090909090909,0.04221785294680458,0.8879076086956521,0.04346054227662537,0.8879076086956521,0.043627257776545324 +flat_mae,patch,logistic,aabc_sex,96,0.046415888336127774,train,0.9168241965973535,0.0117937107280632,0.9147249333216096,0.012080897298253821,0.9147249333216096,0.012149202759156088 +flat_mae,patch,logistic,aabc_sex,96,0.046415888336127774,test,0.9272727272727272,0.034779000028396546,0.9242424242424243,0.03690396877201548,0.9191576086956521,0.0386411165712355 +flat_mae,patch,logistic,aabc_sex,97,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,97,21.54434690031882,test,0.8363636363636363,0.0499274051511188,0.8307692307692308,0.05211739005349969,0.8288043478260869,0.05203445604333177 +flat_mae,patch,logistic,aabc_sex,98,0.3593813663804626,train,0.9810964083175804,0.005912660317600871,0.9806425644028103,0.006046251952659396,0.9812274685659017,0.005921402391254824 +flat_mae,patch,logistic,aabc_sex,98,0.3593813663804626,test,0.8,0.05175989065861454,0.7861435136090491,0.057448597315347114,0.7792119565217391,0.05626817148987758 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,train,0.8752362948960303,0.014886755482489229,0.8717679379444085,0.01530706156831197,0.8708710689058883,0.01542111997727275 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,test,0.8727272727272727,0.04610954975395439,0.8663658451926415,0.05005989706526048,0.8600543478260869,0.050864338815354994 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,train,0.9243856332703214,0.011957094212854265,0.922283598754187,0.012335065093687186,0.9212608810340279,0.0126029765418789 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,test,0.8727272727272727,0.042902988780281856,0.8699763593380614,0.04380719510719203,0.8722826086956521,0.04361875269678864 diff --git a/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic/log.txt b/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5821020824e0ae049d7e2e3f18a4b82eb2e31e2c --- /dev/null +++ b/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:46 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:24:33 time: 6.2439 data: 5.1290 max mem: 3205 +extract (train) [ 20/236] eta: 0:02:00 time: 0.2718 data: 0.0882 max mem: 3393 +extract (train) [ 40/236] eta: 0:01:14 time: 0.1938 data: 0.0549 max mem: 3393 +extract (train) [ 60/236] eta: 0:00:56 time: 0.2028 data: 0.0637 max mem: 3393 +extract (train) [ 80/236] eta: 0:00:47 time: 0.2412 data: 0.0853 max mem: 3393 +extract (train) [100/236] eta: 0:00:38 time: 0.2030 data: 0.0641 max mem: 3393 +extract (train) [120/236] eta: 0:00:31 time: 0.2039 data: 0.0645 max mem: 3393 +extract (train) [140/236] eta: 0:00:25 time: 0.2108 data: 0.0708 max mem: 3393 +extract (train) [160/236] eta: 0:00:19 time: 0.2120 data: 0.0720 max mem: 3393 +extract (train) [180/236] eta: 0:00:13 time: 0.2062 data: 0.0690 max mem: 3393 +extract (train) [200/236] eta: 0:00:08 time: 0.2141 data: 0.0733 max mem: 3393 +extract (train) [220/236] eta: 0:00:03 time: 0.1917 data: 0.0613 max mem: 3393 +extract (train) [235/236] eta: 0:00:00 time: 0.1737 data: 0.0523 max mem: 3393 +extract (train) Total time: 0:00:56 (0.2388 s / it) +extract (validation) [ 0/29] eta: 0:02:06 time: 4.3553 data: 4.1916 max mem: 3393 +extract (validation) [20/29] eta: 0:00:03 time: 0.2010 data: 0.0659 max mem: 3393 +extract (validation) [28/29] eta: 0:00:00 time: 0.1704 data: 0.0500 max mem: 3393 +extract (validation) Total time: 0:00:10 (0.3482 s / it) +extract (test) [ 0/28] eta: 0:02:04 time: 4.4628 data: 4.3233 max mem: 3393 +extract (test) [20/28] eta: 0:00:03 time: 0.1915 data: 0.0588 max mem: 3393 +extract (test) [27/28] eta: 0:00:00 time: 0.1625 data: 0.0467 max mem: 3393 +extract (test) Total time: 0:00:09 (0.3503 s / it) +feature extraction time: 0:01:16 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 2.7826 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | aabc_sex | | 2.7826 | test | 0.89091 | 0.043863 | 0.88911 | 0.043926 | 0.90152 | 0.040231 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05265588898336156, "f1": 0.8166666666666667, "f1_std": 0.05265479976916016, "bacc": 0.8254076086956521, "bacc_std": 0.05117159420062498} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.051022263816481866, "f1": 0.8328267477203647, "f1_std": 0.052100542340955486, "bacc": 0.8349184782608696, "bacc_std": 0.052040162627840746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.0554105458829032, "f1": 0.7727272727272727, "f1_std": 0.05863437044538351, "bacc": 0.7697010869565217, "bacc_std": 0.05848903869876677} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 166.81005372000556, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.050275682964238864, "f1": 0.8166666666666667, "f1_std": 0.05041723963269043, "bacc": 0.8254076086956521, "bacc_std": 0.049211660530014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05273855678629085, "f1": 0.790003471017008, "f1_std": 0.056230055783621925, "bacc": 0.7853260869565217, "bacc_std": 0.05558520219851598} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03693972754466904, "f1": 0.9045470322804582, "f1_std": 0.03992807000558217, "bacc": 0.8974184782608696, "bacc_std": 0.04171778202044614} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04707285017014047, "f1": 0.8521505376344086, "f1_std": 0.04774533501656769, "bacc": 0.8566576086956521, "bacc_std": 0.047426358897281806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05253974174669985, "f1": 0.795677136102668, "f1_std": 0.053761764767330654, "bacc": 0.7975543478260869, "bacc_std": 0.05411603893049472} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.046299049487771055, "f1": 0.8683760683760684, "f1_std": 0.048387140550290475, "bacc": 0.8661684782608696, "bacc_std": 0.04900389469866049} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.048698770497147895, "f1": 0.8505434782608696, "f1_std": 0.050083757649930345, "bacc": 0.8505434782608696, "bacc_std": 0.050190351568757355} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03667531878885939, "f1": 0.9027925061859314, "f1_std": 0.04190960075868798, "bacc": 0.8913043478260869, "bacc_std": 0.04385092463885362} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05178686134787916, "f1": 0.7861435136090491, "f1_std": 0.057965069555226255, "bacc": 0.7792119565217391, "bacc_std": 0.05650578427288931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 166.81005372000556, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041913551330374846, "f1": 0.8879076086956521, "f1_std": 0.04308790390649044, "bacc": 0.8879076086956521, "bacc_std": 0.04317491706667154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03883382695154418, "f1": 0.8879076086956521, "f1_std": 0.03999668307543158, "bacc": 0.8879076086956521, "bacc_std": 0.04032041382000847} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05706815991455355, "f1": 0.7758152173913043, "f1_std": 0.058976953550242794, "bacc": 0.7758152173913043, "bacc_std": 0.059001860339285686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.051427027656220974, "f1": 0.8307692307692308, "f1_std": 0.05383410687411033, "bacc": 0.8288043478260869, "bacc_std": 0.054278706998561854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 2.782559402207126, "split": "test", "acc": 0.8, "acc_std": 0.051782839622203425, "f1": 0.790003471017008, "f1_std": 0.0555870379546963, "bacc": 0.7853260869565217, "bacc_std": 0.05496704207452725} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05072647767379556, "f1": 0.8176392572944298, "f1_std": 0.05062105628133805, "bacc": 0.8315217391304348, "bacc_std": 0.047879830440684626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.054926378976207925, "f1": 0.7727272727272727, "f1_std": 0.05772363026484643, "bacc": 0.7697010869565217, "bacc_std": 0.0571900716828565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04610408633580652, "f1": 0.8250265111346766, "f1_std": 0.05230800624333397, "bacc": 0.8165760869565217, "bacc_std": 0.051851235104635476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.052523229020851894, "f1": 0.7727272727272727, "f1_std": 0.05581268638224436, "bacc": 0.7697010869565217, "bacc_std": 0.055373226476392326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.0519799579939985, "f1": 0.8328267477203647, "f1_std": 0.053059633258707335, "bacc": 0.8349184782608696, "bacc_std": 0.05295779550098547} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.046282938683959336, "f1": 0.8699763593380614, "f1_std": 0.04729635012452601, "bacc": 0.8722826086956521, "bacc_std": 0.04700199204630469} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04799629462282125, "f1": 0.8699763593380614, "f1_std": 0.04910219932790519, "bacc": 0.8722826086956521, "bacc_std": 0.04885495493974446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05094568814429603, "f1": 0.8151881720430108, "f1_std": 0.05180721624321444, "bacc": 0.8192934782608696, "bacc_std": 0.05138047076561111} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05115633468606205, "f1": 0.7727272727272727, "f1_std": 0.05421765843013767, "bacc": 0.7697010869565217, "bacc_std": 0.053793701941407264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04253984422993856, "f1": 0.8699763593380614, "f1_std": 0.04326416544506314, "bacc": 0.8722826086956521, "bacc_std": 0.042897966358643964} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.05296143614941217, "f1": 0.790003471017008, "f1_std": 0.057136675986417966, "bacc": 0.7853260869565217, "bacc_std": 0.05670747727149538} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04905835957946512, "f1": 0.8307692307692308, "f1_std": 0.051178039635536604, "bacc": 0.8288043478260869, "bacc_std": 0.05126013007047176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.059432144793123046, "f1": 0.7555555555555555, "f1_std": 0.061952149867716945, "bacc": 0.7540760869565217, "bacc_std": 0.061502168699851326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05530347231039635, "f1": 0.7782258064516129, "f1_std": 0.055718803885056456, "bacc": 0.7819293478260869, "bacc_std": 0.055179640142934576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04434598754758142, "f1": 0.8683760683760684, "f1_std": 0.046439782848170005, "bacc": 0.8661684782608696, "bacc_std": 0.04699345640291025} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.045888820007196636, "f1": 0.8505434782608696, "f1_std": 0.047194520021074556, "bacc": 0.8505434782608696, "bacc_std": 0.04709612353182535} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049449344646318734, "f1": 0.8307692307692308, "f1_std": 0.05149415963006614, "bacc": 0.8288043478260869, "bacc_std": 0.0517093403774025} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043602577829624116, "f1": 0.8699763593380614, "f1_std": 0.044378928880580606, "bacc": 0.8722826086956521, "bacc_std": 0.04402821761910765} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04887976139856006, "f1": 0.8328267477203647, "f1_std": 0.04978049940641453, "bacc": 0.8349184782608696, "bacc_std": 0.04963764381180495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.052573267253919684, "f1": 0.8151881720430108, "f1_std": 0.053486615006010346, "bacc": 0.8192934782608696, "bacc_std": 0.053490187548087584} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04203854980159861, "f1": 0.8639095086603039, "f1_std": 0.04749952680671482, "bacc": 0.8539402173913043, "bacc_std": 0.04816852800661158} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04533743459004501, "f1": 0.8505434782608696, "f1_std": 0.046700870489602, "bacc": 0.8505434782608696, "bacc_std": 0.046837935783746884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.049738489677229825, "f1": 0.8131793478260869, "f1_std": 0.05141701602857101, "bacc": 0.8131793478260869, "bacc_std": 0.05157697266027768} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038480287966483194, "f1": 0.905982905982906, "f1_std": 0.040362495966507414, "bacc": 0.9035326086956521, "bacc_std": 0.04107032746648314} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 166.81005372000556, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.055751941744341615, "f1": 0.7782258064516129, "f1_std": 0.05660795085808481, "bacc": 0.7819293478260869, "bacc_std": 0.05630541567402165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0439057743900889, "f1": 0.8521505376344086, "f1_std": 0.04445632866046411, "bacc": 0.8566576086956521, "bacc_std": 0.04384902873841091} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05146166349737786, "f1": 0.790003471017008, "f1_std": 0.0554262734827424, "bacc": 0.7853260869565217, "bacc_std": 0.05497276355871275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.034506216950927315, "f1": 0.9266666666666667, "f1_std": 0.03439735271827876, "bacc": 0.9375, "bacc_std": 0.029653780192203154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.0393400097055957, "f1": 0.9071259709557582, "f1_std": 0.0400938976349243, "bacc": 0.9096467391304348, "bacc_std": 0.03956758514624431} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04032423957771133, "f1": 0.8863636363636364, "f1_std": 0.042786489845962436, "bacc": 0.8817934782608696, "bacc_std": 0.04366744834135569} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04058312151703114, "f1": 0.8879076086956521, "f1_std": 0.041885923840686734, "bacc": 0.8879076086956521, "bacc_std": 0.04216694092273785} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 166.81005372000556, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.047862060752793846, "f1": 0.8343927735028438, "f1_std": 0.04817250193499985, "bacc": 0.8410326086956521, "bacc_std": 0.04748820110383062} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04820247927507941, "f1": 0.8484848484848485, "f1_std": 0.0507837081955852, "bacc": 0.8444293478260869, "bacc_std": 0.05090977446513677} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04529986773191422, "f1": 0.8074229691876751, "f1_std": 0.05073726921765426, "bacc": 0.8009510869565217, "bacc_std": 0.050273032794018735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04892133675690641, "f1": 0.8307692307692308, "f1_std": 0.051052479399323294, "bacc": 0.8288043478260869, "bacc_std": 0.05116841613459943} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.049437483687870154, "f1": 0.8521505376344086, "f1_std": 0.05005743999904591, "bacc": 0.8566576086956521, "bacc_std": 0.049228255619581746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047307106380010025, "f1": 0.8521505376344086, "f1_std": 0.04786339862693188, "bacc": 0.8566576086956521, "bacc_std": 0.04702470759122328} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 21.54434690031882, "split": "test", "acc": 0.8, "acc_std": 0.053532546676942294, "f1": 0.7931623931623932, "f1_std": 0.05615577862990481, "bacc": 0.7914402173913043, "bacc_std": 0.05628428399847978} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048303106345706434, "f1": 0.8307692307692308, "f1_std": 0.050494865110545385, "bacc": 0.8288043478260869, "bacc_std": 0.05078730095183825} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04187341806559225, "f1": 0.8879076086956521, "f1_std": 0.04324370316732738, "bacc": 0.8879076086956521, "bacc_std": 0.04343313034334395} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05269962911720449, "f1": 0.7975911676145868, "f1_std": 0.052978822356543255, "bacc": 0.8036684782608696, "bacc_std": 0.05225068674519407} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04536135902463663, "f1": 0.8699763593380614, "f1_std": 0.04632550080479619, "bacc": 0.8722826086956521, "bacc_std": 0.04603412930281962} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04786946437798012, "f1": 0.8250265111346766, "f1_std": 0.05401258850812746, "bacc": 0.8165760869565217, "bacc_std": 0.05336562533883418} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 2.782559402207126, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05629291751173917, "f1": 0.7585275244849713, "f1_std": 0.057295239610100114, "bacc": 0.7601902173913043, "bacc_std": 0.05716227324842578} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042532383384471135, "f1": 0.8879076086956521, "f1_std": 0.043869281572619365, "bacc": 0.8879076086956521, "bacc_std": 0.04397386528961242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 2.782559402207126, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030001123945887918, "f1": 0.9435897435897436, "f1_std": 0.031403955628115855, "bacc": 0.9408967391304348, "bacc_std": 0.03285763773574193} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05472460277389255, "f1": 0.7975911676145868, "f1_std": 0.05498243239251332, "bacc": 0.8036684782608696, "bacc_std": 0.05441472233104929} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.045901497121733706, "f1": 0.8683760683760684, "f1_std": 0.04795800107377598, "bacc": 0.8661684782608696, "bacc_std": 0.048408551303511474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05590187388748458, "f1": 0.7758152173913043, "f1_std": 0.057523772721999294, "bacc": 0.7758152173913043, "bacc_std": 0.05755525874797466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03910915430450041, "f1": 0.9071259709557582, "f1_std": 0.03983721099297673, "bacc": 0.9096467391304348, "bacc_std": 0.0393662235498639} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03885738280362057, "f1": 0.9045470322804582, "f1_std": 0.042070079932440566, "bacc": 0.8974184782608696, "bacc_std": 0.043712359140952936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.9818181818181818, "acc_std": 0.018124236091054922, "f1": 0.9811965811965813, "f1_std": 0.019033310260698953, "bacc": 0.9782608695652174, "bacc_std": 0.02167028228278305} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.048198967788405656, "f1": 0.8533333333333333, "f1_std": 0.04815418032403406, "bacc": 0.8627717391304348, "bacc_std": 0.04631582983263305} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.042705387044781905, "f1": 0.8428571428571429, "f1_std": 0.04959198270756321, "bacc": 0.8322010869565217, "bacc_std": 0.048915602288142374} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042466375614728866, "f1": 0.8663658451926415, "f1_std": 0.04573044341950363, "bacc": 0.8600543478260869, "bacc_std": 0.04643619318417449} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 21.54434690031882, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038264412830412, "f1": 0.9045470322804582, "f1_std": 0.041573816592526514, "bacc": 0.8974184782608696, "bacc_std": 0.04329736525477135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04654447442149876, "f1": 0.8699763593380614, "f1_std": 0.047472654440359224, "bacc": 0.8722826086956521, "bacc_std": 0.04711054185814096} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04482577475788485, "f1": 0.8505434782608696, "f1_std": 0.04628517780841475, "bacc": 0.8505434782608696, "bacc_std": 0.04650196342541968} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05271973613849365, "f1": 0.8106060606060606, "f1_std": 0.05573070000133594, "bacc": 0.8070652173913043, "bacc_std": 0.05548169430489548} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04852091991524362, "f1": 0.8250265111346766, "f1_std": 0.05446445629425495, "bacc": 0.8165760869565217, "bacc_std": 0.053927853743368455} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03654997710596111, "f1": 0.9252717391304348, "f1_std": 0.0376408995727102, "bacc": 0.9252717391304348, "bacc_std": 0.037813069227190715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04746194299707766, "f1": 0.84593837535014, "f1_std": 0.05219844710407987, "bacc": 0.8383152173913043, "bacc_std": 0.05233849217449428} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.7090909090909091, "acc_std": 0.062484113683478934, "f1": 0.696969696969697, "f1_std": 0.06558017612631742, "bacc": 0.6949728260869565, "bacc_std": 0.06471991278065005} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041228650874028876, "f1": 0.8863636363636364, "f1_std": 0.04358952704766233, "bacc": 0.8817934782608696, "bacc_std": 0.044539619255488855} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04423042593636641, "f1": 0.84593837535014, "f1_std": 0.04841159693018112, "bacc": 0.8383152173913043, "bacc_std": 0.048776578278426956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03819523573769183, "f1": 0.8821428571428571, "f1_std": 0.04486985290714586, "bacc": 0.8695652173913043, "bacc_std": 0.045668216642892404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.0560218493385017, "f1": 0.7931623931623932, "f1_std": 0.05827916762984857, "bacc": 0.7914402173913043, "bacc_std": 0.058255216809721115} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04269276749865502, "f1": 0.8879076086956521, "f1_std": 0.044144738763871746, "bacc": 0.8879076086956521, "bacc_std": 0.044726083601717544} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04418742949776365, "f1": 0.8699763593380614, "f1_std": 0.04506553745968865, "bacc": 0.8722826086956521, "bacc_std": 0.044936406075238904} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04991475708852653, "f1": 0.8328267477203647, "f1_std": 0.05084546982440388, "bacc": 0.8349184782608696, "bacc_std": 0.05046261887237688} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05145016364252861, "f1": 0.8106060606060606, "f1_std": 0.05452648044088764, "bacc": 0.8070652173913043, "bacc_std": 0.05459618185922766} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04382540868355924, "f1": 0.84593837535014, "f1_std": 0.04801965265036465, "bacc": 0.8383152173913043, "bacc_std": 0.04842113524321146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 166.81005372000556, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04808730352526097, "f1": 0.8307692307692308, "f1_std": 0.050320559157121116, "bacc": 0.8288043478260869, "bacc_std": 0.0504716924623748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04921847398912505, "f1": 0.8328267477203647, "f1_std": 0.05017049025434237, "bacc": 0.8349184782608696, "bacc_std": 0.05016582556230712} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.037837477594889986, "f1": 0.905982905982906, "f1_std": 0.03944760922374126, "bacc": 0.9035326086956521, "bacc_std": 0.04011543204446846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 166.81005372000556, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04752462954324986, "f1": 0.8035714285714286, "f1_std": 0.054673392528910764, "bacc": 0.7948369565217391, "bacc_std": 0.05323249361949366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038471197769906255, "f1": 0.9071259709557582, "f1_std": 0.039199159975394214, "bacc": 0.9096467391304348, "bacc_std": 0.03881062805028247} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04221785294680458, "f1": 0.8879076086956521, "f1_std": 0.04346054227662537, "bacc": 0.8879076086956521, "bacc_std": 0.043627257776545324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.034779000028396546, "f1": 0.9242424242424243, "f1_std": 0.03690396877201548, "bacc": 0.9191576086956521, "bacc_std": 0.0386411165712355} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.0499274051511188, "f1": 0.8307692307692308, "f1_std": 0.05211739005349969, "bacc": 0.8288043478260869, "bacc_std": 0.05203445604333177} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05175989065861454, "f1": 0.7861435136090491, "f1_std": 0.057448597315347114, "bacc": 0.7792119565217391, "bacc_std": 0.05626817148987758} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04610954975395439, "f1": 0.8663658451926415, "f1_std": 0.05005989706526048, "bacc": 0.8600543478260869, "bacc_std": 0.050864338815354994} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042902988780281856, "f1": 0.8699763593380614, "f1_std": 0.04380719510719203, "bacc": 0.8722826086956521, "bacc_std": 0.04361875269678864} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 11.514 | 39.71 | 0.94839 | 0.040725 | 0.94692 | 0.04194 | 0.94631 | 0.042678 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 11.514 | 39.71 | 0.84836 | 0.046317 | 0.84311 | 0.048044 | 0.84205 | 0.048153 | + + +done! total time: 0:05:14 diff --git a/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic/config.yaml b/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6ea3996e05726dd971a7887d4e80f99242ffdb19 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic/eval_table.csv b/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..39710dea40303dff2b542051558b7fb503bbb04e --- /dev/null +++ b/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.3593813663804626,train,0.9316239316239316,0.009578303089056401,0.9307242539123857,0.009733780399628968,0.9299445137566484,0.009901145254784898 +flat_mae,patch,logistic,abide_dx,,0.3593813663804626,test,0.5241935483870968,0.041284081559585684,0.5127539127539127,0.04184667249699166,0.5151872217858078,0.04119939857251283 +flat_mae,patch,logistic,abide_dx,1,0.005994842503189409,train,0.7165242165242165,0.017482637341670913,0.7095210667820004,0.018113957275255634,0.7080472499077151,0.01788823652685562 +flat_mae,patch,logistic,abide_dx,1,0.005994842503189409,test,0.6774193548387096,0.041113137346047476,0.6688034188034189,0.04295419279844657,0.6680672268907563,0.04203937864030787 +flat_mae,patch,logistic,abide_dx,2,0.3593813663804626,train,0.9216524216524217,0.010031799424812969,0.920486676730254,0.010219301282402097,0.9188999630860096,0.010373358578822559 +flat_mae,patch,logistic,abide_dx,2,0.3593813663804626,test,0.6209677419354839,0.04211467917333632,0.6179613241560145,0.042344171626269744,0.618172268907563,0.04239843385697718 +flat_mae,patch,logistic,abide_dx,3,0.046415888336127774,train,0.8105413105413105,0.01409940941434186,0.8074463324142113,0.01432886945855722,0.806016980435585,0.014272212961785177 +flat_mae,patch,logistic,abide_dx,3,0.046415888336127774,test,0.6048387096774194,0.04441990535419158,0.5989703649924097,0.04504116366441694,0.5987394957983193,0.04472068721015768 +flat_mae,patch,logistic,abide_dx,4,0.046415888336127774,train,0.8176638176638177,0.014569762408488346,0.8144774332080769,0.014924683905774245,0.8127722406792174,0.014932497700045554 +flat_mae,patch,logistic,abide_dx,4,0.046415888336127774,test,0.7016129032258065,0.03831011527471725,0.6944388944388944,0.039845593860878076,0.6932773109243697,0.03926182668976251 +flat_mae,patch,logistic,abide_dx,5,0.046415888336127774,train,0.8133903133903134,0.014677475114763954,0.8100566967190259,0.01498334369122216,0.8083056478405315,0.014953154093032708 +flat_mae,patch,logistic,abide_dx,5,0.046415888336127774,test,0.5967741935483871,0.04129690912688167,0.5915678524374176,0.04169908351233194,0.5913865546218487,0.04146902108947524 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,train,0.8205128205128205,0.014663587948363357,0.8170922355393434,0.015071834588745036,0.8150609080841639,0.015078872003588357 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,test,0.6370967741935484,0.04635304281882685,0.6342182890855457,0.046843179294802285,0.634453781512605,0.04677036162353719 +flat_mae,patch,logistic,abide_dx,7,0.005994842503189409,train,0.7165242165242165,0.01711661469307375,0.7105322594347735,0.017686557922821953,0.7092284976005906,0.017546572760843048 +flat_mae,patch,logistic,abide_dx,7,0.005994842503189409,test,0.5887096774193549,0.04462083000509283,0.5765651155005022,0.046826300397298085,0.5777310924369747,0.04541367312022607 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,train,0.9373219373219374,0.009215859718798448,0.9365785950705092,0.009340085595253544,0.9360649686231082,0.009454571985151825 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,test,0.5645161290322581,0.04229725774192688,0.5588932806324111,0.04293049709371854,0.5588235294117647,0.04267437159659311 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,train,0.9216524216524217,0.00987335121733457,0.920486676730254,0.01006350335407584,0.9188999630860096,0.01024171581578018 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,test,0.6048387096774194,0.04283920763410549,0.5972691721349506,0.04353892823111837,0.5971638655462186,0.04304172193901062 +flat_mae,patch,logistic,abide_dx,10,0.046415888336127774,train,0.8304843304843305,0.01399679348998892,0.8278401187053696,0.014270933065766584,0.8264673311184939,0.014318802047782402 +flat_mae,patch,logistic,abide_dx,10,0.046415888336127774,test,0.6612903225806451,0.040930955671154394,0.6502820306204673,0.043394048096207456,0.6502100840336134,0.042008753482431 +flat_mae,patch,logistic,abide_dx,11,0.046415888336127774,train,0.8076923076923077,0.014650219783339935,0.8039491513151771,0.015071280719662841,0.8019564414913252,0.015072563097944986 +flat_mae,patch,logistic,abide_dx,11,0.046415888336127774,test,0.6209677419354839,0.042008679916720464,0.6167554415729598,0.04245925815837763,0.6165966386554622,0.0423885638229868 +flat_mae,patch,logistic,abide_dx,12,0.005994842503189409,train,0.7193732193732194,0.01634174002689427,0.7129513739702229,0.016875859362168937,0.7115171650055371,0.016717499953353667 +flat_mae,patch,logistic,abide_dx,12,0.005994842503189409,test,0.5887096774193549,0.044167039376957366,0.5841388834089565,0.04458292141122282,0.5840336134453781,0.04437302500699791 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,train,0.9230769230769231,0.010035358872074964,0.9220128373930218,0.010211408943347216,0.9207825765965301,0.010391390340097752 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,test,0.5725806451612904,0.04249018678406638,0.5703170970905524,0.042903984753441864,0.5709033613445378,0.04313978178191481 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,train,0.9273504273504274,0.009652200911040482,0.9263204469917579,0.009831232730783478,0.9249538575119971,0.010009716910258716 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,test,0.6048387096774194,0.04310701056721079,0.5907590759075907,0.0453912057515939,0.592436974789916,0.04383432933102287 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,train,0.8219373219373219,0.014186975497978607,0.8191597885560604,0.014484624294715072,0.8178294573643411,0.01455401087225457 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,test,0.6532258064516129,0.039270305166193877,0.6480760345851759,0.039930931836775055,0.6475840336134454,0.03964505782892502 +flat_mae,patch,logistic,abide_dx,16,0.046415888336127774,train,0.8105413105413105,0.014546443589820905,0.8077223273658869,0.014795612022782697,0.8066076042820229,0.014817358049660674 +flat_mae,patch,logistic,abide_dx,16,0.046415888336127774,test,0.6129032258064516,0.04254916663914516,0.6112852664576802,0.042812067293305056,0.6123949579831933,0.04292760821989454 +flat_mae,patch,logistic,abide_dx,17,0.046415888336127774,train,0.8176638176638177,0.014606378047169018,0.8153846153846154,0.014829584234071961,0.8148394241417497,0.014887649285549748 +flat_mae,patch,logistic,abide_dx,17,0.046415888336127774,test,0.6129032258064516,0.043490570939756265,0.6045708211533352,0.04493554455811725,0.6045168067226891,0.04407857663726617 +flat_mae,patch,logistic,abide_dx,18,0.3593813663804626,train,0.9330484330484331,0.009340343630694485,0.9320992354629924,0.00951938682820236,0.9307124400147656,0.00975718669328957 +flat_mae,patch,logistic,abide_dx,18,0.3593813663804626,test,0.6290322580645161,0.043240642013598486,0.6242424242424243,0.04386617606615634,0.6239495798319328,0.043601468943845846 +flat_mae,patch,logistic,abide_dx,19,0.005994842503189409,train,0.7421652421652422,0.01589959930750264,0.7362649679624891,0.016402493074144674,0.7345514950166112,0.016262966153999667 +flat_mae,patch,logistic,abide_dx,19,0.005994842503189409,test,0.5725806451612904,0.04351731268865347,0.5478500171998624,0.04790838098260214,0.5551470588235294,0.04451250840553695 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,train,0.8176638176638177,0.014051374476050232,0.8151415404871627,0.014344345271512923,0.8142488002953119,0.01447380682295407 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,test,0.6532258064516129,0.03989847613008047,0.6408702094699266,0.04215016598399647,0.641281512605042,0.040701763083595376 +flat_mae,patch,logistic,abide_dx,21,0.000774263682681127,train,0.6723646723646723,0.017309744874948758,0.6579730038892702,0.018452832671581765,0.6582502768549281,0.01773169799557126 +flat_mae,patch,logistic,abide_dx,21,0.000774263682681127,test,0.5887096774193549,0.04177785200071881,0.5740553647201454,0.04370057909705704,0.576155462184874,0.04227325722468146 +flat_mae,patch,logistic,abide_dx,22,0.046415888336127774,train,0.8290598290598291,0.014248603536410246,0.8260725936325721,0.014600688490252695,0.8242894056847545,0.014642178289329923 +flat_mae,patch,logistic,abide_dx,22,0.046415888336127774,test,0.5967741935483871,0.043081544899482405,0.575109649122807,0.047269246976230334,0.5803571428571428,0.04413932884337094 +flat_mae,patch,logistic,abide_dx,23,0.3593813663804626,train,0.9173789173789174,0.010538961392205707,0.9161787593567035,0.01073990595336288,0.9147286821705426,0.01094052398004128 +flat_mae,patch,logistic,abide_dx,23,0.3593813663804626,test,0.5967741935483871,0.044992194952677395,0.58994708994709,0.046026024497452335,0.5898109243697479,0.04552468870036837 +flat_mae,patch,logistic,abide_dx,24,0.046415888336127774,train,0.8148148148148148,0.014788005547585569,0.8112856398421797,0.015198838560204549,0.8093023255813954,0.015205868959525269 +flat_mae,patch,logistic,abide_dx,24,0.046415888336127774,test,0.6048387096774194,0.04403463304477666,0.6017043592264831,0.04417293170351323,0.601890756302521,0.04416192525932249 +flat_mae,patch,logistic,abide_dx,25,0.005994842503189409,train,0.7264957264957265,0.016466748235548594,0.7205970149253731,0.01701122638613265,0.7191583610188261,0.016884888432620912 +flat_mae,patch,logistic,abide_dx,25,0.005994842503189409,test,0.6290322580645161,0.04344978083643979,0.6274817136886102,0.04353433023167341,0.6286764705882353,0.043653220708625845 +flat_mae,patch,logistic,abide_dx,26,0.005994842503189409,train,0.7321937321937322,0.015719189846824414,0.724676018258134,0.01635662134398017,0.7228497600590624,0.016119348636000657 +flat_mae,patch,logistic,abide_dx,26,0.005994842503189409,test,0.5725806451612904,0.04468924962737756,0.5623043623043623,0.04620564046687654,0.5630252100840336,0.0451854926690979 +flat_mae,patch,logistic,abide_dx,27,0.046415888336127774,train,0.8105413105413105,0.015038982038523297,0.8078552833314468,0.01527872189648326,0.8069029162052418,0.015296056643130312 +flat_mae,patch,logistic,abide_dx,27,0.046415888336127774,test,0.5725806451612904,0.045398212064891565,0.5643931861867832,0.04639725160756262,0.5646008403361344,0.045871421786329955 +flat_mae,patch,logistic,abide_dx,28,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,28,1291.5496650148827,test,0.6370967741935484,0.04268278317059871,0.6330637206549615,0.04332768949551267,0.6328781512605042,0.04321975378321963 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,train,0.8048433048433048,0.01456864732981116,0.8007181886561003,0.015033265286728471,0.7984865263935031,0.015007451949615315 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,test,0.6774193548387096,0.039879824024235046,0.6688034188034189,0.04188124334564985,0.6680672268907563,0.04090223241051902 +flat_mae,patch,logistic,abide_dx,30,0.005994842503189409,train,0.7122507122507122,0.01694076219616888,0.7052749719416387,0.017461167455087984,0.703875968992248,0.01725587200310047 +flat_mae,patch,logistic,abide_dx,30,0.005994842503189409,test,0.6370967741935484,0.04234016894566617,0.6342182890855457,0.04261392435327109,0.634453781512605,0.04252657852203198 +flat_mae,patch,logistic,abide_dx,31,0.046415888336127774,train,0.8162393162393162,0.014769910854079187,0.8129565944790409,0.015115680384265761,0.8111849390919158,0.015132672063206037 +flat_mae,patch,logistic,abide_dx,31,0.046415888336127774,test,0.6451612903225806,0.042695275809892956,0.6375232527238905,0.04452496015712942,0.6370798319327731,0.04367422892699733 +flat_mae,patch,logistic,abide_dx,32,0.046415888336127774,train,0.8205128205128205,0.01422023485822427,0.8177780341485925,0.014498082040798018,0.8165374677002584,0.014528849303628048 +flat_mae,patch,logistic,abide_dx,32,0.046415888336127774,test,0.6370967741935484,0.04212897050827861,0.6317074780542539,0.04261838304020082,0.6313025210084033,0.042234307116611074 +flat_mae,patch,logistic,abide_dx,33,0.046415888336127774,train,0.8190883190883191,0.013842097819626784,0.8161329640346229,0.014144024197177904,0.8146548541897379,0.01418329954816489 +flat_mae,patch,logistic,abide_dx,33,0.046415888336127774,test,0.5725806451612904,0.04284765986953694,0.5643931861867832,0.04379133700030574,0.5646008403361344,0.04322708815632735 +flat_mae,patch,logistic,abide_dx,34,0.3593813663804626,train,0.9330484330484331,0.009739044857283777,0.9321450385918152,0.009898948424039381,0.9310077519379845,0.010028826459785688 +flat_mae,patch,logistic,abide_dx,34,0.3593813663804626,test,0.5403225806451613,0.0432549582729253,0.537888198757764,0.04354100764742364,0.5383403361344539,0.04366090718453375 +flat_mae,patch,logistic,abide_dx,35,0.046415888336127774,train,0.8091168091168092,0.014703339086436949,0.8062083855231064,0.014962704366723404,0.8050203026947212,0.0149667357376967 +flat_mae,patch,logistic,abide_dx,35,0.046415888336127774,test,0.6693548387096774,0.04091259941439645,0.6667322189446083,0.04139352722244033,0.6670168067226891,0.04130047995845927 +flat_mae,patch,logistic,abide_dx,36,0.005994842503189409,train,0.717948717948718,0.01607809711807512,0.7113678649744168,0.01656608460047531,0.7099298634182355,0.01639043941145779 +flat_mae,patch,logistic,abide_dx,36,0.005994842503189409,test,0.6774193548387096,0.041091241335156826,0.6688034188034189,0.04277763648525594,0.6680672268907563,0.04185253933161877 +flat_mae,patch,logistic,abide_dx,37,0.046415888336127774,train,0.8361823361823362,0.014120366328593998,0.833382522604706,0.014508518384888302,0.8316352897748247,0.014641372689916821 +flat_mae,patch,logistic,abide_dx,37,0.046415888336127774,test,0.6532258064516129,0.0397184550451784,0.6448884448884449,0.04115653437550358,0.6444327731092437,0.040399868390830165 +flat_mae,patch,logistic,abide_dx,38,0.005994842503189409,train,0.7108262108262108,0.016777573135978704,0.703948229477205,0.017300964409583227,0.7025839793281654,0.01710826250197527 +flat_mae,patch,logistic,abide_dx,38,0.005994842503189409,test,0.6532258064516129,0.04019436389169068,0.6448884448884449,0.04151157851580024,0.6444327731092437,0.040804104287870346 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,train,0.8162393162393162,0.013534250630543096,0.8129565944790409,0.013822063312745285,0.8111849390919158,0.013818937940601222 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,test,0.5967741935483871,0.04396794310417141,0.5836690840719849,0.045798049335887776,0.5850840336134454,0.04444940641644906 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,train,0.8162393162393162,0.014752659445214882,0.81375978677328,0.014980307597183154,0.8129568106312293,0.015013710817275242 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,test,0.6532258064516129,0.03919338576894447,0.6331613347093223,0.042771518458555646,0.6365546218487395,0.04010720712249162 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,train,0.6467236467236467,0.01745606411531348,0.6312056737588653,0.01867543600115551,0.6323366555924695,0.017897132485508598 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,test,0.6290322580645161,0.03924210121545952,0.6119727891156463,0.04231364298836883,0.6144957983193278,0.04015843813684551 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,train,0.8048433048433048,0.013984618298275662,0.8013570034389814,0.01429406650128978,0.7996677740863787,0.014277481250283159 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,test,0.6290322580645161,0.04409677419354838,0.6255252100840336,0.044836666351665194,0.6255252100840336,0.044590379240014404 +flat_mae,patch,logistic,abide_dx,43,0.046415888336127774,train,0.8076923076923077,0.014073640373030877,0.8039491513151771,0.014460420101863084,0.8019564414913252,0.014452952581534347 +flat_mae,patch,logistic,abide_dx,43,0.046415888336127774,test,0.717741935483871,0.042134993773728924,0.710955710955711,0.04364201444051302,0.7095588235294117,0.04301362189664417 +flat_mae,patch,logistic,abide_dx,44,0.000774263682681127,train,0.6552706552706553,0.01728865706665264,0.640128117135667,0.018473523846953072,0.6409745293466224,0.01772114591718627 +flat_mae,patch,logistic,abide_dx,44,0.000774263682681127,test,0.6129032258064516,0.03997531431299166,0.5921052631578947,0.04397497149481937,0.5966386554621849,0.04103224726333098 +flat_mae,patch,logistic,abide_dx,45,0.046415888336127774,train,0.8233618233618234,0.014668026631684797,0.8205417886447037,0.015008825082479246,0.8191214470284238,0.015086857569313085 +flat_mae,patch,logistic,abide_dx,45,0.046415888336127774,test,0.5806451612903226,0.041908800443708635,0.5643243243243243,0.043704722143080695,0.5672268907563025,0.04222818336737672 +flat_mae,patch,logistic,abide_dx,46,0.046415888336127774,train,0.8262108262108262,0.014397080084802706,0.8233065953654188,0.014697865822600197,0.8217054263565892,0.014741011759662897 +flat_mae,patch,logistic,abide_dx,46,0.046415888336127774,test,0.5725806451612904,0.04469744223500621,0.5691904293674206,0.045167292227623355,0.569327731092437,0.045194090900702234 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,train,0.8148148148148148,0.015179620859587782,0.8119932098358494,0.01544736288400603,0.8107788851974899,0.015453680789704872 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,test,0.6129032258064516,0.04297369909299979,0.607905138339921,0.0437169917842997,0.6076680672268908,0.0434851656123859 +flat_mae,patch,logistic,abide_dx,48,0.046415888336127774,train,0.8205128205128205,0.01398314136614389,0.8176473013647796,0.014258152616334653,0.8162421557770395,0.014264240159438838 +flat_mae,patch,logistic,abide_dx,48,0.046415888336127774,test,0.6451612903225806,0.04267607524765116,0.6391534391534391,0.04381060237156751,0.6386554621848739,0.04334719713144787 +flat_mae,patch,logistic,abide_dx,49,0.000774263682681127,train,0.6709401709401709,0.017217957851632108,0.6571069709022399,0.018147002702927635,0.6572535991140642,0.01751083365900478 +flat_mae,patch,logistic,abide_dx,49,0.000774263682681127,test,0.6048387096774194,0.039736266353089376,0.578494623655914,0.04418238570013005,0.5861344537815126,0.04059818158704357 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,train,0.8162393162393162,0.013903051001574334,0.813098655791366,0.014229187660698616,0.8114802510151347,0.014255921715475118 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,test,0.5967741935483871,0.04465999427283362,0.5810810810810811,0.04713720256675513,0.5835084033613446,0.04528510431816933 +flat_mae,patch,logistic,abide_dx,51,0.046415888336127774,train,0.8262108262108262,0.014700204274575215,0.8231738035264484,0.015078082138637632,0.8214101144333703,0.015121153166270716 +flat_mae,patch,logistic,abide_dx,51,0.046415888336127774,test,0.5967741935483871,0.041211631163837505,0.5810810810810811,0.04356186821528533,0.5835084033613446,0.04182421273594904 +flat_mae,patch,logistic,abide_dx,52,0.005994842503189409,train,0.7222222222222222,0.016479023571547197,0.7134243994347622,0.017286387941185366,0.7117386489479514,0.01691431917440416 +flat_mae,patch,logistic,abide_dx,52,0.005994842503189409,test,0.6290322580645161,0.04385020856099628,0.6242424242424243,0.04462218152686849,0.6239495798319328,0.04452530415289145 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,train,0.8190883190883191,0.014420001852908272,0.8152643062724434,0.014827164070846322,0.8128829826504245,0.014779086244230381 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,test,0.6290322580645161,0.04276284029254295,0.6242424242424243,0.04342207004187337,0.6239495798319328,0.043176670429289805 +flat_mae,patch,logistic,abide_dx,54,0.005994842503189409,train,0.7250712250712251,0.016750247000801768,0.719719897806142,0.01716177287397828,0.7184569952011812,0.017046344699817206 +flat_mae,patch,logistic,abide_dx,54,0.005994842503189409,test,0.6209677419354839,0.04510232037057044,0.6137071651090342,0.0463636833031733,0.6134453781512605,0.04571655086801859 +flat_mae,patch,logistic,abide_dx,55,0.005994842503189409,train,0.7207977207977208,0.017173415793444505,0.7137698042870457,0.017810733704264333,0.712218530823182,0.01759028040594031 +flat_mae,patch,logistic,abide_dx,55,0.005994842503189409,test,0.6129032258064516,0.042043706509107076,0.6003223207091055,0.04395201151796931,0.6013655462184874,0.04262738162186557 +flat_mae,patch,logistic,abide_dx,56,0.005994842503189409,train,0.7165242165242165,0.016547743100567787,0.7084249084249084,0.01721236103531624,0.7068660022148394,0.016902526070245796 +flat_mae,patch,logistic,abide_dx,56,0.005994842503189409,test,0.6612903225806451,0.03989701558058263,0.6502820306204673,0.04235785137587265,0.6502100840336134,0.04101881706001485 +flat_mae,patch,logistic,abide_dx,57,0.046415888336127774,train,0.8219373219373219,0.013685425911501679,0.8190285079080932,0.01401972479330757,0.8175341454411222,0.014095154826991082 +flat_mae,patch,logistic,abide_dx,57,0.046415888336127774,test,0.6532258064516129,0.039863512587277675,0.6480760345851759,0.0406830702303827,0.6475840336134454,0.04038080695452615 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,train,0.8262108262108262,0.01400073049449309,0.8228988312365071,0.014405602454043664,0.8208194905869325,0.014446985523044019 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,test,0.6129032258064516,0.04346805912865296,0.6112852664576802,0.04353824942154756,0.6123949579831933,0.04371216285030784 +flat_mae,patch,logistic,abide_dx,59,0.046415888336127774,train,0.811965811965812,0.015300966006035414,0.8088235294117647,0.015612507265701651,0.8073089700996677,0.01562627123933995 +flat_mae,patch,logistic,abide_dx,59,0.046415888336127774,test,0.6290322580645161,0.04456227155100645,0.6266038229903116,0.0448244940169691,0.6271008403361344,0.044791266846952324 +flat_mae,patch,logistic,abide_dx,60,0.046415888336127774,train,0.8133903133903134,0.015491394766894546,0.8103418762876817,0.015812811178201536,0.8088962716869693,0.015824577725537563 +flat_mae,patch,logistic,abide_dx,60,0.046415888336127774,test,0.5887096774193549,0.04398425541310865,0.5865315462569467,0.04417646002498987,0.5871848739495797,0.04429141821564067 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,train,0.8276353276353277,0.014651157190122673,0.8251916487451509,0.014888629797909905,0.8241786637135474,0.014922397315113632 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,test,0.5725806451612904,0.04423782512393834,0.5623043623043623,0.04515594725345051,0.5630252100840336,0.04431821774533121 +flat_mae,patch,logistic,abide_dx,62,0.000774263682681127,train,0.6709401709401709,0.01686058949891694,0.6594401496966376,0.017910652706293988,0.6590254706533776,0.01737571648037623 +flat_mae,patch,logistic,abide_dx,62,0.000774263682681127,test,0.6129032258064516,0.04186993636584508,0.5978378378378378,0.04441792401715795,0.5997899159663866,0.04274082997220342 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,train,0.8304843304843305,0.015123873594540265,0.8280810429807682,0.015380056755930108,0.8270579549649317,0.015418007125727665 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,test,0.6209677419354839,0.044036972452337696,0.6203504657677024,0.044150532010916364,0.6228991596638656,0.04416149355513142 +flat_mae,patch,logistic,abide_dx,64,0.046415888336127774,train,0.8091168091168092,0.015001372761538975,0.806069352245083,0.015261617600720697,0.8047249907715024,0.015257651478396738 +flat_mae,patch,logistic,abide_dx,64,0.046415888336127774,test,0.6451612903225806,0.04257984786643588,0.6391534391534391,0.04374301919437287,0.6386554621848739,0.0432648698667387 +flat_mae,patch,logistic,abide_dx,65,0.046415888336127774,train,0.8219373219373219,0.014542471530607202,0.8190285079080932,0.014897386934271454,0.8175341454411222,0.014987215102487492 +flat_mae,patch,logistic,abide_dx,65,0.046415888336127774,test,0.6048387096774194,0.044480467249134785,0.5931704050887178,0.04665746390703013,0.5940126050420168,0.04535424956033638 +flat_mae,patch,logistic,abide_dx,66,0.046415888336127774,train,0.8290598290598291,0.014302485491162328,0.8266951942067149,0.014526288140236232,0.825765965300849,0.014546364991278857 +flat_mae,patch,logistic,abide_dx,66,0.046415888336127774,test,0.6129032258064516,0.04579966726224051,0.6092436974789917,0.046368699932556025,0.6092436974789917,0.04625389298721849 +flat_mae,patch,logistic,abide_dx,67,0.046415888336127774,train,0.8176638176638177,0.014825843747029742,0.8143348320550119,0.015204025511261092,0.8124769287559985,0.015246896670638412 +flat_mae,patch,logistic,abide_dx,67,0.046415888336127774,test,0.6532258064516129,0.04194602845096986,0.6448884448884449,0.043576932006045295,0.6444327731092437,0.04268664970847627 +flat_mae,patch,logistic,abide_dx,68,0.000774263682681127,train,0.6581196581196581,0.01681654092652144,0.6443768996960486,0.017853916458865103,0.6447397563676633,0.017221780109342926 +flat_mae,patch,logistic,abide_dx,68,0.000774263682681127,test,0.5967741935483871,0.03919394331408622,0.5810810810810811,0.04189347456698179,0.5835084033613446,0.039982221811747945 +flat_mae,patch,logistic,abide_dx,69,0.005994842503189409,train,0.7051282051282052,0.016007562072030423,0.6957889778615167,0.016815559434444613,0.6944629014396456,0.016458173363628197 +flat_mae,patch,logistic,abide_dx,69,0.005994842503189409,test,0.6854838709677419,0.0407685020527538,0.6721578198088265,0.043651566533400986,0.6722689075630253,0.041829512970162006 +flat_mae,patch,logistic,abide_dx,70,0.046415888336127774,train,0.8205128205128205,0.015012298429850175,0.8176473013647796,0.015351469695808152,0.8162421557770395,0.01541843014295104 +flat_mae,patch,logistic,abide_dx,70,0.046415888336127774,test,0.6209677419354839,0.04453546506193729,0.6153389215233318,0.045388848751803205,0.6150210084033614,0.04509356542582331 +flat_mae,patch,logistic,abide_dx,71,0.046415888336127774,train,0.8190883190883191,0.014368339097894181,0.8158564922390557,0.0146592371191468,0.8140642303433001,0.014616513646363013 +flat_mae,patch,logistic,abide_dx,71,0.046415888336127774,test,0.6532258064516129,0.03931641103787403,0.6429862738533645,0.04107719866164188,0.6428571428571428,0.0399598841213838 +flat_mae,patch,logistic,abide_dx,72,0.3593813663804626,train,0.9145299145299145,0.010308706102825698,0.9134615384615385,0.010453765351307034,0.9127353266888151,0.010528649519402139 +flat_mae,patch,logistic,abide_dx,72,0.3593813663804626,test,0.5241935483870968,0.04360950012968361,0.5234186697934988,0.043613506635374236,0.5252100840336134,0.043709194454083354 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,train,0.8048433048433048,0.015033486699902647,0.8020764948602122,0.015237878080858051,0.8011443337024733,0.015212191593777348 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,test,0.6290322580645161,0.03969965283177649,0.6266038229903116,0.039870109268220065,0.6271008403361344,0.0398788773951557 +flat_mae,patch,logistic,abide_dx,74,0.005994842503189409,train,0.7136752136752137,0.016454233127379835,0.7060589332027873,0.017123270082534062,0.7045773348098929,0.01688693708699315 +flat_mae,patch,logistic,abide_dx,74,0.005994842503189409,test,0.6290322580645161,0.04281820340911307,0.6210470369386127,0.04412702440007403,0.6207983193277311,0.04343186865320686 +flat_mae,patch,logistic,abide_dx,75,0.046415888336127774,train,0.8148148148148148,0.015245762282913695,0.8118583268049313,0.015617056117240872,0.810483573274271,0.015699314896695623 +flat_mae,patch,logistic,abide_dx,75,0.046415888336127774,test,0.6451612903225806,0.0409200891258643,0.6356837606837606,0.04256190347344885,0.6355042016806722,0.04166504903969842 +flat_mae,patch,logistic,abide_dx,76,0.046415888336127774,train,0.8076923076923077,0.014371618590603886,0.8041047668460186,0.014817505863391412,0.8022517534145441,0.014886670526488574 +flat_mae,patch,logistic,abide_dx,76,0.046415888336127774,test,0.6693548387096774,0.04075176751591489,0.6575739206573719,0.04300275716343429,0.657563025210084,0.041464770753086654 +flat_mae,patch,logistic,abide_dx,77,0.046415888336127774,train,0.8276353276353277,0.013373413290946419,0.8251916487451509,0.013594323436510304,0.8241786637135474,0.013626169788903388 +flat_mae,patch,logistic,abide_dx,77,0.046415888336127774,test,0.532258064516129,0.04507058575514828,0.5311603650586701,0.045220500564835,0.532563025210084,0.04539670070997022 +flat_mae,patch,logistic,abide_dx,78,0.046415888336127774,train,0.8262108262108262,0.013563234251677646,0.8238067807768268,0.013759774345971107,0.8228866740494647,0.01375667232252695 +flat_mae,patch,logistic,abide_dx,78,0.046415888336127774,test,0.6129032258064516,0.042833860369720776,0.5978378378378378,0.044704063706268816,0.5997899159663866,0.04322538013830674 +flat_mae,patch,logistic,abide_dx,79,0.046415888336127774,train,0.8319088319088319,0.014053904368009714,0.8290998217468806,0.014396525460207378,0.8274640088593577,0.014464465460585416 +flat_mae,patch,logistic,abide_dx,79,0.046415888336127774,test,0.6048387096774194,0.043808123275625055,0.5931704050887178,0.04613649124232754,0.5940126050420168,0.04466083182381372 +flat_mae,patch,logistic,abide_dx,80,0.3593813663804626,train,0.9358974358974359,0.008763054829346162,0.9351168164313222,0.008881332956650568,0.9344776670358066,0.008995773116624847 +flat_mae,patch,logistic,abide_dx,80,0.3593813663804626,test,0.5967741935483871,0.04270167001732451,0.5941345902068604,0.042936531142214185,0.5945378151260504,0.0431073123937748 +flat_mae,patch,logistic,abide_dx,81,0.046415888336127774,train,0.8162393162393162,0.015089676822395159,0.8128112216528621,0.015435759159426497,0.810889627168697,0.015421296653370555 +flat_mae,patch,logistic,abide_dx,81,0.046415888336127774,test,0.6129032258064516,0.04402541598737713,0.5951020408163266,0.047541045925511,0.5982142857142857,0.04497705880162714 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,train,0.7136752136752137,0.017119025154253457,0.7073756467819036,0.017652347001438858,0.7060538944259874,0.01749311908083878 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,test,0.6129032258064516,0.04179572243844966,0.6003223207091055,0.04422265172952331,0.6013655462184874,0.04276559544832569 +flat_mae,patch,logistic,abide_dx,83,0.046415888336127774,train,0.8133903133903134,0.014838885496827731,0.8100566967190259,0.01518374206501488,0.8083056478405315,0.015176256060351298 +flat_mae,patch,logistic,abide_dx,83,0.046415888336127774,test,0.6129032258064516,0.04304336801014306,0.6092436974789917,0.043478191221444244,0.6092436974789917,0.04344047080429191 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,train,0.9188034188034188,0.010225572507099332,0.9175368635815636,0.010439908646266306,0.9157253599114064,0.010623231528864805 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,test,0.6048387096774194,0.04528888656854025,0.6004471624909581,0.04571977995275497,0.6003151260504203,0.04551317451707635 +flat_mae,patch,logistic,abide_dx,85,0.046415888336127774,train,0.8105413105413105,0.013941817185813424,0.8078552833314468,0.014201135637855347,0.8069029162052418,0.014259065477644377 +flat_mae,patch,logistic,abide_dx,85,0.046415888336127774,test,0.6290322580645161,0.04127939308473816,0.6210470369386127,0.04241401945253636,0.6207983193277311,0.04176528047883907 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,train,0.8290598290598291,0.013715115990033631,0.8259389050515737,0.014088501609144785,0.8239940937615357,0.014165452617888272 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,test,0.6290322580645161,0.04500658698466618,0.6266038229903116,0.045471849995903604,0.6271008403361344,0.04546393078776404 +flat_mae,patch,logistic,abide_dx,87,0.046415888336127774,train,0.8005698005698005,0.015280797754368148,0.7967691505992704,0.01573963982841928,0.7949058693244739,0.015761117124632976 +flat_mae,patch,logistic,abide_dx,87,0.046415888336127774,test,0.6612903225806451,0.04085137142552884,0.6569169960474308,0.04127147306952389,0.6565126050420168,0.04102570972715738 +flat_mae,patch,logistic,abide_dx,88,0.046415888336127774,train,0.811965811965812,0.014432497292878455,0.8088235294117647,0.01475480324954852,0.8073089700996677,0.014779863346403386 +flat_mae,patch,logistic,abide_dx,88,0.046415888336127774,test,0.5887096774193549,0.04596567907086079,0.5873947935016637,0.04615058046713721,0.5887605042016807,0.04619853584836723 +flat_mae,patch,logistic,abide_dx,89,0.046415888336127774,train,0.8290598290598291,0.01478972096761629,0.8264552706177072,0.015092206581189978,0.8251753414544112,0.015153844737883159 +flat_mae,patch,logistic,abide_dx,89,0.046415888336127774,test,0.5967741935483871,0.04131666246557779,0.5963541666666667,0.041389998778551144,0.5992647058823529,0.04156590435483708 +flat_mae,patch,logistic,abide_dx,90,0.046415888336127774,train,0.8048433048433048,0.014621829826821172,0.8017991282574423,0.014890026892427537,0.8005537098560354,0.014892245759094211 +flat_mae,patch,logistic,abide_dx,90,0.046415888336127774,test,0.6451612903225806,0.041285962471406985,0.6313513513513513,0.04354651373630016,0.6323529411764706,0.04198956385720837 +flat_mae,patch,logistic,abide_dx,91,0.046415888336127774,train,0.8105413105413105,0.014931052598046111,0.8074463324142113,0.015206590857396491,0.806016980435585,0.015184422468709031 +flat_mae,patch,logistic,abide_dx,91,0.046415888336127774,test,0.6129032258064516,0.04043778381713843,0.6063492063492064,0.041450922687933905,0.60609243697479,0.04103487482293122 +flat_mae,patch,logistic,abide_dx,92,0.046415888336127774,train,0.8233618233618234,0.014454517523663973,0.8207959682798489,0.014722767280470804,0.8197120708748615,0.014774491465283795 +flat_mae,patch,logistic,abide_dx,92,0.046415888336127774,test,0.5483870967741935,0.04340927601949774,0.5441176470588236,0.04347175593474291,0.5441176470588236,0.04334445608826659 +flat_mae,patch,logistic,abide_dx,93,0.046415888336127774,train,0.8148148148148148,0.014187863704510126,0.8114338138058714,0.014535431737444465,0.8095976375046142,0.014556876803373767 +flat_mae,patch,logistic,abide_dx,93,0.046415888336127774,test,0.5564516129032258,0.043894855332508066,0.5529334644378892,0.04385612774890223,0.553046218487395,0.043792677830717164 +flat_mae,patch,logistic,abide_dx,94,0.000774263682681127,train,0.6538461538461539,0.01751019401447506,0.6361057129265603,0.019263152126182344,0.6379106681432263,0.018184910487316137 +flat_mae,patch,logistic,abide_dx,94,0.000774263682681127,test,0.5967741935483871,0.042042280264808704,0.5836690840719849,0.044153889673427024,0.5850840336134454,0.04273000063675474 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,train,0.8048433048433048,0.015115483622675157,0.802210006108057,0.015328926219235674,0.8014396456256921,0.015315812436326133 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,test,0.6048387096774194,0.043058326847732004,0.5880957223239103,0.04592739007657337,0.5908613445378151,0.04388065219861184 +flat_mae,patch,logistic,abide_dx,96,0.046415888336127774,train,0.8105413105413105,0.014946129350932863,0.8075860150236482,0.015288111700606288,0.806312292358804,0.015368376733031314 +flat_mae,patch,logistic,abide_dx,96,0.046415888336127774,test,0.6693548387096774,0.042988549040357406,0.6630211440312852,0.04456023013336834,0.6622899159663866,0.04383846685208054 +flat_mae,patch,logistic,abide_dx,97,0.3593813663804626,train,0.9344729344729344,0.009165564078313005,0.9333778966131907,0.00939403075863723,0.9311184939091917,0.00971089743687654 +flat_mae,patch,logistic,abide_dx,97,0.3593813663804626,test,0.6370967741935484,0.04334339746787591,0.6330637206549615,0.04419768212279903,0.6328781512605042,0.04400716331604586 +flat_mae,patch,logistic,abide_dx,98,0.046415888336127774,train,0.8162393162393162,0.015386270618986444,0.8126625223678359,0.015839818444453348,0.810594315245478,0.015849388975963694 +flat_mae,patch,logistic,abide_dx,98,0.046415888336127774,test,0.6693548387096774,0.04248585490260742,0.6595915634415801,0.04456710340963384,0.6591386554621849,0.04342151480565184 +flat_mae,patch,logistic,abide_dx,99,0.046415888336127774,train,0.8034188034188035,0.01468735742719744,0.799983482677458,0.015013861622428811,0.7983757844222961,0.01499287997269589 +flat_mae,patch,logistic,abide_dx,99,0.046415888336127774,test,0.6693548387096774,0.03895119214359001,0.6575739206573719,0.041270982845458284,0.657563025210084,0.039917357671463004 +flat_mae,patch,logistic,abide_dx,100,0.000774263682681127,train,0.6666666666666666,0.01644684311805553,0.6515966472105335,0.01787485829874074,0.6521963824289405,0.017021809165914788 +flat_mae,patch,logistic,abide_dx,100,0.000774263682681127,test,0.6370967741935484,0.04103809998482146,0.6190346145968457,0.04485951415033147,0.6218487394957983,0.042330725111534864 diff --git a/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic/log.txt b/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..1cb24ed3d092ae853d1c3041900166e84fb929b0 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:52 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:16:23 time: 3.4023 data: 2.6918 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:29 time: 0.1803 data: 0.0530 max mem: 2851 +extract (train) [ 40/289] eta: 0:01:04 time: 0.1770 data: 0.0452 max mem: 2851 +extract (train) [ 60/289] eta: 0:00:53 time: 0.1797 data: 0.0487 max mem: 2851 +extract (train) [ 80/289] eta: 0:00:45 time: 0.1676 data: 0.0424 max mem: 2851 +extract (train) [100/289] eta: 0:00:39 time: 0.1686 data: 0.0437 max mem: 2851 +extract (train) [120/289] eta: 0:00:34 time: 0.1766 data: 0.0463 max mem: 2851 +extract (train) [140/289] eta: 0:00:29 time: 0.1932 data: 0.0536 max mem: 2851 +extract (train) [160/289] eta: 0:00:25 time: 0.1727 data: 0.0450 max mem: 2851 +extract (train) [180/289] eta: 0:00:21 time: 0.1597 data: 0.0433 max mem: 2851 +extract (train) [200/289] eta: 0:00:16 time: 0.1661 data: 0.0456 max mem: 2851 +extract (train) [220/289] eta: 0:00:13 time: 0.1785 data: 0.0487 max mem: 2851 +extract (train) [240/289] eta: 0:00:09 time: 0.1681 data: 0.0451 max mem: 2851 +extract (train) [260/289] eta: 0:00:05 time: 0.1658 data: 0.0455 max mem: 2851 +extract (train) [280/289] eta: 0:00:01 time: 0.1333 data: 0.0318 max mem: 2851 +extract (train) [288/289] eta: 0:00:00 time: 0.1321 data: 0.0319 max mem: 2851 +extract (train) Total time: 0:00:52 (0.1818 s / it) +extract (validation) [ 0/62] eta: 0:03:20 time: 3.2295 data: 3.1127 max mem: 2851 +extract (validation) [20/62] eta: 0:00:13 time: 0.1841 data: 0.0550 max mem: 2851 +extract (validation) [40/62] eta: 0:00:05 time: 0.1425 data: 0.0385 max mem: 2851 +extract (validation) [60/62] eta: 0:00:00 time: 0.1363 data: 0.0345 max mem: 2851 +extract (validation) [61/62] eta: 0:00:00 time: 0.1363 data: 0.0342 max mem: 2851 +extract (validation) Total time: 0:00:12 (0.2081 s / it) +extract (test) [ 0/62] eta: 0:03:25 time: 3.3141 data: 3.1140 max mem: 2851 +extract (test) [20/62] eta: 0:00:14 time: 0.1921 data: 0.0552 max mem: 2851 +extract (test) [40/62] eta: 0:00:05 time: 0.1538 data: 0.0410 max mem: 2851 +extract (test) [60/62] eta: 0:00:00 time: 0.1322 data: 0.0317 max mem: 2851 +extract (test) [61/62] eta: 0:00:00 time: 0.1322 data: 0.0317 max mem: 2851 +extract (test) Total time: 0:00:13 (0.2137 s / it) +feature extraction time: 0:01:18 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|--------:|:--------|--------:|----------:|--------:|----------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.35938 | train | 0.93162 | 0.0095783 | 0.93072 | 0.0097338 | 0.92994 | 0.0099011 | +| flat_mae | patch | logistic | abide_dx | | 0.35938 | test | 0.52419 | 0.041284 | 0.51275 | 0.041847 | 0.51519 | 0.041199 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.041113137346047476, "f1": 0.6688034188034189, "f1_std": 0.04295419279844657, "bacc": 0.6680672268907563, "bacc_std": 0.04203937864030787} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04211467917333632, "f1": 0.6179613241560145, "f1_std": 0.042344171626269744, "bacc": 0.618172268907563, "bacc_std": 0.04239843385697718} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04441990535419158, "f1": 0.5989703649924097, "f1_std": 0.04504116366441694, "bacc": 0.5987394957983193, "bacc_std": 0.04472068721015768} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.7016129032258065, "acc_std": 0.03831011527471725, "f1": 0.6944388944388944, "f1_std": 0.039845593860878076, "bacc": 0.6932773109243697, "bacc_std": 0.03926182668976251} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04129690912688167, "f1": 0.5915678524374176, "f1_std": 0.04169908351233194, "bacc": 0.5913865546218487, "bacc_std": 0.04146902108947524} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04635304281882685, "f1": 0.6342182890855457, "f1_std": 0.046843179294802285, "bacc": 0.634453781512605, "bacc_std": 0.04677036162353719} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04462083000509283, "f1": 0.5765651155005022, "f1_std": 0.046826300397298085, "bacc": 0.5777310924369747, "bacc_std": 0.04541367312022607} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04229725774192688, "f1": 0.5588932806324111, "f1_std": 0.04293049709371854, "bacc": 0.5588235294117647, "bacc_std": 0.04267437159659311} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04283920763410549, "f1": 0.5972691721349506, "f1_std": 0.04353892823111837, "bacc": 0.5971638655462186, "bacc_std": 0.04304172193901062} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.040930955671154394, "f1": 0.6502820306204673, "f1_std": 0.043394048096207456, "bacc": 0.6502100840336134, "bacc_std": 0.042008753482431} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.042008679916720464, "f1": 0.6167554415729598, "f1_std": 0.04245925815837763, "bacc": 0.6165966386554622, "bacc_std": 0.0423885638229868} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.044167039376957366, "f1": 0.5841388834089565, "f1_std": 0.04458292141122282, "bacc": 0.5840336134453781, "bacc_std": 0.04437302500699791} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04249018678406638, "f1": 0.5703170970905524, "f1_std": 0.042903984753441864, "bacc": 0.5709033613445378, "bacc_std": 0.04313978178191481} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04310701056721079, "f1": 0.5907590759075907, "f1_std": 0.0453912057515939, "bacc": 0.592436974789916, "bacc_std": 0.04383432933102287} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.039270305166193877, "f1": 0.6480760345851759, "f1_std": 0.039930931836775055, "bacc": 0.6475840336134454, "bacc_std": 0.03964505782892502} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04254916663914516, "f1": 0.6112852664576802, "f1_std": 0.042812067293305056, "bacc": 0.6123949579831933, "bacc_std": 0.04292760821989454} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.043490570939756265, "f1": 0.6045708211533352, "f1_std": 0.04493554455811725, "bacc": 0.6045168067226891, "bacc_std": 0.04407857663726617} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.043240642013598486, "f1": 0.6242424242424243, "f1_std": 0.04386617606615634, "bacc": 0.6239495798319328, "bacc_std": 0.043601468943845846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04351731268865347, "f1": 0.5478500171998624, "f1_std": 0.04790838098260214, "bacc": 0.5551470588235294, "bacc_std": 0.04451250840553695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.03989847613008047, "f1": 0.6408702094699266, "f1_std": 0.04215016598399647, "bacc": 0.641281512605042, "bacc_std": 0.040701763083595376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.000774263682681127, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04177785200071881, "f1": 0.5740553647201454, "f1_std": 0.04370057909705704, "bacc": 0.576155462184874, "bacc_std": 0.04227325722468146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.043081544899482405, "f1": 0.575109649122807, "f1_std": 0.047269246976230334, "bacc": 0.5803571428571428, "bacc_std": 0.04413932884337094} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044992194952677395, "f1": 0.58994708994709, "f1_std": 0.046026024497452335, "bacc": 0.5898109243697479, "bacc_std": 0.04552468870036837} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04403463304477666, "f1": 0.6017043592264831, "f1_std": 0.04417293170351323, "bacc": 0.601890756302521, "bacc_std": 0.04416192525932249} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04344978083643979, "f1": 0.6274817136886102, "f1_std": 0.04353433023167341, "bacc": 0.6286764705882353, "bacc_std": 0.043653220708625845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04468924962737756, "f1": 0.5623043623043623, "f1_std": 0.04620564046687654, "bacc": 0.5630252100840336, "bacc_std": 0.0451854926690979} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.045398212064891565, "f1": 0.5643931861867832, "f1_std": 0.04639725160756262, "bacc": 0.5646008403361344, "bacc_std": 0.045871421786329955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 1291.5496650148827, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04268278317059871, "f1": 0.6330637206549615, "f1_std": 0.04332768949551267, "bacc": 0.6328781512605042, "bacc_std": 0.04321975378321963} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.039879824024235046, "f1": 0.6688034188034189, "f1_std": 0.04188124334564985, "bacc": 0.6680672268907563, "bacc_std": 0.04090223241051902} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04234016894566617, "f1": 0.6342182890855457, "f1_std": 0.04261392435327109, "bacc": 0.634453781512605, "bacc_std": 0.04252657852203198} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.042695275809892956, "f1": 0.6375232527238905, "f1_std": 0.04452496015712942, "bacc": 0.6370798319327731, "bacc_std": 0.04367422892699733} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04212897050827861, "f1": 0.6317074780542539, "f1_std": 0.04261838304020082, "bacc": 0.6313025210084033, "bacc_std": 0.042234307116611074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04284765986953694, "f1": 0.5643931861867832, "f1_std": 0.04379133700030574, "bacc": 0.5646008403361344, "bacc_std": 0.04322708815632735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.0432549582729253, "f1": 0.537888198757764, "f1_std": 0.04354100764742364, "bacc": 0.5383403361344539, "bacc_std": 0.04366090718453375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04091259941439645, "f1": 0.6667322189446083, "f1_std": 0.04139352722244033, "bacc": 0.6670168067226891, "bacc_std": 0.04130047995845927} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.041091241335156826, "f1": 0.6688034188034189, "f1_std": 0.04277763648525594, "bacc": 0.6680672268907563, "bacc_std": 0.04185253933161877} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.0397184550451784, "f1": 0.6448884448884449, "f1_std": 0.04115653437550358, "bacc": 0.6444327731092437, "bacc_std": 0.040399868390830165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04019436389169068, "f1": 0.6448884448884449, "f1_std": 0.04151157851580024, "bacc": 0.6444327731092437, "bacc_std": 0.040804104287870346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04396794310417141, "f1": 0.5836690840719849, "f1_std": 0.045798049335887776, "bacc": 0.5850840336134454, "bacc_std": 0.04444940641644906} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.03919338576894447, "f1": 0.6331613347093223, "f1_std": 0.042771518458555646, "bacc": 0.6365546218487395, "bacc_std": 0.04010720712249162} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.03924210121545952, "f1": 0.6119727891156463, "f1_std": 0.04231364298836883, "bacc": 0.6144957983193278, "bacc_std": 0.04015843813684551} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04409677419354838, "f1": 0.6255252100840336, "f1_std": 0.044836666351665194, "bacc": 0.6255252100840336, "bacc_std": 0.044590379240014404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.717741935483871, "acc_std": 0.042134993773728924, "f1": 0.710955710955711, "f1_std": 0.04364201444051302, "bacc": 0.7095588235294117, "bacc_std": 0.04301362189664417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.000774263682681127, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.03997531431299166, "f1": 0.5921052631578947, "f1_std": 0.04397497149481937, "bacc": 0.5966386554621849, "bacc_std": 0.04103224726333098} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.041908800443708635, "f1": 0.5643243243243243, "f1_std": 0.043704722143080695, "bacc": 0.5672268907563025, "bacc_std": 0.04222818336737672} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04469744223500621, "f1": 0.5691904293674206, "f1_std": 0.045167292227623355, "bacc": 0.569327731092437, "bacc_std": 0.045194090900702234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04297369909299979, "f1": 0.607905138339921, "f1_std": 0.0437169917842997, "bacc": 0.6076680672268908, "bacc_std": 0.0434851656123859} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04267607524765116, "f1": 0.6391534391534391, "f1_std": 0.04381060237156751, "bacc": 0.6386554621848739, "bacc_std": 0.04334719713144787} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.000774263682681127, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.039736266353089376, "f1": 0.578494623655914, "f1_std": 0.04418238570013005, "bacc": 0.5861344537815126, "bacc_std": 0.04059818158704357} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04465999427283362, "f1": 0.5810810810810811, "f1_std": 0.04713720256675513, "bacc": 0.5835084033613446, "bacc_std": 0.04528510431816933} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.041211631163837505, "f1": 0.5810810810810811, "f1_std": 0.04356186821528533, "bacc": 0.5835084033613446, "bacc_std": 0.04182421273594904} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04385020856099628, "f1": 0.6242424242424243, "f1_std": 0.04462218152686849, "bacc": 0.6239495798319328, "bacc_std": 0.04452530415289145} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04276284029254295, "f1": 0.6242424242424243, "f1_std": 0.04342207004187337, "bacc": 0.6239495798319328, "bacc_std": 0.043176670429289805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04510232037057044, "f1": 0.6137071651090342, "f1_std": 0.0463636833031733, "bacc": 0.6134453781512605, "bacc_std": 0.04571655086801859} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.042043706509107076, "f1": 0.6003223207091055, "f1_std": 0.04395201151796931, "bacc": 0.6013655462184874, "bacc_std": 0.04262738162186557} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.03989701558058263, "f1": 0.6502820306204673, "f1_std": 0.04235785137587265, "bacc": 0.6502100840336134, "bacc_std": 0.04101881706001485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.039863512587277675, "f1": 0.6480760345851759, "f1_std": 0.0406830702303827, "bacc": 0.6475840336134454, "bacc_std": 0.04038080695452615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04346805912865296, "f1": 0.6112852664576802, "f1_std": 0.04353824942154756, "bacc": 0.6123949579831933, "bacc_std": 0.04371216285030784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04456227155100645, "f1": 0.6266038229903116, "f1_std": 0.0448244940169691, "bacc": 0.6271008403361344, "bacc_std": 0.044791266846952324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04398425541310865, "f1": 0.5865315462569467, "f1_std": 0.04417646002498987, "bacc": 0.5871848739495797, "bacc_std": 0.04429141821564067} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04423782512393834, "f1": 0.5623043623043623, "f1_std": 0.04515594725345051, "bacc": 0.5630252100840336, "bacc_std": 0.04431821774533121} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.000774263682681127, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04186993636584508, "f1": 0.5978378378378378, "f1_std": 0.04441792401715795, "bacc": 0.5997899159663866, "bacc_std": 0.04274082997220342} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.044036972452337696, "f1": 0.6203504657677024, "f1_std": 0.044150532010916364, "bacc": 0.6228991596638656, "bacc_std": 0.04416149355513142} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04257984786643588, "f1": 0.6391534391534391, "f1_std": 0.04374301919437287, "bacc": 0.6386554621848739, "bacc_std": 0.0432648698667387} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.044480467249134785, "f1": 0.5931704050887178, "f1_std": 0.04665746390703013, "bacc": 0.5940126050420168, "bacc_std": 0.04535424956033638} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04579966726224051, "f1": 0.6092436974789917, "f1_std": 0.046368699932556025, "bacc": 0.6092436974789917, "bacc_std": 0.04625389298721849} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04194602845096986, "f1": 0.6448884448884449, "f1_std": 0.043576932006045295, "bacc": 0.6444327731092437, "bacc_std": 0.04268664970847627} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.000774263682681127, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.03919394331408622, "f1": 0.5810810810810811, "f1_std": 0.04189347456698179, "bacc": 0.5835084033613446, "bacc_std": 0.039982221811747945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.0407685020527538, "f1": 0.6721578198088265, "f1_std": 0.043651566533400986, "bacc": 0.6722689075630253, "bacc_std": 0.041829512970162006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04453546506193729, "f1": 0.6153389215233318, "f1_std": 0.045388848751803205, "bacc": 0.6150210084033614, "bacc_std": 0.04509356542582331} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.03931641103787403, "f1": 0.6429862738533645, "f1_std": 0.04107719866164188, "bacc": 0.6428571428571428, "bacc_std": 0.0399598841213838} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04360950012968361, "f1": 0.5234186697934988, "f1_std": 0.043613506635374236, "bacc": 0.5252100840336134, "bacc_std": 0.043709194454083354} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.03969965283177649, "f1": 0.6266038229903116, "f1_std": 0.039870109268220065, "bacc": 0.6271008403361344, "bacc_std": 0.0398788773951557} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04281820340911307, "f1": 0.6210470369386127, "f1_std": 0.04412702440007403, "bacc": 0.6207983193277311, "bacc_std": 0.04343186865320686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.0409200891258643, "f1": 0.6356837606837606, "f1_std": 0.04256190347344885, "bacc": 0.6355042016806722, "bacc_std": 0.04166504903969842} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04075176751591489, "f1": 0.6575739206573719, "f1_std": 0.04300275716343429, "bacc": 0.657563025210084, "bacc_std": 0.041464770753086654} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04507058575514828, "f1": 0.5311603650586701, "f1_std": 0.045220500564835, "bacc": 0.532563025210084, "bacc_std": 0.04539670070997022} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.042833860369720776, "f1": 0.5978378378378378, "f1_std": 0.044704063706268816, "bacc": 0.5997899159663866, "bacc_std": 0.04322538013830674} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.043808123275625055, "f1": 0.5931704050887178, "f1_std": 0.04613649124232754, "bacc": 0.5940126050420168, "bacc_std": 0.04466083182381372} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04270167001732451, "f1": 0.5941345902068604, "f1_std": 0.042936531142214185, "bacc": 0.5945378151260504, "bacc_std": 0.0431073123937748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04402541598737713, "f1": 0.5951020408163266, "f1_std": 0.047541045925511, "bacc": 0.5982142857142857, "bacc_std": 0.04497705880162714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04179572243844966, "f1": 0.6003223207091055, "f1_std": 0.04422265172952331, "bacc": 0.6013655462184874, "bacc_std": 0.04276559544832569} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04304336801014306, "f1": 0.6092436974789917, "f1_std": 0.043478191221444244, "bacc": 0.6092436974789917, "bacc_std": 0.04344047080429191} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04528888656854025, "f1": 0.6004471624909581, "f1_std": 0.04571977995275497, "bacc": 0.6003151260504203, "bacc_std": 0.04551317451707635} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04127939308473816, "f1": 0.6210470369386127, "f1_std": 0.04241401945253636, "bacc": 0.6207983193277311, "bacc_std": 0.04176528047883907} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04500658698466618, "f1": 0.6266038229903116, "f1_std": 0.045471849995903604, "bacc": 0.6271008403361344, "bacc_std": 0.04546393078776404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04085137142552884, "f1": 0.6569169960474308, "f1_std": 0.04127147306952389, "bacc": 0.6565126050420168, "bacc_std": 0.04102570972715738} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04596567907086079, "f1": 0.5873947935016637, "f1_std": 0.04615058046713721, "bacc": 0.5887605042016807, "bacc_std": 0.04619853584836723} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04131666246557779, "f1": 0.5963541666666667, "f1_std": 0.041389998778551144, "bacc": 0.5992647058823529, "bacc_std": 0.04156590435483708} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.041285962471406985, "f1": 0.6313513513513513, "f1_std": 0.04354651373630016, "bacc": 0.6323529411764706, "bacc_std": 0.04198956385720837} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04043778381713843, "f1": 0.6063492063492064, "f1_std": 0.041450922687933905, "bacc": 0.60609243697479, "bacc_std": 0.04103487482293122} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04340927601949774, "f1": 0.5441176470588236, "f1_std": 0.04347175593474291, "bacc": 0.5441176470588236, "bacc_std": 0.04334445608826659} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.043894855332508066, "f1": 0.5529334644378892, "f1_std": 0.04385612774890223, "bacc": 0.553046218487395, "bacc_std": 0.043792677830717164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.000774263682681127, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.042042280264808704, "f1": 0.5836690840719849, "f1_std": 0.044153889673427024, "bacc": 0.5850840336134454, "bacc_std": 0.04273000063675474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.043058326847732004, "f1": 0.5880957223239103, "f1_std": 0.04592739007657337, "bacc": 0.5908613445378151, "bacc_std": 0.04388065219861184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.042988549040357406, "f1": 0.6630211440312852, "f1_std": 0.04456023013336834, "bacc": 0.6622899159663866, "bacc_std": 0.04383846685208054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04334339746787591, "f1": 0.6330637206549615, "f1_std": 0.04419768212279903, "bacc": 0.6328781512605042, "bacc_std": 0.04400716331604586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04248585490260742, "f1": 0.6595915634415801, "f1_std": 0.04456710340963384, "bacc": 0.6591386554621849, "bacc_std": 0.04342151480565184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.03895119214359001, "f1": 0.6575739206573719, "f1_std": 0.041270982845458284, "bacc": 0.657563025210084, "bacc_std": 0.039917357671463004} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.000774263682681127, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04103809998482146, "f1": 0.6190346145968457, "f1_std": 0.04485951415033147, "bacc": 0.6218487394957983, "bacc_std": 0.042330725111534864} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 12.989 | 129.15 | 0.80406 | 0.071932 | 0.79974 | 0.075084 | 0.79846 | 0.074815 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 12.989 | 129.15 | 0.61944 | 0.035969 | 0.61111 | 0.036134 | 0.61183 | 0.035412 | + + +done! total time: 0:05:23 diff --git a/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic/config.yaml b/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5c53bdae865e0c42ae27a9a155ee9506c5a3488c --- /dev/null +++ b/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..3bec874c8642e8f8f3b471ec77614ed76f91e565 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.046415888336127774,train,0.8657534246575342,0.017078188059986704,0.8614950940532335,0.01786542857265286,0.857391463638029,0.018112886528454195 +flat_mae,patch,logistic,adhd200_dx,,0.046415888336127774,test,0.676923076923077,0.059504947832663864,0.6719538572458543,0.06047265227632608,0.6727799227799228,0.06038152931158118 +flat_mae,patch,logistic,adhd200_dx,1,0.000774263682681127,train,0.6767123287671233,0.022587077014058984,0.6510403163080114,0.025500796987109806,0.6526073151370825,0.02357514448176547 +flat_mae,patch,logistic,adhd200_dx,1,0.000774263682681127,test,0.5538461538461539,0.0572655890767544,0.5250692869740489,0.06076585750307687,0.5299227799227799,0.05785183843593267 +flat_mae,patch,logistic,adhd200_dx,2,0.046415888336127774,train,0.863013698630137,0.018246061122376967,0.8585271317829457,0.01910003251673321,0.8542468095499787,0.019267217692187515 +flat_mae,patch,logistic,adhd200_dx,2,0.046415888336127774,test,0.5692307692307692,0.05671171570517729,0.5376016260162602,0.06225838701724851,0.5434362934362934,0.0579687388284011 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,train,0.7753424657534247,0.022369737351692617,0.7648787078934138,0.024009055738663534,0.7607925749526775,0.023508700049786866 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,test,0.6,0.05724673888027191,0.5775,0.06147789908968846,0.5791505791505791,0.058772705740167124 +flat_mae,patch,logistic,adhd200_dx,4,0.005994842503189409,train,0.7506849315068493,0.023079824113354856,0.7411650107149814,0.02446588668211456,0.7382304451364718,0.024122255251734163 +flat_mae,patch,logistic,adhd200_dx,4,0.005994842503189409,test,0.6153846153846154,0.058280724431296206,0.6018132810585641,0.061335942045065636,0.6013513513513513,0.059766919147401495 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,train,0.7808219178082192,0.02078671573710714,0.7736434108527132,0.022026284546400397,0.7706692312389326,0.021989550001872284 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,test,0.5076923076923077,0.06166973690315324,0.5019157088122606,0.06265548856678507,0.5024131274131274,0.06307831677995987 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,train,0.7753424657534247,0.020005385651299848,0.7643010143010143,0.021738756019797873,0.7600751053306467,0.021251549075675173 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,test,0.676923076923077,0.05564570085522007,0.6612062546537603,0.06027805632457064,0.6597490347490347,0.058029226425179735 +flat_mae,patch,logistic,adhd200_dx,7,0.000774263682681127,train,0.6904109589041096,0.02384004430609179,0.6739551465996316,0.026013751162567152,0.6726354033095194,0.02499823872913839 +flat_mae,patch,logistic,adhd200_dx,7,0.000774263682681127,test,0.6,0.053583809265342605,0.570630081300813,0.05875139503905504,0.5748069498069498,0.05497776935868238 +flat_mae,patch,logistic,adhd200_dx,8,0.000774263682681127,train,0.6575342465753424,0.022877677610674274,0.6332071163848894,0.02618067205423451,0.6348995542529157,0.024211013993169033 +flat_mae,patch,logistic,adhd200_dx,8,0.000774263682681127,test,0.6307692307692307,0.052420466665642984,0.577922077922078,0.06534726334508818,0.5931467181467182,0.05533943784487845 +flat_mae,patch,logistic,adhd200_dx,9,0.046415888336127774,train,0.863013698630137,0.017203730824674596,0.8590777118853472,0.0178951782948399,0.8556817487940405,0.018128698401832573 +flat_mae,patch,logistic,adhd200_dx,9,0.046415888336127774,test,0.6,0.06166942986506083,0.588206627680312,0.06414252962711496,0.5878378378378378,0.0631285088032032 +flat_mae,patch,logistic,adhd200_dx,10,0.3593813663804626,train,0.9808219178082191,0.007018189914722619,0.9804551539491299,0.007165759005496705,0.9794223606277096,0.0074775484731877655 +flat_mae,patch,logistic,adhd200_dx,10,0.3593813663804626,test,0.5384615384615384,0.059654952625754924,0.5192307692307693,0.06180309132185452,0.5207528957528957,0.060215730060988015 +flat_mae,patch,logistic,adhd200_dx,11,0.046415888336127774,train,0.873972602739726,0.015797057857953677,0.8703514949345195,0.016402000539652333,0.8668254259021799,0.016596147741691346 +flat_mae,patch,logistic,adhd200_dx,11,0.046415888336127774,test,0.5692307692307692,0.05821271745394455,0.5608108108108107,0.05943390770577114,0.5608108108108107,0.05902766828279475 +flat_mae,patch,logistic,adhd200_dx,12,0.3593813663804626,train,0.9863013698630136,0.0058550169507943515,0.9860175757157852,0.006003312751636345,0.9842767295597484,0.006720381091320536 +flat_mae,patch,logistic,adhd200_dx,12,0.3593813663804626,test,0.5846153846153846,0.05862002808177161,0.5830363506771205,0.058889472645809154,0.5873552123552124,0.05932128623455292 +flat_mae,patch,logistic,adhd200_dx,13,0.046415888336127774,train,0.8904109589041096,0.01657785557762633,0.8874707115550623,0.017187596993291695,0.88425841118642,0.017500511778493447 +flat_mae,patch,logistic,adhd200_dx,13,0.046415888336127774,test,0.6,0.06020512471061013,0.5921814671814671,0.06129344119325076,0.5921814671814671,0.060771826774628876 +flat_mae,patch,logistic,adhd200_dx,14,0.046415888336127774,train,0.8493150684931506,0.01846539489916572,0.8454116324377604,0.019064076766956806,0.8428283568419125,0.01917790528908459 +flat_mae,patch,logistic,adhd200_dx,14,0.046415888336127774,test,0.6461538461538462,0.05890901619743036,0.6336682185738789,0.062146457074397456,0.6327220077220077,0.060796763063037065 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,train,0.7452054794520548,0.021742253092199054,0.7316622002988118,0.023653793230580165,0.7283537888502167,0.022795503203861693 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,test,0.6307692307692307,0.05951964719625667,0.6235521235521235,0.06047272103557194,0.6235521235521235,0.060358604493063656 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,train,0.7698630136986301,0.02056704208476135,0.7602739726027397,0.022036332541953082,0.7566556756426696,0.021706631290715647 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,test,0.5538461538461539,0.06091686059971773,0.5381034060279344,0.06258601657950763,0.5386100386100386,0.06138737497131656 +flat_mae,patch,logistic,adhd200_dx,17,0.046415888336127774,train,0.8684931506849315,0.01760480580593876,0.8652058841632302,0.01813228314639139,0.862688526592172,0.01819513376921124 +flat_mae,patch,logistic,adhd200_dx,17,0.046415888336127774,test,0.5846153846153846,0.05871990680869055,0.5699583435432491,0.06145209366131349,0.5699806949806949,0.060034387646530685 +flat_mae,patch,logistic,adhd200_dx,18,0.046415888336127774,train,0.8602739726027397,0.017702704480348722,0.8558418325860186,0.018563732784502077,0.8518196250839593,0.018808476561670995 +flat_mae,patch,logistic,adhd200_dx,18,0.046415888336127774,test,0.6153846153846154,0.059894629368741695,0.606060606060606,0.06123124537723429,0.6056949806949807,0.060530000381377465 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,train,0.8712328767123287,0.01752891699825019,0.8666029035544047,0.018552076117333956,0.8615283629480369,0.018863359931584694 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,test,0.676923076923077,0.06036583539420486,0.6655231560891939,0.06391764659581159,0.6640926640926641,0.06276648339768712 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,train,0.7424657534246575,0.02145089729027852,0.7329212853406402,0.02267758029029618,0.7302314221163827,0.022317700473933284 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,test,0.6153846153846154,0.05593973934395675,0.5966741126830479,0.05956773634875029,0.597007722007722,0.05736341308013188 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,train,0.7671232876712328,0.0213903512603398,0.7571409560935898,0.022703998105863116,0.7535110215546192,0.02227510118001808 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,test,0.7076923076923077,0.05511287180916318,0.6934723256391164,0.05935428871932758,0.6911196911196911,0.05751842221042249 +flat_mae,patch,logistic,adhd200_dx,22,0.046415888336127774,train,0.8410958904109589,0.01852075544991967,0.8362165005879805,0.01938356587541209,0.8326769249557306,0.019487613328053925 +flat_mae,patch,logistic,adhd200_dx,22,0.046415888336127774,test,0.6153846153846154,0.05756622119338048,0.6094688776736361,0.059000555408504046,0.61003861003861,0.05916352132013179 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,train,0.7917808219178082,0.01943952905610274,0.7831050228310502,0.020821796423450743,0.7789430298589486,0.020593756305295988 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,test,0.6153846153846154,0.05582575596967586,0.5905769715293525,0.06092880156668017,0.5926640926640927,0.05723420917198336 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,train,0.7452054794520548,0.02150419039492512,0.7336581691500129,0.023225537391161698,0.7305061977163094,0.022753184600145818 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,test,0.6615384615384615,0.05450640481338385,0.6299171842650104,0.06302193643219771,0.6332046332046332,0.057101951438083956 +flat_mae,patch,logistic,adhd200_dx,25,0.005994842503189409,train,0.7616438356164383,0.020492957291593516,0.7476336933457312,0.022551349053612554,0.7436343652683641,0.021706554413297257 +flat_mae,patch,logistic,adhd200_dx,25,0.005994842503189409,test,0.6461538461538462,0.05497279370140541,0.6167649320687003,0.0619366915297259,0.6196911196911197,0.05722116357302214 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,train,0.7561643835616438,0.021106356414677466,0.7451137317672167,0.02276757266686839,0.7416498748244489,0.02223659459559254 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,test,0.6153846153846154,0.0551576546265822,0.5834401435529352,0.06222831061513817,0.5883204633204633,0.05713123455936296 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,train,0.7589041095890411,0.021694467568080304,0.7488584474885844,0.023092882223134936,0.7455119985345301,0.022705384174726586 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,test,0.7230769230769231,0.05303444454024486,0.7176640926640927,0.05405444556692365,0.7176640926640927,0.05377801209791008 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,train,0.7671232876712328,0.020626755894595308,0.7576948008840918,0.02190316980927576,0.7542284911766501,0.021540367710327007 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,test,0.6153846153846154,0.0541713258502937,0.5834401435529352,0.06113224527527147,0.5883204633204633,0.05613889901634252 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,train,0.7698630136986301,0.0215035509206162,0.7608125819134994,0.022827928711384598,0.7573731452647005,0.022512442760103606 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,test,0.5692307692307692,0.06031255082470189,0.545,0.06448818718183073,0.5477799227799228,0.06153564747621803 +flat_mae,patch,logistic,adhd200_dx,30,0.046415888336127774,train,0.873972602739726,0.018006069742447978,0.8705913182883216,0.018731876710168793,0.8675428955242108,0.019059334448234537 +flat_mae,patch,logistic,adhd200_dx,30,0.046415888336127774,test,0.6,0.054693119111150615,0.5833333333333333,0.0571336811586876,0.5834942084942085,0.0555374175286818 +flat_mae,patch,logistic,adhd200_dx,31,0.3593813663804626,train,0.9753424657534246,0.0083709171364992,0.9749085326265458,0.008526210055046533,0.9745679916956708,0.00867276297049639 +flat_mae,patch,logistic,adhd200_dx,31,0.3593813663804626,test,0.6307692307692307,0.059494446063696034,0.6235521235521235,0.060752597031315435,0.6235521235521235,0.06019783739826568 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,train,0.8493150684931506,0.018935942227686766,0.8454116324377604,0.0195802287188209,0.8428283568419125,0.019716087889845704 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,test,0.6461538461538462,0.05741953155112201,0.6289401836684041,0.06168051084712195,0.6283783783783784,0.05967790280427032 +flat_mae,patch,logistic,adhd200_dx,33,0.005994842503189409,train,0.7671232876712328,0.02182961709234036,0.7587499319600937,0.023022149306424974,0.7556634304207119,0.022789463321737636 +flat_mae,patch,logistic,adhd200_dx,33,0.005994842503189409,test,0.5692307692307692,0.06055749676300353,0.564176245210728,0.06119702477144015,0.5651544401544402,0.061196796584670324 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,train,0.873972602739726,0.01717838456398579,0.8714631308182264,0.017623491710827794,0.8704127740123344,0.01788683430789088 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,test,0.5692307692307692,0.06031380659742999,0.564176245210728,0.06124768065242325,0.5651544401544402,0.06163327286376625 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,train,0.7342465753424657,0.021562612291352177,0.7178457296323746,0.023684312368223034,0.7150577028759846,0.022611843677091154 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,test,0.6307692307692307,0.057498826332246115,0.6036585365853658,0.06332847127867035,0.6061776061776062,0.05911397772664288 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,train,0.7616438356164383,0.02291267782318527,0.7540831261761494,0.02397005393991643,0.751526531110704,0.023827791822369087 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,test,0.6153846153846154,0.05808497346140251,0.6018132810585641,0.061038667690533496,0.6013513513513513,0.05966645370475177 +flat_mae,patch,logistic,adhd200_dx,37,0.000774263682681127,train,0.6575342465753424,0.02096302296353183,0.6219371732087072,0.024623448237372194,0.6284423276546376,0.02194461059330252 +flat_mae,patch,logistic,adhd200_dx,37,0.000774263682681127,test,0.6153846153846154,0.04606058269395328,0.554672513017265,0.05930753190875318,0.5752895752895753,0.04843900617142849 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,train,0.7342465753424657,0.023001444225953283,0.7240989674654198,0.02441805742465556,0.7215149294742627,0.023990316266928532 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,test,0.5692307692307692,0.05746423842899906,0.5512820512820513,0.06012947737164356,0.5521235521235521,0.05820206915826109 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,train,0.7726027397260274,0.02038158482091978,0.7628552865384465,0.021818746316566745,0.759082860108689,0.021491237705092245 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,test,0.6307692307692307,0.0615423075721229,0.6153846153846154,0.06404714127278875,0.6148648648648649,0.06243158787689593 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,train,0.7561643835616438,0.022598059457321323,0.7468536917981687,0.023885210398192535,0.7438022836905416,0.02352146265362123 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,test,0.6615384615384615,0.04774166180799899,0.6130952380952381,0.06002177730440167,0.6245173745173745,0.05071997773002365 +flat_mae,patch,logistic,adhd200_dx,41,0.046415888336127774,train,0.863013698630137,0.01789663530565597,0.8590777118853472,0.018640560725927097,0.8556817487940405,0.018831121553825235 +flat_mae,patch,logistic,adhd200_dx,41,0.046415888336127774,test,0.6461538461538462,0.05978969850515453,0.644808743169399,0.05982158801541909,0.6500965250965252,0.059381385241960516 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,train,0.7917808219178082,0.020232828298696496,0.7835923360169756,0.02123527764317484,0.7796604994809794,0.020959728647286076 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,test,0.6307692307692307,0.06271468972810364,0.6235521235521235,0.06377761357442285,0.6235521235521235,0.06339578516973475 +flat_mae,patch,logistic,adhd200_dx,43,0.000774263682681127,train,0.6657534246575343,0.022568342444463242,0.6403534276668605,0.025380538033943204,0.6421811076509739,0.023502473998959248 +flat_mae,patch,logistic,adhd200_dx,43,0.000774263682681127,test,0.5846153846153846,0.060512243369694504,0.5501153550371699,0.06653549857759201,0.556949806949807,0.061573446806877014 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,train,0.8712328767123287,0.01711640058759064,0.8683513801809545,0.017647832420832163,0.8665506503022532,0.017941781861312157 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,test,0.6153846153846154,0.05764464903293434,0.606060606060606,0.05921578486629486,0.6056949806949807,0.05872212325340343 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,train,0.7780821917808219,0.022036380639370943,0.7680248570016242,0.02381929600954055,0.7639372290407278,0.02338048202951626 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,test,0.5076923076923077,0.05950008701699171,0.4871794871794872,0.06236568104858205,0.48938223938223935,0.06038077148270777 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,train,0.7479452054794521,0.021043395993003866,0.736833855799373,0.022497110144429934,0.7336508518043597,0.022001931833103797 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,test,0.6153846153846154,0.059431141598626364,0.5966741126830479,0.06273333213340375,0.597007722007722,0.06045994029541409 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,train,0.7780821917808219,0.021339051197830805,0.7685696169833032,0.022857905959625176,0.7646546986627587,0.02248936072595075 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,test,0.5692307692307692,0.06130707267325128,0.5565302144249512,0.06265016787306586,0.5564671814671815,0.06192038296137542 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,train,0.8684931506849315,0.01770759771118233,0.8644550349693632,0.018494065637778712,0.8605361177260793,0.01873217203779221 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,test,0.6307692307692307,0.060914148529912664,0.6264367816091954,0.06175308554170438,0.627895752895753,0.061938043329468624 +flat_mae,patch,logistic,adhd200_dx,49,0.046415888336127774,train,0.863013698630137,0.017342101318424116,0.8588073280930866,0.018117311580660808,0.8549642791720096,0.018291489179414932 +flat_mae,patch,logistic,adhd200_dx,49,0.046415888336127774,test,0.5538461538461539,0.06234080032061976,0.5500119360229172,0.06306192727957428,0.5516409266409266,0.06359814774523685 +flat_mae,patch,logistic,adhd200_dx,50,0.3593813663804626,train,0.9753424657534246,0.008364901787247829,0.9748709122203099,0.00854967290561261,0.9738505220736399,0.008947496248503129 +flat_mae,patch,logistic,adhd200_dx,50,0.3593813663804626,test,0.5692307692307692,0.058078836443265204,0.5565302144249512,0.06045777555896192,0.5564671814671815,0.05953511430094472 +flat_mae,patch,logistic,adhd200_dx,51,0.046415888336127774,train,0.873972602739726,0.017171881513996033,0.8710445468509984,0.01767797475103027,0.8689778347682726,0.017893644739776993 +flat_mae,patch,logistic,adhd200_dx,51,0.046415888336127774,test,0.6,0.06084209069386094,0.5921814671814671,0.06265787776687684,0.5921814671814671,0.06242020102102711 +flat_mae,patch,logistic,adhd200_dx,52,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,52,166.81005372000556,test,0.5230769230769231,0.06048955311045832,0.5115151515151515,0.06166501453032125,0.5115830115830116,0.061061613368097144 +flat_mae,patch,logistic,adhd200_dx,53,0.046415888336127774,train,0.8712328767123287,0.01686582463246114,0.8671483555204486,0.017702712573942035,0.8629633021920987,0.0179582989295863 +flat_mae,patch,logistic,adhd200_dx,53,0.046415888336127774,test,0.5230769230769231,0.05926792041680342,0.49987589972697943,0.06221983316200884,0.502895752895753,0.05999061139233182 +flat_mae,patch,logistic,adhd200_dx,54,0.000774263682681127,train,0.6794520547945205,0.021622128181495368,0.6596695914122457,0.024302370226241755,0.6593393173352873,0.022912587776226175 +flat_mae,patch,logistic,adhd200_dx,54,0.000774263682681127,test,0.5384615384615384,0.05798408269649392,0.5125,0.06153215536539919,0.5164092664092664,0.05870489862503548 +flat_mae,patch,logistic,adhd200_dx,55,0.046415888336127774,train,0.8931506849315068,0.015940105572410695,0.8905749140972719,0.016409197507246027,0.8881205348965012,0.01666344198910492 +flat_mae,patch,logistic,adhd200_dx,55,0.046415888336127774,test,0.5692307692307692,0.06375048253675135,0.5666666666666667,0.06380417795717837,0.5694980694980695,0.06425679767514877 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,train,0.7726027397260274,0.021767964500068952,0.7617212386248339,0.023420409153067163,0.7576479208646272,0.02289742657845494 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,test,0.5846153846153846,0.05939442724840168,0.5745454545454545,0.06070472188012568,0.5743243243243243,0.060147054191353515 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,train,0.7945205479452054,0.020907293604799145,0.7866744593804793,0.022166814176650108,0.7828051535690297,0.021965267417927148 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,test,0.6153846153846154,0.059952320305882836,0.5905769715293525,0.06525933957683168,0.5926640926640927,0.061561379891102906 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,train,0.7863013698630137,0.020597215085571424,0.7797394318252151,0.021579319056509432,0.7769585394150332,0.021568410830982366 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,test,0.6615384615384615,0.0565710194064628,0.6515594541910331,0.058946245373878714,0.6505791505791505,0.05777064500738074 +flat_mae,patch,logistic,adhd200_dx,59,0.046415888336127774,train,0.8821917808219178,0.017601838898116118,0.8786916162342229,0.018382647533215588,0.8748244489222691,0.018699544783355525 +flat_mae,patch,logistic,adhd200_dx,59,0.046415888336127774,test,0.5076923076923077,0.05877225123128174,0.48,0.062216358119852586,0.48503861003861004,0.05948481241158543 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,train,0.7972602739726027,0.020075186877502173,0.7888127853881278,0.021429790625384876,0.7845148684130183,0.021151749741149483 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,test,0.6307692307692307,0.05591638728372842,0.61,0.06102715186358429,0.6105212355212355,0.05797635992295799 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,train,0.7835616438356164,0.020567869062099665,0.773752638310226,0.02198777375099392,0.7695090675947975,0.021584886168634444 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,test,0.5538461538461539,0.05920342225835084,0.5250692869740489,0.06373726101759931,0.5299227799227799,0.06050718983622143 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,train,0.8767123287671232,0.017367234579230093,0.8732883317261331,0.018102303134223993,0.8699700799902302,0.01841306453083887 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,test,0.5846153846153846,0.0586200603828582,0.578226387887527,0.0595344030671501,0.5786679536679536,0.05937859601291822 +flat_mae,patch,logistic,adhd200_dx,63,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,63,2.782559402207126,test,0.5384615384615384,0.060720245105569795,0.5330459770114943,0.061116563758247026,0.5337837837837838,0.061119656790161986 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,train,0.7616438356164383,0.020729991019745736,0.7502379248236211,0.02245687988910006,0.7465042437564877,0.021940240232594787 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,test,0.6153846153846154,0.05838481605316686,0.5966741126830479,0.06258611262848957,0.597007722007722,0.060116132732405014 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,train,0.7616438356164383,0.02161347341507988,0.751426625648733,0.02308981811094439,0.7479391830005495,0.022711279961858486 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,test,0.6923076923076923,0.053164698880935946,0.6697154471544715,0.0600745987168088,0.6689189189189189,0.056129481992877664 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,train,0.7616438356164383,0.021225600874478022,0.751426625648733,0.022618830583177298,0.7479391830005495,0.022166094850227663 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,test,0.676923076923077,0.054646846842452725,0.6612062546537603,0.058706828966698724,0.6597490347490347,0.056793099000891065 +flat_mae,patch,logistic,adhd200_dx,67,0.046415888336127774,train,0.8684931506849315,0.018039887416489733,0.8652058841632302,0.018652586025564053,0.862688526592172,0.018915955493297932 +flat_mae,patch,logistic,adhd200_dx,67,0.046415888336127774,test,0.6,0.05899682871830119,0.5775,0.06346597861177079,0.5791505791505791,0.06037054928764662 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,train,0.7589041095890411,0.022723267851648438,0.745079365079365,0.02509744053733582,0.7412071808023448,0.024153722491621045 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,test,0.6461538461538462,0.0559226092457431,0.6233308138070043,0.061367562292631336,0.6240347490347491,0.05793770442040621 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,train,0.7753424657534247,0.020495315979667136,0.7675086999751429,0.021706199137629327,0.764379923062832,0.021594916171311655 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,test,0.5538461538461539,0.06140196400305084,0.5381034060279344,0.06317904633758856,0.5386100386100386,0.061911850483780756 +flat_mae,patch,logistic,adhd200_dx,70,0.046415888336127774,train,0.8767123287671232,0.016998785514001365,0.8732883317261331,0.017715720377473303,0.8699700799902302,0.018007545352588153 +flat_mae,patch,logistic,adhd200_dx,70,0.046415888336127774,test,0.6307692307692307,0.06161260149289181,0.6235521235521235,0.06304160345581517,0.6235521235521235,0.06284827736084889 +flat_mae,patch,logistic,adhd200_dx,71,0.046415888336127774,train,0.873972602739726,0.01719617932053133,0.8701027418456397,0.017967125645906724,0.866107956280149,0.018244569176593773 +flat_mae,patch,logistic,adhd200_dx,71,0.046415888336127774,test,0.5538461538461539,0.06153762307121098,0.5469838981014179,0.06248177938605528,0.5472972972972974,0.06253792977805431 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,train,0.7780821917808219,0.02136403432979856,0.7685696169833032,0.022747195386145634,0.7646546986627587,0.022351699712148475 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,test,0.7384615384615385,0.05286726932309323,0.7257383966244726,0.05691980971726144,0.7224903474903475,0.05542362361576988 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,train,0.6767123287671233,0.02237375080510943,0.6542672745954277,0.025059124460097493,0.6547597240031752,0.023446675949477837 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,test,0.5538461538461539,0.0594706113090109,0.5250692869740489,0.06318208609007908,0.5299227799227799,0.0602026684615873 +flat_mae,patch,logistic,adhd200_dx,74,0.046415888336127774,train,0.863013698630137,0.01819109670276617,0.8588073280930866,0.019064519507832518,0.8549642791720096,0.019281632874448642 +flat_mae,patch,logistic,adhd200_dx,74,0.046415888336127774,test,0.676923076923077,0.060229179662687066,0.6741465743614228,0.06069938990241908,0.6771235521235521,0.060708661915514814 +flat_mae,patch,logistic,adhd200_dx,75,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,75,10000.0,test,0.5076923076923077,0.06054993733475372,0.4980694980694981,0.061499305600829515,0.4980694980694981,0.061220136756844686 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,train,0.7835616438356164,0.02102046005775313,0.773752638310226,0.022457636024097387,0.7695090675947975,0.022043890054682106 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,test,0.5384615384615384,0.05453932732306175,0.4846723044397463,0.06020184380218335,0.5033783783783784,0.05457176405896156 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,train,0.873972602739726,0.01807682693349379,0.8703514949345195,0.018848966802993663,0.8668254259021799,0.019156041010845178 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,test,0.6461538461538462,0.05900652055032241,0.6289401836684041,0.06280969185824659,0.6283783783783784,0.06058208559538486 +flat_mae,patch,logistic,adhd200_dx,78,0.005994842503189409,train,0.7808219178082192,0.022545938424987193,0.7740917249489385,0.02356348697234715,0.7713867008609635,0.02349825986415514 +flat_mae,patch,logistic,adhd200_dx,78,0.005994842503189409,test,0.5538461538461539,0.050594047408129894,0.48342011510002736,0.06223730091284569,0.5125482625482626,0.05192318018313542 +flat_mae,patch,logistic,adhd200_dx,79,0.046415888336127774,train,0.873972602739726,0.016502854288341317,0.8705913182883216,0.017065307867979107,0.8675428955242108,0.01719226615072895 +flat_mae,patch,logistic,adhd200_dx,79,0.046415888336127774,test,0.47692307692307695,0.06153966152676176,0.4615009746588694,0.06252886367415189,0.4623552123552124,0.061813106623232986 +flat_mae,patch,logistic,adhd200_dx,80,0.3593813663804626,train,0.9835616438356164,0.006457043123000468,0.9832844843377908,0.006564177392921705,0.9832844843377908,0.006593717959021876 +flat_mae,patch,logistic,adhd200_dx,80,0.3593813663804626,test,0.5384615384615384,0.06052609589279378,0.5374762808349146,0.060904132016181785,0.5424710424710424,0.06113460717550788 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,train,0.7808219178082192,0.02118241569018412,0.7726989662473533,0.022333665419904555,0.7692342919948708,0.02214673640288412 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,test,0.5692307692307692,0.05393571452919808,0.5289855072463768,0.06095492702591165,0.5390926640926641,0.055289992352924845 +flat_mae,patch,logistic,adhd200_dx,82,0.046415888336127774,train,0.8712328767123287,0.01709457318423848,0.8676567020250723,0.017759778104266606,0.8643982414361605,0.01798953182968937 +flat_mae,patch,logistic,adhd200_dx,82,0.046415888336127774,test,0.6153846153846154,0.060524406539694785,0.6150201374081972,0.06055672266851132,0.6230694980694981,0.06057061405854022 +flat_mae,patch,logistic,adhd200_dx,83,0.3593813663804626,train,0.9780821917808219,0.007591475797573075,0.9776101091890566,0.007803641307762668,0.9755602369176284,0.00847845219890788 +flat_mae,patch,logistic,adhd200_dx,83,0.3593813663804626,test,0.6615384615384615,0.056686669169553984,0.6515594541910331,0.058706671054663685,0.6505791505791505,0.05817856203597878 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,train,0.7726027397260274,0.02023911924547642,0.7622970756930223,0.021660106539710292,0.7583653904866581,0.02120036665696275 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,test,0.5846153846153846,0.059838836807984026,0.5644080416976918,0.06381067756323514,0.5656370656370656,0.061261480436377226 +flat_mae,patch,logistic,adhd200_dx,85,0.046415888336127774,train,0.8657534246575342,0.017509557961403622,0.8612156531050912,0.018383948980915284,0.8566739940159981,0.01855224208333624 +flat_mae,patch,logistic,adhd200_dx,85,0.046415888336127774,test,0.6153846153846154,0.06100857248911634,0.606060606060606,0.062405489865537025,0.6056949806949807,0.06197521869229974 +flat_mae,patch,logistic,adhd200_dx,86,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,86,2.782559402207126,test,0.5538461538461539,0.05874369964958548,0.543030303030303,0.06038817988207976,0.542953667953668,0.06003518819625919 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,train,0.7616438356164383,0.022047394997964827,0.7508415130758186,0.02370557919364191,0.7472217133785186,0.023198210544904178 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,test,0.5846153846153846,0.051181903236738976,0.5308740978348035,0.061058728869043725,0.5482625482625483,0.05280501421462855 +flat_mae,patch,logistic,adhd200_dx,88,0.005994842503189409,train,0.7698630136986301,0.020909589041095893,0.761333914559721,0.02208567893630436,0.7580906148867314,0.021882564990543425 +flat_mae,patch,logistic,adhd200_dx,88,0.005994842503189409,test,0.6307692307692307,0.05859980410735277,0.6036585365853658,0.06512271871877104,0.6061776061776062,0.06055959629257867 +flat_mae,patch,logistic,adhd200_dx,89,0.046415888336127774,train,0.9013698630136986,0.015405369092786093,0.8992454913507545,0.01583441568764217,0.8975544971606522,0.01609625458655278 +flat_mae,patch,logistic,adhd200_dx,89,0.046415888336127774,test,0.5538461538461539,0.06292066198111723,0.5381034060279344,0.06561990331086198,0.5386100386100386,0.06416884384509056 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,train,0.7698630136986301,0.02126099137329366,0.7602739726027397,0.02247265665382327,0.7566556756426696,0.02206959894410583 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,test,0.6615384615384615,0.054909448858684574,0.6425000000000001,0.060229103255400214,0.6418918918918919,0.05716372688395238 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,train,0.8657534246575342,0.017635388942161222,0.8612156531050912,0.018568613660985164,0.8566739940159981,0.01883123683622368 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,test,0.6307692307692307,0.05632554571943104,0.5962732919254659,0.06466910122485398,0.6018339768339769,0.05857590421093464 +flat_mae,patch,logistic,adhd200_dx,92,0.046415888336127774,train,0.8657534246575342,0.017651352095874557,0.8614950940532335,0.01844464192480178,0.857391463638029,0.018638468995892117 +flat_mae,patch,logistic,adhd200_dx,92,0.046415888336127774,test,0.6,0.05649653076221301,0.5626293995859213,0.06274212693020575,0.5704633204633205,0.05757478748821781 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,train,0.7479452054794521,0.022117535873869845,0.7348891555611696,0.023896859868020308,0.731498442938267,0.023223929220007954 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,test,0.6307692307692307,0.046047580229263016,0.5666666666666667,0.062174406709198125,0.5888030888030888,0.04940116775797494 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,train,0.7726027397260274,0.020976816770162478,0.763396099686819,0.022224541583159917,0.7598003297307199,0.021874359696556205 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,test,0.6153846153846154,0.055571062240881104,0.5834401435529352,0.06236305682430676,0.5883204633204633,0.05741727355833475 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,train,0.7808219178082192,0.021342318743229068,0.7740917249489385,0.02227124658306682,0.7713867008609635,0.022186512911127085 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,test,0.5538461538461539,0.056726780042273654,0.5250692869740489,0.06176104818236881,0.5299227799227799,0.05795150249710536 +flat_mae,patch,logistic,adhd200_dx,96,0.000774263682681127,train,0.673972602739726,0.02346361512540719,0.6528671328671329,0.02577544145624314,0.6530500091591867,0.024367868763157507 +flat_mae,patch,logistic,adhd200_dx,96,0.000774263682681127,test,0.6,0.0572460773571523,0.5775,0.06183627874640533,0.5791505791505791,0.058789709270174274 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,train,0.7671232876712328,0.020705093321999216,0.7565692943844204,0.022212989638314983,0.7527935519325883,0.02174798170772359 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,test,0.5692307692307692,0.05417485606428317,0.5190274841437632,0.06157909892866873,0.5347490347490347,0.05486393676243797 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,train,0.7561643835616438,0.02184022982838441,0.7462922032786373,0.022916735774417902,0.7430848140685107,0.022504599003110436 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,test,0.6461538461538462,0.05957084390695656,0.6289401836684041,0.0631125443973189,0.6283783783783784,0.06077644959066037 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7835616438356164,0.021480153748672674,0.7762456448020858,0.022511469044813166,0.773096415704952,0.0222900131862803 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.5846153846153846,0.055063505026458466,0.5411764705882354,0.06385498223982913,0.5526061776061776,0.05683970398529639 +flat_mae,patch,logistic,adhd200_dx,100,0.000774263682681127,train,0.673972602739726,0.022060997340963267,0.6497431638026272,0.02472617323407717,0.650897600293094,0.023015836470489808 +flat_mae,patch,logistic,adhd200_dx,100,0.000774263682681127,test,0.5692307692307692,0.057503494105310915,0.545,0.061292643112078674,0.5477799227799228,0.058572480030164895 diff --git a/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic/log.txt b/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f450bab2231775e89a04b993ca0c114515581d0e --- /dev/null +++ b/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:52 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:13:18 time: 5.2900 data: 4.2381 max mem: 2698 +extract (train) [ 20/151] eta: 0:01:02 time: 0.2346 data: 0.0604 max mem: 2851 +extract (train) [ 40/151] eta: 0:00:39 time: 0.2212 data: 0.0568 max mem: 2851 +extract (train) [ 60/151] eta: 0:00:26 time: 0.1806 data: 0.0411 max mem: 2851 +extract (train) [ 80/151] eta: 0:00:19 time: 0.1849 data: 0.0446 max mem: 2851 +extract (train) [100/151] eta: 0:00:12 time: 0.1852 data: 0.0443 max mem: 2851 +extract (train) [120/151] eta: 0:00:07 time: 0.1945 data: 0.0506 max mem: 2851 +extract (train) [140/151] eta: 0:00:02 time: 0.1410 data: 0.0328 max mem: 2851 +extract (train) [150/151] eta: 0:00:00 time: 0.1364 data: 0.0320 max mem: 2851 +extract (train) Total time: 0:00:33 (0.2233 s / it) +extract (validation) [ 0/32] eta: 0:01:56 time: 3.6309 data: 3.4524 max mem: 2851 +extract (validation) [20/32] eta: 0:00:04 time: 0.1815 data: 0.0476 max mem: 2851 +extract (validation) [31/32] eta: 0:00:00 time: 0.1419 data: 0.0320 max mem: 2851 +extract (validation) Total time: 0:00:09 (0.2842 s / it) +extract (test) [ 0/33] eta: 0:01:45 time: 3.2064 data: 3.0594 max mem: 2851 +extract (test) [20/33] eta: 0:00:04 time: 0.1743 data: 0.0467 max mem: 2851 +extract (test) [32/33] eta: 0:00:00 time: 0.1434 data: 0.0357 max mem: 2851 +extract (test) Total time: 0:00:08 (0.2625 s / it) +feature extraction time: 0:00:51 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.046416 | train | 0.86575 | 0.017078 | 0.8615 | 0.017865 | 0.85739 | 0.018113 | +| flat_mae | patch | logistic | adhd200_dx | | 0.046416 | test | 0.67692 | 0.059505 | 0.67195 | 0.060473 | 0.67278 | 0.060382 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.000774263682681127, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.0572655890767544, "f1": 0.5250692869740489, "f1_std": 0.06076585750307687, "bacc": 0.5299227799227799, "bacc_std": 0.05785183843593267} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05671171570517729, "f1": 0.5376016260162602, "f1_std": 0.06225838701724851, "bacc": 0.5434362934362934, "bacc_std": 0.0579687388284011} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05724673888027191, "f1": 0.5775, "f1_std": 0.06147789908968846, "bacc": 0.5791505791505791, "bacc_std": 0.058772705740167124} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.058280724431296206, "f1": 0.6018132810585641, "f1_std": 0.061335942045065636, "bacc": 0.6013513513513513, "bacc_std": 0.059766919147401495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06166973690315324, "f1": 0.5019157088122606, "f1_std": 0.06265548856678507, "bacc": 0.5024131274131274, "bacc_std": 0.06307831677995987} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05564570085522007, "f1": 0.6612062546537603, "f1_std": 0.06027805632457064, "bacc": 0.6597490347490347, "bacc_std": 0.058029226425179735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.053583809265342605, "f1": 0.570630081300813, "f1_std": 0.05875139503905504, "bacc": 0.5748069498069498, "bacc_std": 0.05497776935868238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.052420466665642984, "f1": 0.577922077922078, "f1_std": 0.06534726334508818, "bacc": 0.5931467181467182, "bacc_std": 0.05533943784487845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.06166942986506083, "f1": 0.588206627680312, "f1_std": 0.06414252962711496, "bacc": 0.5878378378378378, "bacc_std": 0.0631285088032032} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.059654952625754924, "f1": 0.5192307692307693, "f1_std": 0.06180309132185452, "bacc": 0.5207528957528957, "bacc_std": 0.060215730060988015} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05821271745394455, "f1": 0.5608108108108107, "f1_std": 0.05943390770577114, "bacc": 0.5608108108108107, "bacc_std": 0.05902766828279475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05862002808177161, "f1": 0.5830363506771205, "f1_std": 0.058889472645809154, "bacc": 0.5873552123552124, "bacc_std": 0.05932128623455292} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.06020512471061013, "f1": 0.5921814671814671, "f1_std": 0.06129344119325076, "bacc": 0.5921814671814671, "bacc_std": 0.060771826774628876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05890901619743036, "f1": 0.6336682185738789, "f1_std": 0.062146457074397456, "bacc": 0.6327220077220077, "bacc_std": 0.060796763063037065} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05951964719625667, "f1": 0.6235521235521235, "f1_std": 0.06047272103557194, "bacc": 0.6235521235521235, "bacc_std": 0.060358604493063656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06091686059971773, "f1": 0.5381034060279344, "f1_std": 0.06258601657950763, "bacc": 0.5386100386100386, "bacc_std": 0.06138737497131656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05871990680869055, "f1": 0.5699583435432491, "f1_std": 0.06145209366131349, "bacc": 0.5699806949806949, "bacc_std": 0.060034387646530685} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.059894629368741695, "f1": 0.606060606060606, "f1_std": 0.06123124537723429, "bacc": 0.6056949806949807, "bacc_std": 0.060530000381377465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.06036583539420486, "f1": 0.6655231560891939, "f1_std": 0.06391764659581159, "bacc": 0.6640926640926641, "bacc_std": 0.06276648339768712} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05593973934395675, "f1": 0.5966741126830479, "f1_std": 0.05956773634875029, "bacc": 0.597007722007722, "bacc_std": 0.05736341308013188} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05511287180916318, "f1": 0.6934723256391164, "f1_std": 0.05935428871932758, "bacc": 0.6911196911196911, "bacc_std": 0.05751842221042249} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05756622119338048, "f1": 0.6094688776736361, "f1_std": 0.059000555408504046, "bacc": 0.61003861003861, "bacc_std": 0.05916352132013179} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05582575596967586, "f1": 0.5905769715293525, "f1_std": 0.06092880156668017, "bacc": 0.5926640926640927, "bacc_std": 0.05723420917198336} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05450640481338385, "f1": 0.6299171842650104, "f1_std": 0.06302193643219771, "bacc": 0.6332046332046332, "bacc_std": 0.057101951438083956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05497279370140541, "f1": 0.6167649320687003, "f1_std": 0.0619366915297259, "bacc": 0.6196911196911197, "bacc_std": 0.05722116357302214} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0551576546265822, "f1": 0.5834401435529352, "f1_std": 0.06222831061513817, "bacc": 0.5883204633204633, "bacc_std": 0.05713123455936296} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05303444454024486, "f1": 0.7176640926640927, "f1_std": 0.05405444556692365, "bacc": 0.7176640926640927, "bacc_std": 0.05377801209791008} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0541713258502937, "f1": 0.5834401435529352, "f1_std": 0.06113224527527147, "bacc": 0.5883204633204633, "bacc_std": 0.05613889901634252} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06031255082470189, "f1": 0.545, "f1_std": 0.06448818718183073, "bacc": 0.5477799227799228, "bacc_std": 0.06153564747621803} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.054693119111150615, "f1": 0.5833333333333333, "f1_std": 0.0571336811586876, "bacc": 0.5834942084942085, "bacc_std": 0.0555374175286818} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.059494446063696034, "f1": 0.6235521235521235, "f1_std": 0.060752597031315435, "bacc": 0.6235521235521235, "bacc_std": 0.06019783739826568} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05741953155112201, "f1": 0.6289401836684041, "f1_std": 0.06168051084712195, "bacc": 0.6283783783783784, "bacc_std": 0.05967790280427032} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06055749676300353, "f1": 0.564176245210728, "f1_std": 0.06119702477144015, "bacc": 0.5651544401544402, "bacc_std": 0.061196796584670324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06031380659742999, "f1": 0.564176245210728, "f1_std": 0.06124768065242325, "bacc": 0.5651544401544402, "bacc_std": 0.06163327286376625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.057498826332246115, "f1": 0.6036585365853658, "f1_std": 0.06332847127867035, "bacc": 0.6061776061776062, "bacc_std": 0.05911397772664288} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05808497346140251, "f1": 0.6018132810585641, "f1_std": 0.061038667690533496, "bacc": 0.6013513513513513, "bacc_std": 0.05966645370475177} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.04606058269395328, "f1": 0.554672513017265, "f1_std": 0.05930753190875318, "bacc": 0.5752895752895753, "bacc_std": 0.04843900617142849} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05746423842899906, "f1": 0.5512820512820513, "f1_std": 0.06012947737164356, "bacc": 0.5521235521235521, "bacc_std": 0.05820206915826109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.0615423075721229, "f1": 0.6153846153846154, "f1_std": 0.06404714127278875, "bacc": 0.6148648648648649, "bacc_std": 0.06243158787689593} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.04774166180799899, "f1": 0.6130952380952381, "f1_std": 0.06002177730440167, "bacc": 0.6245173745173745, "bacc_std": 0.05071997773002365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05978969850515453, "f1": 0.644808743169399, "f1_std": 0.05982158801541909, "bacc": 0.6500965250965252, "bacc_std": 0.059381385241960516} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06271468972810364, "f1": 0.6235521235521235, "f1_std": 0.06377761357442285, "bacc": 0.6235521235521235, "bacc_std": 0.06339578516973475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.060512243369694504, "f1": 0.5501153550371699, "f1_std": 0.06653549857759201, "bacc": 0.556949806949807, "bacc_std": 0.061573446806877014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05764464903293434, "f1": 0.606060606060606, "f1_std": 0.05921578486629486, "bacc": 0.6056949806949807, "bacc_std": 0.05872212325340343} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.05950008701699171, "f1": 0.4871794871794872, "f1_std": 0.06236568104858205, "bacc": 0.48938223938223935, "bacc_std": 0.06038077148270777} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.059431141598626364, "f1": 0.5966741126830479, "f1_std": 0.06273333213340375, "bacc": 0.597007722007722, "bacc_std": 0.06045994029541409} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06130707267325128, "f1": 0.5565302144249512, "f1_std": 0.06265016787306586, "bacc": 0.5564671814671815, "bacc_std": 0.06192038296137542} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.060914148529912664, "f1": 0.6264367816091954, "f1_std": 0.06175308554170438, "bacc": 0.627895752895753, "bacc_std": 0.061938043329468624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06234080032061976, "f1": 0.5500119360229172, "f1_std": 0.06306192727957428, "bacc": 0.5516409266409266, "bacc_std": 0.06359814774523685} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.058078836443265204, "f1": 0.5565302144249512, "f1_std": 0.06045777555896192, "bacc": 0.5564671814671815, "bacc_std": 0.05953511430094472} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.06084209069386094, "f1": 0.5921814671814671, "f1_std": 0.06265787776687684, "bacc": 0.5921814671814671, "bacc_std": 0.06242020102102711} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 166.81005372000556, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06048955311045832, "f1": 0.5115151515151515, "f1_std": 0.06166501453032125, "bacc": 0.5115830115830116, "bacc_std": 0.061061613368097144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05926792041680342, "f1": 0.49987589972697943, "f1_std": 0.06221983316200884, "bacc": 0.502895752895753, "bacc_std": 0.05999061139233182} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05798408269649392, "f1": 0.5125, "f1_std": 0.06153215536539919, "bacc": 0.5164092664092664, "bacc_std": 0.05870489862503548} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06375048253675135, "f1": 0.5666666666666667, "f1_std": 0.06380417795717837, "bacc": 0.5694980694980695, "bacc_std": 0.06425679767514877} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05939442724840168, "f1": 0.5745454545454545, "f1_std": 0.06070472188012568, "bacc": 0.5743243243243243, "bacc_std": 0.060147054191353515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.059952320305882836, "f1": 0.5905769715293525, "f1_std": 0.06525933957683168, "bacc": 0.5926640926640927, "bacc_std": 0.061561379891102906} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.0565710194064628, "f1": 0.6515594541910331, "f1_std": 0.058946245373878714, "bacc": 0.6505791505791505, "bacc_std": 0.05777064500738074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.05877225123128174, "f1": 0.48, "f1_std": 0.062216358119852586, "bacc": 0.48503861003861004, "bacc_std": 0.05948481241158543} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05591638728372842, "f1": 0.61, "f1_std": 0.06102715186358429, "bacc": 0.6105212355212355, "bacc_std": 0.05797635992295799} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05920342225835084, "f1": 0.5250692869740489, "f1_std": 0.06373726101759931, "bacc": 0.5299227799227799, "bacc_std": 0.06050718983622143} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.0586200603828582, "f1": 0.578226387887527, "f1_std": 0.0595344030671501, "bacc": 0.5786679536679536, "bacc_std": 0.05937859601291822} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 2.782559402207126, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.060720245105569795, "f1": 0.5330459770114943, "f1_std": 0.061116563758247026, "bacc": 0.5337837837837838, "bacc_std": 0.061119656790161986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05838481605316686, "f1": 0.5966741126830479, "f1_std": 0.06258611262848957, "bacc": 0.597007722007722, "bacc_std": 0.060116132732405014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.053164698880935946, "f1": 0.6697154471544715, "f1_std": 0.0600745987168088, "bacc": 0.6689189189189189, "bacc_std": 0.056129481992877664} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.054646846842452725, "f1": 0.6612062546537603, "f1_std": 0.058706828966698724, "bacc": 0.6597490347490347, "bacc_std": 0.056793099000891065} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05899682871830119, "f1": 0.5775, "f1_std": 0.06346597861177079, "bacc": 0.5791505791505791, "bacc_std": 0.06037054928764662} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.0559226092457431, "f1": 0.6233308138070043, "f1_std": 0.061367562292631336, "bacc": 0.6240347490347491, "bacc_std": 0.05793770442040621} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06140196400305084, "f1": 0.5381034060279344, "f1_std": 0.06317904633758856, "bacc": 0.5386100386100386, "bacc_std": 0.061911850483780756} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06161260149289181, "f1": 0.6235521235521235, "f1_std": 0.06304160345581517, "bacc": 0.6235521235521235, "bacc_std": 0.06284827736084889} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06153762307121098, "f1": 0.5469838981014179, "f1_std": 0.06248177938605528, "bacc": 0.5472972972972974, "bacc_std": 0.06253792977805431} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.05286726932309323, "f1": 0.7257383966244726, "f1_std": 0.05691980971726144, "bacc": 0.7224903474903475, "bacc_std": 0.05542362361576988} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.0594706113090109, "f1": 0.5250692869740489, "f1_std": 0.06318208609007908, "bacc": 0.5299227799227799, "bacc_std": 0.0602026684615873} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.060229179662687066, "f1": 0.6741465743614228, "f1_std": 0.06069938990241908, "bacc": 0.6771235521235521, "bacc_std": 0.060708661915514814} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 10000.0, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06054993733475372, "f1": 0.4980694980694981, "f1_std": 0.061499305600829515, "bacc": 0.4980694980694981, "bacc_std": 0.061220136756844686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05453932732306175, "f1": 0.4846723044397463, "f1_std": 0.06020184380218335, "bacc": 0.5033783783783784, "bacc_std": 0.05457176405896156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05900652055032241, "f1": 0.6289401836684041, "f1_std": 0.06280969185824659, "bacc": 0.6283783783783784, "bacc_std": 0.06058208559538486} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.050594047408129894, "f1": 0.48342011510002736, "f1_std": 0.06223730091284569, "bacc": 0.5125482625482626, "bacc_std": 0.05192318018313542} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.47692307692307695, "acc_std": 0.06153966152676176, "f1": 0.4615009746588694, "f1_std": 0.06252886367415189, "bacc": 0.4623552123552124, "bacc_std": 0.061813106623232986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06052609589279378, "f1": 0.5374762808349146, "f1_std": 0.060904132016181785, "bacc": 0.5424710424710424, "bacc_std": 0.06113460717550788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05393571452919808, "f1": 0.5289855072463768, "f1_std": 0.06095492702591165, "bacc": 0.5390926640926641, "bacc_std": 0.055289992352924845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.060524406539694785, "f1": 0.6150201374081972, "f1_std": 0.06055672266851132, "bacc": 0.6230694980694981, "bacc_std": 0.06057061405854022} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.3593813663804626, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.056686669169553984, "f1": 0.6515594541910331, "f1_std": 0.058706671054663685, "bacc": 0.6505791505791505, "bacc_std": 0.05817856203597878} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.059838836807984026, "f1": 0.5644080416976918, "f1_std": 0.06381067756323514, "bacc": 0.5656370656370656, "bacc_std": 0.061261480436377226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06100857248911634, "f1": 0.606060606060606, "f1_std": 0.062405489865537025, "bacc": 0.6056949806949807, "bacc_std": 0.06197521869229974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 2.782559402207126, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05874369964958548, "f1": 0.543030303030303, "f1_std": 0.06038817988207976, "bacc": 0.542953667953668, "bacc_std": 0.06003518819625919} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.051181903236738976, "f1": 0.5308740978348035, "f1_std": 0.061058728869043725, "bacc": 0.5482625482625483, "bacc_std": 0.05280501421462855} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05859980410735277, "f1": 0.6036585365853658, "f1_std": 0.06512271871877104, "bacc": 0.6061776061776062, "bacc_std": 0.06055959629257867} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06292066198111723, "f1": 0.5381034060279344, "f1_std": 0.06561990331086198, "bacc": 0.5386100386100386, "bacc_std": 0.06416884384509056} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.054909448858684574, "f1": 0.6425000000000001, "f1_std": 0.060229103255400214, "bacc": 0.6418918918918919, "bacc_std": 0.05716372688395238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05632554571943104, "f1": 0.5962732919254659, "f1_std": 0.06466910122485398, "bacc": 0.6018339768339769, "bacc_std": 0.05857590421093464} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05649653076221301, "f1": 0.5626293995859213, "f1_std": 0.06274212693020575, "bacc": 0.5704633204633205, "bacc_std": 0.05757478748821781} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.046047580229263016, "f1": 0.5666666666666667, "f1_std": 0.062174406709198125, "bacc": 0.5888030888030888, "bacc_std": 0.04940116775797494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.055571062240881104, "f1": 0.5834401435529352, "f1_std": 0.06236305682430676, "bacc": 0.5883204633204633, "bacc_std": 0.05741727355833475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.056726780042273654, "f1": 0.5250692869740489, "f1_std": 0.06176104818236881, "bacc": 0.5299227799227799, "bacc_std": 0.05795150249710536} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.0572460773571523, "f1": 0.5775, "f1_std": 0.06183627874640533, "bacc": 0.5791505791505791, "bacc_std": 0.058789709270174274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05417485606428317, "f1": 0.5190274841437632, "f1_std": 0.06157909892866873, "bacc": 0.5347490347490347, "bacc_std": 0.05486393676243797} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05957084390695656, "f1": 0.6289401836684041, "f1_std": 0.0631125443973189, "bacc": 0.6283783783783784, "bacc_std": 0.06077644959066037} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.055063505026458466, "f1": 0.5411764705882354, "f1_std": 0.06385498223982913, "bacc": 0.5526061776061776, "bacc_std": 0.05683970398529639} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.057503494105310915, "f1": 0.545, "f1_std": 0.061292643112078674, "bacc": 0.5477799227799228, "bacc_std": 0.058572480030164895} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 101.76 | 999.96 | 0.81411 | 0.08431 | 0.80593 | 0.090002 | 0.80322 | 0.089851 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 101.76 | 999.96 | 0.60062 | 0.04962 | 0.58054 | 0.052554 | 0.58356 | 0.05055 | + + +done! total time: 0:04:31 diff --git a/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d468d8fbb465f8eca61375e94f5f9efa7ba8b40e --- /dev/null +++ b/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..25c8ac9fb8504a3c609030fd6d89ffcaf28ccbe3 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,,166.81005372000556,test,0.5853658536585366,0.07386635197164572,0.4558938329430133,0.07245003296675631,0.4548611111111111,0.07947699372970114 +flat_mae,patch,logistic,adni_ad_vs_cn,1,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,1,21.54434690031882,test,0.8048780487804879,0.05391018595692995,0.7152777777777778,0.08597702690792597,0.7016129032258065,0.0834766569027455 +flat_mae,patch,logistic,adni_ad_vs_cn,2,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,2,21.54434690031882,test,0.7560975609756098,0.061981767002958564,0.6693548387096775,0.0847810777401297,0.6693548387096775,0.08654595352042534 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,test,0.7560975609756098,0.05325102462895257,0.6117424242424243,0.09015788579022892,0.6016129032258064,0.07713430210946796 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.046415888336127774,train,0.924119241192412,0.012935588153713175,0.8862014274385408,0.020871005067042907,0.861492316542033,0.024073938862968727 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.046415888336127774,test,0.8536585365853658,0.05470161339513611,0.8136363636363637,0.06720795718216024,0.8354838709677419,0.07056314719409477 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.046415888336127774,train,0.926829268292683,0.012334338164017772,0.8897471366126266,0.02002614236975282,0.8632591009943298,0.023785347749360104 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.046415888336127774,test,0.6829268292682927,0.04635265590127199,0.4696517412935323,0.065422983266204,0.4854838709677419,0.05345639630513192 +flat_mae,patch,logistic,adni_ad_vs_cn,6,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,6,2.782559402207126,test,0.7073170731707317,0.06410170012993327,0.5729166666666666,0.08631320225809522,0.5693548387096774,0.08067822186854992 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.3593813663804626,train,0.991869918699187,0.004641668090191123,0.9885825675299359,0.0065639564923437215,0.986605308570959,0.008639465232023502 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.3593813663804626,test,0.7073170731707317,0.05869847532392946,0.5729166666666666,0.08395517558019032,0.5693548387096774,0.07826616690763738 +flat_mae,patch,logistic,adni_ad_vs_cn,8,0.3593813663804626,train,0.991869918699187,0.00461317621811351,0.9885825675299359,0.006501085858554967,0.986605308570959,0.008000841641525334 +flat_mae,patch,logistic,adni_ad_vs_cn,8,0.3593813663804626,test,0.7560975609756098,0.06170290160622114,0.6693548387096775,0.08213433376507209,0.6693548387096775,0.08344518591907935 +flat_mae,patch,logistic,adni_ad_vs_cn,9,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,9,166.81005372000556,test,0.7073170731707317,0.06964300913228409,0.6272727272727273,0.08186535399146078,0.6370967741935484,0.0869084747678408 +flat_mae,patch,logistic,adni_ad_vs_cn,10,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,10,1291.5496650148827,test,0.7073170731707317,0.07018173061640827,0.6272727272727273,0.0858037201099534,0.6370967741935484,0.09160772015097365 +flat_mae,patch,logistic,adni_ad_vs_cn,11,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,11,166.81005372000556,test,0.7804878048780488,0.061380722538478046,0.6917293233082706,0.08542095474280705,0.685483870967742,0.08496856069118341 +flat_mae,patch,logistic,adni_ad_vs_cn,12,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,12,21.54434690031882,test,0.6585365853658537,0.06313378710611696,0.5017361111111112,0.08091185453150397,0.5032258064516129,0.07471761504437113 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.3593813663804626,train,0.997289972899729,0.002755280701176509,0.9961941891766453,0.0039028219143826424,0.9941860465116279,0.005911038248454255 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.3593813663804626,test,0.7804878048780488,0.054031607732385134,0.6660633484162897,0.08691143769468333,0.6516129032258065,0.08175133891987646 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.046415888336127774,train,0.9105691056910569,0.01353761047142437,0.8612481626234888,0.023349795026764732,0.8283753800640973,0.025975696495811385 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.046415888336127774,test,0.8292682926829268,0.0509403776016267,0.7402714932126697,0.08921050106114684,0.717741935483871,0.0842606625964113 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,train,0.986449864498645,0.005850957923652251,0.9808134274809954,0.008401970857523429,0.9749774015942148,0.011271733415687477 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,test,0.7804878048780488,0.05665143325588912,0.6660633484162897,0.09083147691761732,0.6516129032258065,0.08354267479538048 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.3593813663804626,train,0.991869918699187,0.004753838668632086,0.9885825675299359,0.006704088940573208,0.986605308570959,0.008353730666905649 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.3593813663804626,test,0.8536585365853658,0.036985904995716565,0.7415966386554622,0.09010351692403558,0.7,0.07582110524121896 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.046415888336127774,train,0.924119241192412,0.012287246136675815,0.8862014274385408,0.0195801745994442,0.861492316542033,0.02263412954812497 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.046415888336127774,test,0.7073170731707317,0.05518830454863606,0.5340909090909092,0.08348630064649379,0.535483870967742,0.06999235360348482 +flat_mae,patch,logistic,adni_ad_vs_cn,18,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,18,2.782559402207126,test,0.8048780487804879,0.06311846410995944,0.7515151515151515,0.07703548038342849,0.7693548387096774,0.08152995300018441 +flat_mae,patch,logistic,adni_ad_vs_cn,19,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,19,21.54434690031882,test,0.7560975609756098,0.06460590271082287,0.6693548387096775,0.08362620796164759,0.6693548387096775,0.08453113853423398 +flat_mae,patch,logistic,adni_ad_vs_cn,20,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,20,2.782559402207126,test,0.7804878048780488,0.06609250273148327,0.7410526315789474,0.0707631977471325,0.7870967741935484,0.07397149512867292 +flat_mae,patch,logistic,adni_ad_vs_cn,21,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,21,21.54434690031882,test,0.7804878048780488,0.05172572152902073,0.6660633484162897,0.08751576640814474,0.6516129032258065,0.07990633245682943 +flat_mae,patch,logistic,adni_ad_vs_cn,22,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,22,2.782559402207126,test,0.8536585365853658,0.047627220907025335,0.7670454545454546,0.08740481102412913,0.7338709677419355,0.0833820450831452 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,test,0.7317073170731707,0.06721340550688852,0.6479313036690086,0.08291295410040263,0.6532258064516129,0.08648400181674835 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.3593813663804626,train,0.989159891598916,0.0055988504024470686,0.9847141673570836,0.007967143085526405,0.9807913550825869,0.010281544556214377 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.3593813663804626,test,0.7317073170731707,0.059689360443455255,0.5918552036199095,0.09116953488845253,0.5854838709677419,0.08148273592548415 +flat_mae,patch,logistic,adni_ad_vs_cn,25,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,25,166.81005372000556,test,0.7560975609756098,0.06146873767116401,0.6693548387096775,0.08531968628628803,0.6693548387096775,0.08700309292766682 +flat_mae,patch,logistic,adni_ad_vs_cn,26,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,26,21.54434690031882,test,0.8048780487804879,0.05987373189942769,0.7354838709677419,0.07956328975010646,0.7354838709677419,0.08143188109767019 +flat_mae,patch,logistic,adni_ad_vs_cn,27,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,27,21.54434690031882,test,0.8292682926829268,0.053336416726256615,0.7402714932126697,0.08857955623317884,0.717741935483871,0.08549314490169431 +flat_mae,patch,logistic,adni_ad_vs_cn,28,0.3593813663804626,train,0.997289972899729,0.0026771501688070956,0.9961941891766453,0.0037882972519350617,0.9941860465116279,0.0057434210016849814 +flat_mae,patch,logistic,adni_ad_vs_cn,28,0.3593813663804626,test,0.7560975609756098,0.060003152802352695,0.6440972222222222,0.08843170797091617,0.635483870967742,0.08470727584242786 +flat_mae,patch,logistic,adni_ad_vs_cn,29,0.3593813663804626,train,0.991869918699187,0.004462834581547718,0.9885825675299359,0.006297733073871022,0.986605308570959,0.007968638096503606 +flat_mae,patch,logistic,adni_ad_vs_cn,29,0.3593813663804626,test,0.8048780487804879,0.056361542193498454,0.7354838709677419,0.07796743322014178,0.7354838709677419,0.08159971709108084 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.046415888336127774,train,0.9105691056910569,0.012992135604913918,0.8612481626234888,0.022285895763517225,0.8283753800640973,0.025010091535329418 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.046415888336127774,test,0.7560975609756098,0.04731797836700164,0.569327731092437,0.09281233422854243,0.567741935483871,0.06965098990984576 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.3593813663804626,train,0.994579945799458,0.0038069119557094374,0.9923570836785418,0.005437462520314869,0.9883720930232558,0.008167154137539464 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.3593813663804626,test,0.7317073170731707,0.05560599045872572,0.5918552036199095,0.08649327137234365,0.5854838709677419,0.07762647100725746 +flat_mae,patch,logistic,adni_ad_vs_cn,32,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,32,21.54434690031882,test,0.6585365853658537,0.06743637360820451,0.5651515151515152,0.07810186683862563,0.5709677419354839,0.0826876448939389 +flat_mae,patch,logistic,adni_ad_vs_cn,33,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,33,21.54434690031882,test,0.6097560975609756,0.07298609224308505,0.5287356321839081,0.0797788950736887,0.5387096774193548,0.08881179219496932 +flat_mae,patch,logistic,adni_ad_vs_cn,34,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,34,21.54434690031882,test,0.8292682926829268,0.05463841446742919,0.7885040530582166,0.0654277858466808,0.8193548387096774,0.06988346855973626 +flat_mae,patch,logistic,adni_ad_vs_cn,35,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,35,166.81005372000556,test,0.7317073170731707,0.06486409165844072,0.6232247284878863,0.0884525389368805,0.6193548387096774,0.08697701419892165 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,test,0.7073170731707317,0.05625426680889042,0.5340909090909092,0.08541830736460779,0.535483870967742,0.07257608150527094 +flat_mae,patch,logistic,adni_ad_vs_cn,37,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,37,2.782559402207126,test,0.6829268292682927,0.06537711952424433,0.5547201336675021,0.08495254629098649,0.5532258064516129,0.08168736880745213 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,train,0.997289972899729,0.0026877130120796697,0.9961941891766453,0.0038054863671016896,0.9941860465116279,0.0057660819852174226 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,test,0.7317073170731707,0.049525313653171074,0.5512437810945273,0.0841628563860447,0.5516129032258065,0.06724606305857803 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.3593813663804626,train,0.997289972899729,0.0025873819640399344,0.9961941891766453,0.0036602690193837874,0.9941860465116279,0.005550836887969385 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.3593813663804626,test,0.6341463414634146,0.06571821663451059,0.48621553884711777,0.07695666036352872,0.48709677419354835,0.07387955295935043 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,test,0.7317073170731707,0.0623072351548938,0.6232247284878863,0.0848774926356396,0.6193548387096774,0.08417209317529728 +flat_mae,patch,logistic,adni_ad_vs_cn,41,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,41,21.54434690031882,test,0.8536585365853658,0.05149470785951873,0.8016129032258064,0.07164623516925737,0.8016129032258064,0.07698034797937726 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.3593813663804626,train,0.991869918699187,0.004800647596485512,0.9885825675299359,0.00678634214967772,0.986605308570959,0.008688890813461672 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.3593813663804626,test,0.7804878048780488,0.05854737705401343,0.6917293233082706,0.08368967241507448,0.685483870967742,0.08413581038398465 +flat_mae,patch,logistic,adni_ad_vs_cn,43,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,43,166.81005372000556,test,0.6829268292682927,0.0726202872984208,0.5839188134270101,0.08901748428394309,0.5870967741935484,0.09259364520394478 +flat_mae,patch,logistic,adni_ad_vs_cn,44,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,44,21.54434690031882,test,0.8048780487804879,0.04308827732770946,0.6554621848739496,0.09582669010765707,0.6338709677419355,0.07503657769165208 +flat_mae,patch,logistic,adni_ad_vs_cn,45,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,45,166.81005372000556,test,0.8536585365853658,0.04911601678978526,0.7864583333333333,0.07799161096707341,0.7677419354838709,0.08022909730334937 +flat_mae,patch,logistic,adni_ad_vs_cn,46,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,46,166.81005372000556,test,0.7804878048780488,0.04019497395189424,0.5886287625418061,0.09275681193760191,0.5838709677419355,0.06625664312009948 +flat_mae,patch,logistic,adni_ad_vs_cn,47,0.3593813663804626,train,0.989159891598916,0.005433422538166638,0.9847141673570836,0.007736839807458125,0.9807913550825869,0.010034075272180615 +flat_mae,patch,logistic,adni_ad_vs_cn,47,0.3593813663804626,test,0.8048780487804879,0.05041137138839583,0.6893939393939394,0.08964154258823055,0.667741935483871,0.08086889969948656 +flat_mae,patch,logistic,adni_ad_vs_cn,48,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,48,166.81005372000556,test,0.6829268292682927,0.072823550328523,0.6072218128224024,0.08509614734598585,0.6209677419354839,0.09329840342238152 +flat_mae,patch,logistic,adni_ad_vs_cn,49,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,49,21.54434690031882,test,0.8048780487804879,0.05952169056594976,0.7354838709677419,0.08261690067169539,0.7354838709677419,0.0857223029053736 +flat_mae,patch,logistic,adni_ad_vs_cn,50,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,50,166.81005372000556,test,0.7317073170731707,0.06637237443426919,0.6479313036690086,0.08253963685421709,0.6532258064516129,0.08374469470825835 +flat_mae,patch,logistic,adni_ad_vs_cn,51,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,51,21.54434690031882,test,0.6341463414634146,0.06873549817308175,0.5199063231850116,0.0820297925501416,0.5209677419354839,0.08429176424331608 +flat_mae,patch,logistic,adni_ad_vs_cn,52,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,52,21.54434690031882,test,0.6829268292682927,0.0695735458830013,0.5839188134270101,0.08550833222013729,0.5870967741935484,0.08871766972794255 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,test,0.8048780487804879,0.042022061569330284,0.6554621848739496,0.09436140665069609,0.6338709677419355,0.07330387170306715 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.046415888336127774,train,0.926829268292683,0.011968598981472596,0.887597730067579,0.020169343428268784,0.8551647629221792,0.0235200137614075 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.046415888336127774,test,0.7804878048780488,0.05957970980413577,0.6660633484162897,0.09420053828039614,0.6516129032258065,0.08611389259230211 +flat_mae,patch,logistic,adni_ad_vs_cn,55,0.3593813663804626,train,0.991869918699187,0.004925442755702336,0.9885825675299359,0.006955378621758862,0.986605308570959,0.008713627881419795 +flat_mae,patch,logistic,adni_ad_vs_cn,55,0.3593813663804626,test,0.8048780487804879,0.05776472826087685,0.7354838709677419,0.08057535054223001,0.7354838709677419,0.08376707560405186 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.3593813663804626,train,0.997289972899729,0.0026833374080187755,0.9961941891766453,0.0038008554975543407,0.9941860465116279,0.005756694788133296 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.3593813663804626,test,0.7073170731707317,0.054980846808360684,0.5340909090909092,0.08040178397247043,0.535483870967742,0.0686516922523801 +flat_mae,patch,logistic,adni_ad_vs_cn,57,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,57,21.54434690031882,test,0.7560975609756098,0.06530341029071804,0.6693548387096775,0.0903257246699847,0.6693548387096775,0.09142580713451595 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,test,0.8048780487804879,0.05515593599451731,0.7152777777777778,0.08583845250229447,0.7016129032258065,0.08385552115824027 +flat_mae,patch,logistic,adni_ad_vs_cn,59,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,59,166.81005372000556,test,0.7804878048780488,0.049204281043947334,0.6328358208955224,0.09244720167071581,0.6177419354838709,0.07575783004901666 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.046415888336127774,train,0.9186991869918699,0.012684106910101056,0.8744897959183674,0.02154979871481047,0.8417700714931383,0.02473750607127376 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.046415888336127774,test,0.7804878048780488,0.05625765067939033,0.6660633484162897,0.09245433308035948,0.6516129032258065,0.0844890670168005 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,train,0.991869918699187,0.00466337318352436,0.9885825675299359,0.006602379486343818,0.986605308570959,0.008649567506473712 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,test,0.7317073170731707,0.05640214238550867,0.5918552036199095,0.08738230100677158,0.5854838709677419,0.0781524575308169 +flat_mae,patch,logistic,adni_ad_vs_cn,62,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,62,21.54434690031882,test,0.6585365853658537,0.06223019575361796,0.5017361111111112,0.07837861446662327,0.5032258064516129,0.07363439587572747 +flat_mae,patch,logistic,adni_ad_vs_cn,63,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,63,21.54434690031882,test,0.7560975609756098,0.060238747681116124,0.6693548387096775,0.083269184975022,0.6693548387096775,0.08488400262820564 +flat_mae,patch,logistic,adni_ad_vs_cn,64,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,64,166.81005372000556,test,0.7560975609756098,0.06029511006244203,0.6440972222222222,0.08617357999023835,0.635483870967742,0.08060846907217784 +flat_mae,patch,logistic,adni_ad_vs_cn,65,0.046415888336127774,train,0.926829268292683,0.012239690915627453,0.887597730067579,0.020758968378944132,0.8551647629221792,0.024296919374212883 +flat_mae,patch,logistic,adni_ad_vs_cn,65,0.046415888336127774,test,0.7560975609756098,0.06233596677932339,0.6440972222222222,0.09274211761519914,0.635483870967742,0.08992350842424744 +flat_mae,patch,logistic,adni_ad_vs_cn,66,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,66,2.782559402207126,test,0.7073170731707317,0.06096304260738657,0.5729166666666666,0.0871052016458449,0.5693548387096774,0.08034549511209382 +flat_mae,patch,logistic,adni_ad_vs_cn,67,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,67,166.81005372000556,test,0.8048780487804879,0.056125210577278394,0.7152777777777778,0.08812599671116532,0.7016129032258065,0.08497914318392903 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.046415888336127774,train,0.926829268292683,0.012950299408582198,0.8897471366126266,0.02096213295936407,0.8632591009943298,0.024241733144302547 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.046415888336127774,test,0.7560975609756098,0.06491940731982912,0.6893939393939394,0.07727275671658856,0.7032258064516128,0.0831321600548176 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,test,0.7804878048780488,0.06038297367483309,0.6917293233082706,0.08572546880720444,0.685483870967742,0.08539629256940474 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.046415888336127774,train,0.9132791327913279,0.013213014977961608,0.8661224489795918,0.023014966495215682,0.8341893335524694,0.026255686930354082 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.046415888336127774,test,0.7804878048780488,0.037534326941996675,0.5886287625418061,0.08634322435858609,0.5838709677419355,0.06174752148863623 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.046415888336127774,train,0.9295392953929539,0.01249628024647272,0.8922893838692294,0.02091377701784035,0.8609787164105513,0.024753402067189283 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.046415888336127774,test,0.7073170731707317,0.05568542135155785,0.5340909090909092,0.08611944502630993,0.535483870967742,0.07329033244831828 +flat_mae,patch,logistic,adni_ad_vs_cn,72,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,72,166.81005372000556,test,0.7804878048780488,0.05474594397684203,0.6660633484162897,0.08821744239812374,0.6516129032258065,0.08136827413298017 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.3593813663804626,train,0.991869918699187,0.0046030877304059775,0.9885825675299359,0.006494663373041181,0.986605308570959,0.008107414530145076 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.3593813663804626,test,0.8292682926829268,0.05580359882552229,0.7602339181286549,0.08268777083551411,0.7516129032258064,0.08496527263906467 +flat_mae,patch,logistic,adni_ad_vs_cn,74,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,74,166.81005372000556,test,0.7560975609756098,0.05823777391461187,0.6440972222222222,0.0856649423359408,0.635483870967742,0.08176734405366246 +flat_mae,patch,logistic,adni_ad_vs_cn,75,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,75,166.81005372000556,test,0.8536585365853658,0.053442113435003324,0.8016129032258064,0.07369994504183223,0.8016129032258064,0.07947899047647251 +flat_mae,patch,logistic,adni_ad_vs_cn,76,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,76,21.54434690031882,test,0.6585365853658537,0.04357176279185874,0.39705882352941174,0.015967585899545497,0.43548387096774194,0.028813585072035616 +flat_mae,patch,logistic,adni_ad_vs_cn,77,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,77,166.81005372000556,test,0.7317073170731707,0.05990148565222108,0.6232247284878863,0.08474017033244868,0.6193548387096774,0.0828688440692745 +flat_mae,patch,logistic,adni_ad_vs_cn,78,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,78,166.81005372000556,test,0.7317073170731707,0.06827916242467993,0.6232247284878863,0.09537404928292492,0.6193548387096774,0.0922470177798827 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,test,0.9024390243902439,0.046890102762323665,0.8757575757575757,0.05690882441074128,0.9016129032258065,0.055331638400796954 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,test,0.6341463414634146,0.06349930557452856,0.48621553884711777,0.07700010581437944,0.48709677419354835,0.0742686326943568 +flat_mae,patch,logistic,adni_ad_vs_cn,81,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,81,21.54434690031882,test,0.7804878048780488,0.0541150425909677,0.6660633484162897,0.09247558794321739,0.6516129032258065,0.08325339627712096 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,test,0.7073170731707317,0.06313203447944146,0.603225806451613,0.08263421425543989,0.603225806451613,0.08322627056634782 +flat_mae,patch,logistic,adni_ad_vs_cn,83,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,2.782559402207126,test,0.7317073170731707,0.06509956681152483,0.6232247284878863,0.09001409711109588,0.6193548387096774,0.08814094497486052 +flat_mae,patch,logistic,adni_ad_vs_cn,84,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,84,2.782559402207126,test,0.6829268292682927,0.06655880824792237,0.5547201336675021,0.08524929216672607,0.5532258064516129,0.08219143006334956 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,test,0.8048780487804879,0.05791398573919672,0.7354838709677419,0.07732404140369112,0.7354838709677419,0.08040211446476905 +flat_mae,patch,logistic,adni_ad_vs_cn,86,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,86,21.54434690031882,test,0.6097560975609756,0.06897945620075109,0.5030303030303029,0.07609141856558463,0.5048387096774194,0.08024937582622983 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.3593813663804626,train,0.991869918699187,0.004531823320695356,0.9885825675299359,0.006399847326545155,0.986605308570959,0.008215456002119838 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.3593813663804626,test,0.8780487804878049,0.04664659492996655,0.8144796380090498,0.08183768703725718,0.7838709677419355,0.08338934644747208 +flat_mae,patch,logistic,adni_ad_vs_cn,88,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,88,2.782559402207126,test,0.7560975609756098,0.06213722795680428,0.6693548387096775,0.08127940765455609,0.6693548387096775,0.08238885516096608 +flat_mae,patch,logistic,adni_ad_vs_cn,89,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,89,166.81005372000556,test,0.7804878048780488,0.06501681473812182,0.7280766396462786,0.07623836691415395,0.7532258064516129,0.08150183338399926 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.3593813663804626,train,0.983739837398374,0.006601374588596787,0.9768796992481203,0.009570674458254123,0.9691634481058427,0.012887383869980625 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.3593813663804626,test,0.7560975609756098,0.0619282461725146,0.6440972222222222,0.09439593471415984,0.635483870967742,0.08958584324017421 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.3593813663804626,train,0.994579945799458,0.003758470529218197,0.9924192620593311,0.005257372648629068,0.9924192620593311,0.005985416770038435 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.3593813663804626,test,0.7073170731707317,0.06478526282724706,0.603225806451613,0.08404953414526098,0.603225806451613,0.08625785527274546 +flat_mae,patch,logistic,adni_ad_vs_cn,92,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,92,2.782559402207126,test,0.6585365853658537,0.056398746088727233,0.4564393939393939,0.06952246989062644,0.4693548387096774,0.061472894376107486 +flat_mae,patch,logistic,adni_ad_vs_cn,93,0.046415888336127774,train,0.940379403794038,0.010816505469148143,0.9088602478893479,0.018096460568637056,0.8761401922918892,0.022387356718131784 +flat_mae,patch,logistic,adni_ad_vs_cn,93,0.046415888336127774,test,0.7073170731707317,0.06771329136501665,0.603225806451613,0.0861114929051163,0.603225806451613,0.0858741970624694 +flat_mae,patch,logistic,adni_ad_vs_cn,94,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,94,10000.0,test,0.6829268292682927,0.06245980980255294,0.5547201336675021,0.08223894148352015,0.5532258064516129,0.07924385177226353 +flat_mae,patch,logistic,adni_ad_vs_cn,95,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,95,21.54434690031882,test,0.7317073170731707,0.05993055668048907,0.6232247284878863,0.08259845078475109,0.6193548387096774,0.0803052274945738 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,test,0.7560975609756098,0.061250424612157114,0.6893939393939394,0.0754613459593449,0.7032258064516128,0.08066997063549053 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.3593813663804626,train,0.991869918699187,0.0048159735975308576,0.9885825675299359,0.006804175807948498,0.986605308570959,0.008667941043457045 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.3593813663804626,test,0.7804878048780488,0.040695207832269396,0.5886287625418061,0.09217418692311527,0.5838709677419355,0.06582024357369012 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.005994842503189409,train,0.8319783197831978,0.013373674842603676,0.695576964019587,0.030307143767250615,0.6638178979373819,0.02508040687479159 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.005994842503189409,test,0.7804878048780488,0.048083285864591145,0.6328358208955224,0.09401663382734154,0.6177419354838709,0.07778912842782644 +flat_mae,patch,logistic,adni_ad_vs_cn,99,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,99,166.81005372000556,test,0.7317073170731707,0.0640586249934384,0.6232247284878863,0.08839824034097298,0.6193548387096774,0.08582491015383371 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.046415888336127774,train,0.9186991869918699,0.013166452961292803,0.8732249198350893,0.023130337592401583,0.837722902457063,0.02681260064564242 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.046415888336127774,test,0.6829268292682927,0.0563032496317555,0.5176470588235295,0.07654181812916153,0.5193548387096775,0.06803311085594083 diff --git a/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..111fe051b64d2e29b22af6a501bc793c208d94bb --- /dev/null +++ b/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:46 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:11:34 time: 4.2346 data: 3.4810 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:57 time: 0.2070 data: 0.0702 max mem: 2851 +extract (train) [ 40/164] eta: 0:00:36 time: 0.1929 data: 0.0554 max mem: 2851 +extract (train) [ 60/164] eta: 0:00:27 time: 0.1948 data: 0.0513 max mem: 2851 +extract (train) [ 80/164] eta: 0:00:21 time: 0.2073 data: 0.0564 max mem: 2851 +extract (train) [100/164] eta: 0:00:15 time: 0.2011 data: 0.0535 max mem: 2851 +extract (train) [120/164] eta: 0:00:10 time: 0.2009 data: 0.0547 max mem: 2851 +extract (train) [140/164] eta: 0:00:05 time: 0.1686 data: 0.0426 max mem: 2851 +extract (train) [160/164] eta: 0:00:00 time: 0.1476 data: 0.0401 max mem: 2851 +extract (train) [163/164] eta: 0:00:00 time: 0.1497 data: 0.0406 max mem: 2851 +extract (train) Total time: 0:00:35 (0.2168 s / it) +extract (validation) [ 0/21] eta: 0:01:35 time: 4.5584 data: 4.4337 max mem: 2851 +extract (validation) [20/21] eta: 0:00:00 time: 0.1444 data: 0.0385 max mem: 2851 +extract (validation) Total time: 0:00:07 (0.3697 s / it) +extract (test) [ 0/21] eta: 0:01:33 time: 4.4379 data: 4.3180 max mem: 2851 +extract (test) [20/21] eta: 0:00:00 time: 0.1477 data: 0.0367 max mem: 2851 +extract (test) Total time: 0:00:07 (0.3679 s / it) +feature extraction time: 0:00:51 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 166.81 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 166.81 | test | 0.58537 | 0.073866 | 0.45589 | 0.07245 | 0.45486 | 0.079477 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05391018595692995, "f1": 0.7152777777777778, "f1_std": 0.08597702690792597, "bacc": 0.7016129032258065, "bacc_std": 0.0834766569027455} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.061981767002958564, "f1": 0.6693548387096775, "f1_std": 0.0847810777401297, "bacc": 0.6693548387096775, "bacc_std": 0.08654595352042534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05325102462895257, "f1": 0.6117424242424243, "f1_std": 0.09015788579022892, "bacc": 0.6016129032258064, "bacc_std": 0.07713430210946796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.05470161339513611, "f1": 0.8136363636363637, "f1_std": 0.06720795718216024, "bacc": 0.8354838709677419, "bacc_std": 0.07056314719409477} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.04635265590127199, "f1": 0.4696517412935323, "f1_std": 0.065422983266204, "bacc": 0.4854838709677419, "bacc_std": 0.05345639630513192} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06410170012993327, "f1": 0.5729166666666666, "f1_std": 0.08631320225809522, "bacc": 0.5693548387096774, "bacc_std": 0.08067822186854992} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05869847532392946, "f1": 0.5729166666666666, "f1_std": 0.08395517558019032, "bacc": 0.5693548387096774, "bacc_std": 0.07826616690763738} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06170290160622114, "f1": 0.6693548387096775, "f1_std": 0.08213433376507209, "bacc": 0.6693548387096775, "bacc_std": 0.08344518591907935} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06964300913228409, "f1": 0.6272727272727273, "f1_std": 0.08186535399146078, "bacc": 0.6370967741935484, "bacc_std": 0.0869084747678408} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 1291.5496650148827, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07018173061640827, "f1": 0.6272727272727273, "f1_std": 0.0858037201099534, "bacc": 0.6370967741935484, "bacc_std": 0.09160772015097365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.061380722538478046, "f1": 0.6917293233082706, "f1_std": 0.08542095474280705, "bacc": 0.685483870967742, "bacc_std": 0.08496856069118341} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06313378710611696, "f1": 0.5017361111111112, "f1_std": 0.08091185453150397, "bacc": 0.5032258064516129, "bacc_std": 0.07471761504437113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.054031607732385134, "f1": 0.6660633484162897, "f1_std": 0.08691143769468333, "bacc": 0.6516129032258065, "bacc_std": 0.08175133891987646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.0509403776016267, "f1": 0.7402714932126697, "f1_std": 0.08921050106114684, "bacc": 0.717741935483871, "bacc_std": 0.0842606625964113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05665143325588912, "f1": 0.6660633484162897, "f1_std": 0.09083147691761732, "bacc": 0.6516129032258065, "bacc_std": 0.08354267479538048} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.036985904995716565, "f1": 0.7415966386554622, "f1_std": 0.09010351692403558, "bacc": 0.7, "bacc_std": 0.07582110524121896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05518830454863606, "f1": 0.5340909090909092, "f1_std": 0.08348630064649379, "bacc": 0.535483870967742, "bacc_std": 0.06999235360348482} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06311846410995944, "f1": 0.7515151515151515, "f1_std": 0.07703548038342849, "bacc": 0.7693548387096774, "bacc_std": 0.08152995300018441} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06460590271082287, "f1": 0.6693548387096775, "f1_std": 0.08362620796164759, "bacc": 0.6693548387096775, "bacc_std": 0.08453113853423398} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06609250273148327, "f1": 0.7410526315789474, "f1_std": 0.0707631977471325, "bacc": 0.7870967741935484, "bacc_std": 0.07397149512867292} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05172572152902073, "f1": 0.6660633484162897, "f1_std": 0.08751576640814474, "bacc": 0.6516129032258065, "bacc_std": 0.07990633245682943} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 2.782559402207126, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.047627220907025335, "f1": 0.7670454545454546, "f1_std": 0.08740481102412913, "bacc": 0.7338709677419355, "bacc_std": 0.0833820450831452} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06721340550688852, "f1": 0.6479313036690086, "f1_std": 0.08291295410040263, "bacc": 0.6532258064516129, "bacc_std": 0.08648400181674835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.059689360443455255, "f1": 0.5918552036199095, "f1_std": 0.09116953488845253, "bacc": 0.5854838709677419, "bacc_std": 0.08148273592548415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06146873767116401, "f1": 0.6693548387096775, "f1_std": 0.08531968628628803, "bacc": 0.6693548387096775, "bacc_std": 0.08700309292766682} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05987373189942769, "f1": 0.7354838709677419, "f1_std": 0.07956328975010646, "bacc": 0.7354838709677419, "bacc_std": 0.08143188109767019} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.053336416726256615, "f1": 0.7402714932126697, "f1_std": 0.08857955623317884, "bacc": 0.717741935483871, "bacc_std": 0.08549314490169431} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.060003152802352695, "f1": 0.6440972222222222, "f1_std": 0.08843170797091617, "bacc": 0.635483870967742, "bacc_std": 0.08470727584242786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.056361542193498454, "f1": 0.7354838709677419, "f1_std": 0.07796743322014178, "bacc": 0.7354838709677419, "bacc_std": 0.08159971709108084} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04731797836700164, "f1": 0.569327731092437, "f1_std": 0.09281233422854243, "bacc": 0.567741935483871, "bacc_std": 0.06965098990984576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05560599045872572, "f1": 0.5918552036199095, "f1_std": 0.08649327137234365, "bacc": 0.5854838709677419, "bacc_std": 0.07762647100725746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06743637360820451, "f1": 0.5651515151515152, "f1_std": 0.07810186683862563, "bacc": 0.5709677419354839, "bacc_std": 0.0826876448939389} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 21.54434690031882, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07298609224308505, "f1": 0.5287356321839081, "f1_std": 0.0797788950736887, "bacc": 0.5387096774193548, "bacc_std": 0.08881179219496932} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05463841446742919, "f1": 0.7885040530582166, "f1_std": 0.0654277858466808, "bacc": 0.8193548387096774, "bacc_std": 0.06988346855973626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06486409165844072, "f1": 0.6232247284878863, "f1_std": 0.0884525389368805, "bacc": 0.6193548387096774, "bacc_std": 0.08697701419892165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05625426680889042, "f1": 0.5340909090909092, "f1_std": 0.08541830736460779, "bacc": 0.535483870967742, "bacc_std": 0.07257608150527094} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06537711952424433, "f1": 0.5547201336675021, "f1_std": 0.08495254629098649, "bacc": 0.5532258064516129, "bacc_std": 0.08168736880745213} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.049525313653171074, "f1": 0.5512437810945273, "f1_std": 0.0841628563860447, "bacc": 0.5516129032258065, "bacc_std": 0.06724606305857803} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06571821663451059, "f1": 0.48621553884711777, "f1_std": 0.07695666036352872, "bacc": 0.48709677419354835, "bacc_std": 0.07387955295935043} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.0623072351548938, "f1": 0.6232247284878863, "f1_std": 0.0848774926356396, "bacc": 0.6193548387096774, "bacc_std": 0.08417209317529728} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 21.54434690031882, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.05149470785951873, "f1": 0.8016129032258064, "f1_std": 0.07164623516925737, "bacc": 0.8016129032258064, "bacc_std": 0.07698034797937726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05854737705401343, "f1": 0.6917293233082706, "f1_std": 0.08368967241507448, "bacc": 0.685483870967742, "bacc_std": 0.08413581038398465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0726202872984208, "f1": 0.5839188134270101, "f1_std": 0.08901748428394309, "bacc": 0.5870967741935484, "bacc_std": 0.09259364520394478} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04308827732770946, "f1": 0.6554621848739496, "f1_std": 0.09582669010765707, "bacc": 0.6338709677419355, "bacc_std": 0.07503657769165208} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 166.81005372000556, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04911601678978526, "f1": 0.7864583333333333, "f1_std": 0.07799161096707341, "bacc": 0.7677419354838709, "bacc_std": 0.08022909730334937} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04019497395189424, "f1": 0.5886287625418061, "f1_std": 0.09275681193760191, "bacc": 0.5838709677419355, "bacc_std": 0.06625664312009948} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05041137138839583, "f1": 0.6893939393939394, "f1_std": 0.08964154258823055, "bacc": 0.667741935483871, "bacc_std": 0.08086889969948656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.072823550328523, "f1": 0.6072218128224024, "f1_std": 0.08509614734598585, "bacc": 0.6209677419354839, "bacc_std": 0.09329840342238152} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05952169056594976, "f1": 0.7354838709677419, "f1_std": 0.08261690067169539, "bacc": 0.7354838709677419, "bacc_std": 0.0857223029053736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06637237443426919, "f1": 0.6479313036690086, "f1_std": 0.08253963685421709, "bacc": 0.6532258064516129, "bacc_std": 0.08374469470825835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 21.54434690031882, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06873549817308175, "f1": 0.5199063231850116, "f1_std": 0.0820297925501416, "bacc": 0.5209677419354839, "bacc_std": 0.08429176424331608} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 21.54434690031882, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0695735458830013, "f1": 0.5839188134270101, "f1_std": 0.08550833222013729, "bacc": 0.5870967741935484, "bacc_std": 0.08871766972794255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.042022061569330284, "f1": 0.6554621848739496, "f1_std": 0.09436140665069609, "bacc": 0.6338709677419355, "bacc_std": 0.07330387170306715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05957970980413577, "f1": 0.6660633484162897, "f1_std": 0.09420053828039614, "bacc": 0.6516129032258065, "bacc_std": 0.08611389259230211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05776472826087685, "f1": 0.7354838709677419, "f1_std": 0.08057535054223001, "bacc": 0.7354838709677419, "bacc_std": 0.08376707560405186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.054980846808360684, "f1": 0.5340909090909092, "f1_std": 0.08040178397247043, "bacc": 0.535483870967742, "bacc_std": 0.0686516922523801} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06530341029071804, "f1": 0.6693548387096775, "f1_std": 0.0903257246699847, "bacc": 0.6693548387096775, "bacc_std": 0.09142580713451595} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05515593599451731, "f1": 0.7152777777777778, "f1_std": 0.08583845250229447, "bacc": 0.7016129032258065, "bacc_std": 0.08385552115824027} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.049204281043947334, "f1": 0.6328358208955224, "f1_std": 0.09244720167071581, "bacc": 0.6177419354838709, "bacc_std": 0.07575783004901666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05625765067939033, "f1": 0.6660633484162897, "f1_std": 0.09245433308035948, "bacc": 0.6516129032258065, "bacc_std": 0.0844890670168005} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05640214238550867, "f1": 0.5918552036199095, "f1_std": 0.08738230100677158, "bacc": 0.5854838709677419, "bacc_std": 0.0781524575308169} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06223019575361796, "f1": 0.5017361111111112, "f1_std": 0.07837861446662327, "bacc": 0.5032258064516129, "bacc_std": 0.07363439587572747} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.060238747681116124, "f1": 0.6693548387096775, "f1_std": 0.083269184975022, "bacc": 0.6693548387096775, "bacc_std": 0.08488400262820564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06029511006244203, "f1": 0.6440972222222222, "f1_std": 0.08617357999023835, "bacc": 0.635483870967742, "bacc_std": 0.08060846907217784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06233596677932339, "f1": 0.6440972222222222, "f1_std": 0.09274211761519914, "bacc": 0.635483870967742, "bacc_std": 0.08992350842424744} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06096304260738657, "f1": 0.5729166666666666, "f1_std": 0.0871052016458449, "bacc": 0.5693548387096774, "bacc_std": 0.08034549511209382} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.056125210577278394, "f1": 0.7152777777777778, "f1_std": 0.08812599671116532, "bacc": 0.7016129032258065, "bacc_std": 0.08497914318392903} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06491940731982912, "f1": 0.6893939393939394, "f1_std": 0.07727275671658856, "bacc": 0.7032258064516128, "bacc_std": 0.0831321600548176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06038297367483309, "f1": 0.6917293233082706, "f1_std": 0.08572546880720444, "bacc": 0.685483870967742, "bacc_std": 0.08539629256940474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.037534326941996675, "f1": 0.5886287625418061, "f1_std": 0.08634322435858609, "bacc": 0.5838709677419355, "bacc_std": 0.06174752148863623} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05568542135155785, "f1": 0.5340909090909092, "f1_std": 0.08611944502630993, "bacc": 0.535483870967742, "bacc_std": 0.07329033244831828} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05474594397684203, "f1": 0.6660633484162897, "f1_std": 0.08821744239812374, "bacc": 0.6516129032258065, "bacc_std": 0.08136827413298017} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05580359882552229, "f1": 0.7602339181286549, "f1_std": 0.08268777083551411, "bacc": 0.7516129032258064, "bacc_std": 0.08496527263906467} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05823777391461187, "f1": 0.6440972222222222, "f1_std": 0.0856649423359408, "bacc": 0.635483870967742, "bacc_std": 0.08176734405366246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 166.81005372000556, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.053442113435003324, "f1": 0.8016129032258064, "f1_std": 0.07369994504183223, "bacc": 0.8016129032258064, "bacc_std": 0.07947899047647251} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.04357176279185874, "f1": 0.39705882352941174, "f1_std": 0.015967585899545497, "bacc": 0.43548387096774194, "bacc_std": 0.028813585072035616} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05990148565222108, "f1": 0.6232247284878863, "f1_std": 0.08474017033244868, "bacc": 0.6193548387096774, "bacc_std": 0.0828688440692745} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06827916242467993, "f1": 0.6232247284878863, "f1_std": 0.09537404928292492, "bacc": 0.6193548387096774, "bacc_std": 0.0922470177798827} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 166.81005372000556, "split": "test", "acc": 0.9024390243902439, "acc_std": 0.046890102762323665, "f1": 0.8757575757575757, "f1_std": 0.05690882441074128, "bacc": 0.9016129032258065, "bacc_std": 0.055331638400796954} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06349930557452856, "f1": 0.48621553884711777, "f1_std": 0.07700010581437944, "bacc": 0.48709677419354835, "bacc_std": 0.0742686326943568} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0541150425909677, "f1": 0.6660633484162897, "f1_std": 0.09247558794321739, "bacc": 0.6516129032258065, "bacc_std": 0.08325339627712096} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06313203447944146, "f1": 0.603225806451613, "f1_std": 0.08263421425543989, "bacc": 0.603225806451613, "bacc_std": 0.08322627056634782} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06509956681152483, "f1": 0.6232247284878863, "f1_std": 0.09001409711109588, "bacc": 0.6193548387096774, "bacc_std": 0.08814094497486052} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06655880824792237, "f1": 0.5547201336675021, "f1_std": 0.08524929216672607, "bacc": 0.5532258064516129, "bacc_std": 0.08219143006334956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05791398573919672, "f1": 0.7354838709677419, "f1_std": 0.07732404140369112, "bacc": 0.7354838709677419, "bacc_std": 0.08040211446476905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 21.54434690031882, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06897945620075109, "f1": 0.5030303030303029, "f1_std": 0.07609141856558463, "bacc": 0.5048387096774194, "bacc_std": 0.08024937582622983} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.04664659492996655, "f1": 0.8144796380090498, "f1_std": 0.08183768703725718, "bacc": 0.7838709677419355, "bacc_std": 0.08338934644747208} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06213722795680428, "f1": 0.6693548387096775, "f1_std": 0.08127940765455609, "bacc": 0.6693548387096775, "bacc_std": 0.08238885516096608} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06501681473812182, "f1": 0.7280766396462786, "f1_std": 0.07623836691415395, "bacc": 0.7532258064516129, "bacc_std": 0.08150183338399926} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0619282461725146, "f1": 0.6440972222222222, "f1_std": 0.09439593471415984, "bacc": 0.635483870967742, "bacc_std": 0.08958584324017421} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06478526282724706, "f1": 0.603225806451613, "f1_std": 0.08404953414526098, "bacc": 0.603225806451613, "bacc_std": 0.08625785527274546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.056398746088727233, "f1": 0.4564393939393939, "f1_std": 0.06952246989062644, "bacc": 0.4693548387096774, "bacc_std": 0.061472894376107486} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06771329136501665, "f1": 0.603225806451613, "f1_std": 0.0861114929051163, "bacc": 0.603225806451613, "bacc_std": 0.0858741970624694} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 10000.0, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06245980980255294, "f1": 0.5547201336675021, "f1_std": 0.08223894148352015, "bacc": 0.5532258064516129, "bacc_std": 0.07924385177226353} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05993055668048907, "f1": 0.6232247284878863, "f1_std": 0.08259845078475109, "bacc": 0.6193548387096774, "bacc_std": 0.0803052274945738} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.061250424612157114, "f1": 0.6893939393939394, "f1_std": 0.0754613459593449, "bacc": 0.7032258064516128, "bacc_std": 0.08066997063549053} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.040695207832269396, "f1": 0.5886287625418061, "f1_std": 0.09217418692311527, "bacc": 0.5838709677419355, "bacc_std": 0.06582024357369012} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.048083285864591145, "f1": 0.6328358208955224, "f1_std": 0.09401663382734154, "bacc": 0.6177419354838709, "bacc_std": 0.07778912842782644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.0640586249934384, "f1": 0.6232247284878863, "f1_std": 0.08839824034097298, "bacc": 0.6193548387096774, "bacc_std": 0.08582491015383371} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0563032496317555, "f1": 0.5176470588235295, "f1_std": 0.07654181812916153, "bacc": 0.5193548387096775, "bacc_std": 0.06803311085594083} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 164.81 | 1003.7 | 0.98672 | 0.030189 | 0.97939 | 0.048896 | 0.97455 | 0.05885 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 164.81 | 1003.7 | 0.75122 | 0.060741 | 0.63988 | 0.089256 | 0.63768 | 0.087864 | + + +done! total time: 0:04:37 diff --git a/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/config.yaml b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..89fc6d5ea46e2e47de71635e7aa3504febda575c --- /dev/null +++ b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n400_1; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_log.json b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..9182fbf2bd1a79fda5d9b6576d3348971caac774 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 8, "eval/id_best": 32, "eval/lr_best": 0.0011099999999999999, "eval/wd_best": 0.05, "eval/train/loss": 0.0005137641564942896, "eval/train/acc": 1.0, "eval/train/acc_std": 0.0, "eval/train/f1": 1.0, "eval/train/f1_std": 0.0, "eval/validation/loss": 0.0740802213549614, "eval/validation/acc": 0.9818948412698413, "eval/validation/acc_std": 0.002097045764527702, "eval/validation/f1": 0.9781444390549351, "eval/validation/f1_std": 0.002774058814578552, "eval/test/loss": 0.0744621604681015, "eval/test/acc": 0.9805555555555555, "eval/test/acc_std": 0.001995141609403247, "eval/test/f1": 0.9784635221118831, "eval/test/f1_std": 0.0024018065907962607} diff --git a/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..2c7ce80de5b58a201de28c32c1ed5550c5663866 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 8, "eval/best/id_best": 32, "eval/best/lr_best": 0.0011099999999999999, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.0005137641564942896, "eval/best/train/acc": 1.0, "eval/best/train/acc_std": 0.0, "eval/best/train/f1": 1.0, "eval/best/train/f1_std": 0.0, "eval/best/validation/loss": 0.0740802213549614, "eval/best/validation/acc": 0.9818948412698413, "eval/best/validation/acc_std": 0.002097045764527702, "eval/best/validation/f1": 0.9781444390549351, "eval/best/validation/f1_std": 0.002774058814578552, "eval/best/test/loss": 0.0744621604681015, "eval/best/test/acc": 0.9805555555555555, "eval/best/test/acc_std": 0.001995141609403247, "eval/best/test/f1": 0.9784635221118831, "eval/best/test/f1_std": 0.0024018065907962607} diff --git a/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..f4cb276e98aa60334e56b0f279cafaeb768e609d --- /dev/null +++ b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 32, "eval/last/lr_best": 0.0011099999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 0.00024033591034822166, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.07630305737257004, "eval/last/validation/acc": 0.9811507936507936, "eval/last/validation/acc_std": 0.002150529316895912, "eval/last/validation/f1": 0.9772682189701773, "eval/last/validation/f1_std": 0.002873631591748449, "eval/last/test/loss": 0.07681722193956375, "eval/last/test/acc": 0.9807539682539682, "eval/last/test/acc_std": 0.00203266797658856, "eval/last/test/f1": 0.9782791448250526, "eval/last/test/f1_std": 0.0025320041470026336} diff --git a/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..01225996bb5de5d0ddc17ef099ebe5dfacea7dc7 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,8,0.0011099999999999999,0.05,32,"[3.7, 1.0]",train,0.0005137641564942896,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,8,0.0011099999999999999,0.05,32,"[3.7, 1.0]",validation,0.0740802213549614,0.9818948412698413,0.002097045764527702,0.9781444390549351,0.002774058814578552 +flat_mae,patch,attn,hcpya_task21,best,8,0.0011099999999999999,0.05,32,"[3.7, 1.0]",test,0.0744621604681015,0.9805555555555555,0.001995141609403247,0.9784635221118831,0.0024018065907962607 diff --git a/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..01225996bb5de5d0ddc17ef099ebe5dfacea7dc7 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,8,0.0011099999999999999,0.05,32,"[3.7, 1.0]",train,0.0005137641564942896,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,8,0.0011099999999999999,0.05,32,"[3.7, 1.0]",validation,0.0740802213549614,0.9818948412698413,0.002097045764527702,0.9781444390549351,0.002774058814578552 +flat_mae,patch,attn,hcpya_task21,best,8,0.0011099999999999999,0.05,32,"[3.7, 1.0]",test,0.0744621604681015,0.9805555555555555,0.001995141609403247,0.9784635221118831,0.0024018065907962607 diff --git a/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..8060e0759412d9a2843d471e611e8480938e3fca --- /dev/null +++ b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.0011099999999999999,0.05,32,"[3.7, 1.0]",train,0.00024033591034822166,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.0011099999999999999,0.05,32,"[3.7, 1.0]",validation,0.07630305737257004,0.9811507936507936,0.002150529316895912,0.9772682189701773,0.002873631591748449 +flat_mae,patch,attn,hcpya_task21,last,19,0.0011099999999999999,0.05,32,"[3.7, 1.0]",test,0.07681722193956375,0.9807539682539682,0.00203266797658856,0.9782791448250526,0.0025320041470026336 diff --git a/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/log.txt b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..259f6e631c9697c67c89c10c5feefc97c599d00b --- /dev/null +++ b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,885 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:38:29 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n400_1; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:20:57 lr: nan time: 3.1436 data: 2.6849 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:32 lr: 0.000003 loss: 3.0531 (3.0542) grad: 0.3047 (0.3217) time: 0.4308 data: 0.0034 max mem: 22446 +train: [0] [ 40/400] eta: 0:02:58 lr: 0.000006 loss: 3.0230 (3.0085) grad: 0.3167 (0.3213) time: 0.4294 data: 0.0039 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:42 lr: 0.000009 loss: 2.8746 (2.9508) grad: 0.3167 (0.3154) time: 0.4358 data: 0.0043 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:29 lr: 0.000012 loss: 2.7749 (2.8922) grad: 0.2926 (0.3064) time: 0.4344 data: 0.0044 max mem: 22446 +train: [0] [100/400] eta: 0:02:18 lr: 0.000015 loss: 2.6184 (2.8263) grad: 0.2714 (0.3010) time: 0.4396 data: 0.0042 max mem: 22446 +train: [0] [120/400] eta: 0:02:07 lr: 0.000018 loss: 2.5304 (2.7613) grad: 0.2760 (0.2954) time: 0.4374 data: 0.0042 max mem: 22446 +train: [0] [140/400] eta: 0:01:59 lr: 0.000021 loss: 2.3794 (2.6989) grad: 0.2716 (0.2932) time: 0.4887 data: 0.0046 max mem: 22446 +train: [0] [160/400] eta: 0:01:50 lr: 0.000024 loss: 2.2654 (2.6431) grad: 0.2602 (0.2874) time: 0.4487 data: 0.0045 max mem: 22446 +train: [0] [180/400] eta: 0:01:40 lr: 0.000027 loss: 2.1755 (2.5867) grad: 0.2373 (0.2824) time: 0.4353 data: 0.0043 max mem: 22446 +train: [0] [200/400] eta: 0:01:31 lr: 0.000030 loss: 2.1218 (2.5337) grad: 0.2471 (0.2787) time: 0.4514 data: 0.0044 max mem: 22446 +train: [0] [220/400] eta: 0:01:21 lr: 0.000033 loss: 2.0214 (2.4838) grad: 0.2284 (0.2744) time: 0.4390 data: 0.0043 max mem: 22446 +train: [0] [240/400] eta: 0:01:12 lr: 0.000036 loss: 1.9066 (2.4323) grad: 0.2362 (0.2718) time: 0.4459 data: 0.0041 max mem: 22446 +train: [0] [260/400] eta: 0:01:03 lr: 0.000039 loss: 1.8422 (2.3855) grad: 0.2378 (0.2693) time: 0.4764 data: 0.0044 max mem: 22446 +train: [0] [280/400] eta: 0:00:54 lr: 0.000042 loss: 1.8158 (2.3448) grad: 0.2212 (0.2655) time: 0.4587 data: 0.0044 max mem: 22446 +train: [0] [300/400] eta: 0:00:46 lr: 0.000045 loss: 1.7632 (2.3043) grad: 0.2057 (0.2617) time: 0.5915 data: 0.1594 max mem: 22446 +train: [0] [320/400] eta: 0:00:37 lr: 0.000048 loss: 1.7180 (2.2654) grad: 0.2026 (0.2585) time: 0.4490 data: 0.0031 max mem: 22446 +train: [0] [340/400] eta: 0:00:27 lr: 0.000051 loss: 1.6443 (2.2281) grad: 0.2148 (0.2563) time: 0.4376 data: 0.0041 max mem: 22446 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 1.6321 (2.1939) grad: 0.2140 (0.2536) time: 0.4456 data: 0.0041 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.5819 (2.1609) grad: 0.2014 (0.2509) time: 0.4398 data: 0.0042 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.5303 (2.1277) grad: 0.2038 (0.2488) time: 0.4358 data: 0.0041 max mem: 22446 +train: [0] Total time: 0:03:03 (0.4598 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.5303 (2.1277) grad: 0.2038 (0.2488) +eval (validation): [0] [ 0/63] eta: 0:03:12 time: 3.0570 data: 2.7695 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:20 time: 0.3361 data: 0.0037 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3327 data: 0.0033 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3138 data: 0.0033 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3125 data: 0.0033 max mem: 22446 +eval (validation): [0] Total time: 0:00:23 (0.3748 s / it) +cv: [0] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 0.099 acc: 0.967 f1: 0.960 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:20:47 lr: nan time: 3.1191 data: 2.7742 max mem: 22446 +train: [1] [ 20/400] eta: 0:03:37 lr: 0.000063 loss: 1.4729 (1.4830) grad: 0.1973 (0.1991) time: 0.4437 data: 0.0038 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:01 lr: 0.000066 loss: 1.4703 (1.4600) grad: 0.1973 (0.1986) time: 0.4342 data: 0.0035 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:44 lr: 0.000069 loss: 1.4114 (1.4368) grad: 0.1922 (0.1955) time: 0.4375 data: 0.0042 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:30 lr: 0.000072 loss: 1.3820 (1.4217) grad: 0.1880 (0.1951) time: 0.4394 data: 0.0041 max mem: 22446 +train: [1] [100/400] eta: 0:02:19 lr: 0.000075 loss: 1.3580 (1.4107) grad: 0.1924 (0.1949) time: 0.4351 data: 0.0040 max mem: 22446 +train: [1] [120/400] eta: 0:02:08 lr: 0.000078 loss: 1.3267 (1.3920) grad: 0.1829 (0.1935) time: 0.4414 data: 0.0040 max mem: 22446 +train: [1] [140/400] eta: 0:01:59 lr: 0.000081 loss: 1.2830 (1.3784) grad: 0.1787 (0.1917) time: 0.4645 data: 0.0045 max mem: 22446 +train: [1] [160/400] eta: 0:01:49 lr: 0.000084 loss: 1.2620 (1.3614) grad: 0.1761 (0.1902) time: 0.4343 data: 0.0043 max mem: 22446 +train: [1] [180/400] eta: 0:01:40 lr: 0.000087 loss: 1.2333 (1.3479) grad: 0.1774 (0.1889) time: 0.4383 data: 0.0042 max mem: 22446 +train: [1] [200/400] eta: 0:01:30 lr: 0.000090 loss: 1.2006 (1.3330) grad: 0.1712 (0.1875) time: 0.4459 data: 0.0043 max mem: 22446 +train: [1] [220/400] eta: 0:01:21 lr: 0.000093 loss: 1.1805 (1.3182) grad: 0.1735 (0.1874) time: 0.4426 data: 0.0042 max mem: 22446 +train: [1] [240/400] eta: 0:01:12 lr: 0.000096 loss: 1.1629 (1.3043) grad: 0.1782 (0.1861) time: 0.4564 data: 0.0043 max mem: 22446 +train: [1] [260/400] eta: 0:01:03 lr: 0.000099 loss: 1.1477 (1.2924) grad: 0.1699 (0.1848) time: 0.4667 data: 0.0043 max mem: 22446 +train: [1] [280/400] eta: 0:00:54 lr: 0.000102 loss: 1.1322 (1.2791) grad: 0.1676 (0.1842) time: 0.4364 data: 0.0042 max mem: 22446 +train: [1] [300/400] eta: 0:00:46 lr: 0.000105 loss: 1.0939 (1.2661) grad: 0.1666 (0.1826) time: 0.6084 data: 0.1670 max mem: 22446 +train: [1] [320/400] eta: 0:00:36 lr: 0.000108 loss: 1.0816 (1.2543) grad: 0.1603 (0.1814) time: 0.4423 data: 0.0033 max mem: 22446 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 1.0647 (1.2426) grad: 0.1597 (0.1798) time: 0.4381 data: 0.0038 max mem: 22446 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 1.0543 (1.2326) grad: 0.1533 (0.1783) time: 0.4405 data: 0.0043 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 1.0288 (1.2223) grad: 0.1554 (0.1772) time: 0.4402 data: 0.0039 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.0181 (1.2125) grad: 0.1588 (0.1762) time: 0.4351 data: 0.0042 max mem: 22446 +train: [1] Total time: 0:03:03 (0.4583 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.0181 (1.2125) grad: 0.1588 (0.1762) +eval (validation): [1] [ 0/63] eta: 0:03:10 time: 3.0239 data: 2.7472 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:19 time: 0.3371 data: 0.0043 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:09 time: 0.3303 data: 0.0029 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3165 data: 0.0034 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3135 data: 0.0034 max mem: 22446 +eval (validation): [1] Total time: 0:00:23 (0.3745 s / it) +cv: [1] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.088 acc: 0.973 f1: 0.968 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:20:37 lr: nan time: 3.0935 data: 2.7522 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:35 lr: 0.000123 loss: 0.9465 (0.9676) grad: 0.1614 (0.1666) time: 0.4394 data: 0.0034 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:00 lr: 0.000126 loss: 0.9697 (0.9741) grad: 0.1614 (0.1702) time: 0.4359 data: 0.0045 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:43 lr: 0.000129 loss: 0.9697 (0.9703) grad: 0.1695 (0.1735) time: 0.4342 data: 0.0041 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:30 lr: 0.000132 loss: 0.9590 (0.9698) grad: 0.1760 (0.1754) time: 0.4386 data: 0.0044 max mem: 22446 +train: [2] [100/400] eta: 0:02:19 lr: 0.000135 loss: 0.9565 (0.9664) grad: 0.1815 (0.1782) time: 0.4490 data: 0.0042 max mem: 22446 +train: [2] [120/400] eta: 0:02:10 lr: 0.000138 loss: 0.9375 (0.9636) grad: 0.1847 (0.1821) time: 0.4630 data: 0.0041 max mem: 22446 +train: [2] [140/400] eta: 0:02:00 lr: 0.000141 loss: 0.8941 (0.9528) grad: 0.1843 (0.1815) time: 0.4586 data: 0.0043 max mem: 22446 +train: [2] [160/400] eta: 0:01:50 lr: 0.000144 loss: 0.9158 (0.9536) grad: 0.1961 (0.1843) time: 0.4435 data: 0.0041 max mem: 22446 +train: [2] [180/400] eta: 0:01:41 lr: 0.000147 loss: 0.9429 (0.9492) grad: 0.1982 (0.1854) time: 0.4622 data: 0.0045 max mem: 22446 +train: [2] [200/400] eta: 0:01:32 lr: 0.000150 loss: 0.8883 (0.9412) grad: 0.1821 (0.1850) time: 0.4466 data: 0.0042 max mem: 22446 +train: [2] [220/400] eta: 0:01:22 lr: 0.000153 loss: 0.9120 (0.9445) grad: 0.1821 (0.1869) time: 0.4433 data: 0.0043 max mem: 22446 +train: [2] [240/400] eta: 0:01:13 lr: 0.000156 loss: 0.9036 (0.9387) grad: 0.1870 (0.1873) time: 0.4735 data: 0.0044 max mem: 22446 +train: [2] [260/400] eta: 0:01:04 lr: 0.000159 loss: 0.8502 (0.9353) grad: 0.1989 (0.1882) time: 0.4637 data: 0.0045 max mem: 22446 +train: [2] [280/400] eta: 0:00:55 lr: 0.000162 loss: 0.8502 (0.9312) grad: 0.1991 (0.1893) time: 0.4351 data: 0.0042 max mem: 22446 +train: [2] [300/400] eta: 0:00:46 lr: 0.000165 loss: 0.8414 (0.9253) grad: 0.1986 (0.1907) time: 0.6139 data: 0.1762 max mem: 22446 +train: [2] [320/400] eta: 0:00:37 lr: 0.000168 loss: 0.8750 (0.9226) grad: 0.2121 (0.1924) time: 0.4396 data: 0.0032 max mem: 22446 +train: [2] [340/400] eta: 0:00:27 lr: 0.000171 loss: 0.8643 (0.9180) grad: 0.2064 (0.1932) time: 0.4370 data: 0.0041 max mem: 22446 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 0.8691 (0.9173) grad: 0.2054 (0.1953) time: 0.4330 data: 0.0040 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 0.8440 (0.9115) grad: 0.2232 (0.1975) time: 0.4351 data: 0.0036 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.7762 (0.9034) grad: 0.2134 (0.1983) time: 0.4381 data: 0.0042 max mem: 22446 +train: [2] Total time: 0:03:04 (0.4614 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.7762 (0.9034) grad: 0.2134 (0.1983) +eval (validation): [2] [ 0/63] eta: 0:03:07 time: 2.9738 data: 2.7421 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:21 time: 0.3697 data: 0.0043 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3210 data: 0.0033 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3065 data: 0.0031 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3041 data: 0.0031 max mem: 22446 +eval (validation): [2] Total time: 0:00:23 (0.3778 s / it) +cv: [2] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 0.070 acc: 0.977 f1: 0.974 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:20:47 lr: nan time: 3.1186 data: 2.7865 max mem: 22446 +train: [3] [ 20/400] eta: 0:03:30 lr: 0.000183 loss: 0.7132 (0.7295) grad: 0.2116 (0.2214) time: 0.4254 data: 0.0027 max mem: 22446 +train: [3] [ 40/400] eta: 0:02:58 lr: 0.000186 loss: 0.7585 (0.7649) grad: 0.2116 (0.2163) time: 0.4347 data: 0.0037 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:42 lr: 0.000189 loss: 0.7748 (0.7669) grad: 0.2046 (0.2177) time: 0.4398 data: 0.0041 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:29 lr: 0.000192 loss: 0.7406 (0.7747) grad: 0.2104 (0.2175) time: 0.4339 data: 0.0042 max mem: 22446 +train: [3] [100/400] eta: 0:02:18 lr: 0.000195 loss: 0.7518 (0.7761) grad: 0.2225 (0.2173) time: 0.4407 data: 0.0041 max mem: 22446 +train: [3] [120/400] eta: 0:02:09 lr: 0.000198 loss: 0.8042 (0.7873) grad: 0.2325 (0.2261) time: 0.4583 data: 0.0042 max mem: 22446 +train: [3] [140/400] eta: 0:01:59 lr: 0.000201 loss: 0.8078 (0.7905) grad: 0.2490 (0.2335) time: 0.4415 data: 0.0039 max mem: 22446 +train: [3] [160/400] eta: 0:01:49 lr: 0.000204 loss: 0.7841 (0.7919) grad: 0.2644 (0.2420) time: 0.4368 data: 0.0042 max mem: 22446 +train: [3] [180/400] eta: 0:01:40 lr: 0.000207 loss: 0.7948 (0.7961) grad: 0.2694 (0.2456) time: 0.4500 data: 0.0041 max mem: 22446 +train: [3] [200/400] eta: 0:01:30 lr: 0.000210 loss: 0.7948 (0.7992) grad: 0.2694 (0.2529) time: 0.4349 data: 0.0042 max mem: 22446 +train: [3] [220/400] eta: 0:01:21 lr: 0.000213 loss: 0.7764 (0.7969) grad: 0.2772 (0.2595) time: 0.4408 data: 0.0045 max mem: 22446 +train: [3] [240/400] eta: 0:01:12 lr: 0.000216 loss: 0.7941 (0.8082) grad: 0.2945 (0.2637) time: 0.4650 data: 0.0043 max mem: 22446 +train: [3] [260/400] eta: 0:01:03 lr: 0.000219 loss: 0.7279 (0.8066) grad: 0.3013 (0.2667) time: 0.4637 data: 0.0042 max mem: 22446 +train: [3] [280/400] eta: 0:00:54 lr: 0.000222 loss: 0.7771 (0.8089) grad: 0.3013 (0.2710) time: 0.4362 data: 0.0042 max mem: 22446 +train: [3] [300/400] eta: 0:00:46 lr: 0.000225 loss: 0.8356 (0.8148) grad: 0.3274 (0.2761) time: 0.6181 data: 0.1733 max mem: 22446 +train: [3] [320/400] eta: 0:00:37 lr: 0.000228 loss: 0.8388 (0.8171) grad: 0.3286 (0.2797) time: 0.4483 data: 0.0036 max mem: 22446 +train: [3] [340/400] eta: 0:00:27 lr: 0.000231 loss: 0.6930 (0.8102) grad: 0.3257 (0.2831) time: 0.4306 data: 0.0042 max mem: 22446 +train: [3] [360/400] eta: 0:00:18 lr: 0.000234 loss: 0.6930 (0.8048) grad: 0.3299 (0.2866) time: 0.4439 data: 0.0042 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 0.7603 (0.8078) grad: 0.3635 (0.2925) time: 0.4413 data: 0.0042 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 0.8656 (0.8122) grad: 0.3866 (0.2980) time: 0.4388 data: 0.0041 max mem: 22446 +train: [3] Total time: 0:03:03 (0.4583 s / it) +train: [3] Summary: lr: 0.000240 loss: 0.8656 (0.8122) grad: 0.3866 (0.2980) +eval (validation): [3] [ 0/63] eta: 0:03:15 time: 3.1036 data: 2.8714 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:19 time: 0.3220 data: 0.0024 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3393 data: 0.0034 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3187 data: 0.0033 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3178 data: 0.0033 max mem: 22446 +eval (validation): [3] Total time: 0:00:23 (0.3751 s / it) +cv: [3] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.073 acc: 0.976 f1: 0.971 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:20:36 lr: nan time: 3.0907 data: 2.7527 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:35 lr: 0.000243 loss: 0.9643 (0.9557) grad: 0.3612 (0.4278) time: 0.4398 data: 0.0032 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:01 lr: 0.000246 loss: 0.9889 (0.9976) grad: 0.4106 (0.4343) time: 0.4394 data: 0.0038 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:44 lr: 0.000249 loss: 0.9760 (0.9690) grad: 0.4364 (0.4570) time: 0.4421 data: 0.0039 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:31 lr: 0.000252 loss: 0.9021 (0.9447) grad: 0.4579 (0.4487) time: 0.4397 data: 0.0042 max mem: 22446 +train: [4] [100/400] eta: 0:02:20 lr: 0.000255 loss: 0.9147 (0.9351) grad: 0.4264 (0.4465) time: 0.4457 data: 0.0041 max mem: 22446 +train: [4] [120/400] eta: 0:02:11 lr: 0.000258 loss: 0.9205 (0.9408) grad: 0.4632 (0.4744) time: 0.4744 data: 0.0043 max mem: 22446 +train: [4] [140/400] eta: 0:02:00 lr: 0.000261 loss: 0.8545 (0.9356) grad: 0.4770 (0.4776) time: 0.4327 data: 0.0042 max mem: 22446 +train: [4] [160/400] eta: 0:01:50 lr: 0.000264 loss: 0.9504 (0.9578) grad: 0.5102 (0.4861) time: 0.4500 data: 0.0040 max mem: 22446 +train: [4] [180/400] eta: 0:01:41 lr: 0.000267 loss: 0.9934 (0.9550) grad: 0.4518 (0.4781) time: 0.4382 data: 0.0041 max mem: 22446 +train: [4] [200/400] eta: 0:01:31 lr: 0.000270 loss: 0.9215 (0.9518) grad: 0.4407 (0.4849) time: 0.4323 data: 0.0041 max mem: 22446 +train: [4] [220/400] eta: 0:01:21 lr: 0.000273 loss: 0.9215 (0.9583) grad: 0.5042 (0.4902) time: 0.4331 data: 0.0042 max mem: 22446 +train: [4] [240/400] eta: 0:01:12 lr: 0.000276 loss: 0.9176 (0.9535) grad: 0.5135 (0.4932) time: 0.4741 data: 0.0040 max mem: 22446 +train: [4] [260/400] eta: 0:01:03 lr: 0.000279 loss: 1.0125 (0.9826) grad: 0.5466 (0.5008) time: 0.4421 data: 0.0040 max mem: 22446 +train: [4] [280/400] eta: 0:00:54 lr: 0.000282 loss: 1.2209 (0.9997) grad: 0.5545 (0.5068) time: 0.4292 data: 0.0038 max mem: 22446 +train: [4] [300/400] eta: 0:00:46 lr: 0.000285 loss: 1.0280 (1.0037) grad: 0.5581 (0.5111) time: 0.6330 data: 0.1704 max mem: 22446 +train: [4] [320/400] eta: 0:00:37 lr: 0.000288 loss: 0.8179 (0.9939) grad: 0.5613 (0.5185) time: 0.4400 data: 0.0033 max mem: 22446 +train: [4] [340/400] eta: 0:00:27 lr: 0.000291 loss: 0.7244 (0.9830) grad: 0.5613 (0.5179) time: 0.4454 data: 0.0042 max mem: 22446 +train: [4] [360/400] eta: 0:00:18 lr: 0.000294 loss: 0.7919 (0.9891) grad: 0.5021 (0.5196) time: 0.4447 data: 0.0042 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 1.0565 (1.0032) grad: 0.5789 (0.5284) time: 0.4385 data: 0.0042 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 1.0951 (1.0070) grad: 0.5575 (0.5294) time: 0.4397 data: 0.0042 max mem: 22446 +train: [4] Total time: 0:03:03 (0.4599 s / it) +train: [4] Summary: lr: 0.000300 loss: 1.0951 (1.0070) grad: 0.5575 (0.5294) +eval (validation): [4] [ 0/63] eta: 0:03:11 time: 3.0468 data: 2.8075 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:20 time: 0.3444 data: 0.0037 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3175 data: 0.0028 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3092 data: 0.0032 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3077 data: 0.0032 max mem: 22446 +eval (validation): [4] Total time: 0:00:23 (0.3707 s / it) +cv: [4] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.077 acc: 0.977 f1: 0.974 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:21:37 lr: nan time: 3.2436 data: 2.8674 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:37 lr: 0.000300 loss: 0.8394 (0.9301) grad: 0.4893 (0.5218) time: 0.4382 data: 0.0027 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:03 lr: 0.000300 loss: 0.8472 (0.9751) grad: 0.4893 (0.5056) time: 0.4429 data: 0.0041 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:45 lr: 0.000300 loss: 0.9470 (0.9662) grad: 0.5366 (0.5536) time: 0.4432 data: 0.0040 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:32 lr: 0.000300 loss: 0.9775 (0.9922) grad: 0.6238 (0.5771) time: 0.4478 data: 0.0044 max mem: 22446 +train: [5] [100/400] eta: 0:02:21 lr: 0.000300 loss: 1.0824 (1.0281) grad: 0.5746 (0.5785) time: 0.4453 data: 0.0042 max mem: 22446 +train: [5] [120/400] eta: 0:02:11 lr: 0.000300 loss: 1.1668 (1.0659) grad: 0.5726 (0.5778) time: 0.4694 data: 0.0043 max mem: 22446 +train: [5] [140/400] eta: 0:02:01 lr: 0.000300 loss: 1.2481 (1.0868) grad: 0.6159 (0.5948) time: 0.4425 data: 0.0040 max mem: 22446 +train: [5] [160/400] eta: 0:01:51 lr: 0.000299 loss: 1.0626 (1.1083) grad: 0.6435 (0.5969) time: 0.4599 data: 0.0041 max mem: 22446 +train: [5] [180/400] eta: 0:01:42 lr: 0.000299 loss: 0.9686 (1.1057) grad: 0.6140 (0.5999) time: 0.4467 data: 0.0042 max mem: 22446 +train: [5] [200/400] eta: 0:01:32 lr: 0.000299 loss: 1.1373 (1.1241) grad: 0.6351 (0.6059) time: 0.4433 data: 0.0042 max mem: 22446 +train: [5] [220/400] eta: 0:01:22 lr: 0.000299 loss: 1.1373 (1.1392) grad: 0.6351 (0.6059) time: 0.4296 data: 0.0043 max mem: 22446 +train: [5] [240/400] eta: 0:01:13 lr: 0.000299 loss: 1.0353 (1.1387) grad: 0.6323 (0.6106) time: 0.4678 data: 0.0044 max mem: 22446 +train: [5] [260/400] eta: 0:01:04 lr: 0.000299 loss: 1.2034 (1.1690) grad: 0.7213 (0.6246) time: 0.4518 data: 0.0043 max mem: 22446 +train: [5] [280/400] eta: 0:00:54 lr: 0.000298 loss: 1.3694 (1.1915) grad: 0.7522 (0.6305) time: 0.4309 data: 0.0040 max mem: 22446 +train: [5] [300/400] eta: 0:00:46 lr: 0.000298 loss: 1.2287 (1.1863) grad: 0.6535 (0.6330) time: 0.6135 data: 0.1726 max mem: 22446 +train: [5] [320/400] eta: 0:00:37 lr: 0.000298 loss: 0.8613 (1.1634) grad: 0.5854 (0.6275) time: 0.4461 data: 0.0033 max mem: 22446 +train: [5] [340/400] eta: 0:00:27 lr: 0.000298 loss: 0.8847 (1.1620) grad: 0.5679 (0.6270) time: 0.4312 data: 0.0043 max mem: 22446 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 0.9092 (1.1522) grad: 0.5630 (0.6229) time: 0.4370 data: 0.0043 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.9830 (1.1528) grad: 0.5630 (0.6241) time: 0.4346 data: 0.0039 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 1.0659 (1.1418) grad: 0.5790 (0.6204) time: 0.4393 data: 0.0036 max mem: 22446 +train: [5] Total time: 0:03:04 (0.4606 s / it) +train: [5] Summary: lr: 0.000297 loss: 1.0659 (1.1418) grad: 0.5790 (0.6204) +eval (validation): [5] [ 0/63] eta: 0:03:17 time: 3.1284 data: 2.9031 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:20 time: 0.3338 data: 0.0031 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3261 data: 0.0028 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3220 data: 0.0033 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3193 data: 0.0033 max mem: 22446 +eval (validation): [5] Total time: 0:00:23 (0.3759 s / it) +cv: [5] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.070 acc: 0.978 f1: 0.974 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:20:33 lr: nan time: 3.0846 data: 2.7545 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:31 lr: 0.000296 loss: 0.8134 (0.8569) grad: 0.5114 (0.5680) time: 0.4301 data: 0.0039 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:00 lr: 0.000296 loss: 0.8033 (0.8525) grad: 0.5087 (0.5540) time: 0.4430 data: 0.0037 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:43 lr: 0.000296 loss: 0.7791 (0.8867) grad: 0.5128 (0.5885) time: 0.4432 data: 0.0041 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:32 lr: 0.000295 loss: 0.8987 (0.8965) grad: 0.6187 (0.5868) time: 0.4584 data: 0.0044 max mem: 22446 +train: [6] [100/400] eta: 0:02:21 lr: 0.000295 loss: 0.8192 (0.8703) grad: 0.5400 (0.5738) time: 0.4447 data: 0.0042 max mem: 22446 +train: [6] [120/400] eta: 0:02:11 lr: 0.000295 loss: 0.6692 (0.8490) grad: 0.5304 (0.5647) time: 0.4636 data: 0.0042 max mem: 22446 +train: [6] [140/400] eta: 0:02:00 lr: 0.000294 loss: 0.6475 (0.8390) grad: 0.5447 (0.5648) time: 0.4431 data: 0.0040 max mem: 22446 +train: [6] [160/400] eta: 0:01:51 lr: 0.000294 loss: 0.6030 (0.8283) grad: 0.4871 (0.5559) time: 0.4491 data: 0.0046 max mem: 22446 +train: [6] [180/400] eta: 0:01:41 lr: 0.000293 loss: 0.6583 (0.8380) grad: 0.4832 (0.5518) time: 0.4359 data: 0.0034 max mem: 22446 +train: [6] [200/400] eta: 0:01:31 lr: 0.000293 loss: 0.8725 (0.8674) grad: 0.5034 (0.5508) time: 0.4470 data: 0.0039 max mem: 22446 +train: [6] [220/400] eta: 0:01:22 lr: 0.000292 loss: 0.8235 (0.8633) grad: 0.5522 (0.5474) time: 0.4482 data: 0.0041 max mem: 22446 +train: [6] [240/400] eta: 0:01:13 lr: 0.000292 loss: 0.7251 (0.8497) grad: 0.5192 (0.5437) time: 0.4719 data: 0.0045 max mem: 22446 +train: [6] [260/400] eta: 0:01:04 lr: 0.000291 loss: 0.6410 (0.8366) grad: 0.5154 (0.5443) time: 0.4520 data: 0.0042 max mem: 22446 +train: [6] [280/400] eta: 0:00:54 lr: 0.000291 loss: 0.7245 (0.8414) grad: 0.5154 (0.5411) time: 0.4464 data: 0.0041 max mem: 22446 +train: [6] [300/400] eta: 0:00:46 lr: 0.000290 loss: 0.7194 (0.8323) grad: 0.4744 (0.5382) time: 0.6071 data: 0.1746 max mem: 22446 +train: [6] [320/400] eta: 0:00:37 lr: 0.000290 loss: 0.6389 (0.8201) grad: 0.4121 (0.5319) time: 0.4353 data: 0.0034 max mem: 22446 +train: [6] [340/400] eta: 0:00:27 lr: 0.000289 loss: 0.5418 (0.8109) grad: 0.4121 (0.5262) time: 0.4354 data: 0.0042 max mem: 22446 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 0.4957 (0.7976) grad: 0.4108 (0.5195) time: 0.4423 data: 0.0044 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.4772 (0.7841) grad: 0.4160 (0.5147) time: 0.4450 data: 0.0044 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.4356 (0.7659) grad: 0.3698 (0.5070) time: 0.4485 data: 0.0043 max mem: 22446 +train: [6] Total time: 0:03:04 (0.4615 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.4356 (0.7659) grad: 0.3698 (0.5070) +eval (validation): [6] [ 0/63] eta: 0:03:18 time: 3.1517 data: 2.8718 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:20 time: 0.3468 data: 0.0024 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3243 data: 0.0034 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3166 data: 0.0035 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3152 data: 0.0035 max mem: 22446 +eval (validation): [6] Total time: 0:00:23 (0.3780 s / it) +cv: [6] best hparam: (2.3, 1.0) (029) ('029_lr2.3e+00_wd1.0e+00') loss: 0.065 acc: 0.980 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:21:01 lr: nan time: 3.1537 data: 2.7809 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:39 lr: 0.000286 loss: 0.4046 (0.5245) grad: 0.3406 (0.3528) time: 0.4476 data: 0.0030 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:03 lr: 0.000286 loss: 0.4046 (0.4939) grad: 0.3406 (0.3568) time: 0.4411 data: 0.0039 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:46 lr: 0.000285 loss: 0.4689 (0.5311) grad: 0.3761 (0.3762) time: 0.4514 data: 0.0042 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:32 lr: 0.000284 loss: 0.4730 (0.5142) grad: 0.3775 (0.3720) time: 0.4334 data: 0.0041 max mem: 22446 +train: [7] [100/400] eta: 0:02:22 lr: 0.000284 loss: 0.4220 (0.5158) grad: 0.3775 (0.3783) time: 0.4636 data: 0.0044 max mem: 22446 +train: [7] [120/400] eta: 0:02:11 lr: 0.000283 loss: 0.3948 (0.5082) grad: 0.3989 (0.3757) time: 0.4430 data: 0.0042 max mem: 22446 +train: [7] [140/400] eta: 0:02:01 lr: 0.000282 loss: 0.4676 (0.5142) grad: 0.3946 (0.3794) time: 0.4526 data: 0.0043 max mem: 22446 +train: [7] [160/400] eta: 0:01:51 lr: 0.000282 loss: 0.4756 (0.5068) grad: 0.3946 (0.3787) time: 0.4405 data: 0.0044 max mem: 22446 +train: [7] [180/400] eta: 0:01:41 lr: 0.000281 loss: 0.4595 (0.5145) grad: 0.3751 (0.3801) time: 0.4367 data: 0.0041 max mem: 22446 +train: [7] [200/400] eta: 0:01:31 lr: 0.000280 loss: 0.3448 (0.5139) grad: 0.3843 (0.3818) time: 0.4356 data: 0.0041 max mem: 22446 +train: [7] [220/400] eta: 0:01:22 lr: 0.000279 loss: 0.4909 (0.5142) grad: 0.3783 (0.3796) time: 0.4629 data: 0.0041 max mem: 22446 +train: [7] [240/400] eta: 0:01:13 lr: 0.000278 loss: 0.4909 (0.5150) grad: 0.3696 (0.3799) time: 0.4554 data: 0.0043 max mem: 22446 +train: [7] [260/400] eta: 0:01:03 lr: 0.000278 loss: 0.4530 (0.5078) grad: 0.3834 (0.3793) time: 0.4342 data: 0.0039 max mem: 22446 +train: [7] [280/400] eta: 0:00:54 lr: 0.000277 loss: 0.4537 (0.5214) grad: 0.3834 (0.3793) time: 0.4645 data: 0.0043 max mem: 22446 +train: [7] [300/400] eta: 0:00:46 lr: 0.000276 loss: 0.5993 (0.5278) grad: 0.4108 (0.3802) time: 0.5942 data: 0.1643 max mem: 22446 +train: [7] [320/400] eta: 0:00:37 lr: 0.000275 loss: 0.4802 (0.5301) grad: 0.3752 (0.3791) time: 0.4357 data: 0.0032 max mem: 22446 +train: [7] [340/400] eta: 0:00:27 lr: 0.000274 loss: 0.4218 (0.5237) grad: 0.3567 (0.3797) time: 0.4389 data: 0.0040 max mem: 22446 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 0.3742 (0.5192) grad: 0.3287 (0.3779) time: 0.4369 data: 0.0043 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.3505 (0.5087) grad: 0.3232 (0.3764) time: 0.4352 data: 0.0040 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.3413 (0.5029) grad: 0.3221 (0.3747) time: 0.4374 data: 0.0040 max mem: 22446 +train: [7] Total time: 0:03:03 (0.4594 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.3413 (0.5029) grad: 0.3221 (0.3747) +eval (validation): [7] [ 0/63] eta: 0:03:10 time: 3.0294 data: 2.7579 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:19 time: 0.3187 data: 0.0029 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:08 time: 0.3296 data: 0.0031 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3140 data: 0.0035 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3093 data: 0.0034 max mem: 22446 +eval (validation): [7] Total time: 0:00:23 (0.3676 s / it) +cv: [7] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.067 acc: 0.980 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:20:53 lr: nan time: 3.1349 data: 2.7415 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:36 lr: 0.000270 loss: 0.2602 (0.3000) grad: 0.2681 (0.2795) time: 0.4402 data: 0.0027 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:01 lr: 0.000270 loss: 0.2972 (0.3357) grad: 0.2707 (0.2790) time: 0.4381 data: 0.0040 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:46 lr: 0.000269 loss: 0.3205 (0.3371) grad: 0.2707 (0.2852) time: 0.4552 data: 0.0043 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:32 lr: 0.000268 loss: 0.3052 (0.3481) grad: 0.2970 (0.2892) time: 0.4429 data: 0.0038 max mem: 22446 +train: [8] [100/400] eta: 0:02:21 lr: 0.000267 loss: 0.3052 (0.3427) grad: 0.2970 (0.2904) time: 0.4557 data: 0.0041 max mem: 22446 +train: [8] [120/400] eta: 0:02:10 lr: 0.000266 loss: 0.3250 (0.3673) grad: 0.3130 (0.2985) time: 0.4346 data: 0.0041 max mem: 22446 +train: [8] [140/400] eta: 0:02:00 lr: 0.000265 loss: 0.3672 (0.3732) grad: 0.3130 (0.3022) time: 0.4512 data: 0.0043 max mem: 22446 +train: [8] [160/400] eta: 0:01:50 lr: 0.000264 loss: 0.3518 (0.3763) grad: 0.2954 (0.3017) time: 0.4370 data: 0.0042 max mem: 22446 +train: [8] [180/400] eta: 0:01:40 lr: 0.000263 loss: 0.3080 (0.3718) grad: 0.2715 (0.2995) time: 0.4380 data: 0.0042 max mem: 22446 +train: [8] [200/400] eta: 0:01:31 lr: 0.000262 loss: 0.2857 (0.3648) grad: 0.2660 (0.2964) time: 0.4368 data: 0.0040 max mem: 22446 +train: [8] [220/400] eta: 0:01:22 lr: 0.000260 loss: 0.2857 (0.3650) grad: 0.2791 (0.2964) time: 0.4557 data: 0.0041 max mem: 22446 +train: [8] [240/400] eta: 0:01:13 lr: 0.000259 loss: 0.3381 (0.3648) grad: 0.2951 (0.2956) time: 0.4719 data: 0.0046 max mem: 22446 +train: [8] [260/400] eta: 0:01:03 lr: 0.000258 loss: 0.3237 (0.3667) grad: 0.2804 (0.2955) time: 0.4406 data: 0.0039 max mem: 22446 +train: [8] [280/400] eta: 0:00:54 lr: 0.000257 loss: 0.3280 (0.3677) grad: 0.2804 (0.2956) time: 0.4756 data: 0.0043 max mem: 22446 +train: [8] [300/400] eta: 0:00:46 lr: 0.000256 loss: 0.3314 (0.3676) grad: 0.3038 (0.2963) time: 0.6202 data: 0.1696 max mem: 22446 +train: [8] [320/400] eta: 0:00:37 lr: 0.000255 loss: 0.2994 (0.3603) grad: 0.2538 (0.2911) time: 0.4478 data: 0.0035 max mem: 22446 +train: [8] [340/400] eta: 0:00:27 lr: 0.000254 loss: 0.2498 (0.3559) grad: 0.2556 (0.2900) time: 0.4461 data: 0.0042 max mem: 22446 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 0.2602 (0.3501) grad: 0.2556 (0.2869) time: 0.4564 data: 0.0043 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.2426 (0.3458) grad: 0.2095 (0.2840) time: 0.4475 data: 0.0041 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.2426 (0.3427) grad: 0.1924 (0.2810) time: 0.4497 data: 0.0043 max mem: 22446 +train: [8] Total time: 0:03:05 (0.4643 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.2426 (0.3427) grad: 0.1924 (0.2810) +eval (validation): [8] [ 0/63] eta: 0:03:12 time: 3.0485 data: 2.7769 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:21 time: 0.3620 data: 0.0041 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3225 data: 0.0029 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3189 data: 0.0033 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3173 data: 0.0032 max mem: 22446 +eval (validation): [8] Total time: 0:00:24 (0.3814 s / it) +cv: [8] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.074 acc: 0.982 f1: 0.978 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [9] [ 0/400] eta: 0:20:35 lr: nan time: 3.0894 data: 2.7079 max mem: 22446 +train: [9] [ 20/400] eta: 0:03:41 lr: 0.000249 loss: 0.2693 (0.3088) grad: 0.2029 (0.2103) time: 0.4585 data: 0.0025 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:08 lr: 0.000248 loss: 0.2693 (0.2900) grad: 0.2113 (0.2205) time: 0.4623 data: 0.0041 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:50 lr: 0.000247 loss: 0.2441 (0.2768) grad: 0.2370 (0.2298) time: 0.4502 data: 0.0040 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:36 lr: 0.000246 loss: 0.2449 (0.2711) grad: 0.2078 (0.2242) time: 0.4535 data: 0.0042 max mem: 22446 +train: [9] [100/400] eta: 0:02:24 lr: 0.000244 loss: 0.2474 (0.2697) grad: 0.2099 (0.2218) time: 0.4582 data: 0.0042 max mem: 22446 +train: [9] [120/400] eta: 0:02:13 lr: 0.000243 loss: 0.2633 (0.2736) grad: 0.2142 (0.2251) time: 0.4478 data: 0.0044 max mem: 22446 +train: [9] [140/400] eta: 0:02:03 lr: 0.000242 loss: 0.2185 (0.2688) grad: 0.2196 (0.2225) time: 0.4537 data: 0.0042 max mem: 22446 +train: [9] [160/400] eta: 0:01:53 lr: 0.000241 loss: 0.1907 (0.2630) grad: 0.2068 (0.2227) time: 0.4641 data: 0.0046 max mem: 22446 +train: [9] [180/400] eta: 0:01:43 lr: 0.000240 loss: 0.2112 (0.2578) grad: 0.2239 (0.2207) time: 0.4523 data: 0.0043 max mem: 22446 +train: [9] [200/400] eta: 0:01:33 lr: 0.000238 loss: 0.2162 (0.2572) grad: 0.2165 (0.2196) time: 0.4456 data: 0.0042 max mem: 22446 +train: [9] [220/400] eta: 0:01:24 lr: 0.000237 loss: 0.2335 (0.2572) grad: 0.2066 (0.2174) time: 0.4658 data: 0.0041 max mem: 22446 +train: [9] [240/400] eta: 0:01:14 lr: 0.000236 loss: 0.2105 (0.2538) grad: 0.1801 (0.2159) time: 0.4653 data: 0.0044 max mem: 22446 +train: [9] [260/400] eta: 0:01:05 lr: 0.000234 loss: 0.1910 (0.2524) grad: 0.2211 (0.2160) time: 0.4406 data: 0.0042 max mem: 22446 +train: [9] [280/400] eta: 0:00:55 lr: 0.000233 loss: 0.2292 (0.2538) grad: 0.2297 (0.2168) time: 0.4718 data: 0.0045 max mem: 22446 +train: [9] [300/400] eta: 0:00:47 lr: 0.000232 loss: 0.2349 (0.2528) grad: 0.2168 (0.2173) time: 0.6276 data: 0.1821 max mem: 22446 +train: [9] [320/400] eta: 0:00:38 lr: 0.000230 loss: 0.1755 (0.2474) grad: 0.1653 (0.2126) time: 0.4540 data: 0.0035 max mem: 22446 +train: [9] [340/400] eta: 0:00:28 lr: 0.000229 loss: 0.1865 (0.2463) grad: 0.1646 (0.2113) time: 0.4630 data: 0.0044 max mem: 22446 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 0.2090 (0.2436) grad: 0.1730 (0.2090) time: 0.4575 data: 0.0042 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.1696 (0.2406) grad: 0.1605 (0.2063) time: 0.4676 data: 0.0041 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.1852 (0.2381) grad: 0.1523 (0.2041) time: 0.4777 data: 0.0044 max mem: 22446 +train: [9] Total time: 0:03:09 (0.4739 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.1852 (0.2381) grad: 0.1523 (0.2041) +eval (validation): [9] [ 0/63] eta: 0:03:18 time: 3.1479 data: 2.9038 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:20 time: 0.3349 data: 0.0036 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3477 data: 0.0028 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3397 data: 0.0037 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3391 data: 0.0037 max mem: 22446 +eval (validation): [9] Total time: 0:00:24 (0.3904 s / it) +cv: [9] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.074 acc: 0.981 f1: 0.978 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:24:56 lr: nan time: 3.7400 data: 3.3894 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:44 lr: 0.000224 loss: 0.2301 (0.2314) grad: 0.1539 (0.1563) time: 0.4344 data: 0.0018 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:08 lr: 0.000222 loss: 0.2035 (0.2162) grad: 0.1539 (0.1559) time: 0.4525 data: 0.0039 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:50 lr: 0.000221 loss: 0.2024 (0.2105) grad: 0.1444 (0.1525) time: 0.4524 data: 0.0042 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:35 lr: 0.000220 loss: 0.1809 (0.2028) grad: 0.1421 (0.1523) time: 0.4476 data: 0.0040 max mem: 22446 +train: [10] [100/400] eta: 0:02:24 lr: 0.000218 loss: 0.1754 (0.1999) grad: 0.1448 (0.1545) time: 0.4562 data: 0.0042 max mem: 22446 +train: [10] [120/400] eta: 0:02:13 lr: 0.000217 loss: 0.1823 (0.1957) grad: 0.1525 (0.1549) time: 0.4447 data: 0.0041 max mem: 22446 +train: [10] [140/400] eta: 0:02:02 lr: 0.000215 loss: 0.1873 (0.1963) grad: 0.1525 (0.1540) time: 0.4416 data: 0.0042 max mem: 22446 +train: [10] [160/400] eta: 0:01:52 lr: 0.000214 loss: 0.1714 (0.1923) grad: 0.1518 (0.1531) time: 0.4589 data: 0.0044 max mem: 22446 +train: [10] [180/400] eta: 0:01:42 lr: 0.000213 loss: 0.1693 (0.1906) grad: 0.1518 (0.1529) time: 0.4362 data: 0.0042 max mem: 22446 +train: [10] [200/400] eta: 0:01:32 lr: 0.000211 loss: 0.1963 (0.1898) grad: 0.1543 (0.1550) time: 0.4499 data: 0.0043 max mem: 22446 +train: [10] [220/400] eta: 0:01:23 lr: 0.000210 loss: 0.1783 (0.1882) grad: 0.1179 (0.1505) time: 0.4499 data: 0.0044 max mem: 22446 +train: [10] [240/400] eta: 0:01:13 lr: 0.000208 loss: 0.1624 (0.1882) grad: 0.1096 (0.1492) time: 0.4487 data: 0.0043 max mem: 22446 +train: [10] [260/400] eta: 0:01:04 lr: 0.000207 loss: 0.1672 (0.1880) grad: 0.1315 (0.1484) time: 0.4580 data: 0.0042 max mem: 22446 +train: [10] [280/400] eta: 0:00:55 lr: 0.000205 loss: 0.1537 (0.1860) grad: 0.1194 (0.1455) time: 0.4478 data: 0.0044 max mem: 22446 +train: [10] [300/400] eta: 0:00:47 lr: 0.000204 loss: 0.1547 (0.1854) grad: 0.1061 (0.1443) time: 0.6186 data: 0.1789 max mem: 22446 +train: [10] [320/400] eta: 0:00:37 lr: 0.000202 loss: 0.1584 (0.1840) grad: 0.1250 (0.1444) time: 0.4580 data: 0.0029 max mem: 22446 +train: [10] [340/400] eta: 0:00:28 lr: 0.000201 loss: 0.1563 (0.1823) grad: 0.1235 (0.1432) time: 0.4610 data: 0.0040 max mem: 22446 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 0.1403 (0.1808) grad: 0.1099 (0.1407) time: 0.4464 data: 0.0039 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.1393 (0.1800) grad: 0.1005 (0.1390) time: 0.4538 data: 0.0041 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.1561 (0.1790) grad: 0.1006 (0.1375) time: 0.4499 data: 0.0044 max mem: 22446 +train: [10] Total time: 0:03:06 (0.4671 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.1561 (0.1790) grad: 0.1006 (0.1375) +eval (validation): [10] [ 0/63] eta: 0:03:20 time: 3.1841 data: 2.9498 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:19 time: 0.3291 data: 0.0073 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3465 data: 0.0027 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3271 data: 0.0035 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3260 data: 0.0036 max mem: 22446 +eval (validation): [10] Total time: 0:00:24 (0.3846 s / it) +cv: [10] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.074 acc: 0.981 f1: 0.978 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:21:43 lr: nan time: 3.2596 data: 2.9195 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:40 lr: 0.000195 loss: 0.1531 (0.1593) grad: 0.0647 (0.0809) time: 0.4453 data: 0.0081 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:03 lr: 0.000193 loss: 0.1531 (0.1555) grad: 0.0858 (0.0865) time: 0.4380 data: 0.0036 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:46 lr: 0.000192 loss: 0.1466 (0.1589) grad: 0.0902 (0.0902) time: 0.4455 data: 0.0042 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:32 lr: 0.000190 loss: 0.1554 (0.1593) grad: 0.1013 (0.0951) time: 0.4442 data: 0.0042 max mem: 22446 +train: [11] [100/400] eta: 0:02:21 lr: 0.000189 loss: 0.1502 (0.1554) grad: 0.1039 (0.0978) time: 0.4399 data: 0.0043 max mem: 22446 +train: [11] [120/400] eta: 0:02:10 lr: 0.000187 loss: 0.1437 (0.1543) grad: 0.0987 (0.0989) time: 0.4401 data: 0.0042 max mem: 22446 +train: [11] [140/400] eta: 0:02:00 lr: 0.000186 loss: 0.1440 (0.1547) grad: 0.0969 (0.0989) time: 0.4392 data: 0.0041 max mem: 22446 +train: [11] [160/400] eta: 0:01:50 lr: 0.000184 loss: 0.1566 (0.1561) grad: 0.0867 (0.0988) time: 0.4377 data: 0.0040 max mem: 22446 +train: [11] [180/400] eta: 0:01:40 lr: 0.000183 loss: 0.1549 (0.1553) grad: 0.1042 (0.1005) time: 0.4401 data: 0.0041 max mem: 22446 +train: [11] [200/400] eta: 0:01:31 lr: 0.000181 loss: 0.1378 (0.1531) grad: 0.1042 (0.1007) time: 0.4509 data: 0.0043 max mem: 22446 +train: [11] [220/400] eta: 0:01:21 lr: 0.000180 loss: 0.1323 (0.1520) grad: 0.0979 (0.1007) time: 0.4451 data: 0.0042 max mem: 22446 +train: [11] [240/400] eta: 0:01:12 lr: 0.000178 loss: 0.1472 (0.1511) grad: 0.1023 (0.1006) time: 0.4532 data: 0.0042 max mem: 22446 +train: [11] [260/400] eta: 0:01:03 lr: 0.000177 loss: 0.1437 (0.1518) grad: 0.1042 (0.1008) time: 0.4515 data: 0.0041 max mem: 22446 +train: [11] [280/400] eta: 0:00:54 lr: 0.000175 loss: 0.1431 (0.1522) grad: 0.1156 (0.1019) time: 0.4529 data: 0.0044 max mem: 22446 +train: [11] [300/400] eta: 0:00:46 lr: 0.000174 loss: 0.1526 (0.1526) grad: 0.1102 (0.1027) time: 0.5940 data: 0.1672 max mem: 22446 +train: [11] [320/400] eta: 0:00:36 lr: 0.000172 loss: 0.1542 (0.1524) grad: 0.0998 (0.1034) time: 0.4370 data: 0.0032 max mem: 22446 +train: [11] [340/400] eta: 0:00:27 lr: 0.000170 loss: 0.1449 (0.1521) grad: 0.0949 (0.1026) time: 0.4512 data: 0.0044 max mem: 22446 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 0.1449 (0.1515) grad: 0.0879 (0.1025) time: 0.4426 data: 0.0045 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.1364 (0.1507) grad: 0.0868 (0.1016) time: 0.4422 data: 0.0043 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.1387 (0.1507) grad: 0.0867 (0.1008) time: 0.4266 data: 0.0039 max mem: 22446 +train: [11] Total time: 0:03:03 (0.4584 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.1387 (0.1507) grad: 0.0867 (0.1008) +eval (validation): [11] [ 0/63] eta: 0:03:05 time: 2.9469 data: 2.6801 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:20 time: 0.3430 data: 0.0040 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3238 data: 0.0033 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3076 data: 0.0034 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3082 data: 0.0034 max mem: 22446 +eval (validation): [11] Total time: 0:00:23 (0.3705 s / it) +cv: [11] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.075 acc: 0.981 f1: 0.978 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:20:23 lr: nan time: 3.0582 data: 2.6969 max mem: 22446 +train: [12] [ 20/400] eta: 0:03:31 lr: 0.000164 loss: 0.1337 (0.1331) grad: 0.0670 (0.0790) time: 0.4322 data: 0.0029 max mem: 22446 +train: [12] [ 40/400] eta: 0:02:59 lr: 0.000163 loss: 0.1337 (0.1335) grad: 0.0712 (0.0775) time: 0.4382 data: 0.0041 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:42 lr: 0.000161 loss: 0.1323 (0.1319) grad: 0.0750 (0.0789) time: 0.4312 data: 0.0044 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:29 lr: 0.000160 loss: 0.1350 (0.1328) grad: 0.0811 (0.0799) time: 0.4326 data: 0.0042 max mem: 22446 +train: [12] [100/400] eta: 0:02:17 lr: 0.000158 loss: 0.1394 (0.1360) grad: 0.0757 (0.0800) time: 0.4351 data: 0.0042 max mem: 22446 +train: [12] [120/400] eta: 0:02:07 lr: 0.000156 loss: 0.1346 (0.1364) grad: 0.0812 (0.0830) time: 0.4319 data: 0.0041 max mem: 22446 +train: [12] [140/400] eta: 0:01:57 lr: 0.000155 loss: 0.1271 (0.1356) grad: 0.0928 (0.0838) time: 0.4348 data: 0.0045 max mem: 22446 +train: [12] [160/400] eta: 0:01:48 lr: 0.000153 loss: 0.1343 (0.1361) grad: 0.0849 (0.0833) time: 0.4516 data: 0.0044 max mem: 22446 +train: [12] [180/400] eta: 0:01:39 lr: 0.000152 loss: 0.1348 (0.1367) grad: 0.0760 (0.0837) time: 0.4475 data: 0.0045 max mem: 22446 +train: [12] [200/400] eta: 0:01:29 lr: 0.000150 loss: 0.1314 (0.1364) grad: 0.0764 (0.0837) time: 0.4339 data: 0.0042 max mem: 22446 +train: [12] [220/400] eta: 0:01:20 lr: 0.000149 loss: 0.1312 (0.1372) grad: 0.0791 (0.0839) time: 0.4368 data: 0.0043 max mem: 22446 +train: [12] [240/400] eta: 0:01:11 lr: 0.000147 loss: 0.1399 (0.1378) grad: 0.0843 (0.0842) time: 0.4458 data: 0.0044 max mem: 22446 +train: [12] [260/400] eta: 0:01:02 lr: 0.000145 loss: 0.1388 (0.1370) grad: 0.0730 (0.0832) time: 0.4598 data: 0.0046 max mem: 22446 +train: [12] [280/400] eta: 0:00:53 lr: 0.000144 loss: 0.1222 (0.1365) grad: 0.0654 (0.0821) time: 0.4526 data: 0.0044 max mem: 22446 +train: [12] [300/400] eta: 0:00:45 lr: 0.000142 loss: 0.1301 (0.1368) grad: 0.0690 (0.0815) time: 0.5839 data: 0.1629 max mem: 22446 +train: [12] [320/400] eta: 0:00:36 lr: 0.000141 loss: 0.1301 (0.1361) grad: 0.0656 (0.0803) time: 0.4422 data: 0.0044 max mem: 22446 +train: [12] [340/400] eta: 0:00:27 lr: 0.000139 loss: 0.1241 (0.1360) grad: 0.0609 (0.0799) time: 0.4587 data: 0.0043 max mem: 22446 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 0.1300 (0.1356) grad: 0.0711 (0.0792) time: 0.4377 data: 0.0042 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.1297 (0.1357) grad: 0.0700 (0.0789) time: 0.4420 data: 0.0041 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.1258 (0.1352) grad: 0.0582 (0.0781) time: 0.4455 data: 0.0042 max mem: 22446 +train: [12] Total time: 0:03:02 (0.4558 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.1258 (0.1352) grad: 0.0582 (0.0781) +eval (validation): [12] [ 0/63] eta: 0:03:15 time: 3.1056 data: 2.8294 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:20 time: 0.3512 data: 0.0030 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3214 data: 0.0035 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3154 data: 0.0035 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3164 data: 0.0036 max mem: 22446 +eval (validation): [12] Total time: 0:00:23 (0.3780 s / it) +cv: [12] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.075 acc: 0.981 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:21:21 lr: nan time: 3.2037 data: 2.7996 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:36 lr: 0.000133 loss: 0.1323 (0.1311) grad: 0.0746 (0.0749) time: 0.4374 data: 0.0024 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:00 lr: 0.000131 loss: 0.1220 (0.1236) grad: 0.0689 (0.0694) time: 0.4301 data: 0.0042 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:43 lr: 0.000130 loss: 0.1144 (0.1216) grad: 0.0605 (0.0694) time: 0.4364 data: 0.0042 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:30 lr: 0.000128 loss: 0.1174 (0.1235) grad: 0.0615 (0.0678) time: 0.4373 data: 0.0039 max mem: 22446 +train: [13] [100/400] eta: 0:02:19 lr: 0.000127 loss: 0.1232 (0.1235) grad: 0.0620 (0.0671) time: 0.4454 data: 0.0043 max mem: 22446 +train: [13] [120/400] eta: 0:02:09 lr: 0.000125 loss: 0.1232 (0.1234) grad: 0.0607 (0.0669) time: 0.4523 data: 0.0045 max mem: 22446 +train: [13] [140/400] eta: 0:01:59 lr: 0.000124 loss: 0.1137 (0.1247) grad: 0.0617 (0.0668) time: 0.4431 data: 0.0042 max mem: 22446 +train: [13] [160/400] eta: 0:01:49 lr: 0.000122 loss: 0.1162 (0.1247) grad: 0.0586 (0.0655) time: 0.4436 data: 0.0042 max mem: 22446 +train: [13] [180/400] eta: 0:01:40 lr: 0.000120 loss: 0.1240 (0.1258) grad: 0.0532 (0.0651) time: 0.4479 data: 0.0043 max mem: 22446 +train: [13] [200/400] eta: 0:01:30 lr: 0.000119 loss: 0.1240 (0.1264) grad: 0.0571 (0.0651) time: 0.4367 data: 0.0042 max mem: 22446 +train: [13] [220/400] eta: 0:01:21 lr: 0.000117 loss: 0.1179 (0.1256) grad: 0.0628 (0.0658) time: 0.4331 data: 0.0042 max mem: 22446 +train: [13] [240/400] eta: 0:01:12 lr: 0.000116 loss: 0.1122 (0.1253) grad: 0.0538 (0.0653) time: 0.4464 data: 0.0043 max mem: 22446 +train: [13] [260/400] eta: 0:01:03 lr: 0.000114 loss: 0.1230 (0.1258) grad: 0.0585 (0.0655) time: 0.4445 data: 0.0043 max mem: 22446 +train: [13] [280/400] eta: 0:00:54 lr: 0.000113 loss: 0.1183 (0.1251) grad: 0.0618 (0.0653) time: 0.4606 data: 0.0047 max mem: 22446 +train: [13] [300/400] eta: 0:00:46 lr: 0.000111 loss: 0.1047 (0.1251) grad: 0.0606 (0.0653) time: 0.6179 data: 0.1708 max mem: 22446 +train: [13] [320/400] eta: 0:00:36 lr: 0.000110 loss: 0.1215 (0.1251) grad: 0.0573 (0.0648) time: 0.4241 data: 0.0032 max mem: 22446 +train: [13] [340/400] eta: 0:00:27 lr: 0.000108 loss: 0.1215 (0.1244) grad: 0.0544 (0.0643) time: 0.4468 data: 0.0040 max mem: 22446 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 0.1013 (0.1234) grad: 0.0516 (0.0637) time: 0.4538 data: 0.0044 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.1123 (0.1236) grad: 0.0516 (0.0634) time: 0.4408 data: 0.0041 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.1202 (0.1237) grad: 0.0530 (0.0632) time: 0.4378 data: 0.0040 max mem: 22446 +train: [13] Total time: 0:03:03 (0.4582 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.1202 (0.1237) grad: 0.0530 (0.0632) +eval (validation): [13] [ 0/63] eta: 0:03:09 time: 3.0102 data: 2.7695 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:19 time: 0.3332 data: 0.0076 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3324 data: 0.0032 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3066 data: 0.0031 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3056 data: 0.0031 max mem: 22446 +eval (validation): [13] Total time: 0:00:23 (0.3702 s / it) +cv: [13] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.075 acc: 0.981 f1: 0.978 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:21:00 lr: nan time: 3.1514 data: 2.7627 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:39 lr: 0.000102 loss: 0.1287 (0.1277) grad: 0.0548 (0.0578) time: 0.4491 data: 0.0036 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:02 lr: 0.000101 loss: 0.1145 (0.1199) grad: 0.0555 (0.0583) time: 0.4304 data: 0.0041 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:43 lr: 0.000099 loss: 0.1115 (0.1177) grad: 0.0559 (0.0578) time: 0.4319 data: 0.0044 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:30 lr: 0.000098 loss: 0.1132 (0.1163) grad: 0.0572 (0.0583) time: 0.4379 data: 0.0042 max mem: 22446 +train: [14] [100/400] eta: 0:02:19 lr: 0.000096 loss: 0.1044 (0.1153) grad: 0.0566 (0.0572) time: 0.4414 data: 0.0042 max mem: 22446 +train: [14] [120/400] eta: 0:02:09 lr: 0.000095 loss: 0.1227 (0.1182) grad: 0.0514 (0.0574) time: 0.4554 data: 0.0042 max mem: 22446 +train: [14] [140/400] eta: 0:01:59 lr: 0.000093 loss: 0.1321 (0.1214) grad: 0.0589 (0.0581) time: 0.4475 data: 0.0041 max mem: 22446 +train: [14] [160/400] eta: 0:01:49 lr: 0.000092 loss: 0.1111 (0.1200) grad: 0.0553 (0.0575) time: 0.4300 data: 0.0042 max mem: 22446 +train: [14] [180/400] eta: 0:01:40 lr: 0.000090 loss: 0.1077 (0.1195) grad: 0.0536 (0.0574) time: 0.4447 data: 0.0043 max mem: 22446 +train: [14] [200/400] eta: 0:01:30 lr: 0.000089 loss: 0.1101 (0.1194) grad: 0.0531 (0.0569) time: 0.4414 data: 0.0042 max mem: 22446 +train: [14] [220/400] eta: 0:01:21 lr: 0.000088 loss: 0.1101 (0.1189) grad: 0.0531 (0.0574) time: 0.4439 data: 0.0044 max mem: 22446 +train: [14] [240/400] eta: 0:01:12 lr: 0.000086 loss: 0.1087 (0.1178) grad: 0.0593 (0.0575) time: 0.4392 data: 0.0042 max mem: 22446 +train: [14] [260/400] eta: 0:01:03 lr: 0.000085 loss: 0.1087 (0.1172) grad: 0.0526 (0.0572) time: 0.4427 data: 0.0043 max mem: 22446 +train: [14] [280/400] eta: 0:00:54 lr: 0.000083 loss: 0.1164 (0.1184) grad: 0.0526 (0.0571) time: 0.4407 data: 0.0044 max mem: 22446 +train: [14] [300/400] eta: 0:00:46 lr: 0.000082 loss: 0.1222 (0.1183) grad: 0.0551 (0.0571) time: 0.6230 data: 0.1741 max mem: 22446 +train: [14] [320/400] eta: 0:00:36 lr: 0.000081 loss: 0.1115 (0.1180) grad: 0.0523 (0.0568) time: 0.4604 data: 0.0038 max mem: 22446 +train: [14] [340/400] eta: 0:00:27 lr: 0.000079 loss: 0.1159 (0.1185) grad: 0.0515 (0.0564) time: 0.4342 data: 0.0039 max mem: 22446 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 0.1214 (0.1188) grad: 0.0541 (0.0566) time: 0.4559 data: 0.0043 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.1219 (0.1190) grad: 0.0586 (0.0566) time: 0.4566 data: 0.0043 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.1111 (0.1190) grad: 0.0530 (0.0565) time: 0.4393 data: 0.0039 max mem: 22446 +train: [14] Total time: 0:03:03 (0.4596 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.1111 (0.1190) grad: 0.0530 (0.0565) +eval (validation): [14] [ 0/63] eta: 0:03:10 time: 3.0224 data: 2.7785 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:19 time: 0.3294 data: 0.0036 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3358 data: 0.0032 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3115 data: 0.0034 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3105 data: 0.0033 max mem: 22446 +eval (validation): [14] Total time: 0:00:23 (0.3723 s / it) +cv: [14] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.075 acc: 0.981 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:21:52 lr: nan time: 3.2822 data: 2.8906 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:44 lr: 0.000074 loss: 0.1200 (0.1236) grad: 0.0471 (0.0511) time: 0.4573 data: 0.0035 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:06 lr: 0.000072 loss: 0.1200 (0.1216) grad: 0.0505 (0.0538) time: 0.4419 data: 0.0043 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:47 lr: 0.000071 loss: 0.1147 (0.1171) grad: 0.0520 (0.0536) time: 0.4382 data: 0.0043 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:34 lr: 0.000070 loss: 0.1080 (0.1154) grad: 0.0498 (0.0534) time: 0.4480 data: 0.0043 max mem: 22446 +train: [15] [100/400] eta: 0:02:21 lr: 0.000068 loss: 0.1082 (0.1139) grad: 0.0503 (0.0533) time: 0.4310 data: 0.0042 max mem: 22446 +train: [15] [120/400] eta: 0:02:11 lr: 0.000067 loss: 0.0985 (0.1123) grad: 0.0491 (0.0533) time: 0.4526 data: 0.0041 max mem: 22446 +train: [15] [140/400] eta: 0:02:01 lr: 0.000066 loss: 0.1047 (0.1124) grad: 0.0508 (0.0530) time: 0.4485 data: 0.0042 max mem: 22446 +train: [15] [160/400] eta: 0:01:50 lr: 0.000064 loss: 0.1111 (0.1119) grad: 0.0546 (0.0535) time: 0.4349 data: 0.0042 max mem: 22446 +train: [15] [180/400] eta: 0:01:41 lr: 0.000063 loss: 0.1102 (0.1123) grad: 0.0514 (0.0532) time: 0.4457 data: 0.0041 max mem: 22446 +train: [15] [200/400] eta: 0:01:31 lr: 0.000062 loss: 0.1102 (0.1127) grad: 0.0461 (0.0530) time: 0.4561 data: 0.0042 max mem: 22446 +train: [15] [220/400] eta: 0:01:22 lr: 0.000061 loss: 0.1078 (0.1123) grad: 0.0502 (0.0530) time: 0.4520 data: 0.0041 max mem: 22446 +train: [15] [240/400] eta: 0:01:13 lr: 0.000059 loss: 0.1003 (0.1118) grad: 0.0541 (0.0532) time: 0.4450 data: 0.0042 max mem: 22446 +train: [15] [260/400] eta: 0:01:03 lr: 0.000058 loss: 0.1150 (0.1122) grad: 0.0504 (0.0528) time: 0.4426 data: 0.0042 max mem: 22446 +train: [15] [280/400] eta: 0:00:54 lr: 0.000057 loss: 0.1159 (0.1124) grad: 0.0499 (0.0528) time: 0.4414 data: 0.0043 max mem: 22446 +train: [15] [300/400] eta: 0:00:46 lr: 0.000056 loss: 0.1031 (0.1120) grad: 0.0499 (0.0527) time: 0.6411 data: 0.1819 max mem: 22446 +train: [15] [320/400] eta: 0:00:37 lr: 0.000054 loss: 0.1030 (0.1120) grad: 0.0536 (0.0528) time: 0.4436 data: 0.0047 max mem: 22446 +train: [15] [340/400] eta: 0:00:27 lr: 0.000053 loss: 0.1075 (0.1123) grad: 0.0552 (0.0529) time: 0.4404 data: 0.0043 max mem: 22446 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 0.1083 (0.1123) grad: 0.0552 (0.0530) time: 0.4406 data: 0.0042 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.0992 (0.1115) grad: 0.0489 (0.0528) time: 0.4510 data: 0.0042 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.1103 (0.1124) grad: 0.0486 (0.0527) time: 0.4332 data: 0.0041 max mem: 22446 +train: [15] Total time: 0:03:04 (0.4619 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.1103 (0.1124) grad: 0.0486 (0.0527) +eval (validation): [15] [ 0/63] eta: 0:03:17 time: 3.1330 data: 2.8552 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:19 time: 0.3209 data: 0.0033 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:08 time: 0.3178 data: 0.0030 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3072 data: 0.0034 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3061 data: 0.0034 max mem: 22446 +eval (validation): [15] Total time: 0:00:22 (0.3643 s / it) +cv: [15] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.076 acc: 0.981 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:20:40 lr: nan time: 3.1007 data: 2.7595 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:35 lr: 0.000048 loss: 0.1067 (0.1106) grad: 0.0513 (0.0527) time: 0.4411 data: 0.0029 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:02 lr: 0.000047 loss: 0.1109 (0.1128) grad: 0.0513 (0.0527) time: 0.4439 data: 0.0041 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:45 lr: 0.000046 loss: 0.1127 (0.1128) grad: 0.0479 (0.0514) time: 0.4440 data: 0.0041 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:31 lr: 0.000045 loss: 0.1139 (0.1139) grad: 0.0479 (0.0515) time: 0.4382 data: 0.0041 max mem: 22446 +train: [16] [100/400] eta: 0:02:20 lr: 0.000044 loss: 0.1139 (0.1140) grad: 0.0498 (0.0515) time: 0.4399 data: 0.0042 max mem: 22446 +train: [16] [120/400] eta: 0:02:10 lr: 0.000043 loss: 0.1017 (0.1129) grad: 0.0490 (0.0517) time: 0.4481 data: 0.0041 max mem: 22446 +train: [16] [140/400] eta: 0:02:00 lr: 0.000042 loss: 0.1045 (0.1125) grad: 0.0513 (0.0521) time: 0.4486 data: 0.0043 max mem: 22446 +train: [16] [160/400] eta: 0:01:50 lr: 0.000041 loss: 0.1073 (0.1121) grad: 0.0539 (0.0522) time: 0.4324 data: 0.0042 max mem: 22446 +train: [16] [180/400] eta: 0:01:40 lr: 0.000040 loss: 0.1090 (0.1120) grad: 0.0523 (0.0520) time: 0.4404 data: 0.0042 max mem: 22446 +train: [16] [200/400] eta: 0:01:31 lr: 0.000039 loss: 0.1101 (0.1122) grad: 0.0512 (0.0523) time: 0.4474 data: 0.0043 max mem: 22446 +train: [16] [220/400] eta: 0:01:21 lr: 0.000038 loss: 0.1046 (0.1118) grad: 0.0495 (0.0517) time: 0.4393 data: 0.0042 max mem: 22446 +train: [16] [240/400] eta: 0:01:12 lr: 0.000036 loss: 0.1046 (0.1117) grad: 0.0495 (0.0517) time: 0.4428 data: 0.0041 max mem: 22446 +train: [16] [260/400] eta: 0:01:03 lr: 0.000035 loss: 0.1070 (0.1125) grad: 0.0506 (0.0517) time: 0.4466 data: 0.0041 max mem: 22446 +train: [16] [280/400] eta: 0:00:54 lr: 0.000034 loss: 0.1147 (0.1126) grad: 0.0538 (0.0521) time: 0.4411 data: 0.0041 max mem: 22446 +train: [16] [300/400] eta: 0:00:46 lr: 0.000033 loss: 0.1118 (0.1128) grad: 0.0559 (0.0522) time: 0.6156 data: 0.1740 max mem: 22446 +train: [16] [320/400] eta: 0:00:37 lr: 0.000032 loss: 0.1088 (0.1128) grad: 0.0513 (0.0523) time: 0.4612 data: 0.0036 max mem: 22446 +train: [16] [340/400] eta: 0:00:27 lr: 0.000031 loss: 0.1185 (0.1137) grad: 0.0523 (0.0526) time: 0.4501 data: 0.0042 max mem: 22446 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 0.1202 (0.1135) grad: 0.0552 (0.0526) time: 0.4381 data: 0.0042 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.1046 (0.1128) grad: 0.0487 (0.0524) time: 0.4465 data: 0.0042 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.1046 (0.1127) grad: 0.0508 (0.0526) time: 0.4543 data: 0.0045 max mem: 22446 +train: [16] Total time: 0:03:04 (0.4601 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.1046 (0.1127) grad: 0.0508 (0.0526) +eval (validation): [16] [ 0/63] eta: 0:03:07 time: 2.9746 data: 2.7499 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:20 time: 0.3418 data: 0.0107 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:09 time: 0.3244 data: 0.0032 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3160 data: 0.0034 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3132 data: 0.0035 max mem: 22446 +eval (validation): [16] Total time: 0:00:23 (0.3729 s / it) +cv: [16] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.076 acc: 0.981 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:21:06 lr: nan time: 3.1659 data: 2.7672 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:41 lr: 0.000028 loss: 0.0944 (0.1025) grad: 0.0497 (0.0507) time: 0.4547 data: 0.0037 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:04 lr: 0.000027 loss: 0.1017 (0.1075) grad: 0.0508 (0.0518) time: 0.4377 data: 0.0039 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:45 lr: 0.000026 loss: 0.1126 (0.1121) grad: 0.0494 (0.0514) time: 0.4365 data: 0.0044 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:31 lr: 0.000025 loss: 0.1218 (0.1141) grad: 0.0494 (0.0515) time: 0.4353 data: 0.0042 max mem: 22446 +train: [17] [100/400] eta: 0:02:20 lr: 0.000024 loss: 0.1168 (0.1136) grad: 0.0496 (0.0515) time: 0.4373 data: 0.0042 max mem: 22446 +train: [17] [120/400] eta: 0:02:10 lr: 0.000023 loss: 0.1079 (0.1125) grad: 0.0473 (0.0510) time: 0.4514 data: 0.0044 max mem: 22446 +train: [17] [140/400] eta: 0:02:00 lr: 0.000023 loss: 0.1070 (0.1113) grad: 0.0463 (0.0509) time: 0.4617 data: 0.0042 max mem: 22446 +train: [17] [160/400] eta: 0:01:50 lr: 0.000022 loss: 0.1182 (0.1115) grad: 0.0520 (0.0513) time: 0.4425 data: 0.0043 max mem: 22446 +train: [17] [180/400] eta: 0:01:40 lr: 0.000021 loss: 0.1049 (0.1112) grad: 0.0520 (0.0511) time: 0.4327 data: 0.0040 max mem: 22446 +train: [17] [200/400] eta: 0:01:31 lr: 0.000020 loss: 0.1024 (0.1109) grad: 0.0486 (0.0513) time: 0.4542 data: 0.0039 max mem: 22446 +train: [17] [220/400] eta: 0:01:22 lr: 0.000019 loss: 0.1027 (0.1108) grad: 0.0486 (0.0512) time: 0.4419 data: 0.0041 max mem: 22446 +train: [17] [240/400] eta: 0:01:12 lr: 0.000019 loss: 0.0999 (0.1107) grad: 0.0486 (0.0511) time: 0.4371 data: 0.0042 max mem: 22446 +train: [17] [260/400] eta: 0:01:03 lr: 0.000018 loss: 0.1095 (0.1110) grad: 0.0507 (0.0513) time: 0.4377 data: 0.0043 max mem: 22446 +train: [17] [280/400] eta: 0:00:54 lr: 0.000017 loss: 0.1039 (0.1106) grad: 0.0496 (0.0511) time: 0.4422 data: 0.0044 max mem: 22446 +train: [17] [300/400] eta: 0:00:46 lr: 0.000016 loss: 0.1039 (0.1109) grad: 0.0499 (0.0515) time: 0.6405 data: 0.1950 max mem: 22446 +train: [17] [320/400] eta: 0:00:37 lr: 0.000016 loss: 0.1134 (0.1111) grad: 0.0509 (0.0516) time: 0.4630 data: 0.0032 max mem: 22446 +train: [17] [340/400] eta: 0:00:27 lr: 0.000015 loss: 0.1067 (0.1109) grad: 0.0479 (0.0514) time: 0.4482 data: 0.0045 max mem: 22446 +train: [17] [360/400] eta: 0:00:18 lr: 0.000014 loss: 0.0955 (0.1104) grad: 0.0461 (0.0511) time: 0.4454 data: 0.0039 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.0992 (0.1106) grad: 0.0475 (0.0511) time: 0.4511 data: 0.0042 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.1074 (0.1104) grad: 0.0487 (0.0510) time: 0.4548 data: 0.0042 max mem: 22446 +train: [17] Total time: 0:03:05 (0.4627 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.1074 (0.1104) grad: 0.0487 (0.0510) +eval (validation): [17] [ 0/63] eta: 0:03:15 time: 3.0968 data: 2.8661 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3592 data: 0.0036 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3218 data: 0.0034 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3155 data: 0.0031 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3117 data: 0.0031 max mem: 22446 +eval (validation): [17] Total time: 0:00:23 (0.3800 s / it) +cv: [17] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.076 acc: 0.981 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:19:52 lr: nan time: 2.9813 data: 2.5983 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:39 lr: 0.000012 loss: 0.1039 (0.1126) grad: 0.0489 (0.0513) time: 0.4565 data: 0.0042 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:03 lr: 0.000012 loss: 0.1039 (0.1113) grad: 0.0509 (0.0507) time: 0.4389 data: 0.0037 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:45 lr: 0.000011 loss: 0.1004 (0.1098) grad: 0.0509 (0.0508) time: 0.4366 data: 0.0041 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:32 lr: 0.000011 loss: 0.1064 (0.1096) grad: 0.0488 (0.0503) time: 0.4427 data: 0.0042 max mem: 22446 +train: [18] [100/400] eta: 0:02:20 lr: 0.000010 loss: 0.1064 (0.1109) grad: 0.0505 (0.0510) time: 0.4479 data: 0.0042 max mem: 22446 +train: [18] [120/400] eta: 0:02:10 lr: 0.000009 loss: 0.1039 (0.1086) grad: 0.0506 (0.0511) time: 0.4437 data: 0.0041 max mem: 22446 +train: [18] [140/400] eta: 0:02:00 lr: 0.000009 loss: 0.1069 (0.1089) grad: 0.0492 (0.0511) time: 0.4533 data: 0.0043 max mem: 22446 +train: [18] [160/400] eta: 0:01:50 lr: 0.000008 loss: 0.1069 (0.1082) grad: 0.0492 (0.0509) time: 0.4526 data: 0.0042 max mem: 22446 +train: [18] [180/400] eta: 0:01:40 lr: 0.000008 loss: 0.1059 (0.1080) grad: 0.0520 (0.0513) time: 0.4328 data: 0.0039 max mem: 22446 +train: [18] [200/400] eta: 0:01:31 lr: 0.000007 loss: 0.1063 (0.1078) grad: 0.0516 (0.0514) time: 0.4551 data: 0.0043 max mem: 22446 +train: [18] [220/400] eta: 0:01:22 lr: 0.000007 loss: 0.1021 (0.1074) grad: 0.0492 (0.0512) time: 0.4501 data: 0.0044 max mem: 22446 +train: [18] [240/400] eta: 0:01:13 lr: 0.000006 loss: 0.1032 (0.1073) grad: 0.0491 (0.0513) time: 0.4441 data: 0.0043 max mem: 22446 +train: [18] [260/400] eta: 0:01:03 lr: 0.000006 loss: 0.1070 (0.1075) grad: 0.0481 (0.0510) time: 0.4427 data: 0.0042 max mem: 22446 +train: [18] [280/400] eta: 0:00:54 lr: 0.000006 loss: 0.1070 (0.1078) grad: 0.0513 (0.0514) time: 0.4370 data: 0.0043 max mem: 22446 +train: [18] [300/400] eta: 0:00:46 lr: 0.000005 loss: 0.1032 (0.1077) grad: 0.0532 (0.0515) time: 0.6069 data: 0.1719 max mem: 22446 +train: [18] [320/400] eta: 0:00:37 lr: 0.000005 loss: 0.1025 (0.1079) grad: 0.0498 (0.0515) time: 0.4573 data: 0.0033 max mem: 22446 +train: [18] [340/400] eta: 0:00:27 lr: 0.000004 loss: 0.1085 (0.1085) grad: 0.0478 (0.0514) time: 0.4654 data: 0.0043 max mem: 22446 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 0.1009 (0.1080) grad: 0.0502 (0.0515) time: 0.4424 data: 0.0039 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.1009 (0.1074) grad: 0.0480 (0.0513) time: 0.4480 data: 0.0039 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.0945 (0.1070) grad: 0.0487 (0.0514) time: 0.4514 data: 0.0041 max mem: 22446 +train: [18] Total time: 0:03:04 (0.4621 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.0945 (0.1070) grad: 0.0487 (0.0514) +eval (validation): [18] [ 0/63] eta: 0:03:11 time: 3.0320 data: 2.7611 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:20 time: 0.3384 data: 0.0042 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:09 time: 0.3172 data: 0.0030 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3131 data: 0.0034 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3104 data: 0.0033 max mem: 22446 +eval (validation): [18] Total time: 0:00:23 (0.3709 s / it) +cv: [18] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.076 acc: 0.981 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:20:10 lr: nan time: 3.0266 data: 2.6546 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:36 lr: 0.000003 loss: 0.1070 (0.1057) grad: 0.0475 (0.0478) time: 0.4457 data: 0.0035 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:02 lr: 0.000003 loss: 0.1070 (0.1072) grad: 0.0494 (0.0515) time: 0.4432 data: 0.0041 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:44 lr: 0.000002 loss: 0.1071 (0.1071) grad: 0.0504 (0.0507) time: 0.4381 data: 0.0041 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:31 lr: 0.000002 loss: 0.1062 (0.1076) grad: 0.0495 (0.0501) time: 0.4406 data: 0.0043 max mem: 22446 +train: [19] [100/400] eta: 0:02:19 lr: 0.000002 loss: 0.1144 (0.1094) grad: 0.0467 (0.0495) time: 0.4365 data: 0.0043 max mem: 22446 +train: [19] [120/400] eta: 0:02:10 lr: 0.000002 loss: 0.1121 (0.1086) grad: 0.0468 (0.0495) time: 0.4543 data: 0.0043 max mem: 22446 +train: [19] [140/400] eta: 0:02:00 lr: 0.000001 loss: 0.0975 (0.1075) grad: 0.0490 (0.0494) time: 0.4508 data: 0.0042 max mem: 22446 +train: [19] [160/400] eta: 0:01:50 lr: 0.000001 loss: 0.1056 (0.1075) grad: 0.0490 (0.0493) time: 0.4465 data: 0.0042 max mem: 22446 +train: [19] [180/400] eta: 0:01:40 lr: 0.000001 loss: 0.1082 (0.1073) grad: 0.0510 (0.0496) time: 0.4398 data: 0.0041 max mem: 22446 +train: [19] [200/400] eta: 0:01:31 lr: 0.000001 loss: 0.1028 (0.1072) grad: 0.0513 (0.0497) time: 0.4423 data: 0.0043 max mem: 22446 +train: [19] [220/400] eta: 0:01:22 lr: 0.000001 loss: 0.1039 (0.1080) grad: 0.0534 (0.0501) time: 0.4532 data: 0.0042 max mem: 22446 +train: [19] [240/400] eta: 0:01:12 lr: 0.000001 loss: 0.1048 (0.1077) grad: 0.0540 (0.0501) time: 0.4422 data: 0.0043 max mem: 22446 +train: [19] [260/400] eta: 0:01:03 lr: 0.000000 loss: 0.1014 (0.1075) grad: 0.0487 (0.0502) time: 0.4350 data: 0.0041 max mem: 22446 +train: [19] [280/400] eta: 0:00:54 lr: 0.000000 loss: 0.1114 (0.1083) grad: 0.0490 (0.0503) time: 0.4337 data: 0.0044 max mem: 22446 +train: [19] [300/400] eta: 0:00:46 lr: 0.000000 loss: 0.1175 (0.1086) grad: 0.0496 (0.0504) time: 0.6138 data: 0.1636 max mem: 22446 +train: [19] [320/400] eta: 0:00:36 lr: 0.000000 loss: 0.1123 (0.1087) grad: 0.0533 (0.0507) time: 0.4509 data: 0.0031 max mem: 22446 +train: [19] [340/400] eta: 0:00:27 lr: 0.000000 loss: 0.1034 (0.1085) grad: 0.0529 (0.0506) time: 0.4672 data: 0.0045 max mem: 22446 +train: [19] [360/400] eta: 0:00:18 lr: 0.000000 loss: 0.1103 (0.1086) grad: 0.0513 (0.0508) time: 0.4527 data: 0.0043 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.1103 (0.1086) grad: 0.0519 (0.0509) time: 0.4395 data: 0.0039 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.1046 (0.1083) grad: 0.0478 (0.0508) time: 0.4772 data: 0.0043 max mem: 22446 +train: [19] Total time: 0:03:04 (0.4621 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.1046 (0.1083) grad: 0.0478 (0.0508) +eval (validation): [19] [ 0/63] eta: 0:03:10 time: 3.0314 data: 2.7559 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:20 time: 0.3603 data: 0.0043 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3323 data: 0.0036 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3240 data: 0.0037 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3246 data: 0.0037 max mem: 22446 +eval (validation): [19] Total time: 0:00:24 (0.3858 s / it) +cv: [19] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.076 acc: 0.981 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9811507936507936, "hparam": [3.7, 1.0], "hparam_id": 32, "epoch": 19, "is_best": false, "best_score": 0.9818948412698413} +eval (train): [20] [ 0/297] eta: 0:14:03 time: 2.8402 data: 2.5720 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:08 time: 0.3459 data: 0.0030 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:47 time: 0.3677 data: 0.0036 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:32 time: 0.3317 data: 0.0036 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:21 time: 0.3426 data: 0.0039 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:13 time: 0.3420 data: 0.0038 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:04 time: 0.3289 data: 0.0035 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:56 time: 0.3292 data: 0.0035 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:48 time: 0.3368 data: 0.0037 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:41 time: 0.3529 data: 0.0035 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:34 time: 0.3621 data: 0.0034 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:27 time: 0.3351 data: 0.0035 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3337 data: 0.0035 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3352 data: 0.0032 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:05 time: 0.3446 data: 0.0033 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3243 data: 0.0034 max mem: 22446 +eval (train): [20] Total time: 0:01:44 (0.3511 s / it) +eval (validation): [20] [ 0/63] eta: 0:02:56 time: 2.8016 data: 2.5117 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:19 time: 0.3411 data: 0.0036 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3377 data: 0.0033 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3059 data: 0.0032 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3048 data: 0.0032 max mem: 22446 +eval (validation): [20] Total time: 0:00:23 (0.3708 s / it) +eval (test): [20] [ 0/79] eta: 0:03:41 time: 2.8036 data: 2.5212 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:28 time: 0.3695 data: 0.0035 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3750 data: 0.0034 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3590 data: 0.0038 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3285 data: 0.0035 max mem: 22446 +eval (test): [20] Total time: 0:00:31 (0.3931 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9818948412698413, "hparam": [3.7, 1.0], "hparam_id": 32, "epoch": 8, "is_best": true, "best_score": 0.9818948412698413} +eval (train): [20] [ 0/297] eta: 0:13:35 time: 2.7457 data: 2.4825 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:01 time: 0.3248 data: 0.0033 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:39 time: 0.3354 data: 0.0028 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:30 time: 0.3700 data: 0.0040 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:20 time: 0.3426 data: 0.0034 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:11 time: 0.3338 data: 0.0035 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:04 time: 0.3438 data: 0.0033 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:56 time: 0.3522 data: 0.0037 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:48 time: 0.3285 data: 0.0031 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:41 time: 0.3158 data: 0.0031 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:34 time: 0.3725 data: 0.0038 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:27 time: 0.3606 data: 0.0031 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3311 data: 0.0033 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3466 data: 0.0035 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:05 time: 0.3371 data: 0.0035 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3211 data: 0.0036 max mem: 22446 +eval (train): [20] Total time: 0:01:44 (0.3513 s / it) +eval (validation): [20] [ 0/63] eta: 0:02:54 time: 2.7638 data: 2.5012 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3714 data: 0.0042 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3503 data: 0.0035 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3227 data: 0.0034 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3199 data: 0.0034 max mem: 22446 +eval (validation): [20] Total time: 0:00:24 (0.3896 s / it) +eval (test): [20] [ 0/79] eta: 0:03:37 time: 2.7593 data: 2.4966 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:27 time: 0.3426 data: 0.0042 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:15 time: 0.3479 data: 0.0030 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3435 data: 0.0036 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3242 data: 0.0033 max mem: 22446 +eval (test): [20] Total time: 0:00:29 (0.3732 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|-----------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 8 | 0.00111 | 0.05 | 32 | [3.7, 1.0] | train | 0.00051376 | 1 | 0 | 1 | 0 | +| flat_mae | patch | attn | hcpya_task21 | best | 8 | 0.00111 | 0.05 | 32 | [3.7, 1.0] | validation | 0.07408 | 0.98189 | 0.002097 | 0.97814 | 0.0027741 | +| flat_mae | patch | attn | hcpya_task21 | best | 8 | 0.00111 | 0.05 | 32 | [3.7, 1.0] | test | 0.074462 | 0.98056 | 0.0019951 | 0.97846 | 0.0024018 | + + +done! total time: 1:17:30 diff --git a/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/train_log.json b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..b884d3a11ab849917713fe09abd297262b75e664 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.127709540724754, "train/grad": 0.24880805909633635, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.042659912109375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.03960693359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.03454833984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.0294921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.024437255859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0175048828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.009542236328125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.000714111328125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.988955078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.97648681640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.96413818359375, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.945430908203125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.927169189453125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.9004327392578126, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.8745953369140627, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.849549560546875, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.817771301269531, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.780042724609375, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.7375306701660156, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.6977250671386717, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.6483198547363282, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.5967369842529298, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.5381915283203127, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.4745284271240235, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.406996841430664, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.3252729606628417, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2512362480163572, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.1839089965820313, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0933858585357665, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.988145260810852, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8967936730384827, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8163371801376342, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.7119159144163132, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.6231037274003028, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5230799646675586, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.429103167578578, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.335708532296121, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2539560018107294, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.1722494019195437, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0797843950241803, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.0135848303791135, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9606991474889219, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.8973145052138716, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.8459402531478554, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.7930793924629689, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.7421865657065063, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.7048044074140489, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.6618275571428239, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.6312446358520538, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04689119504764676, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04682890525087714, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04672364523634315, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04661894557997584, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04651210924610496, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04636718422174454, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0461999199911952, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.04601604426279664, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04576880997046828, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04549861639738083, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04522836559452117, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04481308691203594, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.044396331245079634, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.04376769376918674, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04313595226034522, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.042509717773646115, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0416947203874588, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04073354793712497, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03969319399446249, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03878281136974692, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03774378828704357, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03676436328329146, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03576303158886731, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03477767408825457, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03382515381090343, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03276583265513182, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03187679483555257, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.031110678082332015, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.030129804462194442, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.029052502340637148, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02815974676515907, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02739914969075471, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.026437753257341684, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0256458257464692, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02478848023340106, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.024006756581366064, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.023234095931984486, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.022440264413598924, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021736081298440693, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02118218633811921, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02047232139389962, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02026727322023362, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0197832066565752, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.01920614451635629, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.019075689375167713, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.018995663982350378, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.018924384327838197, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01866193087771535, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.018742895876057444, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.0013480186462402, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.992279291152954, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.977292537689209, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.9624009132385254, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.9474856853485107, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.9269511699676514, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.903935194015503, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.8781533241271973, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.8446433544158936, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8088791370391846, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.774103879928589, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7221059799194336, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.672089099884033, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.600959062576294, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.534449577331543, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.472547769546509, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.396127462387085, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3096399307250977, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.215951919555664, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.130403757095337, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.025554895401001, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.9179301261901855, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.7981231212615967, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.6708710193634033, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.5402631759643555, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.3893113136291504, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.2604039907455444, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.1489969491958618, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.0073248147964478, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.8524525761604309, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.725272536277771, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.6199026703834534, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.4947930574417114, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.3974107801914215, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.2942882478237152, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.22879371047019958, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1956067532300949, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.16983917355537415, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.15133315324783325, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13503728806972504, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1253798007965088, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.11880676448345184, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.11515843868255615, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.11512056738138199, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.11041836440563202, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.10165654122829437, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.0991399735212326, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.10912884771823883, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.13012586534023285, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06125992063492063, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0625, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07663690476190477, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.10987103174603174, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.16046626984126985, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.21577380952380953, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.24330357142857142, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2554563492063492, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2693452380952381, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.28621031746031744, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.30481150793650796, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.33035714285714285, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.34672619047619047, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.36061507936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.3665674603174603, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.37276785714285715, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.3933531746031746, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.4201388888888889, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.4585813492063492, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.5017361111111112, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.5374503968253969, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.5642361111111112, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.5823412698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.6029265873015873, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.6349206349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.6743551587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.7095734126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.7351190476190477, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.7626488095238095, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.7854662698412699, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8127480158730159, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8375496031746031, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.863343253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.8975694444444444, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9201388888888888, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9357638888888888, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9402281746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9469246031746031, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.955109126984127, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9588293650793651, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9642857142857143, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9665178571428571, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.966765873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9585813492063492, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01567514404899813, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.015897693952849175, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.022018805801897863, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.03504984342345561, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.049999468847494746, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.06286054801446153, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.07047739840732832, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.07579763116716584, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.08673765824684315, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.1007034052197546, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11809664340446487, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.14188836461180937, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15521573184823204, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1675052178051822, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.17095032896272938, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17467365771355578, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1899911831047029, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20886420712630357, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.23741059279407833, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.27103316256355103, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.29807444475693795, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.3232862665156937, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.34206291317444804, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.3783730325192945, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.4289712992417493, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.5066156835962099, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.5686776102352769, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.606727896888386, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.6549558114489754, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.695893090799215, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.7514730433154492, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.7993584297650648, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.836680014528479, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8775903706567696, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9040374260405047, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9226244884605966, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9248612372820889, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9341653393612546, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9418081588895989, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9446691752906499, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9506458147622606, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9548422970155498, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9547535712804799, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9496926541465511, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9566618404149692, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9596445204153793, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9604944757964572, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9560873150361952, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9504489343871937, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 0.7048044074140489, "validation/loss_best": 0.0991399735212326, "validation/acc_best": 0.966765873015873, "validation/f1_best": 0.9604944757964572} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.2124689882993698, "train/grad": 0.17623206458985805, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.923759765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.903887939453125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.8711993408203127, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.839345703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.8082440185546873, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.765904541015625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.719378662109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.669270324707031, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.6062356567382814, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.5422735595703125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.482480773925781, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.397321319580078, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.3191507720947264, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.2123773956298827, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.1155943298339843, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.02693904876709, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.9190681457519532, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.7979282569885253, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.6689811801910401, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.5547526836395265, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.420858669281006, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.2904065036773682, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.1534002089500428, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.016900589466095, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.8862318649888039, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.7453068123757839, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.633556199669838, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.5454822115972638, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.44623165030032397, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.3546821797266603, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.29217751167714595, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.2486146122030914, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.20398210749030113, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.175143176112324, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.15188647909089922, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.13756332537159324, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.1263792623206973, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.11837329818867147, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1126776285097003, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.10778113391250371, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.10377338983118534, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1007282717525959, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1003967865742743, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.10146817496977746, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.10378090634010732, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1107847128342837, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.1326345144957304, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.14443282882682978, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.201253119148314, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04392932210117578, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04350677434355021, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.042801081342622636, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.04209834549576044, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04139605808071792, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.040416510822251436, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03930577928200364, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.038086289893835786, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03654881665483117, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.035032285768538715, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03368830393999815, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.03193940385244787, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030512354765087367, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02882436038926244, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02753058754839003, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.026512858355417848, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.025445771990343927, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02441250005736947, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.023427603011950852, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022614513379521667, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.021697725928388537, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020823955731466413, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.019912031218409537, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.018998496728017927, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.018113837693817914, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.017093669176101684, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.016178553253412248, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01548841375624761, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.014691893907729537, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.013921389000024646, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.013352610173169523, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.01300689856056124, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.012571710089687257, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.012237553896848113, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.011941127956379204, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.011684550528880209, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01130761283216998, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.011084112925454975, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.011070544937392696, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.011073808355722576, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.011095351176336407, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.011015076355542988, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.011257239838596434, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.011728774458169937, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.011891784407198428, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.012680708282859996, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.014909395879367366, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01585066128638573, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.020560754126054234, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.8287758827209473, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.7962963581085205, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.743769645690918, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.693190097808838, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.6444997787475586, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.579805612564087, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.5106685161590576, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.4386773109436035, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.350949287414551, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.265002489089966, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.186164617538452, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.07541561126709, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.9744720458984375, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.8373528718948364, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.714133858680725, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.6031060218811035, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.4709467887878418, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.3279905319213867, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.182377576828003, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.0583614110946655, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.9169681072235107, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7820153832435608, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.6449484825134277, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.5185966491699219, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.40592700242996216, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.29034948348999023, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.23207397758960724, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.20241402089595795, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.17435964941978455, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.15051446855068207, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13580411672592163, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12599295377731323, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.11611931771039963, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10922765731811523, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.10297665745019913, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09741765260696411, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.09159018099308014, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.08775599300861359, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.08778385072946548, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09781637787818909, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.10246048122644424, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0967126190662384, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.09550394117832184, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.1346999555826187, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.11098360270261765, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.192274808883667, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.4262383282184601, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.5409742593765259, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.29539981484413147, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.29365079365079366, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.31473214285714285, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3464781746031746, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3655753968253968, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3732638888888889, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3812003968253968, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.39285714285714285, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.40674603174603174, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.42931547619047616, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.46304563492063494, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.4965277777777778, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5451388888888888, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5696924603174603, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5954861111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6121031746031746, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6331845238095238, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.6644345238095238, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.6976686507936508, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7296626984126984, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7604166666666666, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.7916666666666666, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8216765873015873, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8571428571428571, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8782242063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9032738095238095, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9293154761904762, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9375, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9404761904761905, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9494047619047619, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9575892857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.972718253968254, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.972718253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9675099206349206, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9662698412698413, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9680059523809523, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9578373015873016, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.96875, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9568452380952381, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9315476190476191, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9193948412698413, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.953125, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.10773570917768333, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.12477504549127665, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.15464423814564585, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.16908573984112688, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.17507170460536234, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.18414789310242588, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.19202743100111738, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.20132803799005852, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.21805343923630707, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.24135643710471325, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.26612174854040493, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.3056586845074269, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.3275404909077177, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.35840068440246364, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.38605713573184, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.42408954240716296, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.48375682246045654, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.5427609113068025, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.6001322021476122, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.6584851687944667, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.711305273213326, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7677472924681625, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8290062639494474, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8561773645854551, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.8854525488743261, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9171341160465396, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9279519453166957, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9270089807190374, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9391857297596999, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9453375544015349, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9494766287652416, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9514932410596963, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9575898282230474, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.960324275260926, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.961275314977711, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.963466441526352, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9652407754024613, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9675082884088649, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9683531441319028, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9647411485544628, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9646751650977518, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.967174883421494, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9664582376824249, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9562717208333217, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9659843465822113, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9472488511957984, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9084716252521984, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8868543066541918, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9454774219753123, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.11837329818867147, "validation/loss_best": 0.08775599300861359, "validation/acc_best": 0.972718253968254, "validation/f1_best": 0.9675082884088649} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 0.9033897021412849, "train/grad": 0.19828720293939114, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.7065045166015627, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.66021728515625, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.587340087890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.5193646240234373, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.456312255859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.37449951171875, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.289192962646484, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.201497573852539, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.095048522949219, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.990396499633789, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.8945377349853516, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.7612561988830566, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.6423324203491212, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.4862914276123047, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.351844916343689, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.2344925212860107, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.098713412284851, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.9546317827701568, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.8113252073526382, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.6933749821782113, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.5652149295806885, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.453884660564363, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.3574022956565022, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.2819047748669982, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.2264261895418167, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.18388972537592052, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.1606733231805265, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.1456608416326344, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.13024697743356228, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.11716578713618218, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.10803805760107935, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.10100795844569803, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0932528627756983, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.08728830195963383, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.08152949235402047, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07768328933976591, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07521112540736795, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07366290649399161, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07228134674951434, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07490604306571186, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0792749800439924, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.08733261172659695, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.09666961833834647, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.11699302967637777, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.17110599212348462, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.2677072844468057, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.46719212068244814, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.8857133353035898, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.8176022633817046, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03965581214055419, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.038502310942858456, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.036633727569133044, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03488815481774509, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.033317508464679124, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.031444410784170033, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.029747044006362557, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.028292883224785327, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026879929658025502, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.025775477439165115, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02492826166562736, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023895627660676835, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023044131174683572, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021965041207149625, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021048258491791785, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02025075857993215, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019338464424945415, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.018380229361355305, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.017443511011078952, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.016679423749446868, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.015804359437897802, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014913928972091525, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013994231969118118, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013159335581585765, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.012499646393116563, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.011905095295514912, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011575145202223212, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.011305689129512757, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.010988854884635657, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01072658560005948, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.010559810862177983, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.010379180131712929, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01016805568127893, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.009920850795460866, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009671858234796674, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009567345643881708, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009563630017219112, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.009583720591617748, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009559529921971262, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.009938237455498893, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01076885926828254, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.011979413451626897, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.013358569084084592, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.016153034011949785, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.020598141262307763, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.028946299747331067, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04101719875819981, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06676322349114344, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.10009205927141011, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.5777111053466797, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.519014835357666, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.42899227142334, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3472819328308105, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2726807594299316, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.177337646484375, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.0779013633728027, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.975533127784729, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8511813879013062, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7296751737594604, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.619706630706787, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.469972014427185, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.3404844999313354, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.1755319833755493, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.037200689315796, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.9179180860519409, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.7807848453521729, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.6389168500900269, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.5073807239532471, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.4048038125038147, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2985364496707916, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.23591041564941406, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.1980789750814438, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.17106503248214722, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.15108945965766907, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.13479366898536682, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.12537720799446106, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11872797459363937, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11041922867298126, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10234188288450241, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0963992029428482, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.09105619043111801, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.08308658003807068, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07742152363061905, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07504704594612122, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07461674511432648, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07166104018688202, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07016198337078094, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.08134405314922333, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.09022429585456848, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.12298262864351273, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.13500669598579407, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.15575461089611053, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.1720626801252365, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.2530473470687866, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5315116047859192, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.202681303024292, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.2944822311401367, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.4863758087158203, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.37103174603174605, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.38070436507936506, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.40054563492063494, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.42584325396825395, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.4521329365079365, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4930555555555556, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5317460317460317, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5679563492063492, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.589781746031746, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.611359126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6304563492063492, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6644345238095238, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6961805555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7371031746031746, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7621527777777778, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7884424603174603, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8214285714285714, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.859375, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8846726190476191, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9089781746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9228670634920635, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9362599206349206, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9454365079365079, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9503968253968254, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9541170634920635, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9575892857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9610615079365079, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9615575396825397, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9441964285714286, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.935515873015873, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.939484126984127, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9432043650793651, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.1783337306813326, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.18541827176109738, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.19750754301993612, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.21571194765305657, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.23443728307934844, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.26572820911522615, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2974268391476458, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3302961053546524, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.35522301115204385, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3858147710787445, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.41694690485887353, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.47812395720636963, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5345807636920219, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.6061075178534538, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6545650551989105, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.6971159589070548, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.7598634775706186, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.827901137601403, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8644150223888842, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8944800247127128, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9069962987823204, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9261412235099552, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9374397397402557, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9418960072366017, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9452269853511099, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9496289293860571, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9521963928657209, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9530313355058436, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9570565384357805, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9621936040340554, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9651877718132575, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9667206988148197, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9687805742523197, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9706528192293737, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9733341622191176, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9727226365441136, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9748481577865514, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9741919187978342, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9676970397108403, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9688093884961139, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9583094475149417, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9580012203070029, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9577062518255305, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9581300725507127, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.953504121832407, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9252319281879356, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9144954681888683, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9244483558763887, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9367170770010653, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 0.07366290649399161, "validation/loss_best": 0.07016198337078094, "validation/acc_best": 0.9766865079365079, "validation/f1_best": 0.9741919187978342} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 0.8122111567854882, "train/grad": 0.29798529751598835, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.4476193237304686, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.379361114501953, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.2756887817382814, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.181723327636719, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.0953531646728516, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.984215087890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.8686975860595703, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.750742835998535, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.6104376602172852, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.4769103336334228, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.3594531536102294, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.2037456130981445, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.071487650871277, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.905269980430603, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.7685011810064316, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.6552539554238319, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.5326754695177078, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.41414224825799467, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.31580892637372016, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.25254942663013935, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.20300964664667845, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.1724857428856194, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.1498386747483164, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.13248315993696452, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.11941905965097249, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.10760995912365616, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.09919925074093044, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.09251691645942628, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.08443776189349592, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.07630995751358569, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.07029411759227515, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.06576471557840705, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.06068202430382371, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.055618994338437915, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.051981500554829835, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.05000277147628367, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.049442755915224554, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0512043718341738, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.059557277215644716, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.06900672812014819, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.09761418600566685, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.11932873866520823, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.21292527992278337, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.34678491551429036, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.5474245014879853, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.1123706333898007, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.3449533703178167, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.714900425737724, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.9315433845948426, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03252660901285708, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.030965664610266685, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.028948310427367688, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.027506806245073675, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02645368273369968, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02536652428098023, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.024425739776343106, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023568013114854694, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02260676591657102, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02170961758121848, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02092107995878905, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019877131623215975, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.018995602731592953, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01789161771070212, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01697407175321132, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.016178083484992386, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.015205376390367746, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.014000673242844642, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.012854753823485226, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.012021582077722996, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.011338512133806943, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.010918625525664537, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01055644768755883, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.010248792939819396, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.009948301545809954, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.009628459337400273, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.009380466134753079, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.009154144036583602, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.008835596378194168, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.008514408892951906, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.008353539935778826, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.008306514144642278, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.008212293345131912, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.007973940167285037, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.007947612136631506, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007989101292332634, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008365409380057826, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008617438822984696, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009875279038969892, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.011531762735685333, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.013803050125570735, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.015790689116111024, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02500335197080858, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03460817470797337, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.044171559452079234, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.07228110133204609, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.08529170334339142, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.13937982631847262, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.18245921274647117, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.3080687522888184, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.2314887046813965, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.115837335586548, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.0108771324157715, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.9145318269729614, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.7913341522216797, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.664690375328064, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.5378657579421997, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.390372633934021, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.2534732818603516, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.135157585144043, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.9799033999443054, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.8482950329780579, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.6842625737190247, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5554533004760742, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4550924003124237, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.34594202041625977, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.25903499126434326, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.21139664947986603, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.18401014804840088, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.1604849249124527, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.1436055600643158, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.13118596374988556, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.12109944224357605, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11301139742136002, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10583129525184631, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10081511735916138, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.09835915267467499, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.09484060108661652, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.08996322751045227, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.09149044752120972, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.09518953412771225, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.09326394647359848, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07765156775712967, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07316165417432785, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07104408740997314, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.08553199470043182, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07904074341058731, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.11424265801906586, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2590058445930481, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.21073415875434875, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.40637314319610596, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.509262204170227, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.7371184229850769, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9865925908088684, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.9480057954788208, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.256707191467285, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.196831226348877, "validation/loss_048_lr5.0e+01_wd1.0e+00": 7.8790717124938965, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.44518849206349204, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.47693452380952384, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5277777777777778, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5615079365079365, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5833333333333334, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6049107142857143, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6245039682539683, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6502976190476191, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6847718253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7182539682539683, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7415674603174603, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7715773809523809, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8043154761904762, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8447420634920635, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8742559523809523, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8965773809523809, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9208829365079365, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9327876984126984, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9412202380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9476686507936508, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9523809523809523, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9561011904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9588293650793651, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9635416666666666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.96875, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.96875, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.966765873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.96875, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9747023809523809, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9407242063492064, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9558531746031746, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9375, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9434523809523809, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9471726190476191, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.939484126984127, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9367559523809523, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9312996031746031, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9362599206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2299771240989574, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2528629137492116, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.2953937024215351, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.32283134461613905, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3454025377001869, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.37411473829406966, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.40779552827837395, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.4534346121631071, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.5228062557553874, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5765786647242107, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.6189594034499487, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.6728023635197746, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7365080064297351, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.807556129064426, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8493779217929723, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8769556772387246, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9078450870291264, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.921931820469946, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.932748736543135, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9390515705946557, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9425902708565129, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9464832326928475, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9507890295517717, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9559883320573397, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9583250133002371, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.960049731560715, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9614103406023512, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9633501908809741, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9630750195800525, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.965582594123413, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9635997372824759, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9625960662197971, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9651780840550354, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9723973013755143, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9713900580159944, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9714368891440293, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9666509677881711, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9700679408395981, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9597702291469977, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9269957422822437, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9429831855754923, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9244408857899084, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9470836955587212, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9315147774266586, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9381136559882183, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9306632063130055, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9161817446821446, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9208830066363889, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9218627349132634, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.051981500554829835, "validation/loss_best": 0.07316165417432785, "validation/acc_best": 0.9761904761904762, "validation/f1_best": 0.9713900580159944} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 1.0070152476429939, "train/grad": 0.5293884335458279, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.1787628173828124, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.093356018066406, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9637994384765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.8467160034179688, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.7402155685424805, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.606144332885742, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.4710535430908203, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.3387955188751222, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.1877351379394532, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.0492069816589356, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.9300448417663574, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.7756170952320098, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.6500210320949554, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5031172224879265, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.39159902349114417, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.3114981158822775, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.24332155890762805, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.19737927136942746, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.1664727962575853, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.1464933504909277, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.12862156893126667, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.11528028400614858, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.10422363472171128, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.09499881749041378, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.08701984548009932, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.07874272159300745, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.07228683980181813, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.06717226276174187, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.06087840124964714, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0537169907707721, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.048061859123408796, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.04357201498001814, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.038871270529925825, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.034601972568780184, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.03355340920388698, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03814166166819632, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04687144479714334, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06946792217902839, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07409653850831091, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.15414746781811117, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.3131367829814553, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.29214405300095675, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.587355644069612, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.955354403508827, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.1549967852979899, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.8866147697158158, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.3394045354798436, "train/loss_047_lr4.3e+01_wd1.0e+00": 8.1235405908525, "train/loss_048_lr5.0e+01_wd1.0e+00": 10.455523955225944, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02751317826099694, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.026487008444964887, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02526430306956172, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.024354403903707862, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023607199834659694, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022711770003661515, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02182507626712322, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020959896808490155, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019970216965302826, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01906483232509345, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.018281659008935094, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.017236549467779697, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.016336314026266337, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01510330690536648, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.013835743395611643, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012743090244475752, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01176266624359414, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.01113145778188482, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01065027673728764, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010248105628415943, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.009826219696551562, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.009498436644207686, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.009211087346775457, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008962698952527716, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.008725365330465138, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.008487260254332796, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.008318420170107857, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.008196436815778725, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.007970562094706111, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.007574706270243041, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.007324351771967485, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.007161345402128063, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006980450133851263, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006875638755736872, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.007177172337542288, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007777135081996675, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008803113116882741, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.011446458599239121, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01272316998714814, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.020055183561344166, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.031168101774528622, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.030696964227827264, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04760835827328265, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07085392309876624, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07850119174362521, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.14360081957653165, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.13152345830923878, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.2939048543944955, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.3277026677504182, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.045567035675049, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9522486925125122, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.8111611604690552, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.685012936592102, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.571653127670288, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.431462049484253, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.293310284614563, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.1602683067321777, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.0100399255752563, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8724528551101685, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7543452978134155, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.605303168296814, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.49071675539016724, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3561001121997833, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.2670508027076721, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.2227984517812729, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.18901284039020538, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.16276757419109344, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1428929716348648, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.13054780662059784, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11985457688570023, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11141663044691086, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.10429410636425018, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.098024383187294, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0922408252954483, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.08665139228105545, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.08307906240224838, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.0799918994307518, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07787192612886429, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07708942890167236, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07739762961864471, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07651863992214203, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.08147236704826355, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08606259524822235, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08522742241621017, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.10090082883834839, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.10658052563667297, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1243167296051979, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.143998920917511, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.23315562307834625, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.5379210114479065, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.32198524475097656, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.61850506067276, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.4965243339538574, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.7121609449386597, "validation/loss_045_lr3.1e+01_wd1.0e+00": 4.9149861335754395, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.583491325378418, "validation/loss_047_lr4.3e+01_wd1.0e+00": 14.925379753112793, "validation/loss_048_lr5.0e+01_wd1.0e+00": 14.678278923034668, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5461309523809523, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5679563492063492, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5944940476190477, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6170634920634921, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6403769841269841, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6750992063492064, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7033730158730159, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7338789682539683, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7666170634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7934027777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8234126984126984, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8663194444444444, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8940972222222222, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9186507936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9293154761904762, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9379960317460317, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9451884920634921, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9518849206349206, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9563492063492064, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9618055555555556, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9657738095238095, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9682539682539683, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.970734126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.972718253968254, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9610615079365079, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.970734126984127, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9578373015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9312996031746031, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9538690476190477, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9471726190476191, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9362599206349206, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9226190476190477, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9312996031746031, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3042082842298586, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3241952655866598, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.35820087719292326, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.3953460368668867, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.4354211046397088, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5036648611112587, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.5510060400326677, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6066585394218081, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.669926339371194, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7116763228078424, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7694940223819893, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.842462893370138, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8774905551857993, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9060209278141794, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9169707066456452, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9260945322185213, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9336290111147194, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9431393430585409, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9480874030477899, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9510615938052844, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9536086529123277, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9579832744567909, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9617507488135382, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9644772225792677, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9669544292122377, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9685513138683874, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.968357545379274, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.968815031005603, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.971574976648949, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9716688418133433, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9728930714598422, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9744079091612586, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.97254153121872, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9700921284936554, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9718487771270515, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9676191337439335, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.967456275101848, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9672754745299218, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9635863361465149, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9606684366410722, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9538779996130405, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.965965653751294, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9515919655688498, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9020006016271976, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9501052866625899, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9287439217667194, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9249212676312129, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9049951794407298, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9230280562290474, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.04357201498001814, "validation/loss_best": 0.07651863992214203, "validation/acc_best": 0.9766865079365079, "validation/f1_best": 0.9744079091612586} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 1.141784567385912, "train/grad": 0.6203595706820488, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9285965728759766, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.8270303726196289, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.674904899597168, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.5410916709899902, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.422991886138916, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.2792334079742431, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.1394367027282715, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.0055518531799317, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.855021755695343, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.7195619940757751, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.6081748148798942, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.47341996982693674, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.36863961711525917, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.2659326948970556, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.2143677418678999, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.185731513351202, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.16102304220199584, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.14120739341713487, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.12609689998440443, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.11583033230155707, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.10591616107150913, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.09740775163285434, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.08921328257769347, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.08126518894918262, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.07350758462212979, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.06482623937539757, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.05766747525893152, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.052326853116974235, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0458358885999769, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.03904772972688079, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.03283936952240765, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.028828700054436922, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.025969489235430954, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.02600549074821174, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.034642775347456335, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03910380561836064, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.053374682869762186, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.08273823683150112, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.15926000708714128, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.3124433295801282, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.4966976064071059, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.77499983523041, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.7354941421933472, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.3200256172474474, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.395608820253983, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.738561123730615, "train/loss_046_lr3.6e+01_wd1.0e+00": 7.734732470894232, "train/loss_047_lr4.3e+01_wd1.0e+00": 8.58451400924474, "train/loss_048_lr5.0e+01_wd1.0e+00": 11.610745971696451, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024784633442759513, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02402943281456828, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022988454475998878, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022098471047356724, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021313911471515894, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02036256516817957, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0194458232447505, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018582039740867914, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017607443039305507, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016707998705096542, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.015913018686696888, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01476137492340058, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013528257543221116, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01203012629877776, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011254349322989583, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01079604176338762, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010271499960217624, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009816281639505178, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009496636841213331, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009290144389960914, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.009049636359559372, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008807088631438092, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.008547550520161167, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008279231637716293, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007977032158523798, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007675058278255165, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.007422490505850874, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0072743611090118065, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.007037659023189917, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006694116267026402, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006229392946697772, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005979913025657879, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0059435610603395615, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006202177526683954, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0073140483487077294, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008158922165530385, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.010307254441468103, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.015600899989076425, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021268974419799632, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0296459244770449, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.044363280256025914, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06009003498897766, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05884406226604824, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.08400324961863664, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.1367883394766568, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.2058768744021654, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.2706493045762181, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.27306525487452743, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.3408202654868364, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.811517357826233, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.7045637369155884, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.5465726852416992, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.4098246097564697, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.2909114360809326, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.14777672290802, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.009389042854309, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.8768659830093384, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.728213906288147, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5980589389801025, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.49498826265335083, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.36847570538520813, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2783025801181793, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.2153376042842865, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.1830563247203827, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1621183305978775, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.14336924254894257, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.12882672250270844, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.11733180284500122, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.10938969254493713, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.10213813930749893, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09669016301631927, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0913168415427208, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.08560246974229813, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.08029833436012268, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.07458320260047913, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.07078500837087631, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06865883618593216, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.06696397066116333, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.06653391569852829, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.06740298867225647, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.06953118741512299, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07505300641059875, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08155488222837448, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.10527247935533524, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11697287112474442, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14330150187015533, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.22096560895442963, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2988903224468231, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.47686395049095154, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.7600687146186829, "validation/loss_041_lr1.6e+01_wd1.0e+00": 1.572716236114502, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.1742762327194214, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.5483999252319336, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.138943672180176, "validation/loss_045_lr3.1e+01_wd1.0e+00": 5.395029544830322, "validation/loss_046_lr3.6e+01_wd1.0e+00": 8.96638298034668, "validation/loss_047_lr4.3e+01_wd1.0e+00": 8.235665321350098, "validation/loss_048_lr5.0e+01_wd1.0e+00": 15.214460372924805, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6011904761904762, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6155753968253969, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6473214285714286, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6820436507936508, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7078373015873016, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7388392857142857, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7700892857142857, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8010912698412699, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8373015873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8678075396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8896329365079365, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9164186507936508, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9300595238095238, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9422123015873016, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9506448412698413, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9521329365079365, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9563492063492064, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9603174603174603, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.964781746031746, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9670138888888888, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9692460317460317, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9714781746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9729662698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.970734126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.966765873015873, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9645337301587301, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9618055555555556, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9429563492063492, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9528769841269841, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9608134920634921, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9610615079365079, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9444444444444444, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9518849206349206, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9265873015873016, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.368064416577621, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3907037912928977, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.44620166406448614, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5180171603277499, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5611591188289559, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6174778275829494, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6813481199143205, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7321071365506462, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7980384350676485, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8427838648015066, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8704163894336632, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9037631080803249, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.918065342676473, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9326791071895816, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9416630058012165, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9439934395135816, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9484952717448595, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.951921606994685, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9576973425470732, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9606612644414907, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9633660296243134, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9655337106387043, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9652995646398014, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9674685709358247, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9688494433484452, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9693571895870076, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9704674736939461, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.969163599214474, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9699997422247909, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9739566708597257, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9737310797649359, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9744578393511448, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9736428435838285, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9698799808197796, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9722455067635659, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9672663298865827, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9634649546823288, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9625460712870696, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9625192162669448, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.956885657586052, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9554625791699585, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.927835087202831, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9527728611520031, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9560179283616524, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9568605177176488, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9498001276355533, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9369859124616621, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9490050566406312, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9134022076331408, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.028828700054436922, "validation/loss_best": 0.06953118741512299, "validation/acc_best": 0.9784226190476191, "validation/f1_best": 0.9744578393511448} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.7658625018596649, "train/grad": 0.5069651198387146, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.7110482406616212, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.5988347816467285, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4349199962615966, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.2952151966094971, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.174768443107605, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.0306890821456909, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8920346021652221, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7606209111213684, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6183010840415955, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.4988081094622612, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.4031626217067242, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.293142853602767, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.23201666202396154, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.18642236134037377, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.15997893359512091, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.1422555383760482, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.12630873405374587, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.11304538109339773, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.1019618099462241, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0936383055895567, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.08487840131856501, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.07697518017143011, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.06886218002066016, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.060888365916907786, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.05332030322402716, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.04512239663861692, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.03837083045393228, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.03271228488534689, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.025492440005764366, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.01914495182223618, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.015180541537702083, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.012930636396631598, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.01281216406263411, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.01047736883163452, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.013282280461862684, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.016432074131444097, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05288551528006792, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09861403165385127, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.15272394323721528, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.22979832359589636, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.2955181993916631, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.47264605429023504, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.5127171743940562, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.6503256294596941, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.28657495114021, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9324208164215086, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.921572183845565, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.991372270397842, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.476038216706366, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023248857436701655, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022492511309683324, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021393554443493487, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020456217736937107, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019651505835354327, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018698568628169595, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.017778113265521825, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.016878536995500328, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.015837084562517704, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.014813047144562006, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.013754651006311179, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012161175361834467, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.011245929002761841, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010594229449052363, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010125292197335512, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00977477258304134, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.00948078213026747, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00923447912093252, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009021236358676106, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008827962700743229, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008574119310360402, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008326265296782367, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.008040116120246238, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.007745868411730044, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00746833949291613, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007097921674721874, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006716965590021573, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006273058800143189, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005523765073739924, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.004749122520152014, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004162309329694835, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0038113100038026457, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0038726554504683007, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0039471091664745475, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004875370591325918, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005905359449170647, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01148224542142998, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.015974683428939897, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.020798795668340517, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.028390568920693884, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03125609344091967, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04547218765745715, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05367524430372665, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06098107724140314, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.14806447002487994, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.16560207971585356, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.26448528554159567, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1878668739516721, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.24805896520327067, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.6195958852767944, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.5053832530975342, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3408335447311401, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.2022266387939453, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0832819938659668, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9411680698394775, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8040868639945984, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6752139925956726, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.5390676856040955, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.425086110830307, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.3316524922847748, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.24558822810649872, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.20455963909626007, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1698104739189148, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.14854325354099274, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.13452212512493134, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.12232887744903564, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1120464950799942, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.10355531424283981, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.0971851646900177, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.09074752032756805, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.08526581525802612, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.07990684360265732, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.07425391674041748, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06973861157894135, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06681620329618454, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06519193947315216, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06417257338762283, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.06395097821950912, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.06523405015468597, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.06873534619808197, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07294957339763641, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07887584716081619, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08182937651872635, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.10762311518192291, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.10685056447982788, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.17209281027317047, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2215578258037567, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2622280418872833, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5679811835289001, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.5478528738021851, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5383275747299194, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.3603527545928955, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.6598179340362549, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.716313123703003, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.974860191345215, "validation/loss_046_lr3.6e+01_wd1.0e+00": 6.293365001678467, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.352060794830322, "validation/loss_048_lr5.0e+01_wd1.0e+00": 7.116423606872559, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6307043650793651, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.65625, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6981646825396826, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7348710317460317, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7606646825396826, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7936507936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8216765873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8526785714285714, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8831845238095238, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9074900793650794, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9208829365079365, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9340277777777778, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9454365079365079, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9526289682539683, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.955109126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9585813492063492, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.964781746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9672619047619048, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9724702380952381, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9732142857142857, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9756944444444444, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9769345238095238, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.980406746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9652777777777778, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9692460317460317, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9613095238095238, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9563492063492064, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9479166666666666, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9615575396825397, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9553571428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9583333333333334, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.42114730505333237, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4677064166250088, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5431631738997689, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6090042493812282, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.659542214597648, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7148115305822245, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7697671962437267, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8254976218436733, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8635669733292675, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8919295524844432, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9082421401784657, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.923824086183822, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9363869529994886, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9436847408793312, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9465670381862197, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9501656806827209, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.952825127999867, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9575504281026898, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9607033108337665, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.963644814544463, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9679869365244175, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9687519976496665, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9719604626454371, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9744740171245928, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9742723179100151, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9746579256882397, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.975453812723653, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9763029434656543, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9772466191478971, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9773624655757158, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9730220510847348, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9726423645926433, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9747939720070384, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9731943472614898, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9699687599981486, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9742950345814297, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9642673088686587, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9583407423059883, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9677923809909468, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9562745448533599, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9600749481193053, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9718016352036122, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9578772895872967, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9474040529468094, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9426292511732962, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9535528616880126, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9494075882562063, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9544249800993845, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9539638412573749, "id_best": 29, "lr_best": 0.0006899999999999999, "wd_best": 0.05, "train/loss_best": 0.01914495182223618, "validation/loss_best": 0.06523405015468597, "validation/acc_best": 0.980406746031746, "validation/f1_best": 0.9773624655757158} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.5028802613914013, "train/grad": 0.3746649768203497, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5323617362976074, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.415804042816162, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.249399700164795, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.110256495475769, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9913837385177612, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8499664783477783, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7154814910888672, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.592891498208046, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.46631367191672324, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.35795761123299596, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.27918276347219945, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.216163990162313, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1834156709909439, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.15351152870804072, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.13473634253256023, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.12219826323911548, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.11038091342896222, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09978999567218125, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.09023653198033571, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.08251731408759952, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.07406930380500854, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.06619750410318374, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.05750472324900329, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.048576319273561236, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04054698565974832, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.03164270459674299, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.024483994208276273, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.01850698731839657, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.012044476782903076, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0077128836046904325, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.006245066467672586, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.004136188952252269, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00327115491963923, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.002742482051253319, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0029359589982777836, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.009929696097970009, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05156133616343141, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06008775060996413, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08392807737924159, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.16379647362977268, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.18565788301639258, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.20590127878822387, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.38598150404170156, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.46717451288364825, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.1695487454906106, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.2023244909010828, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.7888997112307696, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.453173162601888, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.258602362768724, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022318133674561976, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021533013870939612, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020407160208560527, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019474084177054464, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018681488358415665, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.017725870497524738, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.016784634892828762, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.015867198705673217, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01475048617925495, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.013402250991202892, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012134641536977141, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.011147472441662103, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.010649948136415333, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010107026507612318, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.009742488621268421, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009478507281746715, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009229332895483822, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008978110945317894, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00871999773895368, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008487622779794037, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008203437364427373, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007904156697331927, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007497316607623361, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0070156362658599395, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006552404901594855, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005875289400864858, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0051749675076280255, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004424532212724443, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0034919837814231867, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0026919049001298844, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0024266246447223238, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0017887258099653991, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0015234899458664586, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0016229728378311847, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0017702009100867145, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.003933073387661352, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.012175204884495089, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.012721148291415147, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0158661503588975, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.025774462745562544, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.025481941065579647, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03164610241659363, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04244216444602413, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04860508050572054, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.10096411891564533, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.10862917341695877, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.16962121902142863, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1495165077125801, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.19320474665078868, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.4632174968719482, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.3464064598083496, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1810520887374878, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0434774160385132, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9260299801826477, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7861336469650269, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.6540745496749878, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5359520316123962, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.41258588433265686, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.3074932396411896, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2458423227071762, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.1985393911600113, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.17146946489810944, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.14605776965618134, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.13081200420856476, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1207963228225708, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.11134164035320282, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.10292162746191025, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.09578701853752136, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.09035208076238632, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.08493322879076004, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.07998049259185791, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.07443621009588242, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06958356499671936, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06598614156246185, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06332463026046753, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.0626562163233757, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06239373981952667, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.06272243708372116, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.06585178524255753, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.06731250882148743, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.06696746498346329, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07194402068853378, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08796979486942291, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0892229750752449, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.10243619978427887, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2542324364185333, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.24972455203533173, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2488502413034439, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.46203160285949707, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.5062763094902039, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5238439440727234, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7623247504234314, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.1984955072402954, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.987809658050537, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.204575300216675, "validation/loss_046_lr3.6e+01_wd1.0e+00": 7.872354030609131, "validation/loss_047_lr4.3e+01_wd1.0e+00": 5.664804458618164, "validation/loss_048_lr5.0e+01_wd1.0e+00": 5.638765811920166, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.667906746031746, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6949404761904762, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.736359126984127, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7653769841269841, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.792906746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8239087301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8549107142857143, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8819444444444444, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9074900793650794, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9263392857142857, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.935515873015873, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9449404761904762, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9518849206349206, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9605654761904762, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9665178571428571, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.96875, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.970734126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9722222222222222, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9732142857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9747023809523809, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9751984126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9754464285714286, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9697420634920635, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9694940476190477, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.964781746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9699900793650794, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9697420634920635, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9429563492063492, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9568452380952381, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4888421814234147, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.537366817707518, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.611373553051568, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6681074437939115, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7149366205234868, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7752137434131499, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8287004976618019, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8635712315269488, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8920572827202824, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9148709983325602, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.926652951916782, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9355673496606046, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9427808547003558, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9484693359030303, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.952360326260679, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9552442773438528, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.959727781792152, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9622659528837497, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9648543479225541, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9664805767016448, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9685234281229191, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9700483879340347, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9708217737817384, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9721219316403837, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9740393325676686, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9755435970464752, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9747877664984677, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9754263184881816, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9756325260311569, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.976772810753026, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.976673623292964, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9765210464881351, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9763241985449478, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9743184288783876, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9758349565407615, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9745969833329688, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9566929454466935, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9652369405567915, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9640207640938512, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9651764550152941, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9673110959513287, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9657816841044933, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9635082129674094, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9587166960477824, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9672117426746978, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9658700292131127, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9339689734075511, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9465885647497048, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9487757863756259, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.006245066467672586, "validation/loss_best": 0.06731250882148743, "validation/acc_best": 0.9801587301587301, "validation/f1_best": 0.976673623292964} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.3426835673302412, "train/grad": 0.28102783001959325, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.373011999130249, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2547933101654052, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0886957550048828, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9513047170639038, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8347658157348633, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6980567157268525, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5729577445983887, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.46324108958244326, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3467447231709957, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.25860237862914803, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.21241369105875493, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.17433894671499728, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.15119052903726696, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.1290021803509444, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.11526221910491585, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.10546005431562662, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09561584948562085, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.0862209692504257, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.07708199488930405, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06954085162840784, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.06114360881969333, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.05304405447095632, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.04448872718028724, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.03608303757384419, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.027871189955621958, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.019021279234439133, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.012491904906928539, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.00826369938440621, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.004824626836925745, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.002736655594781041, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.002101642545312643, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0015448040328919887, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0010731995292007923, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0008909661415964365, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0004540532361716032, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.010807805536314845, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.036195631911978124, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.03523651150986552, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.049139858409762385, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08192273445427417, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0848976754117757, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.09154100983403624, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1598623568471521, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.20407100925222038, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.35934100645594297, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.7350165588594973, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.368819885076955, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.5902852848358453, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.6500228127930314, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021102231703698637, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020293702073395252, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019150265380740165, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01819709887262434, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01736346394289285, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.016333095636218785, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.015310483132489026, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0142924644658342, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.012890595835633576, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01153757331194356, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.010897669710684567, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.010396954657044261, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.010015599883627147, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.009533958645770326, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.009221057501854375, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00897550834924914, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008691957400878891, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00838493900373578, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008035973977530374, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007708295278716832, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007296828097896651, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006842204604763538, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0063412260258337485, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005771904521388933, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00512788270279998, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004232896558532957, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.003300781651341822, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0024916684902564156, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0016705025234841741, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0010226221206539776, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0008301241014487459, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0005854002318665153, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00042222746315928817, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00039460713689550177, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0003282521446362807, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004278558564756167, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009122375490487791, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00890630010174786, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.013431928544488731, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.015925710771645406, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.016377069212463454, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.016095916722013846, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02630018186078618, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.034279469843377285, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05260731144267261, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.07565460643574033, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.11769964389163079, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.11279319698565922, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.15605573560825353, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3372652530670166, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2200753688812256, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0559085607528687, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9200069308280945, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.804360032081604, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.668921709060669, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5456408262252808, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.4365045428276062, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.31943318247795105, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.2449524700641632, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.20689988136291504, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.17306818068027496, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.1518108993768692, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1321541965007782, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.11995333433151245, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.11167572438716888, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.10365763306617737, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.09660648554563522, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.09039773792028427, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.08553382009267807, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.07998218387365341, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0746631845831871, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.06902357935905457, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06512504070997238, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06349697709083557, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06341121345758438, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06380708515644073, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06414102762937546, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.06589255481958389, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.06845378130674362, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.06881622225046158, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0696810632944107, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0740802213549614, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08135953545570374, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08594812452793121, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12055615335702896, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.21914727985858917, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2236299067735672, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.23757794499397278, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.37774354219436646, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3890593349933624, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.46662792563438416, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.8131039142608643, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.0663350820541382, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.5661394596099854, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.0871822834014893, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.5986361503601074, "validation/loss_047_lr4.3e+01_wd1.0e+00": 4.726654529571533, "validation/loss_048_lr5.0e+01_wd1.0e+00": 5.277135848999023, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6954365079365079, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7247023809523809, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7614087301587301, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7921626984126984, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8157242063492064, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8504464285714286, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8821924603174603, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9010416666666666, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9241071428571429, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9357638888888888, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9429563492063492, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.953125, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9553571428571429, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9630456349206349, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9657738095238095, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.96875, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9702380952380952, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.970734126984127, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9724702380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9749503968253969, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9759424603174603, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.980406746031746, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9818948412698413, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9747023809523809, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9689980158730159, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9712301587301587, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9585813492063492, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9565972222222222, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5387529413491331, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.589730318219416, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6599962658119402, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7127987879822848, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7577855192613994, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8208150080487089, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8633485819110204, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8845949924539137, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9113909388597512, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9250215800931001, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9333383373402577, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9447080487111139, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9468433997754255, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.95033459722461, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9549178480797155, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9580593952179102, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9620310986159483, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9640202213221075, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9652932460075067, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9669148828732239, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.969773429911562, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9716165904680929, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.973373749536443, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.975322089498937, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9741451623982, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9744391502301794, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9750294032532777, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.976721325137515, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9777606803207959, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9743727057043898, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9758829699084796, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.977365365312487, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9781444390549351, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9739012844888334, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9742976997952248, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9702706685923799, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9625834808464508, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9692595752709514, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9744124096377501, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.972738490106121, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9725451007581075, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9731945019860455, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9652414661173792, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9672978411996462, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9653604861471327, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.966377221607368, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9554969476537281, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9507595010149692, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9515875538160286, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.0010731995292007923, "validation/loss_best": 0.0740802213549614, "validation/acc_best": 0.9818948412698413, "validation/f1_best": 0.9781444390549351} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.23813335910439493, "train/grad": 0.20413189731538295, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2778209304809571, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1600484371185302, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.9955445289611816, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8600202012062073, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7456779277324677, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6138253349065781, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.4959668746590614, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.3898142114281654, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.28168380372226237, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.22061934560537338, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1880023281276226, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.1575104608386755, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.13801253124140203, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.11955980574712158, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.10739041637629271, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.09829838863573968, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.08861159983091056, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.0790940726455301, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06973021841607988, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.061995304357260465, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.053278540363535284, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.04491582779213786, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.03640624263323843, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.02804521279409528, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.020311689674854277, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.012867701891809701, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.007999318484216929, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.005102145494893193, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0028295696713030337, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0016250158566981554, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0010497043747454882, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0007579848170280457, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0005083920154720545, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00036533940583467485, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00022298994474112988, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0051994542218744754, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.022065167520195245, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.009923077756538987, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.042806931929662825, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.04493461518548429, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.036903738882392643, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04675804315134883, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.13261380834504963, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.09579262088984251, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.12575417078100146, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.392748996168375, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.5252489989623428, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5952092333510518, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.227063597170636, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020458070556633175, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019658879153430463, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01854942529462278, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.017621339061297477, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.016810967614874245, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.015815320126712322, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.014792454028502107, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01360668260604143, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.011987242365721613, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.011114414229523391, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01066033890005201, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.010148639164399356, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.009751463213469834, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.009379632498603314, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.009106567077105865, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008870783913880586, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008599224265199155, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008299077310366556, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007947707931743936, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0076146691478788855, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0071845181507524105, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0066858553793281315, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006101878905319609, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005404409911425319, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00457211139582796, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0034913766810495874, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0025154530899453675, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0017779540051560616, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0010559994723735144, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0005717438115971163, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0003745646497554844, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0002723645290825516, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00019519779123584158, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00020165376647128142, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00010678611810817529, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.002218617473181439, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006970515381079849, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.005395798254177713, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010021112175467741, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.011506117260969489, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.011173406588877553, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.011055196764581648, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02431964876853727, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.021770999178653935, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.030951679159960412, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05133700291332697, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.07423511488841941, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07455070158755757, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.10738082736575595, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.236483097076416, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1201171875, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.957858145236969, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8240116834640503, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7112594246864319, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5820890665054321, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.46688902378082275, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.36140650510787964, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.2636505663394928, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.2127116471529007, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.1837012767791748, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.15567399561405182, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.13799278438091278, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.12171999365091324, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.11142029613256454, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.10402340441942215, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.09657830744981766, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.08959182351827621, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.08343332260847092, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.07875967770814896, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.07384698837995529, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06923459470272064, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0654725655913353, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06352285295724869, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06346482783555984, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06542393565177917, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06649862974882126, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06703497469425201, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.06840817630290985, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.06963565945625305, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07028385996818542, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07103108614683151, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07400375604629517, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08132495731115341, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08334026485681534, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11907712370157242, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.18713980913162231, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.24608641862869263, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.28068509697914124, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.40491798520088196, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.40433555841445923, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.49594101309776306, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.0983717441558838, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.7529431581497192, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.3773350715637207, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.9194655418395996, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.0599749088287354, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.9389023780822754, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.395651817321777, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7239583333333334, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7462797619047619, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7827380952380952, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8132440476190477, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8425099206349206, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8702876984126984, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.894593253968254, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9149305555555556, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.933531746031746, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.941468253968254, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9486607142857143, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9556051587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9632936507936508, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9662698412698413, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9734623015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9739583333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9759424603174603, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.972718253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9600694444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9737103174603174, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9734623015873016, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9603174603174603, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9610615079365079, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5865212725251899, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6289208137119278, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6989042100321475, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7508734453456901, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8070643472309944, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8472859011887253, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8780886529636399, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9001343802209266, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9222168307461341, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9323948430692746, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9397076815558962, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9477902438772929, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9489440297215724, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.956307838045555, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9593271547811374, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9629622336813964, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9671211455719332, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9694778284244061, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9702656074232126, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9739198632107184, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9741225922012668, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9733375803624573, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9743702832741382, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9766078877487429, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9767466404202354, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.974289515298476, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9746155701469945, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9760338443788283, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9757721273660568, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9751307708062975, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.975643111926188, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9770639448155836, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.977712519516398, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9732433392169898, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9750690773598523, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9722674194839203, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9698882442755871, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9674060516679913, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9704483400189561, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9721968878672071, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9717201016411046, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9716298827785368, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9580764627978859, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9698561531453094, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9682096005530793, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.970570441028485, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9632514994146915, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9518139696289741, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9548731256476987, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.0005083920154720545, "validation/loss_best": 0.07400375604629517, "validation/acc_best": 0.9813988095238095, "validation/f1_best": 0.977712519516398} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.1790001991763711, "train/grad": 0.13753382939845324, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.1871834373474122, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.07059814453125, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.9085765719413758, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.7755535876750946, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.6645576286315918, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5395414489507675, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.4278867007791996, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.3253048324584961, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.24010958410799504, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.1960540824010968, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.16976483397185801, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.1435494841262698, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.12667079785838722, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.11033517342992127, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0990874155703932, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.09053484971635044, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.0813602298591286, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07213793819770217, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06319541112519801, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.05567118728533387, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.046909068347886204, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.038459649849683045, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.029696561787277462, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.021236608922481536, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.013939777594059706, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.007763718143105507, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.004568507131189108, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0030058620311319826, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0019447968807071447, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0012179794162511825, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007908025942742824, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.000596156707033515, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00040755067951977255, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0002927065920084715, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00017231068573892117, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0013481496647000312, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0035747741442173717, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.014018886666744947, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.016867022011429072, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.009846971044316888, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.029280037796124817, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.024766848804429174, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.06800999810919166, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.030503407511860134, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.05724102574400604, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.1484720990806818, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.13016623004339636, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.23272313263267277, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.48551595689728855, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019889115588739514, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019130795910023153, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018073605741374194, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01717758215032518, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01638463287614286, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.015397741612978279, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.014302148032002152, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.012829390452243388, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01138211382785812, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01069128766655922, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.010243844211800024, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009708851120667533, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00932892014971003, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008951100804843008, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008648281539790333, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008383250220795162, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008068529248121195, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.007696572542190552, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007306823378894478, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0069518738955957815, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006473107549245469, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005956192984012887, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005319203913677484, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0045385427912697195, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0035909765667747704, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0024464482846087775, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0015851355294580571, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0010505944674514467, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0006680510268779472, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00042712196120191947, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00026925189742541987, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00020024673192892806, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00014168047922794358, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00011146555978484684, "train/grad_034_lr5.1e+00_wd1.0e+00": 8.23054368584053e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0010477055818512327, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0015866955543316409, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.005204627244182358, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007135881864570244, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0035254381465051345, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007711170808646147, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00855888813980386, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.014056468007740469, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.01011515454401897, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.015962399469939527, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.027312683240875418, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.036991995270393235, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04739414496647954, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.060350350459523076, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.1565525531768799, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.0410388708114624, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.8807955980300903, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.7494620084762573, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.6402463912963867, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5175102353096008, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.40667983889579773, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.30628159642219543, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.23126207292079926, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.191777765750885, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.16770152747631073, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.14382994174957275, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.1291332244873047, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.11508201062679291, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.10583195090293884, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.09889555722475052, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.09207871556282043, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.08562690764665604, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.08005435764789581, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.07572232931852341, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.07146068662405014, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06743108481168747, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.06455693393945694, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.0635191798210144, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06451393663883209, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0661546066403389, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06726735085248947, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06825289130210876, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.0692918673157692, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07071977853775024, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07090884447097778, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07146342843770981, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07394418865442276, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.08007235080003738, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08143831044435501, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1025068610906601, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14125549793243408, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.20118145644664764, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20920175313949585, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.38582563400268555, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3577938377857208, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4976672828197479, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7064222097396851, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5067545175552368, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.0700033903121948, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.6542404890060425, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.4479928016662598, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.853254795074463, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.974809408187866, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7378472222222222, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7624007936507936, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7983630952380952, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8288690476190477, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8568948412698413, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8851686507936508, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9064980158730159, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9236111111111112, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9382440476190477, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9464285714285714, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9528769841269841, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9565972222222222, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9635416666666666, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9672619047619048, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9694940476190477, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.972718253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9737103174603174, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9749503968253969, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9769345238095238, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9789186507936508, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9784226190476191, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9704861111111112, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9662698412698413, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6130779527661375, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6633948646377795, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.725028625343886, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.784210164915619, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8309497528479378, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.866215001941392, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8901851340135318, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9086351184103569, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9287542698030061, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9374410145006349, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9442407870859225, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9483400754346983, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9527513922139884, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9553413776987354, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9601177909401728, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9623280432479734, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9675042086501315, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9686357380707498, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9704323692708718, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.97267274009228, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.973227888059692, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.973575514297038, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9756942447137508, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9752896380531197, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9750322414401079, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9749973702266158, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9763007061592605, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9750482985793845, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9752600835924862, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9752769721660907, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9759773167435326, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9766561644631838, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9777334502754065, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9725888785045568, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9748909760724106, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9729033086075887, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9773230517922075, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9721053001721771, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9759821204130593, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9733530352202713, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9736436403341289, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9738590458144555, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9734473476257582, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9749180243623969, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9731314629893988, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9714634999120433, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9633408425807082, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9635509334253918, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9611429866391428, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00040755067951977255, "validation/loss_best": 0.07394418865442276, "validation/acc_best": 0.9813988095238095, "validation/f1_best": 0.9777334502754065} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.15072697404772042, "train/grad": 0.10081419607624412, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.109052104949951, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9944891548156738, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.836072633266449, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.7071911537647247, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.6012870973348617, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.4831509706377983, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.37457320898771285, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.27938913509249685, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.21362385038286447, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.1781219920143485, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.15586734896525742, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.13322939969599246, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.11865249076858163, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.10410334521904588, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.09372052665799856, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.08551844043657184, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07662352932617068, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06739912688732147, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05815013097599149, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.05023974891752005, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04094807222485542, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03205727936699986, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.02309236672706902, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.015151854446157813, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.009253591373562813, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.005017565675079823, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0031170878279954193, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0021953385695815085, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0014846237096935511, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0009935266617685555, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0006620066240429878, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005031167436391115, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00034966263920068743, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00025783207267522813, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00016223720274865627, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00012133591808378696, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0016002350766211749, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.00040944828651845455, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00450139120221138, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0039806524384766815, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.020371217811480165, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00807026294991374, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.02049644062295556, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.019408091083168984, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.017360729342326523, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.05289308005943894, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0721477890200913, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.2068407276365906, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.10171894386410713, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01922221844550222, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018416719073429705, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.017273135278373958, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.016296746018342675, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01544582293368876, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01440046084113419, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.013205711734481157, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.011803045198321342, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010799824099522085, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01026611278532073, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009868761252146214, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009379935276228935, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.009050548328086733, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008728484130697324, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008475139887304977, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008242911817505956, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007956165123032407, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.007612766290549189, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007214235402643681, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006825979672721587, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006272224479471333, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005660604562726803, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0048319243334117344, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0038363943912554533, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002754809755133465, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0016359234516858123, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0010261741602153051, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0007243348742485978, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0004957044211914763, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00033530755597894313, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00022851419900689506, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00017273029350690195, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001249346749045799, "train/grad_033_lr4.3e+00_wd1.0e+00": 9.626702576497337e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 7.224970036077139e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 9.888193424671954e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0009670834673020323, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0009923990661278713, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0034869388512755177, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0024641908220711427, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005442201896038806, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0038881279739807957, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.005522436490301336, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.005476742462727826, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.009500120922327873, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.012712814385470193, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.023391412840788824, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.033054402329227454, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.028680955589498146, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.0938761234283447, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9794298410415649, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.8213782906532288, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.692633330821991, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5869154930114746, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.4690145254135132, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.3597908914089203, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.27002081274986267, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.21103456616401672, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.1774442344903946, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.15633557736873627, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.13509000837802887, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.12218539416790009, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.10963322222232819, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.10110342502593994, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.09484101086854935, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.08836749941110611, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.08213772624731064, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.07643349468708038, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.07210371643304825, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06764863431453705, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06389575451612473, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.06208929419517517, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06166006624698639, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06276219338178635, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06485360860824585, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.0667676329612732, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06792014837265015, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.06942257285118103, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07138647884130478, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0717417299747467, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0723828375339508, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07496780902147293, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.0796177089214325, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08039084076881409, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09674284607172012, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.135899618268013, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.18011392652988434, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.17692142724990845, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.37805309891700745, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.344048410654068, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4258427023887634, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6341354250907898, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.40995705127716064, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9971006512641907, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.3440401554107666, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.887676477432251, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.2660512924194336, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.1421756744384766, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7524801587301587, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7760416666666666, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8077876984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8435019841269841, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8712797619047619, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8928571428571429, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9184027777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9322916666666666, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9437003968253969, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.951140873015873, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9553571428571429, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9580853174603174, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9618055555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9675099206349206, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9692460317460317, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9737103174603174, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9759424603174603, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9769345238095238, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9774305555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.980406746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9709821428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9655257936507936, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6409190861770621, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6895557991404517, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7451042874655249, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8111982249494231, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8499973582871023, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8757534224944779, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9049928638499379, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9217673016146949, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9352986013708324, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9419147844866829, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9463740253755445, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9495832987692197, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9536205678354577, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9611780617644385, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9633892847210549, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9662214368163022, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9662077148935156, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9689483992476426, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9715435395849726, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9739018089000504, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9738474385798896, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.973884018772237, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9756097500112734, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9756668535603223, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9754444169047968, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9760564221133945, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9749060638368564, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9759605091800962, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9761801635249563, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9748347472861483, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9762068779990446, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9762780676250202, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9777005335334276, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.973118905031698, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9751851535225731, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.974534322259826, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9777308932453217, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9737344980409884, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9752601796390812, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9730017114153062, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9739860967814177, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9731426349867838, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9732792749405595, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9764120113828467, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9695405294576899, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9756291915338897, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9654345434373826, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9649313101318351, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9610950254869012, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00034966263920068743, "validation/loss_best": 0.07496780902147293, "validation/acc_best": 0.9813988095238095, "validation/f1_best": 0.9777005335334276} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.1352017656341195, "train/grad": 0.0781241812184453, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.059669442176819, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9459417653083801, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7893877029418945, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6631456196308136, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5605362433195115, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.44564206779003146, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.33784435808658597, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.2537255913764238, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.19929790910333395, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.16725232264026998, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.14626694327220321, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.12495451661758125, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.11131898261606693, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.09748174565844238, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.08756669076159597, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.07972360106185078, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07095110741443932, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.062011504508554936, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05289862243458629, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04498237939551473, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03587685196660459, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.027309097442775966, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01903749655932188, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.011993954814970493, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.006996853295713663, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.003828141484409571, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0024499272275716067, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0017399407271295785, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0011855262704193591, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0008069431781768799, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005845786072313786, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00045496323145926, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00033076899126172066, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00024727079086005687, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00016704455018043518, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.795511581003666e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.543750382959843e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0001174094993621111, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0023411024548113347, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.004849759396165609, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.004095647847279906, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.005328346751630306, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.012248842995613813, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.002772516943514347, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.011673618657514453, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.020883136754855515, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.021297139832749962, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.05324869385920465, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.07231856832280754, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018906686822883784, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018148539252579213, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.017086571995168925, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.016173058450222016, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015366104999557137, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014305323236621917, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012921646903268993, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0115423662122339, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010742737140972167, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.010253792412113398, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009859144887886941, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009401342214550823, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.009091389554087073, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00873123167315498, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008432645103894174, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008153031300753354, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0077889013371896, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.007386919396230951, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006922474519815296, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00646859293221496, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005864508848753757, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005160956688341684, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004242364966776222, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003173182434111368, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0021450843918137254, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0012618117043166422, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0008290027463226579, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005958769341668813, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.000406643617861846, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002763037770637311, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00019858531093632336, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00015152333578953404, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00011134142252558377, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.859005241902195e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.77756538379981e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.7242040835726584e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 6.430905846277924e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.000295034662218876, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0013174490228645814, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0016178568812818459, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.002962439331606943, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.002949226342992398, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.004412186493159866, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0023031136555700478, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.005787191038896366, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.006443711771489082, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.011067219283196247, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.017158677886502976, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01617838414335192, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.0451691150665283, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9317743182182312, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7756029367446899, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6495424509048462, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5472744703292847, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.43264344334602356, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.3257131278514862, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.24788562953472137, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.19771979749202728, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.16787275671958923, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.14834409952163696, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.12907753884792328, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.11706782877445221, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.10542291402816772, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09752237051725388, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.09153955429792404, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.08549420535564423, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.07995539903640747, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.07514853775501251, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.07165151834487915, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06824864447116852, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0656876266002655, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.06424932181835175, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06372705847024918, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06468518823385239, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06622618436813354, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06801245361566544, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06916869431734085, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07069770991802216, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.0725436732172966, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07235011458396912, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0730912834405899, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07536023110151291, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07988534867763519, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07948306947946548, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09432550519704819, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12670542299747467, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.16959401965141296, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1640659123659134, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3203055262565613, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3258248567581177, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.36265257000923157, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.45603808760643005, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.37202170491218567, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.858715295791626, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.2068852186203003, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.6480493545532227, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.0885727405548096, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.9297428131103516, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7628968253968254, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7881944444444444, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8226686507936508, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8563988095238095, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8802083333333334, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8995535714285714, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9223710317460317, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9362599206349206, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9449404761904762, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.953125, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9558531746031746, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9595734126984127, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9635416666666666, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.96875, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9712301587301587, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9719742063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9734623015873016, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9744543650793651, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9776785714285714, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9774305555555556, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9779265873015873, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9692460317460317, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9677579365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6637249101729892, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7086541295481656, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7729132057319273, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.829704506365056, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8618128911434555, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8822276632964134, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9077883897112786, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9265442424567203, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9352628500165908, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9438709479503425, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9475550564228092, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9515852785563811, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9552821266740137, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9630110055057133, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9657266195821524, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.966382342458392, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9679075175072892, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9704318455149773, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9722410227592277, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9740650025315337, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9737052975096053, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9741298755392674, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9747571704233928, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9750352550828858, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9752392475288642, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9757784556191201, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.975367460129452, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9757406669768871, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9761614410980594, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9750601440537172, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9763475464229087, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9766567466479336, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9766505275952584, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9730312570814316, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.974843389200651, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.973995830515067, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.977592770355932, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.974400907957037, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9763131029287985, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9764637533833336, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9748632940205552, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9724156881336367, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9769129942458699, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9744594471667992, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9744200935520573, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9732485909672418, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9669449038912478, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9676719418304478, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9620540833165027, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00033076899126172066, "validation/loss_best": 0.07536023110151291, "validation/acc_best": 0.9806547619047619, "validation/f1_best": 0.9766505275952584} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.123708188906312, "train/grad": 0.06317018067464232, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.015272960662842, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9010105800628662, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7440156900882721, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6186166566610336, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.517983441054821, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.4054975241422653, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.3012454863637686, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.22935967337340116, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.18317273216322064, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.15532777642831205, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1367640205193311, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.11819936557672918, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.10604306784458459, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.09324588306248188, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.08370479895733297, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.07595594923943282, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.0673243550490588, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05823840937577188, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.049089568899944425, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04115634116344154, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03217219022102654, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02389946891926229, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01596313267014921, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.009501901865005493, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0053818999044597145, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002999062230810523, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0019569830037653446, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0014177792333066464, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.000978689892217517, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006821357738226652, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005057203583419323, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00039869429543614387, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0002949474286288023, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00022211034782230855, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0001559335086494684, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.643528118729591e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.933299168944359e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.148916341364384e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0004873553290963173, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0006192573718726635, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00017991289496421814, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0022704410552978516, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0018071053083986044, "train/loss_043_lr2.2e+01_wd1.0e+00": 9.141704067587852e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.005303250486031174, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.006540895625948906, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0026882716733962297, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.028318822393193843, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.015584741793572903, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018507346543483436, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017751011135987937, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.016681139576248824, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015765075273811817, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014957051523961127, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01386831380892545, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012405570265837013, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.011255995724350215, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010592428343370557, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.010089283732231707, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009657119875773787, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009194006135221571, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008876636086497455, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008488686702912673, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008160475377226249, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00785664118186105, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007487918797996826, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.007062293853377924, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006576301282038912, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006093242692877539, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005427066451229621, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004650900259439368, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0036882644082652403, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0026279706513741984, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0016923477606178494, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.001003619741386501, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0006721423834096641, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0004934839532870683, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00033678633015369995, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00023309482776312507, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001748824641435931, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00013659379739692668, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001010429933648993, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.031541363834549e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.254188026105111e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.027708978924238e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 3.1139513946725206e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 4.6898128244899364e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00024201182076672123, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0003626346221067191, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00023140608162039312, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0014187229174029048, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0017992085467110524, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00021915868529074283, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.002661225818204584, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0026121635065280233, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.003252556271245535, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.008740265228713143, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.006689893804791206, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.0085049867630005, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8962968587875366, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7419314980506897, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6183048486709595, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5185039639472961, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.40566226840019226, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.30193030834198, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.23352842032909393, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.18850640952587128, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.16096358001232147, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.1428440362215042, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.12523150444030762, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.11425189673900604, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1032370775938034, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09568650275468826, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.09002400189638138, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.0842379629611969, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.07893949002027512, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.07416944950819016, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.07039105147123337, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06648921966552734, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06369005143642426, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.06257441639900208, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06305984407663345, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06463464349508286, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06680117547512054, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06845966726541519, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06978100538253784, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07115238904953003, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07300293445587158, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07295694202184677, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07346488535404205, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0753716379404068, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07982733845710754, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07917595654726028, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09269687533378601, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12410696595907211, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1617671102285385, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.15015584230422974, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.299549400806427, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2767440676689148, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.33982306718826294, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5111743807792664, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3377780020236969, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.811572790145874, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.1167824268341064, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.3785309791564941, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.545240879058838, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.5373038053512573, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7703373015873016, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7938988095238095, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8318452380952381, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8630952380952381, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8846726190476191, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9079861111111112, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9268353174603174, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9379960317460317, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9474206349206349, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9536210317460317, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.955109126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9662698412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9685019841269841, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9709821428571429, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.972718253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9739583333333334, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9754464285714286, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9774305555555556, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.980406746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.980406746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.972718253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9699900793650794, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6765668685620934, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7176097380759573, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7898710884485948, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8396801097892291, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8667313006517328, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8919993033164072, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9142965501288894, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9290658161820722, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.938251585797384, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9441806954387706, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9463512481544485, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9540528580296718, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9596560568664669, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9628380388454161, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9663915847821618, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9683249554598952, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.970078402918527, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9722671652004603, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9747705562782837, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.974731494565548, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9752361548093832, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9752295361451578, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.975935031762346, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9763915777462718, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9754396390039274, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9754262535437705, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9759641677684266, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9752282529221177, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9751215414100798, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9762470106890137, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9765355018627444, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9772291228734857, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9778154233008356, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9735785947836229, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9756859263801391, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9742142456652843, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9782730434181085, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9744789601568968, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9769788307482091, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9775645681051225, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9764966979417241, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9749128149791593, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9750565964295878, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9760443800114563, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9763754693616895, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9749661837675965, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9671041496600662, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9691073835629082, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9650360188979124, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.0002949474286288023, "validation/loss_best": 0.0753716379404068, "validation/acc_best": 0.9813988095238095, "validation/f1_best": 0.9778154233008356} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.11898733090609312, "train/grad": 0.056464153341948986, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9838932275772094, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8715731310844421, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7183553743362426, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5971107214689255, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4999356460571289, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.38978939190506934, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.2889177417755127, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.22425122287124397, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.1816204322129488, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.1548181256093085, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.13669086214154957, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.11832504865713417, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.10613621085882187, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.09296047335490584, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.083110758299008, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.07509301285259426, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.06617890900932252, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05686231333762407, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.047417377838864926, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03921661794185639, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.029822395499795675, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02131683113053441, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01362040091305971, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.007900564474985003, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004580983333289624, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002652140371501446, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0017760112136602402, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0013086255080997943, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.00091952552087605, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006475644744932652, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00048694648779928684, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003848160896450281, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00028471450321376323, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00021882493048906327, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00014888300560414791, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.265317089855671e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.098456189036369e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.28156878054142e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 9.273933246731758e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 9.519588202238083e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 7.192371413111687e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0009942629933357239, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0009198098164051771, "train/loss_043_lr2.2e+01_wd1.0e+00": 5.196761339902877e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00045923063531517984, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0008623098209500313, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0010900641791522503, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.007578839100897312, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.135142520070076e-05, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01861186900176108, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01782303412910551, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.016697869743220507, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01573209626134485, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014860915667377413, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013622521446086467, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011983343497850001, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010952048890758306, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010363967712037266, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009930696927476675, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009578669059555978, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.009197509160730987, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008913792485836894, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008535730694420636, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008211241231765597, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007897630063816906, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0075097773631569, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00705358358041849, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0065236378199188035, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005973267657682299, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005230407683120575, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004418061361357104, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0033933283702936023, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002290289311349625, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0014234325356665067, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0008556867574952776, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005837597465142608, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00043447378600831144, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00030762155176489614, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002154208912907052, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00016291185451336787, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001283515626710141, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.60529771327856e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.667483398108743e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.8953334255420485e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.807684369917297e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.995497148610582e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.0013104389977053e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.3755428186091565e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.5904627052301096e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.3898283778811947e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.000841383429372, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0009634260838240701, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.6714390063716525e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.001214680523190858, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0011388816454143236, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.001861003687800273, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.002361423303600744, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.001770876261602601, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9821143746376038, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.87045818567276, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7179334759712219, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5959928631782532, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.4980418384075165, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3864293098449707, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.2863238453865051, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.22428356111049652, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.18281446397304535, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.15672139823436737, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.13946084678173065, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.12280566990375519, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.11190606653690338, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1011248528957367, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09353712201118469, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.08798383921384811, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.08208213746547699, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0764707624912262, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.07155528664588928, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.0680665597319603, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06502418220043182, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06306442618370056, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.06313807517290115, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06381411850452423, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06530801206827164, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0670730248093605, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06880766153335571, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.07000457495450974, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07136737555265427, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.0729294866323471, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0727788582444191, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07336042076349258, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07549473643302917, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07966688275337219, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07909462600946426, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09188565611839294, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1219291090965271, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.15780310332775116, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14632630348205566, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2901957035064697, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2645826041698456, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.32293447852134705, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.43547648191452026, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.31828001141548157, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6975992918014526, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.0591124296188354, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.2946468591690063, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.465169906616211, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.3469151258468628, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7767857142857143, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8010912698412699, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8390376984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8670634920634921, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.886656746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9107142857142857, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9285714285714286, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9387400793650794, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9484126984126984, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9538690476190477, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9657738095238095, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9709821428571429, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.972718253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9739583333333334, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9754464285714286, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9774305555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9694940476190477, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6897411812499183, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7304258743892038, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8022409030742965, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8442892304858954, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8689590111625025, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8955693942931285, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9161525568459241, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9297453419507503, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9393705035889309, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9452406879954962, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9491105099877418, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9540297767991157, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9589996609763028, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9626662441921352, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9655061462941446, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9683269076109918, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9696447542249638, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9721072356998636, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9737673216562354, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.972982723085846, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9725278064093413, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9738456810712073, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9754471642913846, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9759369902849425, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9751566397423301, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9767109714522687, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9762794641703129, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9750310960629337, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9757551798730201, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9761005649658772, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9764074558815041, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9770709523750779, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9770968088591587, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9732613200977063, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9752022954693018, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9749686756252668, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9784006023240115, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9744789601568968, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9772074406293408, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9778038801454126, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9764966979417241, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9753744842076195, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9773669168226097, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9763379340270542, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9763681966112316, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9752982129124275, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.967516659373304, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9691528756810949, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9639409098944824, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00028471450321376323, "validation/loss_best": 0.07549473643302917, "validation/acc_best": 0.9811507936507936, "validation/f1_best": 0.9770968088591587} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.11235270017758012, "train/grad": 0.05267676454037428, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9544779348373413, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8423350119590759, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6897529542446137, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5696855908632279, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.47396889597177505, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3648912499845028, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.26772250793874264, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.20874207522720098, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.16906104251742363, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.1436276502907276, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.12627570109441877, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10885508054867386, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.09726872113533318, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.08493468017317354, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0757997618149966, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.0682795977126807, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.059923902368173, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.051101759122684595, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.042128392662853, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03433150148019195, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.025545370085164904, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.01766299863345921, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.010827230773866176, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00625448907725513, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.003759999219328165, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0022430870123207568, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0015345697198063135, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0011448584031313659, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0008164366241544485, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005762721784412861, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0004427386075258255, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00035480030812323093, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0002681068424135447, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00020768141373991966, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0001417307276278734, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.0011493042111395e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9697539284825327e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.48705393075943e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 7.74012878537178e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.188977181911469e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.593798562884331e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 5.6277401745319364e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1710173934698106e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 7.880264893174172e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00012762808240950106, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.8014259189367294e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.963966853916645e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.1061318218708038e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.570217475295067e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018264500559307634, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017506317738443612, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.016416727975010873, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015484177903272212, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01464219090063125, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013412658469751476, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011905488506890834, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010983524119947106, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010368648460134864, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.00988228207686916, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009465890957508237, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.00901308753527701, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008698816547403112, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008327101627364754, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008023377342615276, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007731772592524067, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0073662924487143755, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0069262147438712415, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006403144305804745, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005854495608364232, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005104788526659831, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004247242780693341, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0031698617909569292, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002067579556896817, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0012869881141523364, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0007813901211193297, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005337997839524178, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00039784855380275985, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002821026558740414, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00019956864196501555, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001529211069646408, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00012091879032595898, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.187221444335591e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.378040339972358e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.681164562702179e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.57027640910701e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.691774172902228e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.789256402814132e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.2534786585528847e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.1074242031782072e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 8.14433475108318e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 9.382328442069414e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00012563341836456505, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.4726281540441769e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00027454852376741047, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0002364891156824484, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.00010318294727680312, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0001917359347798624, "train/grad_048_lr5.0e+01_wd1.0e+00": 6.876175822772669e-05, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9641609787940979, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8531972169876099, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7016109228134155, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.581127405166626, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.48437339067459106, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.373172402381897, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.27624648809432983, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.2178630381822586, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.17828936874866486, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.15328994393348694, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.13669393956661224, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.12072654068470001, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.11039934307336807, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.10012971609830856, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09301037341356277, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.08758791536092758, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.08198236674070358, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.07669548690319061, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.07212172448635101, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.06879044324159622, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06554491817951202, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0637456476688385, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0637829601764679, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06458905339241028, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06616631150245667, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06833534687757492, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.069729745388031, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.07078390568494797, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07209249585866928, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07387544214725494, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0737793892621994, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07398094981908798, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07612846046686172, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07970673590898514, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07898200303316116, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09137748181819916, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12071986496448517, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.15511639416217804, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14390438795089722, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.28332242369651794, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.25744569301605225, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3121436834335327, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3869458734989166, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.30501389503479004, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6641736030578613, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9778599143028259, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.2325773239135742, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.346292495727539, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.2418955564498901, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.783234126984127, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8055555555555556, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.841765873015873, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8687996031746031, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8888888888888888, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9129464285714286, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9305555555555556, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.941468253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9503968253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9553571428571429, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.957093253968254, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9670138888888888, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9692460317460317, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9714781746031746, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9729662698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9744543650793651, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9769345238095238, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9789186507936508, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9747023809523809, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9712301587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7011590731904497, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7395473424347084, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8075574268198528, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8467944602881862, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.87169906062592, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8982733052503671, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9188604088255706, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.932270229465321, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.941298491343334, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9467463696228662, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9485224536143064, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.954903730877327, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9608989614261058, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9629564252409727, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9661563506448648, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9675732352858434, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9699703647395983, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9729146927804257, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9729236719593327, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9732620894665402, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9759456682775837, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9758293993733782, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9764065427334088, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9766906314513013, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9759417433809374, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9754184237126111, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9752264325622868, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9753823564533335, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.975848048149526, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9764728696115414, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.976077769867824, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9770709523750779, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9772682189701773, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9728736230536258, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9755479831831069, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9742950715219962, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9782776730528081, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9742980291350156, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9772074406293408, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9779305890969047, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9760492093238182, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9753779517286219, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9780212183369686, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9767227221486928, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9771374861478821, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9746513881440232, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9683560611445851, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9688546102593423, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9658903848216327, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.0002681068424135447, "validation/loss_best": 0.07612846046686172, "validation/acc_best": 0.9811507936507936, "validation/f1_best": 0.9772682189701773} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.11269395135343074, "train/grad": 0.05261006502434611, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9441576600074768, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8330759298801422, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6824442982673645, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5643116489052773, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.46999802023172377, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3616121490299702, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.2669817440211773, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.2113082002475858, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.1734239923581481, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.14880706800147891, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1319594976119697, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.11501212432049215, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.10340496302582323, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.09069537381641567, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.08100884668529033, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.07300124202854931, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.06384437977336348, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05430133909918368, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04438620083965361, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03577056029811501, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.026171141713857652, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.017812831727787852, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.010790197504684329, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.006201745886355639, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0037089829239994286, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0022215041518211364, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0015123118925839663, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0011263207625597715, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.00080474141985178, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005778030212968588, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0004417199641466141, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00035186437889933586, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00026591396890580655, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00020561177283525466, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00014973080717027188, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.629416719079018e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.307947888970375e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5785247087478637e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 6.630187854170799e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.488015547394753e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 6.275661289691925e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.111492842435837e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 6.5319240093231205e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.0413546115159988e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.847014158964157e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.0876526832580566e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.695145085453987e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.937662303447724e-08, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.51087611913681e-08, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01835444046650082, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017582154353149235, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01646923084743321, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015511693833395838, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014640136919915676, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013350694701075553, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011841832636855542, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010987267827149481, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010416282333899289, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009957076406572014, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009573808189015836, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.00916984246345237, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008868000619113445, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008484696191735565, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008153304527513682, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007847282292786986, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007449060559738427, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006983128667925485, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006410169879673049, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005802704387460836, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004968633566750214, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004019756123452681, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0029270195413846523, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0019049499025277329, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0011910264962352811, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0007302300247101812, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005027235382294748, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003766926443495322, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00026878484179178487, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00019130244723783107, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00014566349716915283, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001154311497521121, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.826753708490287e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.094533317285823e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.674968039784289e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.530665405151012e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.7151283736372987e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.7530813312504058e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.0754018363176369e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.0944306852995459e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.0904256940438478e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 5.7292373011199625e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.3735149139387644e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.6061683396990867e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.5340497100915106e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.5234095849795527e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.1310072823374831e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.1500055579213268e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.5699695600337506e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9529101252555847, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8424881100654602, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6916115880012512, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5719419121742249, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.47576600313186646, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3650047481060028, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.2700241506099701, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.21410337090492249, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.17565608024597168, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.15127141773700714, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.1352686733007431, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.1196407750248909, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.10957027971744537, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.0992656871676445, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09211879968643188, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.08673109114170074, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.08112232387065887, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0760384052991867, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.0716249868273735, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.06820987910032272, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06531920284032822, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06371233612298965, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.063997782766819, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06488706171512604, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06639046221971512, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06836172193288803, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06989256292581558, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.07089261710643768, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07241882383823395, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07418115437030792, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07378214597702026, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07426661998033524, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07620089501142502, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07986627519130707, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0789690688252449, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09119592607021332, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.11983274668455124, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1536732017993927, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14187301695346832, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2780100107192993, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2537863552570343, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.30584433674812317, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.37903279066085815, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.29782921075820923, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6480062007904053, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9453805685043335, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1911050081253052, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.2947971820831299, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.1856815814971924, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7842261904761905, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8070436507936508, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8444940476190477, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8720238095238095, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.890625, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9159226190476191, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9322916666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9422123015873016, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9503968253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9556051587301587, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9575892857142857, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9630456349206349, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9680059523809523, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9719742063492064, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9729662698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9744543650793651, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9759424603174603, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9756944444444444, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.980406746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.980406746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9714781746031746, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7038210673459279, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7429731554364217, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8125696622306761, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8511055901076977, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8738165554936137, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9018528884564693, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9218410254886507, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9331644848719229, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9409187219400785, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9474812604520492, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9489236605834677, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9554883385766301, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.961679944060966, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9627896639403671, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9672076078554264, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9681452955769824, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9699536677251899, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9723203134643918, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.973148053357718, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9718390940585666, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9748094200757271, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9758412415941533, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9766422571015011, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9760683085392428, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9765156408539083, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9755416703847052, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9761353631909445, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.975763179037327, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9752319395995893, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9756928236719311, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9766408686123113, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9770709523750779, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9772682189701773, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9740678965557257, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9755479831831069, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9748422584643572, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9782776730528081, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9742980291350156, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9770289297419158, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9779305890969047, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9760492093238182, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9753779517286219, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9786038051292832, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.977131602057795, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9771374861478821, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9750107836087933, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9681331215936473, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9688546102593423, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9661197035075866, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00026591396890580655, "validation/loss_best": 0.07620089501142502, "validation/acc_best": 0.9811507936507936, "validation/f1_best": 0.9772682189701773} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.11038442365825177, "train/grad": 0.051027158889919516, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.938724536895752, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8281178975105286, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6776893043518066, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5590372359752656, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.463925196826458, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3540598371624947, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.2595465850830078, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.20416101146489382, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.16619844865053893, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.14164988124743105, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1249199485965073, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10822349458001554, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.09706833511590958, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.08499808457680047, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.07595670580863953, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06846707304939628, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.06014701534062624, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05126728120259941, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0421423641871661, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.034144941437989476, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.025089494371786713, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.016943972539156676, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.0100485769379884, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.005699902987107635, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0033890148904174564, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0020332414656877517, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0013913613837212323, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0010468555334955454, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0007472608145326375, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005370119027793408, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00041240958496928214, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00032827512361109255, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00024710897356271743, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00018973637372255326, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00014400661922991275, "train/loss_035_lr6.0e+00_wd1.0e+00": 5.8104582130908966e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.3273668959736826e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.6774050891399384e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 6.701145321130753e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 5.009341984987259e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 4.168525338172913e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.7089752256870268e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 5.568442866206169e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 5.302149802446366e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 7.646065205335617e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.2208038717508318e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.1924357861280442e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.685273885726928e-08, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.247498095035553e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017849811343476175, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01709657402243465, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01601354063488543, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01508277412969619, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01423379797488451, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012954760058782995, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011433255469892174, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010620883372612298, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010099817484151572, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009678681131917983, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009310486705508082, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008888910358073189, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00858352230861783, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008189374617068097, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007847967743873595, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0075354231160599736, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007148605961119756, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006696120316628367, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006142488922923803, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005566000023391098, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0047674822394037615, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0038504253776045516, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.002740536125493236, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0017232932557817548, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0010734783571388106, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0006657685894606402, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0004635488325584447, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00035132453955156963, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00025125070424110164, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0001788565763126826, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00013783418100501876, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00010924044734565541, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.250293740275083e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.649842144270224e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.252288911833603e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.3024189165189454e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.618026989363642e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.684997587779069e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.0408008292376557e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 9.642805224371703e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.0695692442073934e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.510667375837074e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.1494727931301576e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.302006584534537e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.6123984646367775e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 7.201807050291174e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.4439721097601813e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 7.168047773448495e-07, "train/grad_048_lr5.0e+01_wd1.0e+00": 2.0268708962353853e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9472089409828186, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8368472456932068, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6862469911575317, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5672192573547363, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.47128599882125854, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.36082926392555237, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.26702383160591125, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.21220269799232483, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.17436200380325317, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.1503094881772995, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.13436591625213623, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.11888732761144638, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.10895173251628876, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.09872660040855408, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09150761365890503, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.08611840009689331, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.0804234966635704, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.07509464770555496, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.07053162157535553, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.06716684997081757, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06411777436733246, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06280548125505447, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0632062777876854, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06428573280572891, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0659850612282753, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06808821856975555, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06970184296369553, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.07076150923967361, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07213675230741501, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07387332618236542, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07381357252597809, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07425188273191452, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0761338323354721, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07989771664142609, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07878658920526505, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09095162898302078, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.11965881288051605, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.15306487679481506, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14099913835525513, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2762994170188904, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2514282166957855, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.30316177010536194, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.37367895245552063, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.29466181993484497, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6362128853797913, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.927083432674408, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.171668291091919, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.2666089534759521, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.1558178663253784, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7847222222222222, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8077876984126984, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.845734126984127, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8737599206349206, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8913690476190477, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9159226190476191, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9332837301587301, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9429563492063492, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9508928571428571, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9558531746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9580853174603174, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9677579365079365, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9722222222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9729662698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9747023809523809, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9759424603174603, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9789186507936508, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.980406746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.980406746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.980406746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9712301587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.704876646641952, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7445126645469, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8146254427835091, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8528420127784377, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8740813108500819, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9019019928554729, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9232135323660915, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9345079400717502, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.942023077945457, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9477031386087941, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9497235640767878, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9550606761442835, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9611098282043136, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9636877362861002, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9676171046920762, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9682329637542614, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9703267176898972, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9739945489416793, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9734413989440134, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9723129910932586, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9756202695801202, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9751643804761069, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9758395998461277, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9761155965213497, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9762598341950136, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9753912909709309, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.975613451691973, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9752296637705842, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9759969499407142, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9758917851637202, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.976291459266077, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9772291228734857, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9774440003769799, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9739712764287936, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9755479831831069, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.974837561753259, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9780503384375546, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9742980291350156, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9772978958896152, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9779305890969047, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9760492093238182, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9753779517286219, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9784228594467254, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9769049087231653, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9773180300074767, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9751887947168564, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9681780008908573, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9690560958915584, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9658903848216327, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00024710897356271743, "validation/loss_best": 0.0761338323354721, "validation/acc_best": 0.9813988095238095, "validation/f1_best": 0.9774440003769799} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.10696664554998278, "train/grad": 0.05135418899357319, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9220355677604676, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8122015583515168, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6634181439876556, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5466334128379822, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.45323934495449064, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.34510088093578817, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.252564684599638, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.1983324182406068, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.16079143242910504, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.13615013843402266, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.11934895497746766, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10255843326449395, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.09118114271201193, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07908617283217609, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06996203727088869, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06263283817097545, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.054404861936345694, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04587629251182079, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03715088167227805, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.029696263717487455, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02144475727342069, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.014396831821650267, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.00863560913130641, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.004952497323974967, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.002994273900985718, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0018222672399133444, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.001263080146163702, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0009491632226854563, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006847051158547402, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004953140299767256, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003804394043982029, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003074188623577356, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00023223398253321647, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00017658752389252186, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00012792449444532395, "train/loss_035_lr6.0e+00_wd1.0e+00": 5.858665332198143e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.9379660263657568e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5505030751228334e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 6.851926445960998e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 5.65502792596817e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.723837435245514e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.1617766469717026e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.4913886338472365e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 9.190971031785011e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 7.606949657201767e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.772523581981659e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 6.420165300369263e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.239659756422043e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.2751279175281524e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01803707825951278, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017266727047972382, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01617237779777497, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015251009720377623, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014419064484536647, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013101362786255778, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011539168399758637, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010729792336933315, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010171660510823131, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009703008155338467, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.00933965828968212, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008945586392655969, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008638064942788333, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008230239033000544, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007866906459676103, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007533051802311092, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.00711166588589549, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006639061205205508, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00605474689626135, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005457095367601142, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004617504897760227, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0036628338883747348, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.002576362096297089, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0016307333632721566, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001030848004229483, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0006414762961503583, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0004477365054844995, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003390600932834786, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002441855763026979, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00017542362429594504, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.000135205676087935, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00010777975086057268, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.187133869796525e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.606046567867452e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.1727652298723114e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.240802379366414e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.55394407470666e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.8502720285175078e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.1922233971826263e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.0016467740444662e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 8.156737876943189e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 5.1108389322592925e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.0548625445015333e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.9190154001293706e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 4.700772775641658e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.1401707037257805e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.0417811123925063e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.4871272189143225e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.619844715158195e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9449338316917419, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8346477746963501, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6843519806861877, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.565402090549469, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.4698033332824707, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3592952489852905, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.26590806245803833, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.2115333080291748, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.17393074929714203, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.14997929334640503, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.1340596228837967, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.11870256066322327, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.10882255434989929, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.09872060269117355, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09145814925432205, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.08610045909881592, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.0805514007806778, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0752234235405922, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.0705963745713234, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.06737575680017471, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06439118832349777, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06304696947336197, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.06342485547065735, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06463135033845901, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06627891212701797, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06834039837121964, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.06991300731897354, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.07090696692466736, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07244231551885605, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07393322885036469, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07383693009614944, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07426374405622482, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07624972611665726, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07991493493318558, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07863566279411316, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09068619459867477, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1193501427769661, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.15284717082977295, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14066754281520844, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.27497372031211853, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.249561607837677, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3019757866859436, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.37308722734451294, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2924855351448059, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6318655014038086, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9205080270767212, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1597914695739746, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.2570098638534546, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.1448978185653687, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7864583333333334, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8075396825396826, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.845734126984127, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8737599206349206, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8916170634920635, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9176587301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9327876984126984, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9424603174603174, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.951140873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9556051587301587, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9635416666666666, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9677579365079365, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9692460317460317, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9722222222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9729662698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9749503968253969, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9769345238095238, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.980406746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.980406746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.980406746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9717261904761905, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7076520691315534, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7441855449207818, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.813423542807323, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8532138976261772, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8742326304122247, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9041250302290131, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9219367381970869, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9338159010231086, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9419750575911876, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9471232374641939, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9491895883207245, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9557111004943251, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9611098282043136, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9635156406854731, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9676171046920762, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9683389793777657, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.970425939102288, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9733656044106924, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9740250985268165, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9727635405687051, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9749527886358842, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9755137346281153, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9763815484804711, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9764363123099519, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9759848527159068, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.974786042130249, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9756032709175676, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.975763179037327, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9756387052027866, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9758917851637202, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9764137077895938, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9772291228734857, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9772682189701773, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9740678965557257, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9755479831831069, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9750182525341141, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9783728991930261, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9742980291350156, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9772978958896152, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9777110277754762, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9762757620971703, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9753779517286219, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9784228594467254, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9769049087231653, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9773180300074767, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9750100075639099, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9681780008908573, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9686912371236922, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9663422807339092, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00023223398253321647, "validation/loss_best": 0.07624972611665726, "validation/acc_best": 0.9811507936507936, "validation/f1_best": 0.9772682189701773} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.10831781698390841, "train/grad": 0.05078767854720354, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9263931369781494, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8158699798583985, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6661753273010254, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5488383775949478, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.45507552176713945, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3472255864739418, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.25540559537708757, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.20188775449991225, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.16475828740745782, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.14046318590641022, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.12390381978824734, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10710991004481912, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.09559934733435511, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.083134131366387, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0736097580846399, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.0658045488037169, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.057158986032009126, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04808973621577024, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.038823963720351455, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.030909515172243118, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.022259560553357004, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.014901560992002487, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.008977495599538088, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.005197019344195724, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.003150947792455554, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.001910319048911333, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.001317155510187149, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0009897542744874955, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0007136687729507685, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005170536972582341, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003955262061208487, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0003149126097559929, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00023454965092241763, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00018712890334427357, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0001343250274658203, "train/loss_035_lr6.0e+00_wd1.0e+00": 5.8932714164257047e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.81352736055851e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.4765225350856782e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 6.65382482111454e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.735579714179039e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 5.591064691543579e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.8121378272771837e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.402001991868019e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 5.723219364881516e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.1946633458137512e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.1435896158218384e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.381401136517525e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.754191756248474e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.469417035579681e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01769349295645952, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.016929598418064417, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015852290377952157, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014958839784376322, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014155643344856798, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012929455838166177, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011516094480175525, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010725141582079233, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010157089529093355, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009706812873482705, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009349700692109764, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008983067112276331, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00868053424404934, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0082830006419681, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007922468129545451, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007571500781923532, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007145150221185758, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006650284488569014, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006064755860134028, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00545829780807253, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00464345581131056, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003691220722394064, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0025977951259119436, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0016289873346977402, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0010153745657589751, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0006263048766413703, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0004351653201592853, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00032874437780264996, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002366486825121683, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00017055994150723562, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00013182179156501661, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00010550931481702719, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.047708554840938e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.422454952371481e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.132030572440271e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.300317736147918e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.6477267034046294e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.5228574074415135e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.04121557630793e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 8.633429265334426e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 9.802156436557312e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 5.366330666137442e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.1055422637094608e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.522349373700767e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 3.6568809168158082e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 8.145766622699374e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 8.60929442565233e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 2.779768774853673e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 2.649140193628638e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9445880055427551, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8342787623405457, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6840882301330566, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5650716423988342, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.46952247619628906, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3590778410434723, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.26578739285469055, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.21142566204071045, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.1739114224910736, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.14989742636680603, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.13403946161270142, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.11868492513895035, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.10879147797822952, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.09874927252531052, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.09145192801952362, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.08613677322864532, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.08050326257944107, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.07523589581251144, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.07061732560396194, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.0673438236117363, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0643598660826683, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06301574409008026, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.06348852068185806, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06472884863615036, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06624509394168854, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.06826526671648026, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.0699651837348938, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.07094231247901917, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07247589528560638, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.07396409660577774, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07391458749771118, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.07424014061689377, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07630305737257004, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07992035895586014, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07872956246137619, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.09077446162700653, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1192719042301178, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.15291981399059296, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14064736664295197, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2748735249042511, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2495543360710144, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3018683195114136, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3726573586463928, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2918466031551361, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6313766241073608, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9200364351272583, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.15874445438385, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.2553703784942627, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.141170620918274, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7864583333333334, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8072916666666666, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.845734126984127, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8742559523809523, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8923611111111112, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9176587301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9332837301587301, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9427083333333334, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.951140873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9556051587301587, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9575892857142857, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9632936507936508, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9677579365079365, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9694940476190477, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9722222222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9729662698412699, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9747023809523809, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9779265873015873, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9784226190476191, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9796626984126984, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.980406746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9786706349206349, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.980406746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9714781746031746, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7075666573572754, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7439047358986448, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8133189201736224, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8541368596131446, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8752259943735057, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9041250302290131, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9227146260067517, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9340477641029127, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9419750575911876, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9471232374641939, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9489668954297594, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9555320416776399, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9611098282043136, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9640596806630105, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9676171046920762, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9683389793777657, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.970246176186013, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9731859642088287, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9733990837529151, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9725838782545488, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.975047750381456, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9753374949866233, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9763815484804711, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9758880741020526, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9759848527159068, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.975162652943488, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9752501310332959, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.975763179037327, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9758630868905539, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9758917851637202, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9764137077895938, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9772291228734857, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9772682189701773, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9740652172523119, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9755479831831069, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9750182525341141, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.978600684946499, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9742980291350156, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9772978958896152, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9777110277754762, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9762757620971703, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9753779517286219, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9784228594467254, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9769049087231653, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9773180300074767, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9750100075639099, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9681780008908573, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9690560958915584, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9661197035075866, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.00023454965092241763, "validation/loss_best": 0.07630305737257004, "validation/acc_best": 0.9811507936507936, "validation/f1_best": 0.9772682189701773} diff --git a/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/config.yaml b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a66b235fd0941102f79a01a510689138ce5cb8b3 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n400_1; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..ad03ec5754a1cb3b689efcc5899bfe98e4456b51 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 5, "eval/id_best": 25, "eval/lr_best": 0.00035999999999999997, "eval/wd_best": 0.05, "eval/train/loss": 2.0278518199920654, "eval/train/acc": 0.38605980515688865, "eval/train/acc_std": 0.0024390732770308, "eval/train/f1": 0.32683064710456294, "eval/train/f1_std": 0.0026072174595867434, "eval/validation/loss": 2.3861210346221924, "eval/validation/acc": 0.27593207825765964, "eval/validation/acc_std": 0.005400515837060602, "eval/validation/f1": 0.20648864936047728, "eval/validation/f1_std": 0.004769139133979471, "eval/test/loss": 2.3202176094055176, "eval/test/acc": 0.299443413729128, "eval/test/acc_std": 0.005292847465158215, "eval/test/f1": 0.234522776971707, "eval/test/f1_std": 0.005352633686304655, "eval/testid/loss": 2.275526285171509, "eval/testid/acc": 0.30364372469635625, "eval/testid/acc_std": 0.005979370856113823, "eval/testid/f1": 0.24622010956570986, "eval/testid/f1_std": 0.0057295414352910475} diff --git a/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..9107254cf31f918b4da300fbf934bf8fe1349d57 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 5, "eval/best/id_best": 25, "eval/best/lr_best": 0.00035999999999999997, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.0278518199920654, "eval/best/train/acc": 0.38605980515688865, "eval/best/train/acc_std": 0.0024390732770308, "eval/best/train/f1": 0.32683064710456294, "eval/best/train/f1_std": 0.0026072174595867434, "eval/best/validation/loss": 2.3861210346221924, "eval/best/validation/acc": 0.27593207825765964, "eval/best/validation/acc_std": 0.005400515837060602, "eval/best/validation/f1": 0.20648864936047728, "eval/best/validation/f1_std": 0.004769139133979471, "eval/best/test/loss": 2.3202176094055176, "eval/best/test/acc": 0.299443413729128, "eval/best/test/acc_std": 0.005292847465158215, "eval/best/test/f1": 0.234522776971707, "eval/best/test/f1_std": 0.005352633686304655, "eval/best/testid/loss": 2.275526285171509, "eval/best/testid/acc": 0.30364372469635625, "eval/best/testid/acc_std": 0.005979370856113823, "eval/best/testid/f1": 0.24622010956570986, "eval/best/testid/f1_std": 0.0057295414352910475} diff --git a/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..518a74d5dd53d628aa808ddd70c2e199146d147c --- /dev/null +++ b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 19, "eval/last/lr_best": 0.00013199999999999998, "eval/last/wd_best": 0.05, "eval/last/train/loss": 1.8982449769973755, "eval/last/train/acc": 0.4274870155813024, "eval/last/train/acc_std": 0.0024958864613864123, "eval/last/train/f1": 0.37622806674509074, "eval/last/train/f1_std": 0.0027841325931396185, "eval/last/validation/loss": 2.42641019821167, "eval/last/validation/acc": 0.26891842008121075, "eval/last/validation/acc_std": 0.005440777827485976, "eval/last/validation/f1": 0.21265296750433957, "eval/last/validation/f1_std": 0.005197201381780099, "eval/last/test/loss": 2.379150390625, "eval/last/test/acc": 0.29573283858998145, "eval/last/test/acc_std": 0.005501312904378919, "eval/last/test/f1": 0.23091473888547545, "eval/last/test/f1_std": 0.005716840638132547, "eval/last/testid/loss": 2.197214126586914, "eval/last/testid/acc": 0.3227299016772701, "eval/last/testid/acc_std": 0.006143241739032288, "eval/last/testid/f1": 0.2743443587301291, "eval/last/testid/f1_std": 0.006057829224491564} diff --git a/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..45b1fb9e5ecf283df19ec2e1c87e79622d790660 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,5,0.00035999999999999997,0.05,25,"[1.2, 1.0]",train,2.0278518199920654,0.38605980515688865,0.0024390732770308,0.32683064710456294,0.0026072174595867434 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00035999999999999997,0.05,25,"[1.2, 1.0]",validation,2.3861210346221924,0.27593207825765964,0.005400515837060602,0.20648864936047728,0.004769139133979471 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00035999999999999997,0.05,25,"[1.2, 1.0]",test,2.3202176094055176,0.299443413729128,0.005292847465158215,0.234522776971707,0.005352633686304655 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00035999999999999997,0.05,25,"[1.2, 1.0]",testid,2.275526285171509,0.30364372469635625,0.005979370856113823,0.24622010956570986,0.0057295414352910475 diff --git a/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..45b1fb9e5ecf283df19ec2e1c87e79622d790660 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,5,0.00035999999999999997,0.05,25,"[1.2, 1.0]",train,2.0278518199920654,0.38605980515688865,0.0024390732770308,0.32683064710456294,0.0026072174595867434 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00035999999999999997,0.05,25,"[1.2, 1.0]",validation,2.3861210346221924,0.27593207825765964,0.005400515837060602,0.20648864936047728,0.004769139133979471 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00035999999999999997,0.05,25,"[1.2, 1.0]",test,2.3202176094055176,0.299443413729128,0.005292847465158215,0.234522776971707,0.005352633686304655 +flat_mae,patch,attn,nsd_cococlip,best,5,0.00035999999999999997,0.05,25,"[1.2, 1.0]",testid,2.275526285171509,0.30364372469635625,0.005979370856113823,0.24622010956570986,0.0057295414352910475 diff --git a/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..dec30ab5c00b94ad7b6a3605739255e29544940a --- /dev/null +++ b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",train,1.8982449769973755,0.4274870155813024,0.0024958864613864123,0.37622806674509074,0.0027841325931396185 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",validation,2.42641019821167,0.26891842008121075,0.005440777827485976,0.21265296750433957,0.005197201381780099 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",test,2.379150390625,0.29573283858998145,0.005501312904378919,0.23091473888547545,0.005716840638132547 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",testid,2.197214126586914,0.3227299016772701,0.006143241739032288,0.2743443587301291,0.006057829224491564 diff --git a/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/log.txt b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..f60c847cfc7e1b1369d39a4a13ffc193235830c1 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,960 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:39:50 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n400_1; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:22:14 lr: nan time: 3.3366 data: 2.8073 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:38 lr: 0.000003 loss: 3.1883 (3.1901) grad: 0.1806 (0.1810) time: 0.4379 data: 0.0029 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:03 lr: 0.000006 loss: 3.1840 (3.1820) grad: 0.1762 (0.1801) time: 0.4427 data: 0.0048 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:47 lr: 0.000009 loss: 3.1737 (3.1817) grad: 0.1732 (0.1793) time: 0.4558 data: 0.0048 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:33 lr: 0.000012 loss: 3.1650 (3.1763) grad: 0.1702 (0.1774) time: 0.4372 data: 0.0047 max mem: 22448 +train: [0] [100/400] eta: 0:02:22 lr: 0.000015 loss: 3.1619 (3.1741) grad: 0.1662 (0.1756) time: 0.4530 data: 0.0050 max mem: 22448 +train: [0] [120/400] eta: 0:02:12 lr: 0.000018 loss: 3.1607 (3.1716) grad: 0.1584 (0.1729) time: 0.4633 data: 0.0050 max mem: 22448 +train: [0] [140/400] eta: 0:02:02 lr: 0.000021 loss: 3.1513 (3.1683) grad: 0.1596 (0.1720) time: 0.4565 data: 0.0049 max mem: 22448 +train: [0] [160/400] eta: 0:01:52 lr: 0.000024 loss: 3.1438 (3.1649) grad: 0.1735 (0.1733) time: 0.4510 data: 0.0049 max mem: 22448 +train: [0] [180/400] eta: 0:01:42 lr: 0.000027 loss: 3.1336 (3.1619) grad: 0.1774 (0.1727) time: 0.4319 data: 0.0046 max mem: 22448 +train: [0] [200/400] eta: 0:01:32 lr: 0.000030 loss: 3.1289 (3.1600) grad: 0.1588 (0.1712) time: 0.4750 data: 0.0051 max mem: 22448 +train: [0] [220/400] eta: 0:01:23 lr: 0.000033 loss: 3.1487 (3.1584) grad: 0.1604 (0.1706) time: 0.4548 data: 0.0049 max mem: 22448 +train: [0] [240/400] eta: 0:01:13 lr: 0.000036 loss: 3.1316 (3.1554) grad: 0.1639 (0.1699) time: 0.4328 data: 0.0048 max mem: 22448 +train: [0] [260/400] eta: 0:01:04 lr: 0.000039 loss: 3.1196 (3.1525) grad: 0.1601 (0.1690) time: 0.4596 data: 0.0049 max mem: 22448 +train: [0] [280/400] eta: 0:00:55 lr: 0.000042 loss: 3.0968 (3.1483) grad: 0.1583 (0.1685) time: 0.4584 data: 0.0050 max mem: 22448 +train: [0] [300/400] eta: 0:00:46 lr: 0.000045 loss: 3.0760 (3.1425) grad: 0.1613 (0.1682) time: 0.4466 data: 0.0050 max mem: 22448 +train: [0] [320/400] eta: 0:00:36 lr: 0.000048 loss: 3.0616 (3.1385) grad: 0.1700 (0.1688) time: 0.4513 data: 0.0048 max mem: 22448 +train: [0] [340/400] eta: 0:00:27 lr: 0.000051 loss: 3.0707 (3.1349) grad: 0.1717 (0.1690) time: 0.4746 data: 0.0054 max mem: 22448 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 3.0616 (3.1304) grad: 0.1717 (0.1697) time: 0.4507 data: 0.0050 max mem: 22448 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 3.0527 (3.1265) grad: 0.1858 (0.1704) time: 0.4695 data: 0.0050 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.0484 (3.1233) grad: 0.1860 (0.1710) time: 0.4558 data: 0.0048 max mem: 22448 +train: [0] Total time: 0:03:04 (0.4606 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.0484 (3.1233) grad: 0.1860 (0.1710) +eval (validation): [0] [ 0/85] eta: 0:05:35 time: 3.9480 data: 3.6305 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:37 time: 0.4150 data: 0.0047 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:21 time: 0.3580 data: 0.0042 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:10 time: 0.3478 data: 0.0038 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:02 time: 0.3310 data: 0.0042 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3196 data: 0.0040 max mem: 22448 +eval (validation): [0] Total time: 0:00:34 (0.4056 s / it) +cv: [0] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 2.589 acc: 0.225 f1: 0.151 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:52 lr: nan time: 3.4300 data: 3.0773 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:43 lr: 0.000063 loss: 2.9996 (3.0046) grad: 0.1796 (0.1771) time: 0.4456 data: 0.0045 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:08 lr: 0.000066 loss: 3.0096 (3.0107) grad: 0.1719 (0.1736) time: 0.4536 data: 0.0045 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:49 lr: 0.000069 loss: 2.9983 (2.9962) grad: 0.1719 (0.1742) time: 0.4512 data: 0.0047 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:35 lr: 0.000072 loss: 2.9770 (2.9964) grad: 0.1748 (0.1765) time: 0.4475 data: 0.0046 max mem: 22448 +train: [1] [100/400] eta: 0:02:24 lr: 0.000075 loss: 2.9759 (2.9889) grad: 0.1750 (0.1776) time: 0.4596 data: 0.0048 max mem: 22448 +train: [1] [120/400] eta: 0:02:13 lr: 0.000078 loss: 2.9624 (2.9863) grad: 0.1860 (0.1791) time: 0.4513 data: 0.0049 max mem: 22448 +train: [1] [140/400] eta: 0:02:03 lr: 0.000081 loss: 2.9684 (2.9834) grad: 0.1860 (0.1807) time: 0.4654 data: 0.0048 max mem: 22448 +train: [1] [160/400] eta: 0:01:52 lr: 0.000084 loss: 2.9707 (2.9837) grad: 0.1858 (0.1813) time: 0.4429 data: 0.0046 max mem: 22448 +train: [1] [180/400] eta: 0:01:43 lr: 0.000087 loss: 2.9864 (2.9836) grad: 0.1849 (0.1820) time: 0.4501 data: 0.0048 max mem: 22448 +train: [1] [200/400] eta: 0:01:33 lr: 0.000090 loss: 2.9720 (2.9809) grad: 0.1850 (0.1829) time: 0.4489 data: 0.0046 max mem: 22448 +train: [1] [220/400] eta: 0:01:23 lr: 0.000093 loss: 2.9133 (2.9745) grad: 0.1954 (0.1849) time: 0.4529 data: 0.0047 max mem: 22448 +train: [1] [240/400] eta: 0:01:14 lr: 0.000096 loss: 2.9033 (2.9708) grad: 0.1957 (0.1852) time: 0.4613 data: 0.0049 max mem: 22448 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 2.9244 (2.9687) grad: 0.1857 (0.1860) time: 0.4586 data: 0.0048 max mem: 22448 +train: [1] [280/400] eta: 0:00:55 lr: 0.000102 loss: 2.9143 (2.9651) grad: 0.1856 (0.1861) time: 0.4465 data: 0.0048 max mem: 22448 +train: [1] [300/400] eta: 0:00:46 lr: 0.000105 loss: 2.9062 (2.9625) grad: 0.1893 (0.1867) time: 0.4318 data: 0.0047 max mem: 22448 +train: [1] [320/400] eta: 0:00:36 lr: 0.000108 loss: 2.9003 (2.9587) grad: 0.1992 (0.1876) time: 0.4469 data: 0.0048 max mem: 22448 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 2.8780 (2.9536) grad: 0.1962 (0.1880) time: 0.4417 data: 0.0047 max mem: 22448 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 2.8985 (2.9519) grad: 0.1931 (0.1883) time: 0.4402 data: 0.0047 max mem: 22448 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 2.8985 (2.9486) grad: 0.1931 (0.1890) time: 0.4456 data: 0.0047 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.8838 (2.9467) grad: 0.2055 (0.1901) time: 0.4575 data: 0.0048 max mem: 22448 +train: [1] Total time: 0:03:03 (0.4579 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.8838 (2.9467) grad: 0.2055 (0.1901) +eval (validation): [1] [ 0/85] eta: 0:04:16 time: 3.0171 data: 2.7833 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:29 time: 0.3303 data: 0.0041 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:17 time: 0.3278 data: 0.0039 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:09 time: 0.3310 data: 0.0036 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3287 data: 0.0038 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3261 data: 0.0039 max mem: 22448 +eval (validation): [1] Total time: 0:00:31 (0.3648 s / it) +cv: [1] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 2.508 acc: 0.243 f1: 0.170 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:22:00 lr: nan time: 3.3005 data: 2.9118 max mem: 22448 +train: [2] [ 20/400] eta: 0:03:36 lr: 0.000123 loss: 2.8776 (2.8744) grad: 0.2191 (0.2246) time: 0.4340 data: 0.0036 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:02 lr: 0.000126 loss: 2.8848 (2.8794) grad: 0.2180 (0.2209) time: 0.4428 data: 0.0045 max mem: 22448 +train: [2] [ 60/400] eta: 0:02:46 lr: 0.000129 loss: 2.8834 (2.8752) grad: 0.2117 (0.2159) time: 0.4532 data: 0.0048 max mem: 22448 +train: [2] [ 80/400] eta: 0:02:34 lr: 0.000132 loss: 2.8684 (2.8699) grad: 0.2023 (0.2157) time: 0.4555 data: 0.0049 max mem: 22448 +train: [2] [100/400] eta: 0:02:22 lr: 0.000135 loss: 2.8385 (2.8633) grad: 0.2126 (0.2160) time: 0.4484 data: 0.0049 max mem: 22448 +train: [2] [120/400] eta: 0:02:11 lr: 0.000138 loss: 2.8265 (2.8557) grad: 0.2188 (0.2168) time: 0.4501 data: 0.0051 max mem: 22448 +train: [2] [140/400] eta: 0:02:01 lr: 0.000141 loss: 2.8332 (2.8569) grad: 0.2201 (0.2182) time: 0.4467 data: 0.0048 max mem: 22448 +train: [2] [160/400] eta: 0:01:51 lr: 0.000144 loss: 2.8271 (2.8515) grad: 0.2235 (0.2195) time: 0.4508 data: 0.0050 max mem: 22448 +train: [2] [180/400] eta: 0:01:41 lr: 0.000147 loss: 2.8043 (2.8455) grad: 0.2207 (0.2196) time: 0.4491 data: 0.0051 max mem: 22448 +train: [2] [200/400] eta: 0:01:32 lr: 0.000150 loss: 2.8043 (2.8418) grad: 0.2235 (0.2209) time: 0.4410 data: 0.0047 max mem: 22448 +train: [2] [220/400] eta: 0:01:23 lr: 0.000153 loss: 2.8318 (2.8429) grad: 0.2400 (0.2249) time: 0.4632 data: 0.0048 max mem: 22448 +train: [2] [240/400] eta: 0:01:13 lr: 0.000156 loss: 2.9222 (2.8690) grad: 0.3131 (0.2714) time: 0.4531 data: 0.0051 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=96.71 > 63.56) at step 524. Freezing. +train: [2] [260/400] eta: 0:01:04 lr: 0.000159 loss: 2.9378 (2.8968) grad: 0.5387 (0.3075) time: 0.4488 data: 0.0047 max mem: 22448 +train: [2] [280/400] eta: 0:00:55 lr: 0.000162 loss: 2.8132 (2.8907) grad: 0.2317 (0.3026) time: 0.4429 data: 0.0046 max mem: 22448 +train: [2] [300/400] eta: 0:00:45 lr: 0.000165 loss: 2.7971 (2.8842) grad: 0.2318 (0.2979) time: 0.4577 data: 0.0049 max mem: 22448 +train: [2] [320/400] eta: 0:00:36 lr: 0.000168 loss: 2.7953 (2.8786) grad: 0.2285 (0.2932) time: 0.4508 data: 0.0049 max mem: 22448 +train: [2] [340/400] eta: 0:00:27 lr: 0.000171 loss: 2.7861 (2.8743) grad: 0.2230 (0.2895) time: 0.4461 data: 0.0048 max mem: 22448 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 2.7806 (2.8715) grad: 0.2266 (0.2862) time: 0.4447 data: 0.0049 max mem: 22448 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 2.8250 (2.8700) grad: 0.2425 (0.2842) time: 0.4726 data: 0.0052 max mem: 22448 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 2.8488 (2.8662) grad: 0.2459 (0.2817) time: 0.4746 data: 0.0052 max mem: 22448 +train: [2] Total time: 0:03:03 (0.4589 s / it) +train: [2] Summary: lr: 0.000180 loss: 2.8488 (2.8662) grad: 0.2459 (0.2817) +eval (validation): [2] [ 0/85] eta: 0:04:28 time: 3.1538 data: 2.8819 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:31 time: 0.3508 data: 0.0162 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:18 time: 0.3280 data: 0.0033 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:09 time: 0.3503 data: 0.0040 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3308 data: 0.0037 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3175 data: 0.0037 max mem: 22448 +eval (validation): [2] Total time: 0:00:31 (0.3742 s / it) +cv: [2] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 2.426 acc: 0.269 f1: 0.192 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:20:19 lr: nan time: 3.0489 data: 2.6761 max mem: 22448 +train: [3] [ 20/400] eta: 0:03:28 lr: 0.000183 loss: 2.7121 (2.7546) grad: 0.2356 (0.2464) time: 0.4235 data: 0.0033 max mem: 22448 +train: [3] [ 40/400] eta: 0:02:56 lr: 0.000186 loss: 2.8201 (2.8429) grad: 0.2805 (0.3646) time: 0.4317 data: 0.0042 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=69.06 > 63.56) at step 627. Freezing. +train: [3] [ 60/400] eta: 0:02:40 lr: 0.000189 loss: 2.8856 (3.0187) grad: 0.4626 (0.5799) time: 0.4357 data: 0.0045 max mem: 22448 +train: [3] [ 80/400] eta: 0:02:28 lr: 0.000192 loss: 2.8063 (2.9573) grad: 0.2326 (0.4905) time: 0.4332 data: 0.0046 max mem: 22448 +train: [3] [100/400] eta: 0:02:17 lr: 0.000195 loss: 2.7426 (2.9163) grad: 0.2192 (0.4379) time: 0.4428 data: 0.0048 max mem: 22448 +train: [3] [120/400] eta: 0:02:07 lr: 0.000198 loss: 2.7420 (2.8890) grad: 0.2282 (0.4043) time: 0.4398 data: 0.0048 max mem: 22448 +train: [3] [140/400] eta: 0:01:57 lr: 0.000201 loss: 2.7503 (2.8728) grad: 0.2531 (0.3832) time: 0.4397 data: 0.0047 max mem: 22448 +train: [3] [160/400] eta: 0:01:48 lr: 0.000204 loss: 2.7934 (2.8644) grad: 0.2556 (0.3664) time: 0.4364 data: 0.0047 max mem: 22448 +train: [3] [180/400] eta: 0:01:39 lr: 0.000207 loss: 2.7679 (2.8514) grad: 0.2399 (0.3519) time: 0.4466 data: 0.0049 max mem: 22448 +train: [3] [200/400] eta: 0:01:30 lr: 0.000210 loss: 2.7585 (2.8439) grad: 0.2398 (0.3410) time: 0.4405 data: 0.0048 max mem: 22448 +train: [3] [220/400] eta: 0:01:20 lr: 0.000213 loss: 2.7672 (2.8375) grad: 0.2468 (0.3324) time: 0.4391 data: 0.0048 max mem: 22448 +train: [3] [240/400] eta: 0:01:11 lr: 0.000216 loss: 2.7500 (2.8302) grad: 0.2493 (0.3260) time: 0.4378 data: 0.0047 max mem: 22448 +train: [3] [260/400] eta: 0:01:02 lr: 0.000219 loss: 2.7447 (2.8245) grad: 0.2519 (0.3206) time: 0.4362 data: 0.0050 max mem: 22448 +train: [3] [280/400] eta: 0:00:53 lr: 0.000222 loss: 2.7426 (2.8182) grad: 0.2466 (0.3159) time: 0.4394 data: 0.0049 max mem: 22448 +train: [3] [300/400] eta: 0:00:44 lr: 0.000225 loss: 2.8179 (2.8213) grad: 0.2786 (0.3224) time: 0.4455 data: 0.0047 max mem: 22448 +WARNING: classifier 46 (36, 1.0) diverged (loss=80.14 > 63.56) at step 759. Freezing. +train: [3] [320/400] eta: 0:00:35 lr: 0.000228 loss: 2.9803 (2.8536) grad: 0.5460 (0.3745) time: 0.4416 data: 0.0048 max mem: 22448 +train: [3] [340/400] eta: 0:00:26 lr: 0.000231 loss: 2.8070 (2.8494) grad: 0.2724 (0.3671) time: 0.4427 data: 0.0050 max mem: 22448 +train: [3] [360/400] eta: 0:00:17 lr: 0.000234 loss: 2.7943 (2.8477) grad: 0.2528 (0.3609) time: 0.4492 data: 0.0049 max mem: 22448 +train: [3] [380/400] eta: 0:00:08 lr: 0.000237 loss: 2.7834 (2.8432) grad: 0.2603 (0.3556) time: 0.4722 data: 0.0049 max mem: 22448 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 2.7351 (2.8381) grad: 0.2533 (0.3500) time: 0.4545 data: 0.0050 max mem: 22448 +train: [3] Total time: 0:02:59 (0.4484 s / it) +train: [3] Summary: lr: 0.000240 loss: 2.7351 (2.8381) grad: 0.2533 (0.3500) +eval (validation): [3] [ 0/85] eta: 0:04:32 time: 3.2020 data: 2.9605 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:30 time: 0.3325 data: 0.0047 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:18 time: 0.3399 data: 0.0035 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3374 data: 0.0043 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3303 data: 0.0041 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3187 data: 0.0040 max mem: 22448 +eval (validation): [3] Total time: 0:00:31 (0.3704 s / it) +cv: [3] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.433 acc: 0.267 f1: 0.197 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:21:37 lr: nan time: 3.2443 data: 2.8683 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:35 lr: 0.000243 loss: 2.6687 (2.6921) grad: 0.2634 (0.2632) time: 0.4337 data: 0.0039 max mem: 22448 +train: [4] [ 40/400] eta: 0:03:03 lr: 0.000246 loss: 2.6992 (2.7144) grad: 0.2798 (0.2969) time: 0.4508 data: 0.0049 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:45 lr: 0.000249 loss: 2.8284 (2.8595) grad: 0.4322 (0.5396) time: 0.4408 data: 0.0049 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=106.04 > 63.56) at step 833. Freezing. +train: [4] [ 80/400] eta: 0:02:32 lr: 0.000252 loss: 2.8935 (2.9191) grad: 0.7091 (0.5799) time: 0.4421 data: 0.0048 max mem: 22448 +train: [4] [100/400] eta: 0:02:20 lr: 0.000255 loss: 2.7288 (2.8832) grad: 0.2369 (0.5116) time: 0.4385 data: 0.0051 max mem: 22448 +train: [4] [120/400] eta: 0:02:10 lr: 0.000258 loss: 2.6861 (2.8500) grad: 0.2369 (0.4668) time: 0.4502 data: 0.0050 max mem: 22448 +train: [4] [140/400] eta: 0:02:00 lr: 0.000261 loss: 2.7057 (2.8290) grad: 0.2467 (0.4371) time: 0.4449 data: 0.0049 max mem: 22448 +train: [4] [160/400] eta: 0:01:50 lr: 0.000264 loss: 2.7103 (2.8158) grad: 0.2581 (0.4150) time: 0.4370 data: 0.0050 max mem: 22448 +train: [4] [180/400] eta: 0:01:40 lr: 0.000267 loss: 2.7202 (2.8088) grad: 0.2605 (0.3980) time: 0.4368 data: 0.0049 max mem: 22448 +train: [4] [200/400] eta: 0:01:31 lr: 0.000270 loss: 2.7040 (2.7969) grad: 0.2534 (0.3828) time: 0.4520 data: 0.0051 max mem: 22448 +train: [4] [220/400] eta: 0:01:21 lr: 0.000273 loss: 2.7144 (2.7939) grad: 0.2534 (0.3719) time: 0.4377 data: 0.0049 max mem: 22448 +train: [4] [240/400] eta: 0:01:12 lr: 0.000276 loss: 2.7497 (2.7880) grad: 0.2572 (0.3624) time: 0.4378 data: 0.0048 max mem: 22448 +train: [4] [260/400] eta: 0:01:03 lr: 0.000279 loss: 2.7410 (2.7833) grad: 0.2572 (0.3545) time: 0.4401 data: 0.0049 max mem: 22448 +train: [4] [280/400] eta: 0:00:54 lr: 0.000282 loss: 2.7432 (2.7794) grad: 0.2673 (0.3502) time: 0.4411 data: 0.0050 max mem: 22448 +train: [4] [300/400] eta: 0:00:45 lr: 0.000285 loss: 2.7884 (2.7925) grad: 0.3252 (0.3780) time: 0.4499 data: 0.0049 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=74.89 > 63.56) at step 954. Freezing. +train: [4] [320/400] eta: 0:00:36 lr: 0.000288 loss: 2.8622 (2.8110) grad: 0.5872 (0.4058) time: 0.4411 data: 0.0048 max mem: 22448 +train: [4] [340/400] eta: 0:00:27 lr: 0.000291 loss: 2.6996 (2.8043) grad: 0.2199 (0.3951) time: 0.4389 data: 0.0049 max mem: 22448 +train: [4] [360/400] eta: 0:00:18 lr: 0.000294 loss: 2.6987 (2.8004) grad: 0.2269 (0.3862) time: 0.4485 data: 0.0050 max mem: 22448 +train: [4] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.7153 (2.7957) grad: 0.2354 (0.3782) time: 0.4428 data: 0.0049 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.6949 (2.7900) grad: 0.2365 (0.3711) time: 0.4613 data: 0.0050 max mem: 22448 +train: [4] Total time: 0:03:00 (0.4509 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.6949 (2.7900) grad: 0.2365 (0.3711) +eval (validation): [4] [ 0/85] eta: 0:04:15 time: 3.0084 data: 2.7376 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:32 time: 0.3706 data: 0.0044 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:19 time: 0.3540 data: 0.0042 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:09 time: 0.3336 data: 0.0039 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3314 data: 0.0040 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3306 data: 0.0041 max mem: 22448 +eval (validation): [4] Total time: 0:00:32 (0.3816 s / it) +cv: [4] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 2.436 acc: 0.263 f1: 0.194 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:21:57 lr: nan time: 3.2934 data: 2.9125 max mem: 22448 +train: [5] [ 20/400] eta: 0:03:38 lr: 0.000300 loss: 2.6006 (2.5989) grad: 0.2409 (0.2432) time: 0.4402 data: 0.0031 max mem: 22448 +train: [5] [ 40/400] eta: 0:03:02 lr: 0.000300 loss: 2.6222 (2.6491) grad: 0.2436 (0.2474) time: 0.4330 data: 0.0049 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:44 lr: 0.000300 loss: 2.6544 (2.6555) grad: 0.2533 (0.2505) time: 0.4359 data: 0.0049 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:30 lr: 0.000300 loss: 2.6612 (2.6536) grad: 0.2473 (0.2502) time: 0.4345 data: 0.0049 max mem: 22448 +train: [5] [100/400] eta: 0:02:19 lr: 0.000300 loss: 2.6631 (2.6555) grad: 0.2470 (0.2521) time: 0.4358 data: 0.0047 max mem: 22448 +train: [5] [120/400] eta: 0:02:08 lr: 0.000300 loss: 2.6091 (2.6482) grad: 0.2472 (0.2512) time: 0.4375 data: 0.0047 max mem: 22448 +train: [5] [140/400] eta: 0:01:59 lr: 0.000300 loss: 2.6011 (2.6383) grad: 0.2404 (0.2490) time: 0.4573 data: 0.0050 max mem: 22448 +train: [5] [160/400] eta: 0:01:49 lr: 0.000299 loss: 2.6106 (2.6379) grad: 0.2364 (0.2484) time: 0.4383 data: 0.0050 max mem: 22448 +train: [5] [180/400] eta: 0:01:40 lr: 0.000299 loss: 2.6571 (2.6423) grad: 0.2419 (0.2481) time: 0.4378 data: 0.0049 max mem: 22448 +train: [5] [200/400] eta: 0:01:30 lr: 0.000299 loss: 2.6571 (2.6409) grad: 0.2527 (0.2485) time: 0.4326 data: 0.0050 max mem: 22448 +train: [5] [220/400] eta: 0:01:21 lr: 0.000299 loss: 2.5830 (2.6407) grad: 0.2527 (0.2480) time: 0.4515 data: 0.0049 max mem: 22448 +train: [5] [240/400] eta: 0:01:12 lr: 0.000299 loss: 2.6052 (2.6409) grad: 0.2485 (0.2484) time: 0.4387 data: 0.0047 max mem: 22448 +train: [5] [260/400] eta: 0:01:02 lr: 0.000299 loss: 2.6141 (2.6374) grad: 0.2480 (0.2480) time: 0.4292 data: 0.0047 max mem: 22448 +train: [5] [280/400] eta: 0:00:53 lr: 0.000298 loss: 2.6191 (2.6391) grad: 0.2481 (0.2488) time: 0.4399 data: 0.0048 max mem: 22448 +train: [5] [300/400] eta: 0:00:44 lr: 0.000298 loss: 2.6191 (2.6359) grad: 0.2566 (0.2492) time: 0.4349 data: 0.0048 max mem: 22448 +train: [5] [320/400] eta: 0:00:35 lr: 0.000298 loss: 2.6067 (2.6379) grad: 0.2570 (0.2499) time: 0.4329 data: 0.0049 max mem: 22448 +train: [5] [340/400] eta: 0:00:26 lr: 0.000298 loss: 2.6491 (2.6365) grad: 0.2548 (0.2499) time: 0.4390 data: 0.0050 max mem: 22448 +train: [5] [360/400] eta: 0:00:17 lr: 0.000297 loss: 2.6181 (2.6352) grad: 0.2571 (0.2504) time: 0.4488 data: 0.0050 max mem: 22448 +train: [5] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.5821 (2.6352) grad: 0.2578 (0.2510) time: 0.4489 data: 0.0049 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.5683 (2.6318) grad: 0.2510 (0.2506) time: 0.4476 data: 0.0049 max mem: 22448 +train: [5] Total time: 0:02:58 (0.4474 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.5683 (2.6318) grad: 0.2510 (0.2506) +eval (validation): [5] [ 0/85] eta: 0:04:18 time: 3.0387 data: 2.8027 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:30 time: 0.3347 data: 0.0030 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:18 time: 0.3578 data: 0.0035 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:09 time: 0.3313 data: 0.0040 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3217 data: 0.0036 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3186 data: 0.0036 max mem: 22448 +eval (validation): [5] Total time: 0:00:31 (0.3698 s / it) +cv: [5] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 2.386 acc: 0.276 f1: 0.206 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:21:07 lr: nan time: 3.1680 data: 2.8003 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:43 lr: 0.000296 loss: 2.5579 (2.5481) grad: 0.2468 (0.2477) time: 0.4602 data: 0.0039 max mem: 22448 +train: [6] [ 40/400] eta: 0:03:04 lr: 0.000296 loss: 2.5618 (2.5676) grad: 0.2447 (0.2484) time: 0.4331 data: 0.0049 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:45 lr: 0.000296 loss: 2.5551 (2.5657) grad: 0.2427 (0.2492) time: 0.4288 data: 0.0049 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:31 lr: 0.000295 loss: 2.5221 (2.5518) grad: 0.2536 (0.2502) time: 0.4405 data: 0.0050 max mem: 22448 +train: [6] [100/400] eta: 0:02:19 lr: 0.000295 loss: 2.5518 (2.5525) grad: 0.2562 (0.2518) time: 0.4339 data: 0.0049 max mem: 22448 +train: [6] [120/400] eta: 0:02:09 lr: 0.000295 loss: 2.5527 (2.5559) grad: 0.2566 (0.2536) time: 0.4373 data: 0.0048 max mem: 22448 +train: [6] [140/400] eta: 0:01:59 lr: 0.000294 loss: 2.5970 (2.5645) grad: 0.2568 (0.2547) time: 0.4401 data: 0.0048 max mem: 22448 +train: [6] [160/400] eta: 0:01:49 lr: 0.000294 loss: 2.6202 (2.5716) grad: 0.2585 (0.2548) time: 0.4520 data: 0.0051 max mem: 22448 +train: [6] [180/400] eta: 0:01:40 lr: 0.000293 loss: 2.5789 (2.5696) grad: 0.2589 (0.2557) time: 0.4441 data: 0.0050 max mem: 22448 +train: [6] [200/400] eta: 0:01:30 lr: 0.000293 loss: 2.5758 (2.5717) grad: 0.2627 (0.2566) time: 0.4400 data: 0.0049 max mem: 22448 +train: [6] [220/400] eta: 0:01:21 lr: 0.000292 loss: 2.5831 (2.5704) grad: 0.2657 (0.2579) time: 0.4338 data: 0.0047 max mem: 22448 +train: [6] [240/400] eta: 0:01:12 lr: 0.000292 loss: 2.5930 (2.5715) grad: 0.2624 (0.2581) time: 0.4528 data: 0.0051 max mem: 22448 +train: [6] [260/400] eta: 0:01:03 lr: 0.000291 loss: 2.5588 (2.5681) grad: 0.2539 (0.2577) time: 0.4356 data: 0.0049 max mem: 22448 +train: [6] [280/400] eta: 0:00:54 lr: 0.000291 loss: 2.5701 (2.5691) grad: 0.2500 (0.2577) time: 0.4351 data: 0.0048 max mem: 22448 +train: [6] [300/400] eta: 0:00:44 lr: 0.000290 loss: 2.5914 (2.5700) grad: 0.2570 (0.2578) time: 0.4341 data: 0.0048 max mem: 22448 +train: [6] [320/400] eta: 0:00:35 lr: 0.000290 loss: 2.5899 (2.5704) grad: 0.2577 (0.2585) time: 0.4329 data: 0.0049 max mem: 22448 +train: [6] [340/400] eta: 0:00:26 lr: 0.000289 loss: 2.5807 (2.5716) grad: 0.2608 (0.2584) time: 0.4309 data: 0.0047 max mem: 22448 +train: [6] [360/400] eta: 0:00:17 lr: 0.000288 loss: 2.5439 (2.5688) grad: 0.2580 (0.2586) time: 0.4502 data: 0.0047 max mem: 22448 +train: [6] [380/400] eta: 0:00:08 lr: 0.000288 loss: 2.5142 (2.5702) grad: 0.2611 (0.2587) time: 0.4458 data: 0.0049 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.5671 (2.5697) grad: 0.2524 (0.2582) time: 0.4473 data: 0.0050 max mem: 22448 +train: [6] Total time: 0:02:59 (0.4477 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.5671 (2.5697) grad: 0.2524 (0.2582) +eval (validation): [6] [ 0/85] eta: 0:04:18 time: 3.0385 data: 2.7919 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:30 time: 0.3459 data: 0.0036 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:19 time: 0.3683 data: 0.0043 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:09 time: 0.3336 data: 0.0041 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3240 data: 0.0038 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3197 data: 0.0038 max mem: 22448 +eval (validation): [6] Total time: 0:00:31 (0.3762 s / it) +cv: [6] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 2.404 acc: 0.273 f1: 0.216 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [7] [ 0/400] eta: 0:21:01 lr: nan time: 3.1526 data: 2.8314 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:35 lr: 0.000286 loss: 2.4271 (2.4561) grad: 0.2414 (0.2549) time: 0.4373 data: 0.0039 max mem: 22448 +train: [7] [ 40/400] eta: 0:03:00 lr: 0.000286 loss: 2.4430 (2.4596) grad: 0.2530 (0.2565) time: 0.4352 data: 0.0047 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:43 lr: 0.000285 loss: 2.4622 (2.4539) grad: 0.2655 (0.2623) time: 0.4398 data: 0.0048 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:30 lr: 0.000284 loss: 2.4693 (2.4719) grad: 0.2640 (0.2608) time: 0.4319 data: 0.0046 max mem: 22448 +train: [7] [100/400] eta: 0:02:18 lr: 0.000284 loss: 2.4693 (2.4678) grad: 0.2605 (0.2611) time: 0.4322 data: 0.0050 max mem: 22448 +train: [7] [120/400] eta: 0:02:08 lr: 0.000283 loss: 2.4484 (2.4688) grad: 0.2670 (0.2623) time: 0.4434 data: 0.0051 max mem: 22448 +train: [7] [140/400] eta: 0:01:58 lr: 0.000282 loss: 2.4591 (2.4736) grad: 0.2591 (0.2617) time: 0.4316 data: 0.0051 max mem: 22448 +train: [7] [160/400] eta: 0:01:49 lr: 0.000282 loss: 2.4815 (2.4740) grad: 0.2578 (0.2614) time: 0.4587 data: 0.0048 max mem: 22448 +train: [7] [180/400] eta: 0:01:39 lr: 0.000281 loss: 2.5295 (2.4829) grad: 0.2668 (0.2628) time: 0.4453 data: 0.0050 max mem: 22448 +train: [7] [200/400] eta: 0:01:30 lr: 0.000280 loss: 2.5272 (2.4825) grad: 0.2668 (0.2628) time: 0.4510 data: 0.0050 max mem: 22448 +train: [7] [220/400] eta: 0:01:21 lr: 0.000279 loss: 2.4707 (2.4810) grad: 0.2631 (0.2629) time: 0.4277 data: 0.0047 max mem: 22448 +train: [7] [240/400] eta: 0:01:12 lr: 0.000278 loss: 2.4890 (2.4849) grad: 0.2618 (0.2631) time: 0.4482 data: 0.0049 max mem: 22448 +train: [7] [260/400] eta: 0:01:03 lr: 0.000278 loss: 2.5051 (2.4837) grad: 0.2560 (0.2623) time: 0.4425 data: 0.0049 max mem: 22448 +train: [7] [280/400] eta: 0:00:53 lr: 0.000277 loss: 2.4723 (2.4821) grad: 0.2543 (0.2622) time: 0.4311 data: 0.0046 max mem: 22448 +train: [7] [300/400] eta: 0:00:44 lr: 0.000276 loss: 2.4171 (2.4803) grad: 0.2563 (0.2624) time: 0.4318 data: 0.0050 max mem: 22448 +train: [7] [320/400] eta: 0:00:35 lr: 0.000275 loss: 2.4499 (2.4799) grad: 0.2595 (0.2621) time: 0.4296 data: 0.0049 max mem: 22448 +train: [7] [340/400] eta: 0:00:26 lr: 0.000274 loss: 2.4552 (2.4783) grad: 0.2553 (0.2617) time: 0.4369 data: 0.0050 max mem: 22448 +train: [7] [360/400] eta: 0:00:17 lr: 0.000273 loss: 2.4580 (2.4794) grad: 0.2615 (0.2621) time: 0.4490 data: 0.0050 max mem: 22448 +train: [7] [380/400] eta: 0:00:08 lr: 0.000272 loss: 2.4838 (2.4803) grad: 0.2659 (0.2626) time: 0.4435 data: 0.0048 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.5126 (2.4819) grad: 0.2683 (0.2630) time: 0.4378 data: 0.0049 max mem: 22448 +train: [7] Total time: 0:02:58 (0.4466 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.5126 (2.4819) grad: 0.2683 (0.2630) +eval (validation): [7] [ 0/85] eta: 0:04:17 time: 3.0308 data: 2.7923 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:31 time: 0.3510 data: 0.0049 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:19 time: 0.3743 data: 0.0040 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:10 time: 0.3561 data: 0.0049 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3192 data: 0.0043 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3147 data: 0.0041 max mem: 22448 +eval (validation): [7] Total time: 0:00:32 (0.3829 s / it) +cv: [7] best hparam: (0.72, 1.0) (022) ('022_lr7.2e-01_wd1.0e+00') loss: 2.403 acc: 0.272 f1: 0.215 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:22:00 lr: nan time: 3.3019 data: 2.9128 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:40 lr: 0.000270 loss: 2.3179 (2.3559) grad: 0.2456 (0.2490) time: 0.4438 data: 0.0041 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:05 lr: 0.000270 loss: 2.3870 (2.3784) grad: 0.2503 (0.2534) time: 0.4485 data: 0.0045 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:47 lr: 0.000269 loss: 2.4014 (2.3946) grad: 0.2549 (0.2543) time: 0.4422 data: 0.0046 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:32 lr: 0.000268 loss: 2.4159 (2.4018) grad: 0.2597 (0.2597) time: 0.4327 data: 0.0048 max mem: 22448 +train: [8] [100/400] eta: 0:02:20 lr: 0.000267 loss: 2.4269 (2.4028) grad: 0.2741 (0.2637) time: 0.4364 data: 0.0049 max mem: 22448 +train: [8] [120/400] eta: 0:02:09 lr: 0.000266 loss: 2.4311 (2.4039) grad: 0.2802 (0.2670) time: 0.4312 data: 0.0050 max mem: 22448 +train: [8] [140/400] eta: 0:01:59 lr: 0.000265 loss: 2.4029 (2.4080) grad: 0.2738 (0.2682) time: 0.4394 data: 0.0050 max mem: 22448 +train: [8] [160/400] eta: 0:01:49 lr: 0.000264 loss: 2.4224 (2.4097) grad: 0.2775 (0.2709) time: 0.4466 data: 0.0050 max mem: 22448 +train: [8] [180/400] eta: 0:01:40 lr: 0.000263 loss: 2.4134 (2.4055) grad: 0.2715 (0.2701) time: 0.4524 data: 0.0051 max mem: 22448 +train: [8] [200/400] eta: 0:01:31 lr: 0.000262 loss: 2.4092 (2.4081) grad: 0.2693 (0.2703) time: 0.4378 data: 0.0049 max mem: 22448 +train: [8] [220/400] eta: 0:01:21 lr: 0.000260 loss: 2.4294 (2.4096) grad: 0.2742 (0.2704) time: 0.4342 data: 0.0047 max mem: 22448 +train: [8] [240/400] eta: 0:01:12 lr: 0.000259 loss: 2.4237 (2.4101) grad: 0.2707 (0.2708) time: 0.4345 data: 0.0048 max mem: 22448 +train: [8] [260/400] eta: 0:01:03 lr: 0.000258 loss: 2.4287 (2.4124) grad: 0.2693 (0.2711) time: 0.4546 data: 0.0050 max mem: 22448 +train: [8] [280/400] eta: 0:00:54 lr: 0.000257 loss: 2.4287 (2.4114) grad: 0.2651 (0.2715) time: 0.4336 data: 0.0050 max mem: 22448 +train: [8] [300/400] eta: 0:00:44 lr: 0.000256 loss: 2.4074 (2.4118) grad: 0.2643 (0.2715) time: 0.4325 data: 0.0049 max mem: 22448 +train: [8] [320/400] eta: 0:00:35 lr: 0.000255 loss: 2.4108 (2.4125) grad: 0.2631 (0.2710) time: 0.4348 data: 0.0048 max mem: 22448 +train: [8] [340/400] eta: 0:00:26 lr: 0.000254 loss: 2.4125 (2.4133) grad: 0.2683 (0.2715) time: 0.4377 data: 0.0046 max mem: 22448 +train: [8] [360/400] eta: 0:00:17 lr: 0.000253 loss: 2.4258 (2.4142) grad: 0.2683 (0.2711) time: 0.4528 data: 0.0052 max mem: 22448 +train: [8] [380/400] eta: 0:00:08 lr: 0.000252 loss: 2.4237 (2.4149) grad: 0.2651 (0.2708) time: 0.4460 data: 0.0049 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.4103 (2.4155) grad: 0.2659 (0.2714) time: 0.4344 data: 0.0047 max mem: 22448 +train: [8] Total time: 0:02:59 (0.4480 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.4103 (2.4155) grad: 0.2659 (0.2714) +eval (validation): [8] [ 0/85] eta: 0:04:22 time: 3.0901 data: 2.8057 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:32 time: 0.3647 data: 0.0052 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:19 time: 0.3603 data: 0.0042 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:10 time: 0.3487 data: 0.0044 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3213 data: 0.0039 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3108 data: 0.0038 max mem: 22448 +eval (validation): [8] Total time: 0:00:32 (0.3824 s / it) +cv: [8] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.422 acc: 0.268 f1: 0.209 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:21:49 lr: nan time: 3.2736 data: 2.8890 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:46 lr: 0.000249 loss: 2.3444 (2.3466) grad: 0.2667 (0.2748) time: 0.4615 data: 0.0042 max mem: 22448 +train: [9] [ 40/400] eta: 0:03:05 lr: 0.000248 loss: 2.3572 (2.3735) grad: 0.2667 (0.2701) time: 0.4309 data: 0.0047 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:45 lr: 0.000247 loss: 2.3737 (2.3635) grad: 0.2640 (0.2672) time: 0.4302 data: 0.0048 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:32 lr: 0.000246 loss: 2.3585 (2.3714) grad: 0.2717 (0.2697) time: 0.4392 data: 0.0048 max mem: 22448 +train: [9] [100/400] eta: 0:02:20 lr: 0.000244 loss: 2.3585 (2.3664) grad: 0.2721 (0.2699) time: 0.4354 data: 0.0047 max mem: 22448 +train: [9] [120/400] eta: 0:02:09 lr: 0.000243 loss: 2.3480 (2.3645) grad: 0.2674 (0.2698) time: 0.4315 data: 0.0047 max mem: 22448 +train: [9] [140/400] eta: 0:01:59 lr: 0.000242 loss: 2.3480 (2.3652) grad: 0.2674 (0.2703) time: 0.4362 data: 0.0050 max mem: 22448 +train: [9] [160/400] eta: 0:01:49 lr: 0.000241 loss: 2.3403 (2.3600) grad: 0.2649 (0.2705) time: 0.4368 data: 0.0046 max mem: 22448 +train: [9] [180/400] eta: 0:01:40 lr: 0.000240 loss: 2.3317 (2.3622) grad: 0.2681 (0.2718) time: 0.4610 data: 0.0052 max mem: 22448 +train: [9] [200/400] eta: 0:01:31 lr: 0.000238 loss: 2.3298 (2.3616) grad: 0.2825 (0.2735) time: 0.4485 data: 0.0052 max mem: 22448 +train: [9] [220/400] eta: 0:01:21 lr: 0.000237 loss: 2.3300 (2.3601) grad: 0.2798 (0.2744) time: 0.4447 data: 0.0049 max mem: 22448 +train: [9] [240/400] eta: 0:01:12 lr: 0.000236 loss: 2.3655 (2.3634) grad: 0.2795 (0.2741) time: 0.4258 data: 0.0048 max mem: 22448 +train: [9] [260/400] eta: 0:01:03 lr: 0.000234 loss: 2.3988 (2.3620) grad: 0.2687 (0.2732) time: 0.4423 data: 0.0050 max mem: 22448 +train: [9] [280/400] eta: 0:00:53 lr: 0.000233 loss: 2.3909 (2.3634) grad: 0.2693 (0.2737) time: 0.4338 data: 0.0048 max mem: 22448 +train: [9] [300/400] eta: 0:00:44 lr: 0.000232 loss: 2.3844 (2.3651) grad: 0.2772 (0.2744) time: 0.4342 data: 0.0051 max mem: 22448 +train: [9] [320/400] eta: 0:00:35 lr: 0.000230 loss: 2.3341 (2.3664) grad: 0.2748 (0.2740) time: 0.4369 data: 0.0049 max mem: 22448 +train: [9] [340/400] eta: 0:00:26 lr: 0.000229 loss: 2.3309 (2.3640) grad: 0.2715 (0.2743) time: 0.4418 data: 0.0049 max mem: 22448 +train: [9] [360/400] eta: 0:00:17 lr: 0.000228 loss: 2.3253 (2.3638) grad: 0.2741 (0.2748) time: 0.4482 data: 0.0052 max mem: 22448 +train: [9] [380/400] eta: 0:00:08 lr: 0.000226 loss: 2.3256 (2.3630) grad: 0.2716 (0.2748) time: 0.4500 data: 0.0049 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.3569 (2.3638) grad: 0.2714 (0.2752) time: 0.4281 data: 0.0047 max mem: 22448 +train: [9] Total time: 0:02:58 (0.4474 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.3569 (2.3638) grad: 0.2714 (0.2752) +eval (validation): [9] [ 0/85] eta: 0:04:19 time: 3.0482 data: 2.8088 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:31 time: 0.3516 data: 0.0039 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:18 time: 0.3331 data: 0.0040 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:09 time: 0.3437 data: 0.0040 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3260 data: 0.0040 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3192 data: 0.0038 max mem: 22448 +eval (validation): [9] Total time: 0:00:31 (0.3723 s / it) +cv: [9] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.406 acc: 0.269 f1: 0.206 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:21:49 lr: nan time: 3.2740 data: 2.8836 max mem: 22448 +train: [10] [ 20/400] eta: 0:03:40 lr: 0.000224 loss: 2.2392 (2.2700) grad: 0.2676 (0.2772) time: 0.4456 data: 0.0039 max mem: 22448 +train: [10] [ 40/400] eta: 0:03:03 lr: 0.000222 loss: 2.2827 (2.2696) grad: 0.2682 (0.2733) time: 0.4337 data: 0.0041 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:46 lr: 0.000221 loss: 2.2895 (2.2879) grad: 0.2668 (0.2703) time: 0.4545 data: 0.0051 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:33 lr: 0.000220 loss: 2.2900 (2.2790) grad: 0.2651 (0.2699) time: 0.4473 data: 0.0048 max mem: 22448 +train: [10] [100/400] eta: 0:02:21 lr: 0.000218 loss: 2.2879 (2.2818) grad: 0.2724 (0.2710) time: 0.4335 data: 0.0049 max mem: 22448 +train: [10] [120/400] eta: 0:02:10 lr: 0.000217 loss: 2.2931 (2.2852) grad: 0.2714 (0.2722) time: 0.4432 data: 0.0049 max mem: 22448 +train: [10] [140/400] eta: 0:02:00 lr: 0.000215 loss: 2.3151 (2.2902) grad: 0.2714 (0.2725) time: 0.4432 data: 0.0050 max mem: 22448 +train: [10] [160/400] eta: 0:01:50 lr: 0.000214 loss: 2.3157 (2.2933) grad: 0.2711 (0.2729) time: 0.4355 data: 0.0050 max mem: 22448 +train: [10] [180/400] eta: 0:01:40 lr: 0.000213 loss: 2.2938 (2.2959) grad: 0.2704 (0.2732) time: 0.4495 data: 0.0047 max mem: 22448 +train: [10] [200/400] eta: 0:01:31 lr: 0.000211 loss: 2.2938 (2.2980) grad: 0.2745 (0.2739) time: 0.4557 data: 0.0052 max mem: 22448 +train: [10] [220/400] eta: 0:01:22 lr: 0.000210 loss: 2.3438 (2.2989) grad: 0.2777 (0.2742) time: 0.4417 data: 0.0047 max mem: 22448 +train: [10] [240/400] eta: 0:01:12 lr: 0.000208 loss: 2.2846 (2.2988) grad: 0.2712 (0.2738) time: 0.4458 data: 0.0050 max mem: 22448 +train: [10] [260/400] eta: 0:01:03 lr: 0.000207 loss: 2.2971 (2.2988) grad: 0.2682 (0.2739) time: 0.4431 data: 0.0049 max mem: 22448 +train: [10] [280/400] eta: 0:00:54 lr: 0.000205 loss: 2.3215 (2.2996) grad: 0.2692 (0.2742) time: 0.4612 data: 0.0051 max mem: 22448 +train: [10] [300/400] eta: 0:00:45 lr: 0.000204 loss: 2.2468 (2.2966) grad: 0.2701 (0.2739) time: 0.4553 data: 0.0051 max mem: 22448 +train: [10] [320/400] eta: 0:00:36 lr: 0.000202 loss: 2.2446 (2.2948) grad: 0.2737 (0.2744) time: 0.4340 data: 0.0047 max mem: 22448 +train: [10] [340/400] eta: 0:00:27 lr: 0.000201 loss: 2.2695 (2.2944) grad: 0.2762 (0.2747) time: 0.4365 data: 0.0047 max mem: 22448 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 2.2728 (2.2936) grad: 0.2714 (0.2745) time: 0.4534 data: 0.0051 max mem: 22448 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 2.2511 (2.2928) grad: 0.2706 (0.2744) time: 0.4402 data: 0.0051 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.2735 (2.2929) grad: 0.2751 (0.2744) time: 0.4383 data: 0.0051 max mem: 22448 +train: [10] Total time: 0:03:00 (0.4523 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.2735 (2.2929) grad: 0.2751 (0.2744) +eval (validation): [10] [ 0/85] eta: 0:04:25 time: 3.1211 data: 2.8230 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:33 time: 0.3803 data: 0.0059 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:19 time: 0.3505 data: 0.0039 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:10 time: 0.3584 data: 0.0047 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3245 data: 0.0040 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3118 data: 0.0040 max mem: 22448 +eval (validation): [10] Total time: 0:00:32 (0.3856 s / it) +cv: [10] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.407 acc: 0.269 f1: 0.210 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:22:03 lr: nan time: 3.3089 data: 2.9617 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:48 lr: 0.000195 loss: 2.2191 (2.2002) grad: 0.2641 (0.2681) time: 0.4657 data: 0.0047 max mem: 22448 +train: [11] [ 40/400] eta: 0:03:07 lr: 0.000193 loss: 2.2346 (2.2175) grad: 0.2667 (0.2681) time: 0.4360 data: 0.0047 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:46 lr: 0.000192 loss: 2.2207 (2.2084) grad: 0.2704 (0.2708) time: 0.4248 data: 0.0049 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:32 lr: 0.000190 loss: 2.2314 (2.2233) grad: 0.2723 (0.2730) time: 0.4387 data: 0.0049 max mem: 22448 +train: [11] [100/400] eta: 0:02:20 lr: 0.000189 loss: 2.2454 (2.2222) grad: 0.2712 (0.2733) time: 0.4380 data: 0.0050 max mem: 22448 +train: [11] [120/400] eta: 0:02:09 lr: 0.000187 loss: 2.1711 (2.2108) grad: 0.2699 (0.2742) time: 0.4353 data: 0.0049 max mem: 22448 +train: [11] [140/400] eta: 0:01:59 lr: 0.000186 loss: 2.2103 (2.2171) grad: 0.2756 (0.2752) time: 0.4446 data: 0.0048 max mem: 22448 +train: [11] [160/400] eta: 0:01:50 lr: 0.000184 loss: 2.2350 (2.2241) grad: 0.2770 (0.2762) time: 0.4440 data: 0.0049 max mem: 22448 +train: [11] [180/400] eta: 0:01:40 lr: 0.000183 loss: 2.2314 (2.2243) grad: 0.2887 (0.2778) time: 0.4422 data: 0.0050 max mem: 22448 +train: [11] [200/400] eta: 0:01:31 lr: 0.000181 loss: 2.2570 (2.2275) grad: 0.2851 (0.2781) time: 0.4607 data: 0.0051 max mem: 22448 +train: [11] [220/400] eta: 0:01:22 lr: 0.000180 loss: 2.2820 (2.2349) grad: 0.2755 (0.2781) time: 0.4469 data: 0.0051 max mem: 22448 +train: [11] [240/400] eta: 0:01:12 lr: 0.000178 loss: 2.2934 (2.2363) grad: 0.2699 (0.2779) time: 0.4431 data: 0.0051 max mem: 22448 +train: [11] [260/400] eta: 0:01:03 lr: 0.000177 loss: 2.2590 (2.2374) grad: 0.2735 (0.2783) time: 0.4317 data: 0.0055 max mem: 22448 +train: [11] [280/400] eta: 0:00:54 lr: 0.000175 loss: 2.2539 (2.2392) grad: 0.2854 (0.2788) time: 0.4485 data: 0.0044 max mem: 22448 +train: [11] [300/400] eta: 0:00:45 lr: 0.000174 loss: 2.2570 (2.2428) grad: 0.2892 (0.2794) time: 0.4420 data: 0.0049 max mem: 22448 +train: [11] [320/400] eta: 0:00:36 lr: 0.000172 loss: 2.2630 (2.2448) grad: 0.2834 (0.2799) time: 0.4474 data: 0.0049 max mem: 22448 +train: [11] [340/400] eta: 0:00:27 lr: 0.000170 loss: 2.2130 (2.2456) grad: 0.2830 (0.2805) time: 0.4432 data: 0.0048 max mem: 22448 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 2.2286 (2.2454) grad: 0.2831 (0.2810) time: 0.4473 data: 0.0049 max mem: 22448 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 2.2087 (2.2431) grad: 0.2772 (0.2808) time: 0.4431 data: 0.0050 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.2610 (2.2469) grad: 0.2707 (0.2806) time: 0.4421 data: 0.0048 max mem: 22448 +train: [11] Total time: 0:03:00 (0.4510 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.2610 (2.2469) grad: 0.2707 (0.2806) +eval (validation): [11] [ 0/85] eta: 0:04:43 time: 3.3309 data: 3.0284 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:34 time: 0.3930 data: 0.0036 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:20 time: 0.3574 data: 0.0041 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:10 time: 0.3404 data: 0.0041 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3168 data: 0.0038 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3120 data: 0.0038 max mem: 22448 +eval (validation): [11] Total time: 0:00:32 (0.3878 s / it) +cv: [11] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.420 acc: 0.274 f1: 0.218 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:23:31 lr: nan time: 3.5279 data: 3.1221 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:44 lr: 0.000164 loss: 2.1168 (2.1431) grad: 0.2573 (0.2578) time: 0.4443 data: 0.0044 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:08 lr: 0.000163 loss: 2.1661 (2.1539) grad: 0.2631 (0.2665) time: 0.4517 data: 0.0040 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:47 lr: 0.000161 loss: 2.1481 (2.1541) grad: 0.2714 (0.2672) time: 0.4337 data: 0.0046 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:34 lr: 0.000160 loss: 2.1901 (2.1603) grad: 0.2656 (0.2666) time: 0.4530 data: 0.0047 max mem: 22448 +train: [12] [100/400] eta: 0:02:22 lr: 0.000158 loss: 2.1901 (2.1628) grad: 0.2658 (0.2678) time: 0.4464 data: 0.0050 max mem: 22448 +train: [12] [120/400] eta: 0:02:11 lr: 0.000156 loss: 2.1636 (2.1676) grad: 0.2658 (0.2665) time: 0.4356 data: 0.0050 max mem: 22448 +train: [12] [140/400] eta: 0:02:00 lr: 0.000155 loss: 2.1708 (2.1683) grad: 0.2671 (0.2683) time: 0.4307 data: 0.0048 max mem: 22448 +train: [12] [160/400] eta: 0:01:52 lr: 0.000153 loss: 2.1824 (2.1713) grad: 0.2826 (0.2709) time: 0.4898 data: 0.0053 max mem: 22448 +train: [12] [180/400] eta: 0:01:42 lr: 0.000152 loss: 2.1872 (2.1714) grad: 0.2823 (0.2715) time: 0.4472 data: 0.0047 max mem: 22448 +train: [12] [200/400] eta: 0:01:32 lr: 0.000150 loss: 2.2198 (2.1779) grad: 0.2741 (0.2718) time: 0.4534 data: 0.0048 max mem: 22448 +train: [12] [220/400] eta: 0:01:22 lr: 0.000149 loss: 2.2276 (2.1816) grad: 0.2696 (0.2717) time: 0.4284 data: 0.0049 max mem: 22448 +train: [12] [240/400] eta: 0:01:13 lr: 0.000147 loss: 2.1629 (2.1789) grad: 0.2732 (0.2731) time: 0.4328 data: 0.0048 max mem: 22448 +train: [12] [260/400] eta: 0:01:03 lr: 0.000145 loss: 2.1471 (2.1795) grad: 0.2819 (0.2734) time: 0.4301 data: 0.0045 max mem: 22448 +train: [12] [280/400] eta: 0:00:54 lr: 0.000144 loss: 2.1566 (2.1765) grad: 0.2752 (0.2734) time: 0.4347 data: 0.0048 max mem: 22448 +train: [12] [300/400] eta: 0:00:45 lr: 0.000142 loss: 2.1710 (2.1798) grad: 0.2775 (0.2747) time: 0.4343 data: 0.0047 max mem: 22448 +train: [12] [320/400] eta: 0:00:36 lr: 0.000141 loss: 2.2002 (2.1805) grad: 0.2783 (0.2749) time: 0.4324 data: 0.0047 max mem: 22448 +train: [12] [340/400] eta: 0:00:27 lr: 0.000139 loss: 2.1817 (2.1798) grad: 0.2715 (0.2747) time: 0.4312 data: 0.0048 max mem: 22448 +train: [12] [360/400] eta: 0:00:17 lr: 0.000138 loss: 2.1784 (2.1804) grad: 0.2714 (0.2746) time: 0.4323 data: 0.0047 max mem: 22448 +train: [12] [380/400] eta: 0:00:08 lr: 0.000136 loss: 2.1826 (2.1816) grad: 0.2745 (0.2747) time: 0.4397 data: 0.0049 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.1634 (2.1805) grad: 0.2798 (0.2751) time: 0.4297 data: 0.0048 max mem: 22448 +train: [12] Total time: 0:02:59 (0.4488 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.1634 (2.1805) grad: 0.2798 (0.2751) +eval (validation): [12] [ 0/85] eta: 0:04:12 time: 2.9718 data: 2.7343 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:30 time: 0.3454 data: 0.0037 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:17 time: 0.3186 data: 0.0034 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:09 time: 0.3301 data: 0.0038 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3144 data: 0.0039 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3061 data: 0.0039 max mem: 22448 +eval (validation): [12] Total time: 0:00:30 (0.3594 s / it) +cv: [12] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.445 acc: 0.264 f1: 0.205 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:21:02 lr: nan time: 3.1551 data: 2.7447 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:45 lr: 0.000133 loss: 2.1346 (2.1317) grad: 0.2673 (0.2729) time: 0.4651 data: 0.0043 max mem: 22448 +train: [13] [ 40/400] eta: 0:03:05 lr: 0.000131 loss: 2.1415 (2.1277) grad: 0.2673 (0.2709) time: 0.4335 data: 0.0051 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:45 lr: 0.000130 loss: 2.1291 (2.1210) grad: 0.2716 (0.2711) time: 0.4317 data: 0.0046 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:31 lr: 0.000128 loss: 2.1003 (2.1191) grad: 0.2716 (0.2725) time: 0.4291 data: 0.0048 max mem: 22448 +train: [13] [100/400] eta: 0:02:19 lr: 0.000127 loss: 2.0937 (2.1187) grad: 0.2680 (0.2722) time: 0.4269 data: 0.0048 max mem: 22448 +train: [13] [120/400] eta: 0:02:08 lr: 0.000125 loss: 2.1101 (2.1169) grad: 0.2731 (0.2733) time: 0.4271 data: 0.0049 max mem: 22448 +train: [13] [140/400] eta: 0:01:58 lr: 0.000124 loss: 2.1407 (2.1283) grad: 0.2810 (0.2750) time: 0.4287 data: 0.0050 max mem: 22448 +train: [13] [160/400] eta: 0:01:48 lr: 0.000122 loss: 2.1497 (2.1280) grad: 0.2797 (0.2753) time: 0.4272 data: 0.0049 max mem: 22448 +train: [13] [180/400] eta: 0:01:38 lr: 0.000120 loss: 2.1278 (2.1342) grad: 0.2794 (0.2759) time: 0.4300 data: 0.0050 max mem: 22448 +train: [13] [200/400] eta: 0:01:29 lr: 0.000119 loss: 2.1071 (2.1302) grad: 0.2794 (0.2760) time: 0.4448 data: 0.0050 max mem: 22448 +train: [13] [220/400] eta: 0:01:20 lr: 0.000117 loss: 2.1043 (2.1298) grad: 0.2744 (0.2767) time: 0.4375 data: 0.0049 max mem: 22448 +train: [13] [240/400] eta: 0:01:11 lr: 0.000116 loss: 2.1391 (2.1302) grad: 0.2765 (0.2766) time: 0.4402 data: 0.0048 max mem: 22448 +train: [13] [260/400] eta: 0:01:02 lr: 0.000114 loss: 2.1396 (2.1332) grad: 0.2709 (0.2761) time: 0.4306 data: 0.0048 max mem: 22448 +train: [13] [280/400] eta: 0:00:53 lr: 0.000113 loss: 2.1007 (2.1301) grad: 0.2615 (0.2751) time: 0.4491 data: 0.0049 max mem: 22448 +train: [13] [300/400] eta: 0:00:44 lr: 0.000111 loss: 2.1007 (2.1306) grad: 0.2590 (0.2743) time: 0.4379 data: 0.0050 max mem: 22448 +train: [13] [320/400] eta: 0:00:35 lr: 0.000110 loss: 2.1613 (2.1330) grad: 0.2708 (0.2748) time: 0.4274 data: 0.0048 max mem: 22448 +train: [13] [340/400] eta: 0:00:26 lr: 0.000108 loss: 2.1197 (2.1329) grad: 0.2772 (0.2745) time: 0.4350 data: 0.0048 max mem: 22448 +train: [13] [360/400] eta: 0:00:17 lr: 0.000107 loss: 2.1137 (2.1337) grad: 0.2828 (0.2753) time: 0.4346 data: 0.0050 max mem: 22448 +train: [13] [380/400] eta: 0:00:08 lr: 0.000105 loss: 2.1150 (2.1329) grad: 0.2855 (0.2756) time: 0.4286 data: 0.0048 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.1101 (2.1328) grad: 0.2800 (0.2756) time: 0.4304 data: 0.0047 max mem: 22448 +train: [13] Total time: 0:02:56 (0.4422 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.1101 (2.1328) grad: 0.2800 (0.2756) +eval (validation): [13] [ 0/85] eta: 0:04:23 time: 3.1002 data: 2.8426 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:33 time: 0.3808 data: 0.0188 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:20 time: 0.3774 data: 0.0044 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:10 time: 0.3473 data: 0.0043 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3407 data: 0.0043 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3295 data: 0.0042 max mem: 22448 +eval (validation): [13] Total time: 0:00:33 (0.3944 s / it) +cv: [13] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.417 acc: 0.268 f1: 0.206 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:21:19 lr: nan time: 3.1986 data: 2.8025 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:49 lr: 0.000102 loss: 2.0469 (2.0456) grad: 0.2593 (0.2639) time: 0.4731 data: 0.0049 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:11 lr: 0.000101 loss: 2.0524 (2.0583) grad: 0.2562 (0.2643) time: 0.4599 data: 0.0050 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:49 lr: 0.000099 loss: 2.0403 (2.0605) grad: 0.2635 (0.2661) time: 0.4282 data: 0.0048 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:33 lr: 0.000098 loss: 2.0786 (2.0684) grad: 0.2623 (0.2649) time: 0.4259 data: 0.0049 max mem: 22448 +train: [14] [100/400] eta: 0:02:21 lr: 0.000096 loss: 2.0871 (2.0771) grad: 0.2627 (0.2662) time: 0.4302 data: 0.0048 max mem: 22448 +train: [14] [120/400] eta: 0:02:09 lr: 0.000095 loss: 2.0505 (2.0698) grad: 0.2720 (0.2669) time: 0.4249 data: 0.0048 max mem: 22448 +train: [14] [140/400] eta: 0:01:58 lr: 0.000093 loss: 2.0147 (2.0719) grad: 0.2756 (0.2682) time: 0.4211 data: 0.0048 max mem: 22448 +train: [14] [160/400] eta: 0:01:48 lr: 0.000092 loss: 2.0597 (2.0708) grad: 0.2704 (0.2684) time: 0.4227 data: 0.0050 max mem: 22448 +train: [14] [180/400] eta: 0:01:39 lr: 0.000090 loss: 2.0648 (2.0707) grad: 0.2662 (0.2686) time: 0.4269 data: 0.0049 max mem: 22448 +train: [14] [200/400] eta: 0:01:30 lr: 0.000089 loss: 2.0648 (2.0716) grad: 0.2699 (0.2695) time: 0.4501 data: 0.0050 max mem: 22448 +train: [14] [220/400] eta: 0:01:20 lr: 0.000088 loss: 2.0504 (2.0706) grad: 0.2717 (0.2700) time: 0.4388 data: 0.0051 max mem: 22448 +train: [14] [240/400] eta: 0:01:11 lr: 0.000086 loss: 2.0514 (2.0741) grad: 0.2694 (0.2705) time: 0.4383 data: 0.0050 max mem: 22448 +train: [14] [260/400] eta: 0:01:02 lr: 0.000085 loss: 2.0771 (2.0744) grad: 0.2761 (0.2709) time: 0.4234 data: 0.0048 max mem: 22448 +train: [14] [280/400] eta: 0:00:53 lr: 0.000083 loss: 2.0759 (2.0735) grad: 0.2711 (0.2704) time: 0.4478 data: 0.0050 max mem: 22448 +train: [14] [300/400] eta: 0:00:44 lr: 0.000082 loss: 2.0955 (2.0796) grad: 0.2711 (0.2710) time: 0.4288 data: 0.0048 max mem: 22448 +train: [14] [320/400] eta: 0:00:35 lr: 0.000081 loss: 2.1209 (2.0808) grad: 0.2794 (0.2718) time: 0.4209 data: 0.0046 max mem: 22448 +train: [14] [340/400] eta: 0:00:26 lr: 0.000079 loss: 2.1099 (2.0813) grad: 0.2847 (0.2727) time: 0.4293 data: 0.0050 max mem: 22448 +train: [14] [360/400] eta: 0:00:17 lr: 0.000078 loss: 2.0864 (2.0818) grad: 0.2794 (0.2726) time: 0.4258 data: 0.0049 max mem: 22448 +train: [14] [380/400] eta: 0:00:08 lr: 0.000076 loss: 2.0676 (2.0800) grad: 0.2750 (0.2727) time: 0.4242 data: 0.0047 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.0603 (2.0808) grad: 0.2782 (0.2733) time: 0.4269 data: 0.0047 max mem: 22448 +train: [14] Total time: 0:02:56 (0.4409 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.0603 (2.0808) grad: 0.2782 (0.2733) +eval (validation): [14] [ 0/85] eta: 0:04:35 time: 3.2438 data: 2.9466 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:32 time: 0.3566 data: 0.0048 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:18 time: 0.3380 data: 0.0037 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:09 time: 0.3615 data: 0.0045 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3183 data: 0.0041 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3051 data: 0.0039 max mem: 22448 +eval (validation): [14] Total time: 0:00:32 (0.3789 s / it) +cv: [14] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.422 acc: 0.268 f1: 0.210 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:22:36 lr: nan time: 3.3919 data: 3.0004 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:38 lr: 0.000074 loss: 2.0024 (2.0253) grad: 0.2572 (0.2602) time: 0.4352 data: 0.0041 max mem: 22448 +train: [15] [ 40/400] eta: 0:03:02 lr: 0.000072 loss: 2.0086 (2.0286) grad: 0.2572 (0.2605) time: 0.4325 data: 0.0048 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:44 lr: 0.000071 loss: 1.9867 (2.0150) grad: 0.2688 (0.2628) time: 0.4376 data: 0.0048 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:30 lr: 0.000070 loss: 1.9787 (2.0078) grad: 0.2697 (0.2625) time: 0.4292 data: 0.0049 max mem: 22448 +train: [15] [100/400] eta: 0:02:18 lr: 0.000068 loss: 1.9713 (2.0080) grad: 0.2639 (0.2640) time: 0.4293 data: 0.0047 max mem: 22448 +train: [15] [120/400] eta: 0:02:07 lr: 0.000067 loss: 2.0030 (2.0111) grad: 0.2676 (0.2654) time: 0.4217 data: 0.0045 max mem: 22448 +train: [15] [140/400] eta: 0:01:57 lr: 0.000066 loss: 2.0329 (2.0181) grad: 0.2744 (0.2678) time: 0.4389 data: 0.0050 max mem: 22448 +train: [15] [160/400] eta: 0:01:48 lr: 0.000064 loss: 2.0179 (2.0183) grad: 0.2741 (0.2682) time: 0.4290 data: 0.0049 max mem: 22448 +train: [15] [180/400] eta: 0:01:38 lr: 0.000063 loss: 2.0551 (2.0275) grad: 0.2733 (0.2694) time: 0.4292 data: 0.0048 max mem: 22448 +train: [15] [200/400] eta: 0:01:29 lr: 0.000062 loss: 2.0614 (2.0280) grad: 0.2708 (0.2690) time: 0.4499 data: 0.0048 max mem: 22448 +train: [15] [220/400] eta: 0:01:20 lr: 0.000061 loss: 2.0257 (2.0289) grad: 0.2639 (0.2690) time: 0.4401 data: 0.0050 max mem: 22448 +train: [15] [240/400] eta: 0:01:11 lr: 0.000059 loss: 2.0237 (2.0289) grad: 0.2681 (0.2691) time: 0.4411 data: 0.0049 max mem: 22448 +train: [15] [260/400] eta: 0:01:02 lr: 0.000058 loss: 2.0629 (2.0334) grad: 0.2696 (0.2690) time: 0.4203 data: 0.0046 max mem: 22448 +train: [15] [280/400] eta: 0:00:53 lr: 0.000057 loss: 2.0471 (2.0330) grad: 0.2582 (0.2683) time: 0.4400 data: 0.0051 max mem: 22448 +train: [15] [300/400] eta: 0:00:44 lr: 0.000056 loss: 2.0211 (2.0333) grad: 0.2563 (0.2685) time: 0.4281 data: 0.0049 max mem: 22448 +train: [15] [320/400] eta: 0:00:35 lr: 0.000054 loss: 2.0497 (2.0337) grad: 0.2651 (0.2684) time: 0.4236 data: 0.0050 max mem: 22448 +train: [15] [340/400] eta: 0:00:26 lr: 0.000053 loss: 2.0497 (2.0343) grad: 0.2692 (0.2685) time: 0.4366 data: 0.0050 max mem: 22448 +train: [15] [360/400] eta: 0:00:17 lr: 0.000052 loss: 2.0427 (2.0358) grad: 0.2731 (0.2690) time: 0.4293 data: 0.0053 max mem: 22448 +train: [15] [380/400] eta: 0:00:08 lr: 0.000051 loss: 2.0180 (2.0339) grad: 0.2656 (0.2686) time: 0.4376 data: 0.0048 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 2.0202 (2.0349) grad: 0.2617 (0.2684) time: 0.4394 data: 0.0048 max mem: 22448 +train: [15] Total time: 0:02:56 (0.4412 s / it) +train: [15] Summary: lr: 0.000050 loss: 2.0202 (2.0349) grad: 0.2617 (0.2684) +eval (validation): [15] [ 0/85] eta: 0:04:27 time: 3.1495 data: 2.9155 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:30 time: 0.3423 data: 0.0057 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:18 time: 0.3356 data: 0.0036 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:09 time: 0.3276 data: 0.0041 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:01 time: 0.3412 data: 0.0043 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3287 data: 0.0043 max mem: 22448 +eval (validation): [15] Total time: 0:00:31 (0.3719 s / it) +cv: [15] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.427 acc: 0.267 f1: 0.209 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:27:54 lr: nan time: 4.1861 data: 3.7956 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:55 lr: 0.000048 loss: 1.9586 (1.9752) grad: 0.2406 (0.2464) time: 0.4403 data: 0.0033 max mem: 22448 +train: [16] [ 40/400] eta: 0:03:07 lr: 0.000047 loss: 1.9870 (1.9810) grad: 0.2439 (0.2476) time: 0.4198 data: 0.0048 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:48 lr: 0.000046 loss: 1.9870 (1.9785) grad: 0.2483 (0.2504) time: 0.4399 data: 0.0049 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:34 lr: 0.000045 loss: 1.9929 (1.9928) grad: 0.2560 (0.2541) time: 0.4509 data: 0.0048 max mem: 22448 +train: [16] [100/400] eta: 0:02:22 lr: 0.000044 loss: 1.9939 (1.9918) grad: 0.2637 (0.2559) time: 0.4307 data: 0.0049 max mem: 22448 +train: [16] [120/400] eta: 0:02:10 lr: 0.000043 loss: 1.9880 (1.9931) grad: 0.2633 (0.2565) time: 0.4272 data: 0.0049 max mem: 22448 +train: [16] [140/400] eta: 0:01:59 lr: 0.000042 loss: 1.9912 (1.9916) grad: 0.2541 (0.2564) time: 0.4247 data: 0.0050 max mem: 22448 +train: [16] [160/400] eta: 0:01:49 lr: 0.000041 loss: 1.9914 (1.9958) grad: 0.2591 (0.2577) time: 0.4285 data: 0.0051 max mem: 22448 +train: [16] [180/400] eta: 0:01:39 lr: 0.000040 loss: 1.9905 (1.9955) grad: 0.2626 (0.2581) time: 0.4333 data: 0.0050 max mem: 22448 +train: [16] [200/400] eta: 0:01:30 lr: 0.000039 loss: 1.9805 (1.9922) grad: 0.2568 (0.2572) time: 0.4491 data: 0.0051 max mem: 22448 +train: [16] [220/400] eta: 0:01:21 lr: 0.000038 loss: 1.9645 (1.9928) grad: 0.2468 (0.2570) time: 0.4324 data: 0.0047 max mem: 22448 +train: [16] [240/400] eta: 0:01:12 lr: 0.000036 loss: 1.9866 (1.9935) grad: 0.2537 (0.2576) time: 0.4389 data: 0.0048 max mem: 22448 +train: [16] [260/400] eta: 0:01:02 lr: 0.000035 loss: 1.9897 (1.9963) grad: 0.2701 (0.2586) time: 0.4235 data: 0.0045 max mem: 22448 +train: [16] [280/400] eta: 0:00:53 lr: 0.000034 loss: 2.0060 (1.9973) grad: 0.2675 (0.2593) time: 0.4425 data: 0.0049 max mem: 22448 +train: [16] [300/400] eta: 0:00:44 lr: 0.000033 loss: 2.0218 (1.9990) grad: 0.2646 (0.2597) time: 0.4387 data: 0.0049 max mem: 22448 +train: [16] [320/400] eta: 0:00:35 lr: 0.000032 loss: 2.0314 (2.0019) grad: 0.2643 (0.2600) time: 0.4264 data: 0.0049 max mem: 22448 +train: [16] [340/400] eta: 0:00:26 lr: 0.000031 loss: 1.9874 (2.0009) grad: 0.2605 (0.2603) time: 0.4289 data: 0.0049 max mem: 22448 +train: [16] [360/400] eta: 0:00:17 lr: 0.000031 loss: 1.9717 (2.0017) grad: 0.2577 (0.2605) time: 0.4331 data: 0.0051 max mem: 22448 +train: [16] [380/400] eta: 0:00:08 lr: 0.000030 loss: 2.0059 (2.0021) grad: 0.2638 (0.2610) time: 0.4266 data: 0.0050 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 2.0059 (2.0028) grad: 0.2692 (0.2617) time: 0.4253 data: 0.0049 max mem: 22448 +train: [16] Total time: 0:02:57 (0.4431 s / it) +train: [16] Summary: lr: 0.000029 loss: 2.0059 (2.0028) grad: 0.2692 (0.2617) +eval (validation): [16] [ 0/85] eta: 0:04:22 time: 3.0857 data: 2.8421 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:32 time: 0.3718 data: 0.0050 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:19 time: 0.3418 data: 0.0040 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:09 time: 0.3411 data: 0.0039 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3272 data: 0.0041 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3201 data: 0.0040 max mem: 22448 +eval (validation): [16] Total time: 0:00:32 (0.3797 s / it) +cv: [16] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.427 acc: 0.268 f1: 0.212 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:20:55 lr: nan time: 3.1399 data: 2.8109 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:35 lr: 0.000028 loss: 1.9744 (1.9514) grad: 0.2434 (0.2507) time: 0.4391 data: 0.0042 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:02 lr: 0.000027 loss: 1.9755 (1.9613) grad: 0.2562 (0.2538) time: 0.4450 data: 0.0049 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:43 lr: 0.000026 loss: 1.9811 (1.9723) grad: 0.2491 (0.2518) time: 0.4254 data: 0.0048 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:31 lr: 0.000025 loss: 1.9585 (1.9596) grad: 0.2435 (0.2516) time: 0.4560 data: 0.0048 max mem: 22448 +train: [17] [100/400] eta: 0:02:19 lr: 0.000024 loss: 1.9585 (1.9664) grad: 0.2472 (0.2515) time: 0.4297 data: 0.0049 max mem: 22448 +train: [17] [120/400] eta: 0:02:08 lr: 0.000023 loss: 1.9852 (1.9687) grad: 0.2508 (0.2523) time: 0.4285 data: 0.0050 max mem: 22448 +train: [17] [140/400] eta: 0:01:58 lr: 0.000023 loss: 1.9834 (1.9702) grad: 0.2508 (0.2524) time: 0.4386 data: 0.0049 max mem: 22448 +train: [17] [160/400] eta: 0:01:48 lr: 0.000022 loss: 1.9756 (1.9680) grad: 0.2537 (0.2532) time: 0.4267 data: 0.0049 max mem: 22448 +train: [17] [180/400] eta: 0:01:39 lr: 0.000021 loss: 1.9334 (1.9657) grad: 0.2505 (0.2526) time: 0.4304 data: 0.0047 max mem: 22448 +train: [17] [200/400] eta: 0:01:29 lr: 0.000020 loss: 1.9334 (1.9640) grad: 0.2505 (0.2523) time: 0.4416 data: 0.0050 max mem: 22448 +train: [17] [220/400] eta: 0:01:20 lr: 0.000019 loss: 1.9749 (1.9661) grad: 0.2550 (0.2534) time: 0.4362 data: 0.0047 max mem: 22448 +train: [17] [240/400] eta: 0:01:11 lr: 0.000019 loss: 1.9841 (1.9657) grad: 0.2550 (0.2539) time: 0.4372 data: 0.0051 max mem: 22448 +train: [17] [260/400] eta: 0:01:02 lr: 0.000018 loss: 1.9301 (1.9657) grad: 0.2599 (0.2543) time: 0.4306 data: 0.0048 max mem: 22448 +train: [17] [280/400] eta: 0:00:53 lr: 0.000017 loss: 1.9509 (1.9665) grad: 0.2561 (0.2542) time: 0.4458 data: 0.0050 max mem: 22448 +train: [17] [300/400] eta: 0:00:44 lr: 0.000016 loss: 1.9695 (1.9684) grad: 0.2533 (0.2542) time: 0.4358 data: 0.0050 max mem: 22448 +train: [17] [320/400] eta: 0:00:35 lr: 0.000016 loss: 1.9641 (1.9675) grad: 0.2467 (0.2534) time: 0.4277 data: 0.0048 max mem: 22448 +train: [17] [340/400] eta: 0:00:26 lr: 0.000015 loss: 1.9469 (1.9676) grad: 0.2431 (0.2536) time: 0.4358 data: 0.0050 max mem: 22448 +train: [17] [360/400] eta: 0:00:17 lr: 0.000014 loss: 1.9768 (1.9684) grad: 0.2523 (0.2537) time: 0.4299 data: 0.0051 max mem: 22448 +train: [17] [380/400] eta: 0:00:08 lr: 0.000014 loss: 1.9605 (1.9682) grad: 0.2552 (0.2545) time: 0.4266 data: 0.0051 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 1.9591 (1.9674) grad: 0.2532 (0.2543) time: 0.4305 data: 0.0049 max mem: 22448 +train: [17] Total time: 0:02:56 (0.4422 s / it) +train: [17] Summary: lr: 0.000013 loss: 1.9591 (1.9674) grad: 0.2532 (0.2543) +eval (validation): [17] [ 0/85] eta: 0:04:32 time: 3.2004 data: 2.9211 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:30 time: 0.3405 data: 0.0036 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:18 time: 0.3475 data: 0.0043 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:09 time: 0.3501 data: 0.0033 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3302 data: 0.0041 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3210 data: 0.0038 max mem: 22448 +eval (validation): [17] Total time: 0:00:32 (0.3768 s / it) +cv: [17] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.421 acc: 0.269 f1: 0.213 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:21:09 lr: nan time: 3.1748 data: 2.8438 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:42 lr: 0.000012 loss: 1.9885 (1.9912) grad: 0.2503 (0.2546) time: 0.4547 data: 0.0044 max mem: 22448 +train: [18] [ 40/400] eta: 0:03:04 lr: 0.000012 loss: 1.9594 (1.9567) grad: 0.2458 (0.2505) time: 0.4359 data: 0.0045 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:44 lr: 0.000011 loss: 1.9228 (1.9496) grad: 0.2559 (0.2531) time: 0.4297 data: 0.0050 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:31 lr: 0.000011 loss: 1.9334 (1.9497) grad: 0.2510 (0.2519) time: 0.4439 data: 0.0050 max mem: 22448 +train: [18] [100/400] eta: 0:02:19 lr: 0.000010 loss: 1.9536 (1.9518) grad: 0.2440 (0.2515) time: 0.4321 data: 0.0049 max mem: 22448 +train: [18] [120/400] eta: 0:02:08 lr: 0.000009 loss: 1.9410 (1.9466) grad: 0.2495 (0.2512) time: 0.4266 data: 0.0050 max mem: 22448 +train: [18] [140/400] eta: 0:01:58 lr: 0.000009 loss: 1.9410 (1.9532) grad: 0.2495 (0.2516) time: 0.4281 data: 0.0050 max mem: 22448 +train: [18] [160/400] eta: 0:01:48 lr: 0.000008 loss: 1.9430 (1.9525) grad: 0.2465 (0.2509) time: 0.4313 data: 0.0049 max mem: 22448 +train: [18] [180/400] eta: 0:01:38 lr: 0.000008 loss: 1.9423 (1.9504) grad: 0.2499 (0.2516) time: 0.4297 data: 0.0048 max mem: 22448 +train: [18] [200/400] eta: 0:01:30 lr: 0.000007 loss: 1.9535 (1.9524) grad: 0.2514 (0.2519) time: 0.4532 data: 0.0049 max mem: 22448 +train: [18] [220/400] eta: 0:01:20 lr: 0.000007 loss: 1.9713 (1.9526) grad: 0.2496 (0.2517) time: 0.4427 data: 0.0051 max mem: 22448 +train: [18] [240/400] eta: 0:01:11 lr: 0.000006 loss: 1.9613 (1.9556) grad: 0.2496 (0.2517) time: 0.4390 data: 0.0050 max mem: 22448 +train: [18] [260/400] eta: 0:01:02 lr: 0.000006 loss: 1.9468 (1.9546) grad: 0.2536 (0.2516) time: 0.4251 data: 0.0047 max mem: 22448 +train: [18] [280/400] eta: 0:00:53 lr: 0.000006 loss: 1.9504 (1.9546) grad: 0.2531 (0.2517) time: 0.4405 data: 0.0048 max mem: 22448 +train: [18] [300/400] eta: 0:00:44 lr: 0.000005 loss: 1.9430 (1.9528) grad: 0.2483 (0.2513) time: 0.4443 data: 0.0048 max mem: 22448 +train: [18] [320/400] eta: 0:00:35 lr: 0.000005 loss: 1.9472 (1.9546) grad: 0.2465 (0.2513) time: 0.4287 data: 0.0051 max mem: 22448 +train: [18] [340/400] eta: 0:00:26 lr: 0.000004 loss: 1.9569 (1.9519) grad: 0.2495 (0.2510) time: 0.4342 data: 0.0050 max mem: 22448 +train: [18] [360/400] eta: 0:00:17 lr: 0.000004 loss: 1.9140 (1.9523) grad: 0.2495 (0.2508) time: 0.4356 data: 0.0050 max mem: 22448 +train: [18] [380/400] eta: 0:00:08 lr: 0.000004 loss: 1.9558 (1.9517) grad: 0.2446 (0.2506) time: 0.4352 data: 0.0049 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 1.9195 (1.9498) grad: 0.2468 (0.2508) time: 0.4390 data: 0.0049 max mem: 22448 +train: [18] Total time: 0:02:57 (0.4439 s / it) +train: [18] Summary: lr: 0.000003 loss: 1.9195 (1.9498) grad: 0.2468 (0.2508) +eval (validation): [18] [ 0/85] eta: 0:04:27 time: 3.1435 data: 2.8590 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:32 time: 0.3647 data: 0.0045 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:18 time: 0.3376 data: 0.0043 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:09 time: 0.3438 data: 0.0044 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:01 time: 0.3270 data: 0.0039 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3138 data: 0.0037 max mem: 22448 +eval (validation): [18] Total time: 0:00:32 (0.3772 s / it) +cv: [18] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.427 acc: 0.269 f1: 0.213 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:21:24 lr: nan time: 3.2111 data: 2.8342 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:37 lr: 0.000003 loss: 1.9521 (1.9621) grad: 0.2390 (0.2447) time: 0.4412 data: 0.0032 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:01 lr: 0.000003 loss: 1.9458 (1.9302) grad: 0.2442 (0.2467) time: 0.4290 data: 0.0046 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:41 lr: 0.000002 loss: 1.9474 (1.9474) grad: 0.2455 (0.2464) time: 0.4189 data: 0.0048 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:29 lr: 0.000002 loss: 1.9741 (1.9484) grad: 0.2408 (0.2452) time: 0.4388 data: 0.0049 max mem: 22448 +train: [19] [100/400] eta: 0:02:18 lr: 0.000002 loss: 1.9455 (1.9441) grad: 0.2416 (0.2463) time: 0.4455 data: 0.0051 max mem: 22448 +train: [19] [120/400] eta: 0:02:08 lr: 0.000002 loss: 1.9455 (1.9496) grad: 0.2490 (0.2477) time: 0.4333 data: 0.0050 max mem: 22448 +train: [19] [140/400] eta: 0:01:58 lr: 0.000001 loss: 1.9682 (1.9478) grad: 0.2444 (0.2467) time: 0.4342 data: 0.0047 max mem: 22448 +train: [19] [160/400] eta: 0:01:48 lr: 0.000001 loss: 1.9453 (1.9426) grad: 0.2387 (0.2458) time: 0.4284 data: 0.0048 max mem: 22448 +train: [19] [180/400] eta: 0:01:38 lr: 0.000001 loss: 1.9092 (1.9403) grad: 0.2445 (0.2471) time: 0.4292 data: 0.0049 max mem: 22448 +train: [19] [200/400] eta: 0:01:29 lr: 0.000001 loss: 1.9296 (1.9403) grad: 0.2503 (0.2468) time: 0.4381 data: 0.0051 max mem: 22448 +train: [19] [220/400] eta: 0:01:20 lr: 0.000001 loss: 1.9318 (1.9397) grad: 0.2451 (0.2465) time: 0.4573 data: 0.0052 max mem: 22448 +train: [19] [240/400] eta: 0:01:11 lr: 0.000001 loss: 1.9282 (1.9375) grad: 0.2459 (0.2466) time: 0.4449 data: 0.0050 max mem: 22448 +train: [19] [260/400] eta: 0:01:02 lr: 0.000000 loss: 1.9013 (1.9359) grad: 0.2455 (0.2466) time: 0.4321 data: 0.0048 max mem: 22448 +train: [19] [280/400] eta: 0:00:53 lr: 0.000000 loss: 1.9291 (1.9376) grad: 0.2447 (0.2466) time: 0.4329 data: 0.0048 max mem: 22448 +train: [19] [300/400] eta: 0:00:44 lr: 0.000000 loss: 1.9407 (1.9372) grad: 0.2481 (0.2465) time: 0.4473 data: 0.0051 max mem: 22448 +train: [19] [320/400] eta: 0:00:35 lr: 0.000000 loss: 1.9500 (1.9398) grad: 0.2483 (0.2469) time: 0.4368 data: 0.0049 max mem: 22448 +train: [19] [340/400] eta: 0:00:26 lr: 0.000000 loss: 1.9930 (1.9433) grad: 0.2454 (0.2467) time: 0.4314 data: 0.0050 max mem: 22448 +train: [19] [360/400] eta: 0:00:17 lr: 0.000000 loss: 1.9676 (1.9438) grad: 0.2433 (0.2465) time: 0.4312 data: 0.0050 max mem: 22448 +train: [19] [380/400] eta: 0:00:08 lr: 0.000000 loss: 1.9368 (1.9444) grad: 0.2435 (0.2464) time: 0.4280 data: 0.0050 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 1.9541 (1.9469) grad: 0.2468 (0.2466) time: 0.4274 data: 0.0050 max mem: 22448 +train: [19] Total time: 0:02:57 (0.4428 s / it) +train: [19] Summary: lr: 0.000000 loss: 1.9541 (1.9469) grad: 0.2468 (0.2466) +eval (validation): [19] [ 0/85] eta: 0:04:53 time: 3.4475 data: 3.1624 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:35 time: 0.3951 data: 0.0035 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:19 time: 0.3319 data: 0.0041 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:10 time: 0.3416 data: 0.0042 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:01 time: 0.3229 data: 0.0042 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3161 data: 0.0041 max mem: 22448 +eval (validation): [19] Total time: 0:00:32 (0.3857 s / it) +cv: [19] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.426 acc: 0.269 f1: 0.213 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.26891842008121075, "hparam": [0.44, 1.0], "hparam_id": 19, "epoch": 19, "is_best": false, "best_score": 0.27593207825765964} +eval (train): [20] [ 0/509] eta: 0:25:55 time: 3.0559 data: 2.7559 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:03:54 time: 0.3511 data: 0.0038 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:14 time: 0.3459 data: 0.0042 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:02:54 time: 0.3334 data: 0.0042 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:40 time: 0.3298 data: 0.0042 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:29 time: 0.3366 data: 0.0040 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:21 time: 0.3469 data: 0.0042 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:12 time: 0.3273 data: 0.0039 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:04 time: 0.3401 data: 0.0044 max mem: 22448 +eval (train): [20] [180/509] eta: 0:01:55 time: 0.3248 data: 0.0039 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:48 time: 0.3303 data: 0.0040 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:40 time: 0.3337 data: 0.0040 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:33 time: 0.3291 data: 0.0040 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:26 time: 0.3361 data: 0.0039 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:19 time: 0.3551 data: 0.0040 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:12 time: 0.3491 data: 0.0043 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:05 time: 0.3465 data: 0.0042 max mem: 22448 +eval (train): [20] [340/509] eta: 0:00:58 time: 0.3358 data: 0.0041 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:51 time: 0.3209 data: 0.0040 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:44 time: 0.3465 data: 0.0041 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:37 time: 0.3381 data: 0.0039 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:30 time: 0.3353 data: 0.0040 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:23 time: 0.3346 data: 0.0040 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:16 time: 0.3440 data: 0.0041 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:09 time: 0.3370 data: 0.0041 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3214 data: 0.0037 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3116 data: 0.0037 max mem: 22448 +eval (train): [20] Total time: 0:02:54 (0.3435 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:05 time: 2.8839 data: 2.6023 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:32 time: 0.3778 data: 0.0043 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:18 time: 0.3433 data: 0.0042 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3443 data: 0.0045 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3363 data: 0.0041 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3225 data: 0.0039 max mem: 22448 +eval (validation): [20] Total time: 0:00:32 (0.3813 s / it) +eval (test): [20] [ 0/85] eta: 0:04:03 time: 2.8681 data: 2.5820 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:30 time: 0.3488 data: 0.0061 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:18 time: 0.3481 data: 0.0037 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:09 time: 0.3379 data: 0.0044 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3268 data: 0.0041 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3078 data: 0.0038 max mem: 22448 +eval (test): [20] Total time: 0:00:31 (0.3705 s / it) +eval (testid): [20] [ 0/82] eta: 0:03:55 time: 2.8780 data: 2.6308 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:30 time: 0.3785 data: 0.0056 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:17 time: 0.3409 data: 0.0038 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3311 data: 0.0041 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3194 data: 0.0045 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3070 data: 0.0043 max mem: 22448 +eval (testid): [20] Total time: 0:00:30 (0.3742 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.27593207825765964, "hparam": [1.2, 1.0], "hparam_id": 25, "epoch": 5, "is_best": true, "best_score": 0.27593207825765964} +eval (train): [20] [ 0/509] eta: 0:24:27 time: 2.8829 data: 2.6076 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:03:36 time: 0.3199 data: 0.0041 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:07 time: 0.3533 data: 0.0039 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:02:50 time: 0.3388 data: 0.0040 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:37 time: 0.3346 data: 0.0040 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:30 time: 0.3614 data: 0.0045 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:22 time: 0.3636 data: 0.0040 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:14 time: 0.3478 data: 0.0043 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:05 time: 0.3374 data: 0.0042 max mem: 22448 +eval (train): [20] [180/509] eta: 0:01:57 time: 0.3357 data: 0.0038 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:51 time: 0.3797 data: 0.0045 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:43 time: 0.3514 data: 0.0043 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:36 time: 0.3405 data: 0.0042 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:28 time: 0.3309 data: 0.0039 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:21 time: 0.3479 data: 0.0041 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:13 time: 0.3356 data: 0.0039 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:06 time: 0.3403 data: 0.0041 max mem: 22448 +eval (train): [20] [340/509] eta: 0:00:59 time: 0.3350 data: 0.0041 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:52 time: 0.3574 data: 0.0043 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:45 time: 0.3488 data: 0.0042 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:38 time: 0.3595 data: 0.0042 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:31 time: 0.3550 data: 0.0042 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:24 time: 0.3504 data: 0.0039 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:17 time: 0.3659 data: 0.0043 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3611 data: 0.0046 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3493 data: 0.0041 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3269 data: 0.0040 max mem: 22448 +eval (train): [20] Total time: 0:03:00 (0.3540 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:20 time: 3.0655 data: 2.7646 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:33 time: 0.3859 data: 0.0039 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3524 data: 0.0041 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3441 data: 0.0043 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3343 data: 0.0044 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3299 data: 0.0043 max mem: 22448 +eval (validation): [20] Total time: 0:00:32 (0.3878 s / it) +eval (test): [20] [ 0/85] eta: 0:04:16 time: 3.0215 data: 2.7859 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:32 time: 0.3714 data: 0.0041 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:19 time: 0.3583 data: 0.0036 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3543 data: 0.0039 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3334 data: 0.0044 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3256 data: 0.0041 max mem: 22448 +eval (test): [20] Total time: 0:00:32 (0.3877 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:09 time: 3.0447 data: 2.7476 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:32 time: 0.4034 data: 0.0063 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3416 data: 0.0035 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3420 data: 0.0042 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3271 data: 0.0044 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3174 data: 0.0042 max mem: 22448 +eval (testid): [20] Total time: 0:00:31 (0.3871 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00036 | 0.05 | 25 | [1.2, 1.0] | train | 2.0279 | 0.38606 | 0.0024391 | 0.32683 | 0.0026072 | +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00036 | 0.05 | 25 | [1.2, 1.0] | validation | 2.3861 | 0.27593 | 0.0054005 | 0.20649 | 0.0047691 | +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00036 | 0.05 | 25 | [1.2, 1.0] | test | 2.3202 | 0.29944 | 0.0052928 | 0.23452 | 0.0053526 | +| flat_mae | patch | attn | nsd_cococlip | best | 5 | 0.00036 | 0.05 | 25 | [1.2, 1.0] | testid | 2.2755 | 0.30364 | 0.0059794 | 0.24622 | 0.0057295 | + + +done! total time: 1:22:23 diff --git a/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/train_log.json b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..2b8a00b01857f577c863037076cadac581c6a202 --- /dev/null +++ b/data_scaling/n400_1/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.123342648744583, "train/grad": 0.1710019488632679, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.18283203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.182244873046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1811474609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.180162353515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.179195556640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.177833251953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.176422119140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.174898681640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.17303955078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.17119873046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.169521484375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.167244873046875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1652490234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.16275146484375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1604931640625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.158787841796875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.156898193359375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.15489990234375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.153106689453125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.151510009765625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.149873046875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.148497314453125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.147060546875, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.14577880859375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.14462158203125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.143583984375, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.1427001953125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1420166015625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.141234130859375, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.14057861328125, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.140052490234375, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.139573974609375, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.1390399169921874, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.1385906982421874, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.13773681640625, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.13647216796875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.1336761474609376, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.127962646484375, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.114981842041016, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.091178131103516, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0720277404785157, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.0532711791992186, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0294498443603515, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.006611328125, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.985635070800781, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9655013275146485, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.950437831878662, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.93481502532959, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.9213967895507813, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.025920698647387327, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02586443074513227, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02577345394063741, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02568634639494121, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02560089704114944, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02548724859021604, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02536451236344874, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.025234719505533576, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02507401667535305, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02491464099381119, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02476929691620171, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.024571943506598474, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02440048654563725, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024184262054041027, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.024009163295850157, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02386487470008433, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023710787063464522, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.023561871917918323, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.023427766147069633, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.023325208155438305, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.023221684438176453, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.023135722763836384, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.023058921503834425, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02299290947150439, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022939643072895706, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.022889528004452587, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022856529694981875, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.022836797409690916, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.022818398489616813, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02280630880501121, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.022799308439716698, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0227929809410125, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.022772915894165635, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02274020872078836, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02268014468252659, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.022595306942239404, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.022491457723081112, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.022425392381846904, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.022492041951045393, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.023002623515203595, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.023673158520832658, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02415319108404219, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.024779928363859654, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.025140168997459113, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.025717438557185234, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.026600840990431605, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.027411807738244535, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.028560921428725124, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02940302461385727, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.175870656967163, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.173877477645874, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.170696496963501, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.167745351791382, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1649115085601807, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.161226272583008, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.15736722946167, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1535089015960693, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1489639282226562, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.144731044769287, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1412558555603027, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1368777751922607, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1335179805755615, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1298413276672363, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1273996829986572, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.125734567642212, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.124270439147949, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1231276988983154, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.122354030609131, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.121835708618164, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1214542388916016, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1211860179901123, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.121037483215332, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.121150016784668, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1214730739593506, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1220343112945557, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1225154399871826, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1229076385498047, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.123082160949707, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1223151683807373, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.120300769805908, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1172587871551514, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.111478090286255, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1052725315093994, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.095825433731079, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0803170204162598, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.043736696243286, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9645302295684814, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.847186803817749, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7526676654815674, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7132179737091064, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.670238971710205, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6200437545776367, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.617102861404419, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.601146697998047, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.5885801315307617, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.6085598468780518, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.6274986267089844, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.646848201751709, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06127722406792174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06183093392395718, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.062200073827980804, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06201550387596899, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06164636397194537, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.060538944259874494, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07161314138058324, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.073827980804725, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.073827980804725, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.073827980804725, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06921373200442968, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06736803248431156, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.0664451827242525, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.06349206349206349, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.059431524547803614, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.05832410483573274, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.05832410483573274, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.06552233296419344, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07327427094868956, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.07973421926910298, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.0828719084533038, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08453303802141011, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.10409745293466224, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.12901439645625692, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.1493170911775563, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.17663344407530454, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18899963086009597, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20081210778885197, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21207087486157253, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2146548541897379, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2207456626061277, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.22517534145441123, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.22517534145441123, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.21705426356589147, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.2069029162052418, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011327405844478979, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010926654113082806, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010802462463692137, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010647169428775959, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010254559107231045, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010130180401378214, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009432183845660696, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.008942981006060613, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.008793347128582596, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.008693178715289265, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.008955347784565375, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.00980112184747114, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.010563290206463209, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01263505683516584, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01528572747722356, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.016677801903742653, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.017879221805680568, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.018101101370637692, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.01976846029105239, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.020697558431949415, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.02113787074703788, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.022535082565542045, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.023089772565289504, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.024162316408101653, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.022218542794680825, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.01823787484264198, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.016550247011779082, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.014560265430107723, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.01192420106733099, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.01263190505339716, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.013069306868064018, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.017580171733010814, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.020811283029733573, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.022621305710843573, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.023790061367961055, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.02846416329337829, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.0452043844576473, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.06527561280999356, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.08048476656947023, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.10063573791707081, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.10770692487164098, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.11733195507114813, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.12932887935484164, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.13691715560795067, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.15044695670028996, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.15081115631503453, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.15817468953233496, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1541546005614385, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.1461702356216559, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 2.9655013275146485, "validation/loss_best": 2.5885801315307617, "validation/acc_best": 0.22517534145441123, "validation/f1_best": 0.15081115631503453} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.946658617258072, "train/grad": 0.19013701632618904, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.158702392578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1559130859375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1519384765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.148616943359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.145733642578125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.14243896484375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1394873046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13695556640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.134422607421875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1323681640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.130916748046875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.129483642578125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12846435546875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.127454833984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.12672119140625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.126246337890625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.125701904296875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.12503662109375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.124239501953125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.123524169921875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.122684326171875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.1216015625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1202587890625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.118812255859375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.117174072265625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.114708251953125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.111690673828125, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1070037841796876, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.0938031005859377, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.057764129638672, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.011435699462891, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.964225616455078, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.8995372009277345, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.8427454376220704, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.772792434692383, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.711840896606445, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.6497464561462403, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.604188232421875, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.5664842987060545, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.5319059562683104, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.5202342891693115, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.5138786697387694, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.512818956375122, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.519906530380249, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5339016914367676, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.5529743528366087, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.582830991744995, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.624963402748108, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.6699970364570618, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023647453016601504, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023407027777284383, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02305794379208237, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022763442271389066, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022515934999100862, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022233553025871516, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021986298188567163, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02178038431331515, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021586717935279012, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02145112691447139, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021364906053058803, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021286729834973813, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021251395563594996, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021235862579196692, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0212406473280862, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021255177706480027, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02128043357282877, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.021309556076303124, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02134290718473494, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021373071521520615, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02140535259153694, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.021435195598751306, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02146239036694169, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.021484251571819185, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0214902852056548, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021468535335734486, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02141666446812451, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02135607291944325, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.021335247876122593, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.021715338262729346, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.022451325096189977, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.023230823329649864, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.024379689358174802, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02537712418474257, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.026640951409935953, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.027991711972281338, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02955371215939522, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.030831588823348285, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.032014601482078436, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03319158644415438, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03363993542268872, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03446135555393994, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03597404709085822, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.037179231764748695, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03824969091452658, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03901212107390165, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04100857647135854, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0438978795427829, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.04656620618887246, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1467232704162598, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1431610584259033, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1384544372558594, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.134784460067749, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.132023334503174, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.129223585128784, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.127073049545288, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1255202293395996, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.124260902404785, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.123457193374634, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1229355335235596, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.122330904006958, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.121823787689209, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1210505962371826, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1201961040496826, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1193251609802246, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1181282997131348, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.116672992706299, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1150894165039062, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1136996746063232, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1121349334716797, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.110734701156616, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.109285593032837, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.107656240463257, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1055989265441895, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1010780334472656, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0907623767852783, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0668540000915527, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.979498863220215, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.801704168319702, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7152507305145264, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.662388324737549, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5959925651550293, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5409622192382812, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5186052322387695, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5171804428100586, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.504554510116577, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.492570638656616, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.511418342590332, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.545532464981079, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.5347366333007812, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.5077316761016846, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.5509490966796875, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.557642698287964, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.639636278152466, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.685333728790283, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.7800724506378174, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.922929525375366, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.8292925357818604, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06201550387596899, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.062384643779992616, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06256921373200443, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06349206349206349, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06404577334809892, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07364341085271318, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07456626061277224, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.0769656699889258, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.08490217792543374, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09653008490217793, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.1227390180878553, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.16168327796234774, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.18014027316352899, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.19287559985234404, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21077888519748986, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22905131044665927, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2249907715023994, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23883351790328536, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23126614987080105, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23864894795127353, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.24326319675156885, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22739018087855298, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22831303063861202, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2249907715023994, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.21410114433370248, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.1969361387966039, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.20358065706902917, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.20321151716500555, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008153872553512678, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008528647450290637, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008424892228918483, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008778305651587628, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009742291311306541, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010632319038435027, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010198600213827158, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011886873271146417, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01287199017036394, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01329491998606147, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013425423454215995, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013611958221008224, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014384187759446715, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.014753354017413354, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.015440473216640568, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.014752089998241192, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.014997803894406749, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01628048324730055, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.017104126759720104, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.01797261699472024, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.019018764006178697, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.019556534324330983, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.020026482021871184, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.01979704322835198, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.018923516992677702, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.018357919526156872, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.024333829292336226, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03362208136133905, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.051152211018453964, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.07924272760376867, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.0969802333052006, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.11279735610414066, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1362535397926683, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15860227683863318, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.15904330968584734, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15799013606436066, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.15957607545792338, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1727764328353334, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17448575399801072, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16347899514913108, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16714036551663922, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1703503695167767, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1581901308478381, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15529090542548601, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.1603417305735065, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.15424527904466112, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.14073941799415032, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1335504898464349, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.1484717548279585, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 2.5138786697387694, "validation/loss_best": 2.5077316761016846, "validation/acc_best": 0.24326319675156885, "validation/f1_best": 0.1703503695167767} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 2.866150605678558, "train/grad": 0.28165971919894217, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.14041259765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.138289794921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1358154296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13435546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.133326416015625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.132408447265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.131727294921875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13137939453125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.130831298828125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1302685546875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.129742431640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.128841552734375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.128017578125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.126783447265625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1255322265625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.124307861328125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1227099609375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.120645751953125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.118316650390625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.115902099609375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.112464599609375, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.107298583984375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.096719970703125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0696307373046876, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.013442077636719, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.924826202392578, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.8391770935058593, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.7602378845214846, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.6592908096313477, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.5691307830810546, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.51379581451416, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.4719588470458986, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.42642653465271, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.394385676383972, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3592789554595948, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.3382136821746826, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.3236922073364257, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.325152134895325, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3292581486701964, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3495242261886595, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3726232373714446, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.3910784554481506, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4264448475837708, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.457427134513855, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.508488495349884, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.5765394556522367, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.6501018977165223, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.7549638533592224, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.74019630074501, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021976207187399267, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021789814536459742, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02158454698510468, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02146725834812969, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021401899848133326, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021361794359982013, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021351395454257727, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021357490373775365, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021373111815191807, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021392265572212635, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021410736544057726, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021434826529584824, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021456438954919575, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021484817219898103, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021508385613560676, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021528343744575976, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021548971417360008, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02156659253872931, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02157688030041754, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021578369289636613, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.021561208963394165, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02152473168913275, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021503442726098002, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02169032198842615, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.022542739184573293, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.024243580400943755, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.025797958355396985, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.027301627770066263, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.029344528866931795, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03134282673709095, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03231172692961991, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.032971405070275066, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.034025036348029974, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03498585279099643, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03621107178740204, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.037077702740207316, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03793023999780416, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.03862446169368923, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03948589104227722, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.041085867388173936, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04172693371772766, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04209947736933827, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04372770507819951, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04560368260368705, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.048401317372918126, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0517374062910676, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05483052555471659, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06138048915192485, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.10394881879910826, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.129415512084961, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.127547264099121, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1256141662597656, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1244683265686035, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1237447261810303, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.123100996017456, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.122572898864746, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.122144937515259, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1215832233428955, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1210310459136963, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1205673217773438, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.119901657104492, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1192610263824463, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1184303760528564, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.117671251296997, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.116891860961914, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1157281398773193, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1138663291931152, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1109073162078857, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1067323684692383, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.0982911586761475, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0809288024902344, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0337417125701904, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.905559778213501, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.7464163303375244, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.652115821838379, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5930044651031494, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5275473594665527, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4661107063293457, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4362246990203857, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.425877094268799, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4281558990478516, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4327409267425537, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.4445033073425293, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.4699862003326416, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.4773406982421875, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.4805309772491455, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.4749197959899902, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.513247489929199, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.55900239944458, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.5910933017730713, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.613309383392334, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6860718727111816, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6977717876434326, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.666463851928711, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.7796881198883057, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.8905818462371826, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.0551445484161377, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.062200073827980804, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07585825027685493, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.0902547065337763, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.11018826135105204, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.14544112218530822, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.1864156515319306, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2056109265411591, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.26116648209671467, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.26578073089701, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2692875599852344, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.268733850129199, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.260797342192691, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.25802879291251385, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2513842746400886, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2574750830564784, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2571059431524548, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.25341454411221853, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.24713916574381692, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23864894795127353, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23181985972683647, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23015873015873015, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22277593207825766, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2157622739018088, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2233296419342931, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.1998892580287929, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.19195275009228496, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.20062753783684018, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009379757749155407, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010191687968876815, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011176059223529223, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012787916424152307, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013966031706460894, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014202711003672772, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014240026999245245, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014213743492389594, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.014060178313203239, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01410926831274386, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013953957864940067, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013980330271162024, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01405621428331568, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013999256344991518, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01379319845451952, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.013285629470609087, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01364433775102729, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.014444770498471142, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.015189929318896058, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.016724725766338207, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.020676167694166303, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.02924427799406701, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.04055499904104624, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.06499078804826648, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.09570135635558291, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.12071424251429907, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1419663082443591, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.16815584666314076, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18902710325431427, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1909200662687264, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19213071686370753, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19348823480476485, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19005655961728107, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19228336280436298, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18556169680257173, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18994809448500072, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1918752580417542, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19406473491686901, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19022235200238055, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18446293320691307, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18163528916679694, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1815899288945002, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17092982359577, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15413152503706604, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.17278185150700728, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.15433996920795448, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.13419424767287355, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1253610411559834, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 2.51379581451416, "validation/loss_best": 2.425877094268799, "validation/acc_best": 0.2692875599852344, "validation/f1_best": 0.19213071686370753} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 2.8381284928321837, "train/grad": 0.34995969586074355, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13097900390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.130123291015625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12912109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1285302734375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.127855224609375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.127186279296875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.126298828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12547607421875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.124388427734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.123065185546875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.12196044921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12020263671875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.118446044921875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.115950927734375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1135205078125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.110968017578125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.107374267578125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.101763916015625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0908837890625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.0690631103515624, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.007563171386719, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.9106985473632814, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.795961456298828, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.6895797729492186, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.602805633544922, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.5214799880981444, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.4634579467773436, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.4187390327453615, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.370834970474243, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.3261591577529908, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.2923601484298706, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.2665982747077944, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.2456901454925537, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.235358669757843, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.2236819005012514, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.2220629954338076, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.235080564022064, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2548601770401, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2818596041202546, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3201871621608734, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3771340823173523, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4180496346950533, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.482453099489212, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5443133664131166, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.642891476154327, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.7539980030059814, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.695566356182098, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.953841288089752, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021287659062072636, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02126257906202227, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02125790110323578, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02126784370280802, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021281424448825418, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02129765006247908, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021313503687269985, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021328404471278192, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021345540722832082, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02136288119945675, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021377066099084914, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021395405228249727, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0214092566864565, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02142335121985525, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02143068610690534, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021428775233216582, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021420511165633798, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.021399669484235346, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021397790634073318, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021534309331327675, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02230334878433496, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.024083752832375467, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0264500389713794, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.028732082610949873, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.030726778088137507, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03230598475784063, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03344496540725231, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03450122728012502, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03551288825459778, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03659673598594963, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0375339041929692, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03834373406134546, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.039492811346426605, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04020711054094136, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04114183582365513, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.041994359828531745, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04327755521982908, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.043693410586565735, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.044269111901521684, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04550317021086812, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04735719913616777, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04829578245058656, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05113094285130501, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05460346773266792, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.058568937424570325, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0636639211513102, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.12500861538574098, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07210125539451838, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123857259750366, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1231679916381836, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.122366189956665, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1217427253723145, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1211369037628174, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1203722953796387, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1194822788238525, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1185123920440674, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1171910762786865, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1157639026641846, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1143577098846436, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1123087406158447, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1102662086486816, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.107349157333374, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1044089794158936, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1012001037597656, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.0958917140960693, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.0845882892608643, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.05275559425354, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.973757266998291, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.8007121086120605, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.695648193359375, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6262917518615723, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.561436891555786, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5076334476470947, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4732277393341064, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4582722187042236, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4656152725219727, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.484806776046753, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.492922067642212, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.461792469024658, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4440882205963135, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4326064586639404, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.462664842605591, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.4846432209014893, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5046193599700928, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.586778163909912, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.594766139984131, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6314995288848877, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.6890501976013184, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6826047897338867, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.6693787574768066, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.855895757675171, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.8160672187805176, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.079294443130493, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.124366283416748, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07438169066076043, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07641196013289037, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07641196013289037, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.08028792912513842, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.08065706902916205, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.08767072720561092, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.10132890365448505, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.1286452565522333, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.1672203765227021, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.18383167220376523, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2011812476928756, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22111480251015134, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24658545588778147, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2530454042081949, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2502768549280177, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24898486526393504, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24713916574381692, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2604282022886674, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2652270210409745, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.26744186046511625, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26153562200073827, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25987449243263194, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.25839793281653745, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2368032484311554, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22905131044665927, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22591362126245848, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20524178663713546, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20579549649317092, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.1925064599483204, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.1701734957548911, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01513260838381066, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01615153177760165, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.017450097697361996, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.016643134457752465, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01674046647715108, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016304575432788543, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01647274633746731, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.016883081630029412, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01728167758562761, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01777611469556373, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.017912635563923585, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.018699490860196528, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.020600739612257935, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.022016661394993985, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.024380880359973, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.028177691564755192, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.0298484076104058, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.036256261841330885, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.04621595947406552, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.06808353058707935, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.09543408144567157, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.11341883135687862, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1286439879206374, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.14722363732975707, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.16070880009054325, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1800979291129906, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18993425003541, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19144564692097918, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18880782949793531, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1891105306998151, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19620167843251526, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19727717484615, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.196681651805549, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.186632905061119, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18433461073226567, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18599757284943408, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1748383694898977, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17730607672096874, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18220214397322784, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1762205989099601, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17142009952307224, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16609466984830712, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.14926326438123436, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16045046506270602, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.13406230310012687, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.12975111041049572, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.2456901454925537, "validation/loss_best": 2.4326064586639404, "validation/acc_best": 0.26744186046511625, "validation/f1_best": 0.196681651805549} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.7899738430976866, "train/grad": 0.37113520480692386, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.131400146484375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.13085693359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1298583984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12900390625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1282177734375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.126976318359375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.125765380859375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.124169921875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.122352294921875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12043212890625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.118509521484375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1155224609375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.112613525390625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.107828369140625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1018389892578124, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0929962158203126, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.070457763671875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0061822509765626, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.886574401855469, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.7731077575683596, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.661629486083984, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.5774459838867188, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.4992816734313963, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.431375846862793, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.3768377780914305, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.3209435176849365, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.281701946258545, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2486205768585203, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.2137946033477784, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.1844142055511475, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.156934676170349, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.142359185218811, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.1289257621765136, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.1289835166931153, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.1297344851493833, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.148287204504013, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.177446119785309, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.215318796634674, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.265843427181244, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3337011432647703, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3850280725955963, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4634427332878115, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.545950825214386, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.648766918182373, "train/loss_044_lr2.6e+01_wd1.0e+00": 4.580609434843064, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.291054127216339, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02106131854467094, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02107343379408121, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021089329491369425, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021099966503679752, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02110912122763693, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02111896657384932, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021128813647665085, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021138674141839148, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021150608835741876, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021163541045971214, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02117452916223556, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02118570596911013, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021190963853150606, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021190130184404552, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021181670296937228, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021184484013356267, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021289033959619702, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0219436959316954, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02400778774637729, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02645833557471633, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.029137468067929148, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.031256414959207175, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.032965021384879944, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03410557951778173, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.034755781134590505, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03573904003947973, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.036502947509288786, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03727140905335546, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03815790778957307, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03914210695773363, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03996691517531872, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04063381863757968, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04153454462066293, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04260046986863017, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04406768186017871, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.045185419451445344, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0461607020162046, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04698491821065545, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04777356481179595, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.050377931874245406, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05107412667945027, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.054907256830483676, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05675494488328695, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06265582390129566, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.13133449777960776, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.09272368401288986, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1228151321411133, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122420072555542, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121819019317627, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.121230125427246, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.120638132095337, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.11979603767395, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.118767261505127, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.117548704147339, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1159121990203857, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.114016532897949, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1120152473449707, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1087253093719482, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.105064630508423, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.098142147064209, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.0870766639709473, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.065943717956543, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.0014686584472656, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.8428783416748047, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.7075839042663574, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.6366212368011475, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5678486824035645, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.519529342651367, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.481466054916382, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4513161182403564, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4335970878601074, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4359240531921387, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4443912506103516, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4567370414733887, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.468435764312744, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.498223304748535, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.523545026779175, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5208981037139893, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5255424976348877, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.56600022315979, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6026179790496826, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.603776454925537, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6160788536071777, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6466224193573, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7422945499420166, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.853240728378296, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8590586185455322, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.947465419769287, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.016336679458618, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0527477264404297, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0753045404208195, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.0858250276854928, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.09874492432631968, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.12366186784791436, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.1583610188261351, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.18530823181985973, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.20007382798080472, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2201919527500923, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2334809892949428, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24510889627168697, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2532299741602067, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26116648209671467, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.262827611664821, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2571059431524548, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25396825396825395, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2482465854558878, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24750830564784054, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2513842746400886, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2502768549280177, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2427094868955334, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2427094868955334, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23809523809523808, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23661867847914358, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22812846068660023, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20191952750092285, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2131782945736434, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20358065706902917, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.17829457364341086, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.19748984865263935, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012171697062662983, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012181880368563533, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012645411438897492, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013054436130805534, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013492320721342098, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013094838675544757, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014442467261604364, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0154366084787229, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.016413785618793315, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.018744590692104154, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02030434076680013, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.021741416385028887, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.02441211739086556, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.02827438412945978, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.034806883024555195, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.044059548806317704, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.05799568192529992, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.08467389970677862, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1113076830334075, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.12469722896370587, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.14852164574601565, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.16394306891048213, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.17712357974988477, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18389252983105486, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19152501177458967, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19381157614694225, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19377282893852646, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19957531806406117, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2016418105881483, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20344573723769807, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.2037103691337848, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.20174968747208197, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19840886366239938, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18731198960555917, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1858899337648993, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18485125716900153, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17918121708234339, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1762405500921805, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.160228063049393, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.14792539772263516, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15417754494732613, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15209517126361807, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.14068823175033837, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14386747205425082, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 2.3209435176849365, "validation/loss_best": 2.4359240531921387, "validation/acc_best": 0.262827611664821, "validation/f1_best": 0.19381157614694225} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.631793019771576, "train/grad": 0.25060474045574666, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.122662353515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.121910400390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12061279296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.119383544921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1182470703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11661376953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.114847412109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11282470703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11017578125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.107540283203125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1046484375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.099808349609375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.09364501953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0779681396484375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0419970703125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.9657940673828125, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.8284503173828126, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.692686767578125, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.5938777923583984, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.516469078063965, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.4343907737731936, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.369597969055176, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.3122643661499023, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.263058376312256, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.2201388359069822, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.166206455230713, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.1268199133872985, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.095392072200775, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0629594492912293, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.041927101612091, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.0275739288330077, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.0245215582847593, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.0239735317230223, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.0353514337539673, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.0444592916965485, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.072494909763336, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.107371200323105, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.1534283494949342, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.206374604701996, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.2852105367183686, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.35375248670578, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.409552311897278, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.492317649126053, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.589185211658478, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021347949835471808, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021358763254247605, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021372666680254043, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02138513337355107, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02139781243633479, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021414192602969707, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021430090926587583, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02144664944615215, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021466436204500495, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02148638231214136, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02150181817356497, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021519774151965976, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021537998290732503, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021614982928149402, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02193804770708084, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.022901249933056533, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02522123698145151, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.028072568662464617, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03039023685269058, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03227401040494442, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03437798463739455, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03554935488849878, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.036346745835617186, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03693278829567134, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03757151704281569, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03874949196353555, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03970963659696281, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.040627979673445225, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04115741493180394, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04197708364576101, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0432696795463562, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.044186317790299655, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04532585410401225, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04631550470367074, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04806706424802542, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.049012190140783785, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04988947942852974, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05097125858068466, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05162729060277343, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05301201505586505, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05450345629826188, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.055009920988231896, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.058335961997509005, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06276298174634576, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.120410203933716, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1196277141571045, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1183388233184814, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1171088218688965, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1158854961395264, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.114246368408203, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.112474203109741, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1104564666748047, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.107787847518921, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1047351360321045, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1013870239257812, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.09490966796875, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0848326683044434, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.0521647930145264, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.9675567150115967, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.819995880126953, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.6886866092681885, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.606961965560913, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.522465229034424, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4593911170959473, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4202537536621094, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.405313730239868, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3993115425109863, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.393963575363159, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.387998580932617, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.3861210346221924, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4088056087493896, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4506149291992188, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5306882858276367, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5659751892089844, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5619614124298096, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5556764602661133, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6034562587738037, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.673164129257202, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6866517066955566, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.7299423217773438, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.780841827392578, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.794893741607666, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8580965995788574, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.818695545196533, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.897674322128296, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.880673408508301, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9944443702697754, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.069614887237549, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.067921742340347, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0753045404208195, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07641196013289037, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0799187892211148, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0858250276854928, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.10188261351052048, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.13381321520856404, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.16611295681063123, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.19158361018826134, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.20930232558139536, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.23311184939091917, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.25193798449612403, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2606127722406792, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2672572905131045, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.27260981912144705, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2727943890734588, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2753783684016242, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.27593207825765964, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.26559616094499816, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2604282022886674, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2552602436323367, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25378368401624213, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24658545588778147, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.25064599483204136, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.24584717607973422, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2364341085271318, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2336655592469546, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22425249169435216, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21797711332595054, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2100406053894426, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20284237726098192, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2009966777408638, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19416758951642674, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1877076411960133, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.17091177556293835, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.18235511258767073, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012246128071727138, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012695995338758745, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012577942291761415, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012209178787432474, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012267375054554846, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.012269362250320215, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013468322976089462, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014202969015930711, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015375156027428475, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.016215855492294103, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.016883821151386983, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.020545193183819862, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.026240433994709447, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.04097351251290263, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06336064738970394, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.08706451147238005, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.10971005529758694, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.12573225546486097, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.15044420613964296, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.17079775169958797, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.18308755260734808, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19183835150540704, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19894804015256395, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20140183473724874, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20342785830708135, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20648864936047728, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20211614482783544, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1990019559485772, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19590739925067568, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19318357205682357, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18909463584814326, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19908769816143512, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1915643824270904, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18232357539361657, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1737401906016491, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16989401807103632, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17308280847651156, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1704767476086612, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1572334160074147, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16825091394269853, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1452445751036977, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.14338493019778356, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1432140096843124, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1396310218255327, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 2.166206455230713, "validation/loss_best": 2.3861210346221924, "validation/acc_best": 0.27593207825765964, "validation/f1_best": 0.20648864936047728} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.569738973379135, "train/grad": 0.2582075060904026, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.126522216796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.125347900390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12366455078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.120406494140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11817626953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1157177734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.113018798828125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.109307861328125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.105096435546875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.10011474609375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.089490966796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.06975341796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.995361328125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.853876037597656, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.729124755859375, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.6306442260742187, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.540178756713867, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.4466229248046876, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.383241958618164, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.3196896171569823, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.264929256439209, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.209012441635132, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.156540622711182, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.1123799419403078, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.059065821170807, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.019411826133728, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.9899799823760986, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.964202756881714, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.946110051870346, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.9328916108608245, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.929378490447998, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.928680192232132, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.9436847066879273, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9606982511281967, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9950612163543702, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.051479493379593, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.1017368829250334, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.1598678010702135, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.251973630189896, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.33640744805336, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.402700092792511, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4659907281398774, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5382662665843965, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021340453810989857, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021344746882095932, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021352975754998626, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021358585408888756, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021366437352262437, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021373176141642036, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021381272613070906, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0213892322499305, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02139542356133461, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021400986788794398, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021403867839835583, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02142631393391639, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021520530306734145, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022230762955732643, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.024399224231019617, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.027130287196487187, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02948363502509892, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.031655549267306925, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03414806626737118, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03538256081752479, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03637618819251656, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03692043096758425, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.037640921985730526, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03816749952733517, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0387395740300417, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.040013322662562134, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.041011769268661735, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04197623724117875, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04290729191154242, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.043593047633767125, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.044689081758260724, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04570791540667415, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.046876362059265375, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04777834640815854, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04891096776351333, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.049675407484173775, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05117605563253164, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.051861908175051215, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.052692537307739255, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05522651068866253, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.056072107367217544, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0565891239605844, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05938174134120345, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06128688821569085, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1190598011016846, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1181211471557617, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1166539192199707, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.115264654159546, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1139185428619385, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1120855808258057, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.109957456588745, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1074953079223633, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.10400652885437, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.099487543106079, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.093278408050537, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0764527320861816, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.038440704345703, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.892246723175049, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.718632698059082, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.633559465408325, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.561410427093506, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4806325435638428, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4328863620758057, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4131948947906494, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.397260904312134, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.391862154006958, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3899288177490234, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.398484706878662, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4040613174438477, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4270873069763184, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4680888652801514, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5185630321502686, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5545191764831543, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.583564043045044, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.629295825958252, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6742074489593506, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.761662244796753, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7742741107940674, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.816041946411133, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.7934374809265137, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7837018966674805, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8338212966918945, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.765834093093872, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.842156171798706, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8033337593078613, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8982391357421875, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8900465965270996, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0451626777648926, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07308970099667775, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07770394979697305, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0812107788851975, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.09283868586194167, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11184939091915837, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.15282392026578073, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.18512366186784793, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2087486157253599, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.23034330011074197, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2484311554078996, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26596530084902176, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26707272056109266, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.27150239940937615, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27205610926541157, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2727943890734588, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.27150239940937615, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.27297895902547065, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2702104097452935, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2593207825765965, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2469545957918051, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24344776670358065, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24381690660760427, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23606496862310816, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2187153931339978, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2279438907345884, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20801033591731266, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21539313399778517, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.19121447028423771, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013302966551258255, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013001620353167256, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013008827835489253, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013699696584344665, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015003969664055284, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015062752341935956, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.0159256514310716, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017103441886067154, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021059832178642043, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.025863469152533184, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.029669509907402705, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.039918317381151, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.05161094069446456, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.0746203498033637, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.10424826889774434, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1295463721536194, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1539829755361861, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1761492558048866, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1956720085484799, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1979553337852381, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20559831174824128, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2075584660947071, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.212223250334993, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21207318927097776, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21550634108938058, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21590540704714054, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20501364358784877, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1941696270467624, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19705733867500763, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19112332034225857, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19054203742583323, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1880700745680908, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18018273945863364, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18262030625571404, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18066871523619943, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1796966954231928, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18267805390661904, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17875172434898245, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1840812917270743, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17077077563703078, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17999928260810183, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17561551557066282, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17722748205489972, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15597736187646286, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 2.1123799419403078, "validation/loss_best": 2.4040613174438477, "validation/acc_best": 0.27297895902547065, "validation/f1_best": 0.21550634108938058} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.481852380037308, "train/grad": 0.2630133792757988, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.123568115234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122276611328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.120142822265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1179931640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.115975341796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1131103515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.109705810546875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.105987548828125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.100567626953125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0933056640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0829742431640623, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0522186279296877, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9804681396484374, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7917640686035154, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.659739990234375, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.581142807006836, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.492175750732422, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.399557685852051, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.324053936004639, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.266574821472168, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.2022691631317137, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.148627634048462, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.088648352622986, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.033124966621399, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9841957259178162, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.9277263033390044, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8880044758319854, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.8590687876939773, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8324656200408935, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.8169824159145356, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8025557774305343, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8027769726514817, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8082130575180053, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8203252863883972, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.8372793209552765, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.872865647673607, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9209088963270187, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9733630907535553, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0415737974643706, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.107785544991493, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.181809151172638, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.2505233573913572, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.3525876557826995, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.444410101175308, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020840930528938772, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02084661351516843, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020851506870239973, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02085823201574385, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020864703920669855, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02087094415910542, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020877953530289234, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020884631825610997, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02089139336720109, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02090436321683228, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020933057563379407, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021111960504204035, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021778629473410548, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02456467249430716, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.027700473265722395, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.029614518424496056, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.031816782960668206, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03410753342323005, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03547525481320918, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03622189941816032, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0370473281852901, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.037556941471993927, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03883116129785776, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03978975165635348, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04042958047240972, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.041688724253326655, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04261280870065093, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04360393214970827, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04468468680977821, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.045966427065432074, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04746132859960198, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04843915639445186, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04964840531349182, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05001441575586796, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.050737780556082725, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.051470129285007715, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.052139725852757696, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05264806509017944, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05327353723347187, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05295480219647288, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.053485516663640736, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.054547952357679605, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05774988489225507, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06107997773215175, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1176908016204834, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1166346073150635, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.11497163772583, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1133310794830322, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1117045879364014, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.109471321105957, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1068193912506104, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.103628158569336, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0984954833984375, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.090380907058716, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0765187740325928, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.027923822402954, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.9118289947509766, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7151026725769043, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6228854656219482, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.556736469268799, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.478193998336792, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4285166263580322, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4078705310821533, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.40092396736145, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3952977657318115, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.396650552749634, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4026875495910645, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4242496490478516, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4470458030700684, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.487273931503296, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.52119779586792, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5578768253326416, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.59901762008667, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6690785884857178, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.6992146968841553, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7205677032470703, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.799375057220459, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.811022996902466, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8046059608459473, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.8334298133850098, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.8704206943511963, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.806546449661255, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7888190746307373, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8344147205352783, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9403276443481445, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.030458450317383, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.924898862838745, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0167696475982666, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.067921742340347, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07364341085271318, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07604282022886674, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07751937984496124, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08213362864525656, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08674787744555186, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.09431524547803617, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.1184939091915836, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.15153193060169803, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.18715393133997785, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.20985603543743078, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2233296419342931, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2633813215208564, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26799557032115173, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.271686969361388, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2713178294573643, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2722406792174234, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2681801402731635, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26411960132890366, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25212255444813586, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2482465854558878, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24584717607973422, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2456626061277224, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23569582871908454, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22388335179032853, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2321889996308601, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2321889996308601, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2279438907345884, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21483942414174972, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21391657438169065, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21188630490956073, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20893318567737174, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014357983161705133, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014467070187992953, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.015163782537698673, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015976038859543185, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.017439425778356655, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.018368378973226008, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019579723468799777, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020593129000366993, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.023753575428654222, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.026836872672429377, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.032252195863539765, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04671030109041512, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0685048667318629, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.10472727020155082, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.13183266830514306, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14496815431774676, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1739535639733567, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19149172501895376, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19934694553545504, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20190696546889234, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20716698963819546, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21147126986434203, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2149917523072593, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2126230524304679, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21316470302909196, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2099288445066492, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20748620572531962, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20337742257281377, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20286842312644113, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19654421432268324, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1924184835562451, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18999993922572078, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18613474515365383, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18235556361927255, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18160129748619525, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18036697537489488, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16685757946787075, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1786270921823946, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17907759930971387, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17718332204980056, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15876280610978724, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1460971426998756, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15116965238475494, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1401871661891171, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 22, "lr_best": 0.00021599999999999996, "wd_best": 0.05, "train/loss_best": 2.088648352622986, "validation/loss_best": 2.4026875495910645, "validation/acc_best": 0.2722406792174234, "validation/f1_best": 0.2149917523072593} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.4154651880264284, "train/grad": 0.2713599864393473, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1194091796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.117977294921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11552490234375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.113232421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.110814208984375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.107542724609375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1037255859375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.098975830078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.091314697265625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.078797607421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0564990234375, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9773065185546876, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.8294381713867187, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6608668518066407, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.569997024536133, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4903433990478514, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.39908576965332, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3240629196166993, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.255979690551758, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.199633502960205, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.1324875450134275, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.075602550506592, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.0075687265396116, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9491948080062866, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9006479501724243, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8440174818038941, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8025525033473968, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7694351756572724, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.738979114294052, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.7220565444231033, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.6995094764232634, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6895296734571457, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6926079070568085, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7085764962434768, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.72288678586483, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.7627493512630463, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.8205697184801102, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.864614634513855, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.928243891596794, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.0130936777591706, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.0888220661878587, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.174664362668991, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.2433766317367554, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.3261067283153536, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02149258355144411, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02149857728276402, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02150811052881181, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02151742667891085, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02152586627751589, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021536415657028556, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021546261091716586, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02155737376306206, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021578268059529365, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021628777696751058, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021764050009660422, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022474075835198164, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0244712551869452, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.028467366360127925, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030954426899552346, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.033102948935702446, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03543449276126921, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03687247047200799, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037809954546391966, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.038519109236076474, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.039397595888003706, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03986276471056044, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04082146955654025, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.041624873466789725, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04230122871696949, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0435938255302608, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0449332788400352, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04607915952801704, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04711095644161105, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.048282978031784295, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049276507385075094, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05007404949516058, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.051110752522945405, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.051305497642606494, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05184786692261696, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05226362546905875, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.052926009502261874, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05289409801363945, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.053234534561634066, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05448438990861178, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.054367674477398396, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05490439653396607, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05607507398352027, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05845522141084075, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.116741895675659, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1155316829681396, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1135060787200928, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.111504316329956, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.109497308731079, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.106625556945801, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.103029727935791, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.098341941833496, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.089583158493042, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0731041431427, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0402581691741943, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.9218575954437256, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.755882978439331, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6296913623809814, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5560500621795654, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4883718490600586, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.441283702850342, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.421968698501587, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.420912027359009, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4230313301086426, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.421902656555176, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.427278995513916, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4433934688568115, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.48241925239563, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5110549926757812, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5542001724243164, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5965421199798584, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6495020389556885, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6869423389434814, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.747152328491211, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.8240513801574707, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.870407819747925, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.932370185852051, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.928205966949463, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.989328384399414, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.013489007949829, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.945096969604492, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8867626190185547, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9955615997314453, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.930605173110962, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9748475551605225, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0756847858428955, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0598394870758057, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.188476085662842, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07659653008490218, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07862679955703211, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08213362864525656, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08471760797342193, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08859357696567, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.10003691399040236, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.11258767072720562, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.14377999261720192, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.17478774455518642, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.20653377630121816, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22757475083056478, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24529346622369877, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2561830933923957, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26245847176079734, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26393503137689184, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2676264304171281, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26504245108896274, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26098191214470284, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2528608342561831, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2511997046880768, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2482465854558878, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22683647102251753, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22886674049464747, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22572905131044665, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21797711332595054, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2277593207825766, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2294204503506829, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21428571428571427, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21834625322997417, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2041343669250646, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19139904023624954, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19232188999630861, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.1760797342192691, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01551080236498635, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01573688600471426, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.017447032121427713, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.019130390573729974, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.020849118634615364, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.023296169167876992, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02694130282201229, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0292274334176189, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03393143235088383, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.04225730527895465, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.049031635760688995, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.06935076209819781, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09559221389550852, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12979420302728692, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1536325784659834, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17225342721157044, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18544752942781395, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1957532088723887, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20400330636669964, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2008786282955314, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20885544291157945, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20955018608651965, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2106158601962472, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2011127701630102, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19886623425183933, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19848917858380424, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1958022488493584, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18943160841014536, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18987639461609654, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18653211684204285, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17838216732956078, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17301510804302986, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16225009582079816, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16528381187551658, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16380064395903393, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16432167431972855, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16920672124366867, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17570667950910868, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16476312362685927, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17032103123117948, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1493224959661896, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.13522427410287868, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15163997379440583, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1333824731991974, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.1324875450134275, "validation/loss_best": 2.421902656555176, "validation/acc_best": 0.2676264304171281, "validation/f1_best": 0.20885544291157945} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.3637833392620085, "train/grad": 0.2751653129607439, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.116407470703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.114569091796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.111715087890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.10879638671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1059765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.101943359375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0969970703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.09075927734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.078944091796875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0563043212890624, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0102313232421873, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.863663635253906, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.713606414794922, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5934376525878906, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.501753845214844, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.417984924316406, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.343617362976074, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2749883270263673, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.2058492851257325, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.144844093322754, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.071949391365051, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.014318108558655, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9467076659202576, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8869079792499541, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8344285023212432, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7725465679168702, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7265199160575866, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6944796919822693, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.659259126186371, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.637606202363968, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.6110418444871903, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.597159960269928, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5924559324979781, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.6141582995653152, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6413085222244264, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.679330922961235, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7267241549491883, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.794591824412346, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8654012644290925, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.948597310781479, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.0473794400691987, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.1133158195018766, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.186643650531769, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.250789008140564, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021392735661938788, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021398679814301432, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021406121351756156, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021413861569017173, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021422923211939632, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021432270938530566, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021443577399477363, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021462017884477973, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02150829298887402, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021642686706036328, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022024794090539215, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023809815645217895, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026884418334811925, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030170742785558105, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03260417787358165, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.034757036995142696, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03632623789831996, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.037289518686011436, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03813780297525227, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.038933917358517645, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.039971022699028254, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04039200734347105, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.041559839528054, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04245922034606338, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0432126927934587, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.044498119615018365, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.045544690247625114, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04665182799100876, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04773607179522514, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.048655426539480684, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04979838607832789, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.050732349194586274, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.051434526965022084, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.052063701171427966, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.053429172076284885, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.053857537787407636, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05432851806282997, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.054481627847999335, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05429703766480088, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05459407728165388, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05499723389744759, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.054740040954202415, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0561677223071456, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05738048465922475, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1154842376708984, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.114114761352539, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1118216514587402, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1095657348632812, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.107313632965088, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.10392165184021, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.099562168121338, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.093087673187256, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.079256296157837, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0488831996917725, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.983703136444092, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.8041980266571045, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6732234954833984, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5766515731811523, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.49845814704895, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4436733722686768, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.412287712097168, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3959972858428955, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3919801712036133, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.393806219100952, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4056756496429443, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.429863452911377, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.461796522140503, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4971866607666016, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.524060010910034, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.571492910385132, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.613257884979248, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.671599864959717, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7195866107940674, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.753504991531372, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7913661003112793, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.8604557514190674, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.9336585998535156, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.975846290588379, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.9944727420806885, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.041781187057495, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0416438579559326, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.056812286376953, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9108805656433105, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.903597831726074, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8144474029541016, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.856602907180786, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8410301208496094, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7967329025268555, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07678110003691399, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07973421926910298, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08397932816537468, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0932078257659653, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.11129568106312292, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1362126245847176, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.17349575489110372, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19453672942045036, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21853082318198597, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2440014765596161, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2593207825765965, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.262827611664821, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2631967515688446, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26799557032115173, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26854928017718716, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.268733850129199, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26245847176079734, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2499077150239941, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24307862679955702, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24067921742340348, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2117017349575489, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21557770394979697, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20930232558139536, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2074566260612772, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20376522702104097, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21262458471760798, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22056109265411591, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22148394241417496, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22905131044665927, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21040974529346623, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2129937246216316, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01634576749314824, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.016183690035792217, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.017537276244411263, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.018685726951150363, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.021250409162277367, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.024659456739070672, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.028261322433985717, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.031134487320920407, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.0386239948609621, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.05110968834048404, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06522062891227305, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.08999549101183102, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11248358866902353, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.13869703638061467, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16476329935848835, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17977656838709122, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18711035980456606, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19045180187546296, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1974632674032943, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20204762688968794, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20615939541322703, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20682853958337277, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20593685835232658, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20555653710791214, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20068890372587558, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20356220437878111, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20379310768791614, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1951730794752371, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1966554078900399, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20212522521443022, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1915778156386074, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18263626840929503, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18479844754508856, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17614005477589123, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17288345301795302, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17211248768609932, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16852190125244926, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1665943157536823, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1761626994200521, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17490692477613, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18566189226462393, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17445120877341203, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16975286088280883, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18250638376753378, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.071949391365051, "validation/loss_best": 2.4056756496429443, "validation/acc_best": 0.268733850129199, "validation/f1_best": 0.20615939541322703} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.292891728878021, "train/grad": 0.27437703147530557, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.116114501953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1141748046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.111107177734375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.108001708984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.104940185546875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.100364990234375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.09472412109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.086361083984375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0688543701171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0301983642578123, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.9488446044921877, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.764429168701172, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.644032440185547, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5329666900634766, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4316476821899413, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.356860427856445, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.28867130279541, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.219644298553467, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.148563575744629, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.082572841644287, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0066996812820435, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9448726177215576, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8713038563728333, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8051577997207642, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7442277419567107, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.67874382853508, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6317405146360398, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5901287472248078, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5428076106309891, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.5148674088716507, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4822844463586806, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.4608379244804381, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.4586460810899735, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.470749812722206, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4953792303800584, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5372673320770263, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.579436816573143, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.650424253344536, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.7381241536140442, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8355544036626816, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.907634390592575, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9849297505617143, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.0624875122308732, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.1449420994520185, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021094390861690043, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021098104906268417, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021107014110311864, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021116288420744242, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021125457058660688, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021136012920178474, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0211553577426821, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021187057946808637, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021278543560765684, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021561074876226483, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022319265296682714, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0250347341876477, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.028050304455682636, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.031041344571858646, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03374847398139536, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03554480645805597, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03675924432463944, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03763862949796021, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03856927355751395, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039510655980557204, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04040276007726788, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04068039666861296, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.041593776866793636, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04242126857861876, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04322219349443913, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04453886967152357, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.045791368577629325, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04682161612436175, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04774714345112443, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04903389271348715, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05022490669041872, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05082575600594282, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05181390741840005, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05224749837070704, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05355627402663231, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05420459968969226, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0544905505143106, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0544748661108315, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05442887783050537, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05450577879324556, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05325621847063303, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05210539259016514, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.053410251922905445, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.054897009301930665, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1147620677948, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.113297939300537, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1108858585357666, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.108476400375366, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1059792041778564, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.102106809616089, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0966875553131104, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.087963342666626, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0669455528259277, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.016345977783203, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9130666255950928, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7281534671783447, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.632221221923828, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.542118549346924, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4667136669158936, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4301254749298096, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.409292221069336, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4008593559265137, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4023468494415283, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4030022621154785, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4068307876586914, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.423799991607666, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.450871706008911, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5024781227111816, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5410430431365967, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.60184907913208, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6638729572296143, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7308173179626465, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.799415111541748, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8740994930267334, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9382877349853516, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.989718198776245, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.06339693069458, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0972983837127686, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1332333087921143, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1326329708099365, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.163426160812378, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.094602584838867, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0007472038269043, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1011605262756348, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1839399337768555, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0731329917907715, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.060314655303955, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9802920818328857, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07438169066076043, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07604282022886674, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08010335917312661, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08213362864525656, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08748615725359911, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.10040605389442599, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.12089331856773718, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.14691768180140274, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.1849390919158361, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21040974529346623, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2349575489110373, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.26116648209671467, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2678110003691399, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26799557032115173, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2646733111849391, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2663344407530454, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2691029900332226, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26356589147286824, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26208933185677374, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25544481358434845, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24787744555186417, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2427094868955334, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23292727943890734, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2220376522702104, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22369878183831673, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20339608711701734, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21428571428571427, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2085640457733481, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2203765227021041, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21779254337393872, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20616463639719454, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19029162052417867, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19915097822074565, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19139904023624954, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20505721668512367, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.015504220299370078, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.016525815442614836, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01897065236006795, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02091137474556737, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02255094755166451, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.027533909535454426, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02945885313801259, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03340286855472609, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04175580172862136, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.053092361906716325, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07076297321212947, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1042371752928092, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13138835975727117, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1577448510267692, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.17710355346411485, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18723158458313802, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19428954805661633, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19902378064044912, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19830116253577432, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2049251538289114, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20954644513638065, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.209142796635473, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21228662025073333, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21084614321137674, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.204814290811685, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20222174458838396, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19574810556187452, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19057890135516772, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18721702044921487, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1881883283728083, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.187666877737527, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17455282537731354, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1686611653626664, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17192891160709456, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17042369569837304, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16480779130066248, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16329415130515232, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17425272965016902, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17711324067108944, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16308249370979713, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1525461664928037, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1562719927400784, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1539015202504163, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1684754061419418, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.0066996812820435, "validation/loss_best": 2.4068307876586914, "validation/acc_best": 0.2691029900332226, "validation/f1_best": 0.20954644513638065} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.246868360042572, "train/grad": 0.280558213070035, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11921630859375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1172119140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11386962890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11057861328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1071923828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10204833984375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0950836181640624, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0844195556640623, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0590740966796877, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.998704833984375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.8838821411132813, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.7087388610839844, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.611909713745117, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.498986587524414, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3967573928833006, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3271493911743164, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.257838554382324, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1852463340759276, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1069657230377197, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0360875415802, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.957737112045288, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8952049255371093, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8168259000778197, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7485365378856659, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6889793252944947, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.621743575334549, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5672889637947083, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5222369176149368, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4700242012739182, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4376955789327621, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4072509336471557, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.3854226410388946, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.3765553349256516, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.397108352780342, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4071057283878325, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.452891708612442, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.500919234752655, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5653391754627228, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.635315738916397, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.73498009622097, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8164427465200423, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.8972301143407821, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.9694429403543472, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.0439381402730943, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02120073606260121, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02120353947393596, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02121042415499687, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021220245785079896, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021232512020505964, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021250477535650136, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02127935549709946, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021332005113363264, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021492624953389168, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02199112856760621, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02323038781993091, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.026561376340687275, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.029274603221565484, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03245870415121317, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.035209520598873494, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03670097486115992, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03773832933977246, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0385782842990011, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.039616794595494866, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04060171034187079, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.041481740064919, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.041764192767441276, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.042873176615685224, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04387293817475438, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04488764110952616, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04623816767707467, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.047458124086260794, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04861708324402571, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04964694356545806, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.050821650251746175, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.052260516490787265, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05320413660258055, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.053838632423430684, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05443721584975719, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05518036978319287, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05539040988311172, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.055230027325451375, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05493114098906517, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05506047263741493, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.054656920470297334, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.053285449594259265, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.052665814347565176, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.052372720930725335, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.052954604383558035, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.113981008529663, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.112354040145874, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.10965895652771, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1068661212921143, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1039140224456787, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.099226474761963, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.09237003326416, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0806195735931396, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.050079822540283, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9745609760284424, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.842897891998291, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6806626319885254, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.603194236755371, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5148842334747314, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4548938274383545, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4302186965942383, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.417799949645996, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4126076698303223, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4127745628356934, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4126899242401123, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4196181297302246, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4403865337371826, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.471021890640259, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5140929222106934, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5498733520507812, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6207704544067383, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.677912712097168, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.752819061279297, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8355906009674072, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.886538028717041, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.94484281539917, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0373311042785645, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.125699996948242, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1727545261383057, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1421713829040527, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.128166913986206, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1240179538726807, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.11578631401062, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9915761947631836, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.91664981842041, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.895517587661743, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8306827545166016, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.865133047103882, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.820061206817627, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07456626061277224, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0753045404208195, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08010335917312661, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08527131782945736, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.09265411590992986, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1111111111111111, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.13510520487264674, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.16094499815430047, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.19453672942045036, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21483942414174972, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.24086378737541528, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.26264304171280917, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26448874123292726, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2617201919527501, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26854928017718716, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26854928017718716, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.27353266888150607, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.260797342192691, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2547065337763012, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2547065337763012, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24215577703949798, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23569582871908454, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22868217054263565, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23015873015873015, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2292358803986711, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2188999630860096, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21280915466961978, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21114802510151348, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2172388335179033, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21926910299003322, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22517534145441123, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23311184939091917, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22462163159837578, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22462163159837578, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22535991140642303, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22425249169435216, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01614049484821266, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.017505012165864014, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.019472131152214817, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.020036155093809575, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02199090879834392, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.025957284243288054, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.029998283343292634, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.038451531367042775, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04977404295976546, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06181160443179561, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.08178111733362899, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11205379483469584, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13391663088559383, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1606931235852528, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.17732297219349033, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18572907486241186, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19132460492691808, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1944733338225959, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20365495465937222, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2086805517524786, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2175929711471957, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2135070036751292, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21552739407406743, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21273689783362373, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21334419791418577, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2033384226420416, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19878791614371197, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19230586241748127, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1914125877407887, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18891548040465414, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18691224762671568, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17944533774981306, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17336311660523687, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17235795079251184, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17078049888822433, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1781298272022608, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1783654939076045, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18397101679374317, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18012267076459523, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18760975944354605, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1840812902176657, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1865998796448395, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18837973804155891, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1856460090766836, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 1.957737112045288, "validation/loss_best": 2.4196181297302246, "validation/acc_best": 0.27353266888150607, "validation/f1_best": 0.2175929711471957} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.180516156554222, "train/grad": 0.27511465556919573, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11100341796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.108568115234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.10478515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.100921630859375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.096868896484375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.09074951171875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.081939697265625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.067464599609375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0311395263671876, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.943621826171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.805095520019531, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6483078002929688, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.560550537109375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.445100631713867, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3547606658935547, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.291438102722168, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.223932399749756, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1519916915893553, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0717297267913817, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9994758319854737, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9202585887908936, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8579983234405517, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.776726450920105, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7022348004579544, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6356693613529205, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5596602487564086, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4977501744031907, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4423095488548279, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3820136964321137, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3373876655101775, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2929191553592683, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2596284693479538, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.243226074874401, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.2529847857356071, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.250511843264103, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.2953515666723252, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.3411908668279648, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.4121911913156509, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4797610980272293, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5837694895267487, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.6771684724092484, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.7603799664974213, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.8334628301858902, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.9019190680980682, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021162006198428573, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021165595049969853, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021177180330269038, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021191153810359537, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021205355171114205, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02122764928266406, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02126228907611221, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0213369374955073, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02157806442119181, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02232450738083571, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024055977771058678, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02769515478052199, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030255554877221584, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.033701128307729956, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.036176349148154256, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.037465652395039795, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03839092115871608, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03918287148699164, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04024536913260818, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04103332137688995, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04172293081879616, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.041886977199465034, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04288330767303705, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043742746748030185, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.044524276331067085, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.045699137784540655, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.046792455669492486, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.047628200259059665, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04845578173175454, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.049066528789699075, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049815012291073796, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05084390429779887, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.051108671985566616, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.051934712715446946, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.052245763167738914, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05315342290326953, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05386960752308369, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.053822830207645896, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0532061406224966, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05298501323908567, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05191933449357748, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0506102504581213, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05067077372223139, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05097829479724169, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.113467216491699, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.111774444580078, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.108914375305176, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.105971336364746, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.102786064147949, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.097536563873291, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0893898010253906, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.074553966522217, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.033719062805176, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9345951080322266, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7921929359436035, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.653315305709839, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.582460880279541, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.495598077774048, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.449719190597534, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4329097270965576, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4268858432769775, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4275004863739014, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4353187084198, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4453742504119873, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4635097980499268, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.494570255279541, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.536449670791626, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5957348346710205, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.646040201187134, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7167418003082275, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.771400213241577, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.831376075744629, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9094371795654297, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.027984619140625, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1457972526550293, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.18100905418396, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2699129581451416, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2856767177581787, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.297650098800659, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.288792133331299, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.2919371128082275, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.19398832321167, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1480863094329834, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.111708164215088, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.096364974975586, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0275232791900635, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8974568843841553, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9133167266845703, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07364341085271318, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07622739018087855, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07918050941306755, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08490217792543374, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.09523809523809523, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.11609449981543005, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.14433370247323735, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17183462532299743, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20155038759689922, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21926910299003322, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2440014765596161, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2547065337763012, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2617201919527501, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26208933185677374, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26393503137689184, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2606127722406792, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2572905131044666, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2469545957918051, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24732373569582872, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23606496862310816, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22517534145441123, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22462163159837578, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21502399409376152, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2069029162052418, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2102251753414544, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21354743447766703, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22425249169435216, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2201919527500923, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21631598375784422, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2174234034699151, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2264673311184939, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23163528977482467, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.23089700996677742, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014728551415425185, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.015765692150929093, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.018227948228906683, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.019021933512171605, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.021674552466967246, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.026164361026090183, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.031043483103230626, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.040308130253286036, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05342125338427301, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06843475833168618, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09057637547080026, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11967685961646872, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13748709106783275, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.16405885468819426, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1766816668912573, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18652832613071504, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19018892461628165, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19579640754877112, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20105835661356722, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2053845349785721, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2078873794939016, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20821108420002987, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19886456238590108, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1994108127678443, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19108718043403786, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18895612078805332, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1848461100334421, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18347286565642149, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1818450724798165, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17852304182131248, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17352078997861678, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17049913389960933, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.163021809402596, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16530309701964824, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17140833698028576, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17693656324199095, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17209193333967487, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17907802490189692, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1823993122165017, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1712887281731066, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1716730649088736, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17979757855579856, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18810085178324845, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18350989802520187, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 1.9994758319854737, "validation/loss_best": 2.4453742504119873, "validation/acc_best": 0.26393503137689184, "validation/f1_best": 0.2053845349785721} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.1327622824907304, "train/grad": 0.27560615427792073, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11492431640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11245361328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.108416748046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1045556640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.100174560546875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0936053466796873, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.08388916015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0669500732421877, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0217974853515623, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.9153533935546876, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.774002990722656, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.633725128173828, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5471319580078124, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.422552185058594, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3338151168823242, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2696917915344237, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2005029296875, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1247601318359375, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.040180368423462, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9663552093505858, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.88568359375, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8209751224517823, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.738022688627243, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.661248766183853, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.590995156764984, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5115368103981017, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4440657269954682, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.385748136639595, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3241929203271865, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.275498587489128, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2179863625764846, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1713009750843049, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1534194165468217, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.154563007056713, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1421688243746757, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1642337948083878, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2083112439513206, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2705443847179412, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.3456298106908797, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.4459117066860199, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.544997312426567, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.6403797036409378, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7163455295562744, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.7973807615041733, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0215648618619889, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021570501537062227, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021580373006872832, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021591530893929303, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021603900627233087, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02162532219197601, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021664866949431597, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021755125895142554, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02206443921662867, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02300871961284429, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025027049724012613, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02858892399817705, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.031038912693038583, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.034472676161676645, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03663047303445637, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0378141213953495, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03870284157805145, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03956297840923071, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.040729832649230954, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.041540269553661344, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.042253030315041545, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04248539904132485, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04360891783609986, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.044444950744509694, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.045235894173383716, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.046325447373092174, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.047184178847819565, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04798655236139893, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.048913626968860625, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04964355977252126, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05025942219421267, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05063619926571846, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05101456990465522, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.051297429483383894, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.052064211722463366, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.051976031959056854, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.052410867996513844, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05219837982207537, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05267973667010665, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05211958758533001, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05149267300963402, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05103879347443581, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04983971059322357, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0505463232845068, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.112987518310547, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1111788749694824, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1081392765045166, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.104907751083374, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.101364850997925, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0954456329345703, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.085946798324585, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.068002462387085, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.017259120941162, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8997490406036377, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.75722336769104, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6343460083007812, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5659070014953613, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.478189468383789, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4371204376220703, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.420426368713379, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4136741161346436, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.41245436668396, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4173457622528076, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.421365737915039, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.433274269104004, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4652137756347656, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.496088743209839, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5494134426116943, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5979576110839844, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6718456745147705, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7342963218688965, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.820828914642334, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.904160499572754, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0227038860321045, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.122406005859375, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.23529052734375, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.320176124572754, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.336372137069702, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.398338794708252, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4012136459350586, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.407536506652832, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.3295979499816895, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.246798515319824, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1904683113098145, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.198763132095337, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.094489097595215, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9663498401641846, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9678587913513184, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07308970099667775, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07511997046880768, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07862679955703211, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08397932816537468, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08933185677371724, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.09948320413436693, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12218530823181986, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.14894795127353266, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17718715393134, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2069029162052418, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22720561092654115, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25046142488002954, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2587670727205611, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2633813215208564, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26356589147286824, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26375046142488, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2681801402731635, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2661498708010336, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2619047619047619, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.257844222960502, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25101513473606496, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2515688445921004, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24640088593576967, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23901808785529716, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22056109265411591, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2187153931339978, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2026578073089701, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20173495754891105, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20376522702104097, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21225544481358435, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2085640457733481, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21502399409376152, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2233296419342931, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22277593207825766, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21520856404577335, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21483942414174972, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22554448135843486, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2233296419342931, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.016407653015039825, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.017783338070466295, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0192775418317821, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.021497137463940354, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.025967052064690323, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03071021607033927, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03549171901679704, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04291616260566156, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05434462421154904, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.07034890783119112, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09513435865939086, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12637055423103036, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14721836154723753, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17135140984428762, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18337666613509576, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1907983366668218, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19417957881817025, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19932921963020891, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20562065566378132, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20853917573584205, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21109032124691032, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21031859979635573, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2120439321688744, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20640388911757965, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20727161758812573, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2039683588563029, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1988033183983132, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1916603738180972, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1913579419822695, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1833650351231333, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1829433898141949, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16919161322453483, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16750159570705095, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17059855620102404, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17988916012717585, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17548144755831183, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17212106207251812, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17893911221358452, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1877034603169098, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18144797250528658, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16561105272115223, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17098127546829675, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17886956825501765, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.171170473100325, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.040180368423462, "validation/loss_best": 2.4173457622528076, "validation/acc_best": 0.2681801402731635, "validation/f1_best": 0.20562065566378132} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.080797851681709, "train/grad": 0.27334166422486306, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11180419921875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.109193115234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.104873046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.100552978515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.0959326171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0885662841796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.077425537109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.057335205078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0026123046875, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.8797613525390626, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7380134582519533, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6044798278808594, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5160479736328125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.392454147338867, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3099570846557618, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.247847442626953, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.179094648361206, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.102057361602783, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.016976399421692, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9432252955436706, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.860724606513977, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.7963072896003722, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.708243397474289, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6280436795949935, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5567096585035325, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4720760786533356, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3957578575611114, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.332760008573532, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2582407689094544, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2013796856999397, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1396808513998986, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0901071265339852, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0504181668162347, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0387796181440354, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0240944853425027, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.0359337890148164, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.0656626743078232, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1270621624588966, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.1974463102221489, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2985356760025024, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.388995805978775, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.499471527338028, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.590882601737976, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.66419759452343, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021114947069436313, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02111927012912929, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02113055524416268, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02114198395982385, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021158717544749378, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021188517035916447, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021240947162732483, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021361562898382546, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021774882823228835, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022973215905949473, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025244111185893415, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02872231842018664, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.031177548766136168, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03470649876631796, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03672945742495358, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.037876358181238176, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03874634134583175, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03965317411348224, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.040828376449644564, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.041599141955375674, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.042269958946853876, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04243625408038497, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04348161080852151, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.044412374477833506, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0452204312197864, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04628748256713152, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04728783670812845, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.048174286354333164, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.048932276144623754, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04958093516528606, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049897749349474906, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.050350258722901345, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.050353966802358624, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.050410306621342896, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0508165006712079, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05127358106896281, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.051687630042433735, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05160320675000549, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.051350248828530315, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.051263414416462184, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.050878858305513856, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.049834827817976476, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04971335753798485, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04943641887977719, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.112302780151367, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1103756427764893, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.107130289077759, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1037232875823975, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0999414920806885, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.093527317047119, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0829453468322754, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.062415599822998, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.003352165222168, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.873549222946167, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7343192100524902, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6206629276275635, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5524985790252686, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4676172733306885, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4311673641204834, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.416132688522339, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.410144805908203, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4090676307678223, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.416916847229004, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4217588901519775, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.433305025100708, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4608490467071533, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4979562759399414, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.554572105407715, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6022675037384033, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.690683126449585, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7700185775756836, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.85638689994812, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9486939907073975, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.06087589263916, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.177295207977295, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.286743402481079, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.389483690261841, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4213168621063232, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.461200714111328, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5087833404541016, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5344457626342773, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.4385457038879395, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.356388807296753, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.3412675857543945, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3005170822143555, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1690826416015625, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.094928741455078, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.059307098388672, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07493540051679587, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07807308970099668, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08213362864525656, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08914728682170543, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10077519379844961, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12846068660022147, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.15374677002583978, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.18309339239571798, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20837947582133629, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23052787006275377, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2511997046880768, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2619047619047619, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2652270210409745, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2663344407530454, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26541159099298633, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26448874123292726, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2676264304171281, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2591362126245847, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2576596530084902, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2528608342561831, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25230712440014763, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24234034699150978, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23126614987080105, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21428571428571427, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2087486157253599, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2009966777408638, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19896640826873385, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.19712070874861573, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2056109265411591, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19730527870062753, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1967515688445921, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20358065706902917, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20136581764488742, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20911775562938353, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2026578073089701, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20376522702104097, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.015280735124532875, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.016520234651701913, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01843162828689211, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02075833667713832, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.024038958309987854, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.028625641453149886, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.0350037891492642, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.043583223916117776, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.058050938550443125, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.07446900736521823, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09998779836406002, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12785326091993035, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14926654084147487, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17083020291612805, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18608071526770176, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19189434162923277, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19586949448913646, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20060416666409983, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20376606817747653, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21000718965093604, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2126977146884769, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20982952467812285, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21192396127626043, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20959888260787296, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20829791751200863, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2011735803007152, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1935290160954577, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18444069878346028, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18106406336739825, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17981420031129516, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1740792132230469, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1681967687098512, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1667333380874, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1649855398093555, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17534604331662187, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16869390924709138, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16969512314701174, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18062429196332905, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18660437154810805, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1741961822091089, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1712942073635584, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17740710811074714, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17308728919374836, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17402570968899844, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 1.9432252955436706, "validation/loss_best": 2.4217588901519775, "validation/acc_best": 0.2676264304171281, "validation/f1_best": 0.21000718965093604} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.034883133172989, "train/grad": 0.2684458404779434, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.106717529296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.104207763671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.100003662109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.095638427734375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.091134033203125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.083538818359375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.07196044921875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.050421142578125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9899627685546877, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.859752197265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7228932189941406, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.598378143310547, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5098115539550783, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.385544738769531, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3040615844726564, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2403863143920897, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.168777275085449, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0879666996002197, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0002982997894287, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9233723783493042, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8387504243850707, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.770357120037079, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.6793761467933654, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.5962114536762237, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5205909502506256, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4337655687332154, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3528033202886582, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2837100213766097, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.20477244079113, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1342020839452744, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.0648507025837899, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0054038324952126, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9677605053782463, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.9481387040019036, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.9253876104950904, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.9250597643852234, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.9415257120132446, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.9920705896615982, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0461253836750983, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.151503022313118, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2491599577665329, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.352646428346634, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.451319699883461, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.5195821082592011, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021563385324552656, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021569439405575395, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021577661335468293, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02158705531619489, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021603211546316742, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02162707829847932, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021677522314712407, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02180405193939805, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022252513272687794, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023547665141522886, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025921511556953193, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02936475157737732, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03190972442738712, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03547099598683417, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.037366125043481586, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.038445130735635755, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03924415592104197, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04008992962539196, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04117150977253914, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.041826950646936895, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04238675639033317, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04252430016174912, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04347431603819132, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.044246215727180245, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.045026082266122106, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.046042630542069675, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.046747703794389965, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04723387684673071, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04766849111765623, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04799395002424717, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04831619150936604, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.047983077764511106, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0477685577608645, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04772798238322139, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04789267504587769, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04809838781133294, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04839949201792479, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04880098329856992, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.048952586837112906, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04964105838909745, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04972787790000439, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04942392045632005, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.049255363047122955, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04902671167626977, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.112057685852051, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1101176738739014, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1068220138549805, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1033401489257812, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.099477767944336, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0927035808563232, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.081366777420044, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.058943510055542, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9938063621520996, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8568644523620605, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.721304416656494, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6130142211914062, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5443224906921387, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4602832794189453, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4260125160217285, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.412397861480713, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.408460855484009, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.410144090652466, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4213061332702637, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.427335500717163, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.439347743988037, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4716219902038574, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5070154666900635, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5655605792999268, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6156249046325684, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6863911151885986, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7563998699188232, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8396477699279785, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.946512460708618, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.064462900161743, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.175995111465454, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2838902473449707, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.358058452606201, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.369213342666626, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.435765266418457, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4921982288360596, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5289976596832275, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.4824576377868652, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.4194788932800293, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.3905179500579834, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.305100917816162, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.189180612564087, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.1321802139282227, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0277111530303955, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07308970099667775, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07511997046880768, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07899593946105574, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0843484680693983, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.09099298634182355, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10280546327057954, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12975267626430417, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.15725359911406422, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.18881506090808417, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21502399409376152, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2321889996308601, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25452196382428943, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26375046142488, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.26504245108896274, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26504245108896274, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2652270210409745, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2672572905131045, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2593207825765965, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2532299741602067, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24787744555186417, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2427094868955334, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2277593207825766, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22000738279808046, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2187153931339978, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2159468438538206, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21428571428571427, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2070874861572536, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20081210778885197, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20431893687707642, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2059800664451827, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20136581764488742, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20173495754891105, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19712070874861573, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20155038759689922, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19582871908453303, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20173495754891105, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21040974529346623, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2100406053894426, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2172388335179033, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.015922522387966046, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.017089129802432507, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.019225182597616807, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.021395809723779427, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.026101917804901564, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03118870344492349, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03748993421707567, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04533894829280788, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05911621676214215, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08032313667059789, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10668264647753274, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13498257528523844, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1523792063634169, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1748262225207149, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1877766481719879, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1926689002371488, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19709788569314504, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19984158047915543, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2031592683304639, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20896410855006284, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2124709601715674, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21094887217286726, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20836398353409594, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20923734267049313, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20433659001853688, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20321986302733788, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19438419942095697, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1883011552619022, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18619930197015153, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18443369444831292, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18260098586578186, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17579915843040297, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1725316091316409, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17386625993627547, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1790395197873111, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17886188041167253, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18128536252765726, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18202151941362052, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18504312894693375, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17913694274584646, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18265734723607166, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1858105537457512, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18799561640962317, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18951900136293642, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 1.9233723783493042, "validation/loss_best": 2.427335500717163, "validation/acc_best": 0.2672572905131045, "validation/f1_best": 0.20896410855006284} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 2.0027877748012544, "train/grad": 0.26168380089104176, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11548828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.112801513671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.10859375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.10412109375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.099281005859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0914947509765627, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.07902099609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0556982421875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.990013427734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.854328308105469, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7213449096679687, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6035693359375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5155904388427732, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.3888479232788087, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3070945358276367, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2424594497680665, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1704220581054687, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0891448879241943, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0011205196380617, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9237912750244142, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8389322662353516, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.7669679188728333, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.6751856803894043, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.587781199812889, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.503856800198555, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4129253756999969, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3324509423971176, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2621426397562028, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1754577615857125, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.104321027994156, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.0224446147680282, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.9568417212367057, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9083315744996071, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.8813540652394295, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.8412581145763397, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.8345923253893852, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8443812772631645, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8852890184521676, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.9254831656813621, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0089376798272134, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.0937596109509469, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.1923494559526444, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.2930570074915886, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.3549000543355942, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02151597255375236, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0215215278416872, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02153057783842087, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021541271097958087, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021556439148262142, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02158355970401317, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021641566269099712, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021787218204699456, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022300324263051154, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023758368929848075, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.026234900997951626, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02964074113406241, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03221475850790739, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03571306785568595, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03752392084337771, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03856181963346898, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0393281477317214, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04019399160519242, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.041225803550332785, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04182373091578483, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.042327705305069685, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04240453151986003, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04327504273504019, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04398405769839883, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04464442582800984, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04549223698675633, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.046142549440264705, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04656091129407287, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.046603006441146136, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.046652753986418244, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04673512885347009, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04637808883562684, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04616360682994127, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.045941887442022565, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04546671364456415, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04552721250802279, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0459151085652411, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04621575586497784, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04617328591644764, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04661685260012746, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04687893943861127, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04689985243603587, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.046469755060970785, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.046257844250649216, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.111889362335205, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.109933376312256, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1066226959228516, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.103060722351074, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0990779399871826, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0921101570129395, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0802958011627197, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0565855503082275, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9876649379730225, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.846667528152466, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.713362216949463, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6078100204467773, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5387282371520996, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4557411670684814, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.422468423843384, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.409309148788452, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4056951999664307, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4073731899261475, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.419041872024536, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.427253246307373, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.442563533782959, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.47685170173645, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5197932720184326, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.580889940261841, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6293087005615234, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7051212787628174, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.788130283355713, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.875108242034912, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9775967597961426, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.095346689224243, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.2257189750671387, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.334009885787964, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.4309134483337402, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4645586013793945, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.5386440753936768, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5908961296081543, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.6499667167663574, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.577740430831909, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.5398569107055664, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.4738357067108154, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.4449498653411865, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.395709991455078, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2667016983032227, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1936116218566895, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07308970099667775, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07567368032484312, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07954964931709117, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08342561830933924, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.09117755629383537, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.1035437430786268, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.13233665559246954, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.16057585825027684, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.18899963086009597, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21207087486157253, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23274270948689554, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2563676633444075, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26375046142488, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.266703580657069, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26448874123292726, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26411960132890366, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2678110003691399, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26541159099298633, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2604282022886674, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2511997046880768, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2499077150239941, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24935400516795866, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24326319675156885, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22905131044665927, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21853082318198597, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21520856404577335, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2087486157253599, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2041343669250646, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20081210778885197, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20671834625322996, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19952011812476927, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1937984496124031, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20339608711701734, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20155038759689922, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20468807678110004, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20136581764488742, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19952011812476927, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2024732373569583, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20468807678110004, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0169224593816691, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.017670807603819654, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01872991585240866, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.020940547611461847, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.025554319135016514, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02978324319156458, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03701650764733936, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.045460297806944244, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06038847529400174, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0821300686543289, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10750448024717296, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13266383769177473, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15312267241137106, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1786743796488435, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1902868927196726, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19581488678013503, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19678252378440467, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20102118588997864, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2054351560102187, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21201133958738252, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21189724509161856, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2123034896463419, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20634982917388325, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20548540971835352, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20705679596451687, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20428738021299, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19571347857329233, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18671501670258261, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18661578292201716, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18357196259309813, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1793984789575668, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1777885355034542, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17190572964040177, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1680380041872148, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17797297945549903, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1701394307156057, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16646898883667027, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17714416856530235, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17651084204279086, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18012237292938113, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17674589508939617, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1749132946987774, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17824263993342102, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1789818996538621, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 1.9237912750244142, "validation/loss_best": 2.427253246307373, "validation/acc_best": 0.2678110003691399, "validation/f1_best": 0.21201133958738252} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 1.9674499785900117, "train/grad": 0.25433536410331725, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.110740966796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.108172607421875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.10379150390625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.099383544921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.094547119140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0866070556640626, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.073984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.050302734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.982467041015625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.842750244140625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7074330139160154, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5871588134765626, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.497832565307617, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.3733964920043946, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.293623332977295, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2297964668273926, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.157359733581543, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0742027950286865, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.9857142353057862, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9081092119216918, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8215450191497802, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.7498558580875396, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.6564591366052628, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.568434864282608, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.4866219717264175, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.392816228866577, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3093053728342057, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2341445118188858, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1426192009449005, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.065124193429947, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.983819130063057, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.9173418083786964, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.8617350533604622, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.8289508864283561, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.7803932416439057, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.765770505964756, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.7619043165445327, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.7885691511631012, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.8214133471250534, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.9003398016095161, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.9620063406229019, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.0486553838849069, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.1351033180952073, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.1953737053275109, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021283211950212717, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021285073570907115, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021293476941064, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0213019099785015, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021315665636211633, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021343871518038213, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021402199822477996, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021552917459048332, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02207286546938121, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023535949131473898, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02604561487212777, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029406659463420512, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0319512645713985, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03535386646166444, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03708746054209769, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.038111657295376064, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03889277132228017, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.039764223210513594, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04081471526995301, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.041437057852745054, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04192813279107213, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04198113987222314, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04278358031064272, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04353274082764983, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0442245877161622, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.044989596251398326, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.045563394147902725, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04595553029328585, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04596640249714255, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04577730344608426, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04546604944393039, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0450333908200264, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04446096921339631, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0440145156532526, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.043438660837709905, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04312356043606996, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.042997691482305526, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.042884902358055116, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04320132860913873, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04388031199574471, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04385985376313329, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0442818570882082, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04440349668264389, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04444240693002939, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1117970943450928, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.109825611114502, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.106473684310913, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.102879047393799, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0988383293151855, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0917413234710693, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0796358585357666, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.055262565612793, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.984285593032837, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.841515064239502, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7093725204467773, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.605153799057007, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.53581166267395, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4539644718170166, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4207537174224854, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4072282314300537, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.402517318725586, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.402681827545166, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4139609336853027, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.421355724334717, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.436466932296753, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4697368144989014, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5114083290100098, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.573359489440918, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.625649929046631, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.705751419067383, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7837843894958496, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8706531524658203, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.977752208709717, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.099829912185669, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.2218613624572754, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.339672088623047, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.435079574584961, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.479797840118408, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.565153121948242, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.630985975265503, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.6914563179016113, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.6335530281066895, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.5837557315826416, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.523690938949585, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.5017828941345215, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.4501938819885254, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.350440263748169, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.2596988677978516, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07290513104466592, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.073827980804725, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0769656699889258, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.08010335917312661, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08545588778146918, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.09265411590992986, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.1050203026947213, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1330749354005168, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1640826873385013, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19047619047619047, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21391657438169065, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23274270948689554, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25692137320044295, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.266703580657069, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.266703580657069, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26744186046511625, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26891842008121075, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26504245108896274, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2606127722406792, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2530454042081949, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24640088593576967, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2499077150239941, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2292358803986711, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21631598375784422, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21391657438169065, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21077888519748986, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20634920634920634, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20376522702104097, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.19896640826873385, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20062753783684018, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19656699889258028, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1967515688445921, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.19952011812476927, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20081210778885197, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20228866740494647, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2056109265411591, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20431893687707642, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2056109265411591, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20893318567737174, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.016231852445939172, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.018207403942489302, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01942117117810206, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.022474642721234145, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.026380729579287752, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03143082756102059, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03834232916356921, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04630841084996538, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06127514443008003, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08554079593180093, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10919108906038723, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13473321375453326, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15351663370208843, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17948920505262467, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1899126333255112, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1960279476288155, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19728187157986435, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20379001844105818, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20621441344895122, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21307477213618253, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21208706344706374, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21296636148224532, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20939821676537818, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2060071714141273, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2079504461901236, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20275676955033126, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19692787666194214, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18694129354643127, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18382360790207666, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18214071793956624, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17789754859004123, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17485775690897112, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17330192752774987, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16832939204475714, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1717837841918819, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17151274728278856, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17095499824091973, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17542013756823635, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1795589181520505, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1784464418028738, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17756956012611838, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17450902189398618, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17752820225113408, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18197826857497157, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 1.9081092119216918, "validation/loss_best": 2.421355724334717, "validation/acc_best": 0.26891842008121075, "validation/f1_best": 0.21307477213618253} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 1.9497993862628937, "train/grad": 0.2508364052325487, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.112586669921875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1100048828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.10568603515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.101121826171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.096407470703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.088369140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.075665283203125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0514111328125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9826239013671874, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.842654113769531, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7102252197265626, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5918407440185547, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5021810150146484, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.376305618286133, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.2945258903503416, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.228803653717041, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1544700241088868, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0683956003189086, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.9777223443984986, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.898337290287018, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8104279565811157, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.7395434165000916, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.6458196246623993, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.5567429208755492, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.4735170537233353, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.380545410513878, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.295660541653633, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2204074108600615, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1263779866695405, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.0468672981858254, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.96375393897295, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.8885012847185135, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.834645374417305, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.8020161463320256, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.756526740193367, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.7321165047585965, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.7243951316177845, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.7485893678665161, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.7714991426467895, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.8315397757291794, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.8793664208054542, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.954790658056736, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0387239095568657, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.0890841215848923, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021406792434863747, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021409861403517427, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021417298633605242, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021429081107489766, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021443535261787473, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021473458255641162, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021537074032239617, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021694522737525403, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022237600767984988, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023730600830167533, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.026229928536340593, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029574578292667866, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03213250003755093, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03555049104616046, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.037257433552294966, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03828080295585096, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03903738905675709, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.039900778774172065, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.040927824880927804, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0415029557980597, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04199276397004723, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04200431795790791, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04283206213265658, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04350300328806043, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04409507106989622, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04476778540760279, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0452780344709754, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04558638764545322, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04559117030352354, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04526076465845108, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.044846826121211054, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04417922593653202, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.043614064268767835, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0430041753873229, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.042140550408512355, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.041798959989100694, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.041550108250230554, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.041641904171556235, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.041420024149119855, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.041887772772461175, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04184853496029973, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04232268096879124, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.042538043167442086, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04253024997189641, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.111772298812866, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1097817420959473, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.106417179107666, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.102825403213501, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0987448692321777, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0916194915771484, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.079390287399292, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0547897815704346, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.983076572418213, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.839728832244873, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.708080291748047, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.604580879211426, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.535552501678467, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.454376697540283, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.422088623046875, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.409417152404785, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.405653476715088, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4071505069732666, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.419358730316162, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4272632598876953, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.441791534423828, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.476165294647217, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.517449140548706, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.578730583190918, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6287965774536133, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7058966159820557, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.782461643218994, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.870851516723633, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.979121208190918, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.09942626953125, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.21653151512146, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.343989372253418, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.449587106704712, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4888978004455566, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.5862345695495605, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.6532182693481445, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.7218921184539795, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.672999620437622, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.6153934001922607, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.570911169052124, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.557473659515381, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.4983792304992676, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.417724370956421, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.3540585041046143, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07364341085271318, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07733480989294943, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.08028792912513842, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08545588778146918, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.09302325581395349, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10538944259874493, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.13381321520856404, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1640826873385013, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19139904023624954, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21391657438169065, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23255813953488372, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25544481358434845, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2622739018087855, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2663344407530454, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2633813215208564, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2663344407530454, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26854928017718716, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26504245108896274, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2593207825765965, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25212255444813586, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24584717607973422, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24806201550387597, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22886674049464747, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21834625322997417, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21280915466961978, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20228866740494647, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.19785898855666298, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19269102990033224, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1937984496124031, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1998892580287929, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20302694721299372, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20062753783684018, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20081210778885197, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19638242894056848, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.1997046880767811, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20210409745293467, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.016254748571781472, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01831839285089086, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.019328560270236942, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02299237387164085, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02656259601123413, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03146201825091265, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03896625055144234, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0466915657857507, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06148518103402375, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08518923796234579, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10982008185054896, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13461886962756317, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15349263108828853, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1780058688483878, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18921826077761814, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19516207032098642, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19533009205536842, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20376323369391383, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20507182228204968, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21262432109377208, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21062099311340374, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21050674054318066, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20773790645493828, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2040806276688726, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20561264475403118, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20322915508162187, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19648664987394082, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18799763456245575, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18323011764064676, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18307131182973893, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18236244185818937, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17082635649583977, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17121336855569566, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16582949672673591, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17268241037608467, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16543120167368566, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1682820615644868, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17484734036826635, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18063027024106315, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17948679377736743, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17706845423950446, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1713306722531408, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17497506413062203, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17728751352573524, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 1.898337290287018, "validation/loss_best": 2.4272632598876953, "validation/acc_best": 0.26854928017718716, "validation/f1_best": 0.21262432109377208} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 1.946880704164505, "train/grad": 0.24664697952568532, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.112615966796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.110079345703125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.10586181640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.101302490234375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.096590576171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.08847412109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0756591796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.051231689453125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.98169921875, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.8411044311523437, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.707530059814453, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.589010009765625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.500838165283203, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.378289833068848, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.298856430053711, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2351302337646484, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1632817459106444, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.081088910102844, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.9917563152313233, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9138779997825623, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8262209010124206, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.7555875205993652, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.660570741891861, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.569853339791298, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.4867912739515305, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.3901152038574218, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.3022876864671706, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2241925901174546, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1310067486763, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.0537132766842843, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.963621087372303, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.8942685240507126, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.8318131187558174, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.798781228363514, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.740233745276928, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.7161456114053726, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.7094677498936653, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.7238718977570534, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.7345670968294143, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.7940360610187054, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.8519768327474594, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9223187959194183, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.9938766968250274, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.038184658885002, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021015738411806525, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021017927005887033, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021024656621739267, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021033554477617143, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02104594562202692, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02107289039995521, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021129707461223005, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021278918297030033, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02180074310861528, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023256409810855985, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02571484936401248, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02901987570337951, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03157758464105427, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03501456503756344, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03675547688268125, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03779471949674189, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03859508377499878, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03950447982177138, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.040548628959804774, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.041167321298271414, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.041682386975735425, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.041735225971788165, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04255538022145629, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043230034448206424, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04380073990672827, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.044411044232547284, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04496166501194239, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04521778771653771, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045128067564219235, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.044676725640892985, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04427646154537797, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.043751665223389864, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04283533396199345, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0423944740742445, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04154827078804374, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04094732489436865, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04044631116092205, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.040257224272936584, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04011711031198502, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.040348755475133655, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04055867165327072, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04076333750039339, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.041145930774509905, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.040970981512218715, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1117491722106934, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1097757816314697, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.106410264968872, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1028106212615967, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.098729133605957, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0915772914886475, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0793471336364746, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0547034740448, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9829113483428955, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8394768238067627, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.707836866378784, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.604358196258545, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.53523325920105, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4539172649383545, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4215593338012695, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4088327884674072, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4049572944641113, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4062609672546387, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.418428659439087, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.42641019821167, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.440680503845215, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4749879837036133, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5167670249938965, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5786705017089844, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.629394054412842, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7068228721618652, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.784430742263794, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8737034797668457, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.982055902481079, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.101025342941284, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.222090244293213, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.3470427989959717, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.4555280208587646, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4974236488342285, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.589482069015503, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.6568686962127686, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.7254607677459717, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.6715497970581055, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.616159439086914, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.580739736557007, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.5637731552124023, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.504666805267334, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.4193007946014404, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.3558435440063477, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07364341085271318, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0769656699889258, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.08010335917312661, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08508674787744555, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.09265411590992986, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10575858250276855, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.13344407530454042, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1640826873385013, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19158361018826134, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2146548541897379, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.23181985972683647, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2561830933923957, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26264304171280917, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.266703580657069, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26375046142488, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2661498708010336, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26891842008121075, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2652270210409745, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25950535252860835, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2517534145441122, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2454780361757106, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24750830564784054, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2294204503506829, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21816168327796234, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2172388335179033, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21391657438169065, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2039497969730528, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20228866740494647, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.1967515688445921, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20284237726098192, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19435215946843853, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19416758951642674, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1997046880767811, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20321151716500555, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20044296788482835, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19878183831672203, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19509043927648578, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2011812476928756, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.1997046880767811, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01646649825294953, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.018299803551191145, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.019330148572858717, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.022773443016055935, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.026346193241784305, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03140493660618308, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03896812358807471, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04690023692649655, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06152927238945701, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0851659483001335, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11010159433185025, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13534965398197296, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.15275157067437248, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17861727774077077, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18951698584753107, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19536043286637997, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19555840977150252, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20322754156636455, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2055310712953898, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21265296750433957, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2117119419313183, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2106642979066534, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20712923646128978, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20380752856432705, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2054917029528678, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20220733236684318, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19694308196107083, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1876425431190619, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1849416203899462, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18336228023424986, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18228747633358297, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17142737512770104, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17093494090457984, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16487642659312737, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1728986801256476, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16812206246759076, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16836423511765922, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17534492232784551, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18082861657173185, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17811912415329945, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17470533428876522, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17007087206321278, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17680722823858516, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17592605488963323, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 1.9138779997825623, "validation/loss_best": 2.42641019821167, "validation/acc_best": 0.26891842008121075, "validation/f1_best": 0.21265296750433957} diff --git a/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic/config.yaml b/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0d909ebcc8d0732d26737abdeed166e7ba6faf0b --- /dev/null +++ b/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..ba67224e608aad036f5441fa7f1af4ff6b7b0e6f --- /dev/null +++ b/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,0.3593813663804626,train,0.9430604982206405,0.009718841663704219,0.9394070080862533,0.010425699278079526,0.9356708742402993,0.011094847172679138 +flat_mae,patch,logistic,ppmi_dx,,0.3593813663804626,test,0.63,0.04524961436299762,0.5906626839252129,0.04844622125624765,0.5892320892320893,0.04684232537859296 +flat_mae,patch,logistic,ppmi_dx,1,0.046415888336127774,train,0.8327402135231317,0.015080358703522843,0.8150072837292694,0.017395623217074076,0.80415061014772,0.017437234468122915 +flat_mae,patch,logistic,ppmi_dx,1,0.046415888336127774,test,0.62,0.04371370036956378,0.5766488413547237,0.04965517929109973,0.5764006791171477,0.046718817119045165 +flat_mae,patch,logistic,ppmi_dx,2,0.3593813663804626,train,0.9501779359430605,0.009144294829924085,0.9466771487048808,0.009939576492254692,0.9412732819524727,0.010996322012108933 +flat_mae,patch,logistic,ppmi_dx,2,0.3593813663804626,test,0.59,0.04961217189359885,0.5626666666666666,0.05281989014396253,0.5623938879456706,0.05229097998959533 +flat_mae,patch,logistic,ppmi_dx,3,0.046415888336127774,train,0.8185053380782918,0.015093188743586058,0.7961798839458414,0.018017797429398127,0.7838926354099764,0.01776826758149463 +flat_mae,patch,logistic,ppmi_dx,3,0.046415888336127774,test,0.69,0.04351703574463683,0.6615351020853806,0.048463542592696605,0.6583191850594228,0.047252174901520914 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,train,0.7348754448398577,0.016959778826558202,0.6894209977783465,0.02213521818632936,0.6829238921001927,0.019835126964382666 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,test,0.7,0.04106226978626486,0.66078697421981,0.047734870379201884,0.6561969439728353,0.044782648390475896 +flat_mae,patch,logistic,ppmi_dx,5,0.046415888336127774,train,0.8291814946619217,0.015311398136063293,0.81197724991636,0.01746619835113448,0.8021301648469279,0.017490164245246728 +flat_mae,patch,logistic,ppmi_dx,5,0.046415888336127774,test,0.56,0.050015721528335465,0.5280995280995281,0.05238886009304962,0.5280135823429541,0.05173854732883097 +flat_mae,patch,logistic,ppmi_dx,6,0.046415888336127774,train,0.8327402135231317,0.01501652859356584,0.8145527051125434,0.017436666838212458,0.8032808820381074,0.01746765581275586 +flat_mae,patch,logistic,ppmi_dx,6,0.046415888336127774,test,0.58,0.04470615170197497,0.525101763907734,0.051937224805165036,0.5288624787775891,0.04761150502647497 +flat_mae,patch,logistic,ppmi_dx,7,0.046415888336127774,train,0.8291814946619217,0.01576078240050652,0.8137016574585636,0.017659008075572125,0.8056090772853779,0.017861534345425395 +flat_mae,patch,logistic,ppmi_dx,7,0.046415888336127774,test,0.55,0.051013896146050244,0.5366079703429101,0.05184591645257948,0.5403225806451613,0.05342905332313932 +flat_mae,patch,logistic,ppmi_dx,8,0.005994842503189409,train,0.7188612099644128,0.01666144094736625,0.6732365756005653,0.020869094192100066,0.6681786555341469,0.018831212355544823 +flat_mae,patch,logistic,ppmi_dx,8,0.005994842503189409,test,0.62,0.038295294750138695,0.5386109762020399,0.048785906087796614,0.5509337860780985,0.04092566604131759 +flat_mae,patch,logistic,ppmi_dx,9,0.005994842503189409,train,0.7277580071174378,0.015475975553370755,0.681083306443537,0.019941341366071848,0.6754040890601585,0.017925887506499892 +flat_mae,patch,logistic,ppmi_dx,9,0.005994842503189409,test,0.71,0.03829867360627519,0.6579785352046232,0.04956219876842803,0.6540747028862479,0.0439527155611839 +flat_mae,patch,logistic,ppmi_dx,10,0.005994842503189409,train,0.7295373665480427,0.01697580181587017,0.6846659283868586,0.021828051478280285,0.6785886319845857,0.01969538551503112 +flat_mae,patch,logistic,ppmi_dx,10,0.005994842503189409,test,0.6,0.04425717568937268,0.5604395604395604,0.04849572941282711,0.5602716468590832,0.04660202522938627 +flat_mae,patch,logistic,ppmi_dx,11,2.782559402207126,train,0.99644128113879,0.0024616330018922554,0.9962334964144495,0.0026125582400898266,0.9953703703703703,0.0032024021922764947 +flat_mae,patch,logistic,ppmi_dx,11,2.782559402207126,test,0.7,0.044095732219796506,0.6782496782496783,0.047327395321723906,0.6765704584040747,0.04682570102858115 +flat_mae,patch,logistic,ppmi_dx,12,0.3593813663804626,train,0.9323843416370107,0.010628278765627554,0.9280458221024259,0.011423263255464645,0.9250829586812246,0.012079225946649973 +flat_mae,patch,logistic,ppmi_dx,12,0.3593813663804626,test,0.65,0.044211034821637005,0.6072270227808326,0.050228756052448094,0.6056876061120543,0.04736082615996879 +flat_mae,patch,logistic,ppmi_dx,13,0.3593813663804626,train,0.9644128113879004,0.007974016992655047,0.9622676979267375,0.008485012194154571,0.9606615285806037,0.009025187819360538 +flat_mae,patch,logistic,ppmi_dx,13,0.3593813663804626,test,0.61,0.04650550074991129,0.568536342515765,0.05122102941892769,0.5683361629881154,0.04890203025188589 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,train,0.8291814946619217,0.014763273030379194,0.8110712684894665,0.017003237641987922,0.8003907086277029,0.01702512325496237 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,test,0.57,0.04531796994570696,0.5305164319248826,0.04858880543837976,0.5309847198641766,0.0469430625270563 +flat_mae,patch,logistic,ppmi_dx,15,0.005994842503189409,train,0.7384341637010676,0.0156139498736111,0.6926047887481164,0.020080357747320064,0.6858140655105973,0.018013400814026113 +flat_mae,patch,logistic,ppmi_dx,15,0.005994842503189409,test,0.66,0.04194617503420306,0.609375,0.0507321062318044,0.6086587436332768,0.04615700467813121 +flat_mae,patch,logistic,ppmi_dx,16,0.046415888336127774,train,0.8309608540925267,0.014421983812988896,0.810417620970176,0.017133157851359847,0.7974871547848427,0.0170838961895997 +flat_mae,patch,logistic,ppmi_dx,16,0.046415888336127774,test,0.65,0.045189817437117405,0.6224786970121885,0.0490666338031973,0.6209677419354839,0.04831710061727428 +flat_mae,patch,logistic,ppmi_dx,17,0.046415888336127774,train,0.8131672597864769,0.015307182513498819,0.793106349857478,0.017802407520403216,0.7830362877328195,0.017739950841352273 +flat_mae,patch,logistic,ppmi_dx,17,0.046415888336127774,test,0.59,0.04571877513669849,0.5464100011063171,0.050084490057855875,0.5471137521222411,0.047829283307919995 +flat_mae,patch,logistic,ppmi_dx,18,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,18,1291.5496650148827,test,0.57,0.049609978834907795,0.557203171661003,0.05066544682878853,0.5615449915110357,0.05224853446061583 +flat_mae,patch,logistic,ppmi_dx,19,0.046415888336127774,train,0.8167259786476868,0.015128670572605219,0.7960345732779428,0.017554172514366553,0.7850567330336116,0.01738250181476914 +flat_mae,patch,logistic,ppmi_dx,19,0.046415888336127774,test,0.71,0.044014888390179974,0.6938021328265231,0.04620432585274497,0.6948217317487266,0.046390377377879745 +flat_mae,patch,logistic,ppmi_dx,20,0.046415888336127774,train,0.8131672597864769,0.015680755179614324,0.7936099889827396,0.01814515283690319,0.783906015842432,0.018076972012768755 +flat_mae,patch,logistic,ppmi_dx,20,0.046415888336127774,test,0.67,0.041664848493664294,0.6176572818908586,0.050292728202088126,0.616723259762309,0.04547365133574492 +flat_mae,patch,logistic,ppmi_dx,21,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,21,1291.5496650148827,test,0.61,0.04743608752837865,0.5882166613873931,0.049491386541767775,0.5887096774193548,0.049504562984715186 +flat_mae,patch,logistic,ppmi_dx,22,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,22,21.54434690031882,test,0.55,0.04869856260712424,0.5248653785239151,0.050756742363550975,0.5250424448217317,0.051113161301739526 +flat_mae,patch,logistic,ppmi_dx,23,0.005994842503189409,train,0.7348754448398577,0.016159836316410485,0.6913715387195336,0.020499061851053477,0.6846633483194177,0.018597363317695573 +flat_mae,patch,logistic,ppmi_dx,23,0.005994842503189409,test,0.66,0.04496213518061614,0.609375,0.054040822047925936,0.6086587436332768,0.04955820404901081 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,train,0.7295373665480427,0.01610486179300103,0.683671051072402,0.020812465992023643,0.6777189038749732,0.018659874099308298 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,test,0.63,0.04088236294540716,0.5636277862955537,0.050203991210757234,0.5691850594227504,0.04412904201521217 +flat_mae,patch,logistic,ppmi_dx,25,0.3593813663804626,train,0.9430604982206405,0.009744572206034944,0.9392932852648492,0.01050075122046031,0.9354929351316634,0.011278970954382815 +flat_mae,patch,logistic,ppmi_dx,25,0.3593813663804626,test,0.64,0.03998951862676019,0.5792426367461431,0.05036360647994039,0.5823429541595926,0.044420179846896654 +flat_mae,patch,logistic,ppmi_dx,26,0.3593813663804626,train,0.9377224199288257,0.009933716699858937,0.9339087894254063,0.010572397379765579,0.932027403125669,0.01095488964532246 +flat_mae,patch,logistic,ppmi_dx,26,0.3593813663804626,test,0.64,0.04565597879796249,0.6179966044142615,0.048775255708017544,0.6179966044142615,0.04896193072503621 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,train,0.7206405693950177,0.017014390443075404,0.6706248623466214,0.022021648460014594,0.6661448298008992,0.019547318952198203 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,test,0.62,0.041886183879651775,0.5476190476190476,0.052077775167443024,0.5560271646859083,0.04543871301389427 +flat_mae,patch,logistic,ppmi_dx,28,0.3593813663804626,train,0.9466192170818505,0.009420392644202937,0.9431940700808625,0.010110280696271377,0.9401225647612932,0.010838644573089671 +flat_mae,patch,logistic,ppmi_dx,28,0.3593813663804626,test,0.62,0.04606073816169255,0.5924495924495925,0.04968790442111471,0.5916808149405772,0.04891071030375556 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,train,0.8256227758007118,0.01509909349376749,0.8076014811709635,0.017420886140189537,0.7975005352172982,0.01750987381034677 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,test,0.63,0.047497974693664576,0.5906626839252129,0.05315077608236612,0.5895585738539898,0.0506647830186329 +flat_mae,patch,logistic,ppmi_dx,30,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,30,21.54434690031882,test,0.61,0.04777488461524528,0.584,0.051627396178459475,0.583616298811545,0.05144063503619579 +flat_mae,patch,logistic,ppmi_dx,31,0.046415888336127774,train,0.8345195729537367,0.015635418323509544,0.8162979104792366,0.01830563009451841,0.8047259687433097,0.0183997827499793 +flat_mae,patch,logistic,ppmi_dx,31,0.046415888336127774,test,0.66,0.04606840131804012,0.6353496353496353,0.04849142693462424,0.634125636672326,0.04791073543150732 +flat_mae,patch,logistic,ppmi_dx,32,0.046415888336127774,train,0.8167259786476868,0.015312667310758022,0.7980272362815463,0.017608889213476066,0.7885356454720616,0.01764679882154443 +flat_mae,patch,logistic,ppmi_dx,32,0.046415888336127774,test,0.67,0.04428216345211692,0.6440513428972063,0.048367590788555063,0.6421901528013583,0.04776257510752226 +flat_mae,patch,logistic,ppmi_dx,33,0.3593813663804626,train,0.9483985765124555,0.008946409711688709,0.9448264188628785,0.009691096253634918,0.9398281952472705,0.010571857307610172 +flat_mae,patch,logistic,ppmi_dx,33,0.3593813663804626,test,0.66,0.04723928449923855,0.6353496353496353,0.05120044521088683,0.634125636672326,0.05076863277495403 +flat_mae,patch,logistic,ppmi_dx,34,0.046415888336127774,train,0.8202846975088968,0.015024385053951933,0.7989693825149191,0.01771803619553632,0.7870771783344037,0.01750322001006957 +flat_mae,patch,logistic,ppmi_dx,34,0.046415888336127774,test,0.67,0.044037125246773313,0.6296711929076422,0.0515727101953694,0.6269100169779287,0.04865137855127997 +flat_mae,patch,logistic,ppmi_dx,35,0.3593813663804626,train,0.9412811387900356,0.010075971217844312,0.9376854300296689,0.010750475389341107,0.9357873046456862,0.011284628334793772 +flat_mae,patch,logistic,ppmi_dx,35,0.3593813663804626,test,0.6,0.049603528100327705,0.5659722222222222,0.053085290658552796,0.565365025466893,0.05195229570282048 +flat_mae,patch,logistic,ppmi_dx,36,0.005994842503189409,train,0.7241992882562278,0.016256035720051614,0.6818701404942681,0.020013772081335056,0.6759928280882038,0.01829930510340109 +flat_mae,patch,logistic,ppmi_dx,36,0.005994842503189409,test,0.64,0.04114984811636612,0.5792426367461431,0.04936410027607919,0.5823429541595926,0.04417171930517304 +flat_mae,patch,logistic,ppmi_dx,37,0.3593813663804626,train,0.9448398576512456,0.009581040783662042,0.9410213443016977,0.010406432563138118,0.9360682937272533,0.011432763489264777 +flat_mae,patch,logistic,ppmi_dx,37,0.3593813663804626,test,0.65,0.04500335987456937,0.630450849963045,0.04682204692793159,0.6311544991511036,0.04707293583689922 +flat_mae,patch,logistic,ppmi_dx,38,0.046415888336127774,train,0.802491103202847,0.016025263106609147,0.7807426673461857,0.01865338032541245,0.7708868550631557,0.018439236378183056 +flat_mae,patch,logistic,ppmi_dx,38,0.046415888336127774,test,0.7,0.04530611879205721,0.6847414880201765,0.04735395224066736,0.6867572156196944,0.04779090071698159 +flat_mae,patch,logistic,ppmi_dx,39,0.046415888336127774,train,0.8238434163701067,0.014379240812025287,0.8034569366581532,0.017168263000953813,0.7917068079640335,0.01727227235581512 +flat_mae,patch,logistic,ppmi_dx,39,0.046415888336127774,test,0.67,0.0459393121411281,0.6396986570586308,0.05064134614606536,0.6370967741935484,0.04919403747059671 +flat_mae,patch,logistic,ppmi_dx,40,0.000774263682681127,train,0.6690391459074733,0.014710404241846966,0.5771359223300971,0.02179916640790788,0.592927103403982,0.01678211201662067 +flat_mae,patch,logistic,ppmi_dx,40,0.000774263682681127,test,0.66,0.03585707182690745,0.5687468290208015,0.052414192804968146,0.5831918505942275,0.04086209520637194 +flat_mae,patch,logistic,ppmi_dx,41,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,41,2.782559402207126,test,0.6,0.05132166793860075,0.586606035551881,0.05222995387608565,0.5908319185059423,0.05319657754243219 +flat_mae,patch,logistic,ppmi_dx,42,0.046415888336127774,train,0.8185053380782918,0.014901480731364722,0.7997484804024313,0.017089083330025904,0.789980732177264,0.01700703304554459 +flat_mae,patch,logistic,ppmi_dx,42,0.046415888336127774,test,0.68,0.039683119837028935,0.6259934548854604,0.04975199597504042,0.6247877758913413,0.04432757335355691 +flat_mae,patch,logistic,ppmi_dx,43,0.046415888336127774,train,0.8202846975088968,0.015517522535981388,0.8028789220537931,0.017493542997013663,0.7940350032113038,0.01746345309858463 +flat_mae,patch,logistic,ppmi_dx,43,0.046415888336127774,test,0.6,0.04425144065451429,0.554367201426025,0.049556954063591244,0.5551782682512734,0.04696458801086889 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,train,0.7259786476868327,0.016160375101868373,0.6784849250338054,0.020865997041025323,0.6730892742453436,0.018618991872925438 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,test,0.68,0.04148917449166709,0.6259934548854604,0.05256835054930224,0.6247877758913413,0.046974610754166264 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,train,0.8291814946619217,0.015932318763144656,0.8124191641749531,0.01813821104826984,0.8029998929565403,0.01825036332610028 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,test,0.66,0.044687465804182716,0.6212121212121212,0.050608278902441156,0.6188455008488964,0.04811151937006594 +flat_mae,patch,logistic,ppmi_dx,46,0.046415888336127774,train,0.8309608540925267,0.01519882580738319,0.8145890851001025,0.017217367397728178,0.8053147077713552,0.017340847376839323 +flat_mae,patch,logistic,ppmi_dx,46,0.046415888336127774,test,0.63,0.0459838058451016,0.5783475783475784,0.0538732946615018,0.5793718166383701,0.04953516975815706 +flat_mae,patch,logistic,ppmi_dx,47,0.046415888336127774,train,0.8149466192170819,0.015369979045956847,0.79630868740939,0.017497945648126473,0.7870905587668593,0.017456238975380085 +flat_mae,patch,logistic,ppmi_dx,47,0.046415888336127774,test,0.7,0.04302354704112621,0.6744791666666667,0.04795109699290812,0.6714770797962648,0.047428645864378505 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,train,0.7295373665480427,0.015485233024994357,0.681633870005963,0.020212568440982546,0.6759794476557482,0.018024790095250504 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,test,0.64,0.042187291925412804,0.5989304812834224,0.048709334146665534,0.597623089983022,0.04596119541178976 +flat_mae,patch,logistic,ppmi_dx,49,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,49,2.782559402207126,test,0.65,0.04786762998102162,0.6224786970121885,0.05126797979686403,0.6209677419354839,0.050436386082368084 +flat_mae,patch,logistic,ppmi_dx,50,0.3593813663804626,train,0.9377224199288257,0.00949247081591274,0.9332810978939621,0.010292239671853408,0.9276787625776065,0.011039977367040573 +flat_mae,patch,logistic,ppmi_dx,50,0.3593813663804626,test,0.66,0.04265457068122946,0.6155585707824514,0.04908085042653531,0.6137521222410866,0.04579370911799187 +flat_mae,patch,logistic,ppmi_dx,51,0.3593813663804626,train,0.9466192170818505,0.008857493253610566,0.9431940700808625,0.009498651655257224,0.9401225647612932,0.010140322276801693 +flat_mae,patch,logistic,ppmi_dx,51,0.3593813663804626,test,0.58,0.04964866564168668,0.5442708333333334,0.053437358865708384,0.5441426146010186,0.05207740706125955 +flat_mae,patch,logistic,ppmi_dx,52,0.046415888336127774,train,0.8185053380782918,0.015765727035554462,0.7987699566114832,0.01819632119014533,0.7882412759580391,0.01803627334891838 +flat_mae,patch,logistic,ppmi_dx,52,0.046415888336127774,test,0.62,0.046995046547482,0.5824175824175825,0.051826829456541615,0.5814940577249575,0.04979921677443064 +flat_mae,patch,logistic,ppmi_dx,53,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,53,2.782559402207126,test,0.55,0.04745463517929517,0.5396419437340154,0.04775294687403358,0.5454159592529711,0.049309415309893 +flat_mae,patch,logistic,ppmi_dx,54,0.005994842503189409,train,0.7330960854092526,0.01662678087686323,0.6925915661420424,0.020443681124661533,0.6858274459430529,0.018820138894408287 +flat_mae,patch,logistic,ppmi_dx,54,0.005994842503189409,test,0.55,0.04605473265583027,0.5021573182874212,0.04786812136845858,0.5046689303904923,0.04612370514877537 +flat_mae,patch,logistic,ppmi_dx,55,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,55,21.54434690031882,test,0.6,0.04482849094047222,0.5833333333333333,0.046418273462788096,0.5857385398981324,0.04725411490121178 +flat_mae,patch,logistic,ppmi_dx,56,0.046415888336127774,train,0.8042704626334519,0.015458925223581173,0.7818969531900481,0.018230261851414086,0.7714622136587455,0.01800915203661604 +flat_mae,patch,logistic,ppmi_dx,56,0.046415888336127774,test,0.64,0.046751337948768915,0.592944369063772,0.05434622524608595,0.5925297113752122,0.05029172201083732 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,train,0.797153024911032,0.01582791733091751,0.7727988425039363,0.01871929624511709,0.7622029543994862,0.018278060787021662 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,test,0.7,0.040116829386181545,0.6493688639551192,0.0507632949920872,0.6460101867572157,0.04553371611500591 +flat_mae,patch,logistic,ppmi_dx,58,0.046415888336127774,train,0.8238434163701067,0.014439234160343994,0.8034569366581532,0.016928909779322685,0.7917068079640335,0.016781187381561622 +flat_mae,patch,logistic,ppmi_dx,58,0.046415888336127774,test,0.67,0.04486856806273184,0.6296711929076422,0.05230670261082857,0.6269100169779287,0.049255407704104734 +flat_mae,patch,logistic,ppmi_dx,59,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,59,1291.5496650148827,test,0.63,0.04941750297212517,0.6009060511271707,0.05312936191366021,0.5997453310696095,0.052341003946846515 +flat_mae,patch,logistic,ppmi_dx,60,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,60,166.81005372000556,test,0.64,0.04657853582928515,0.6179966044142615,0.0492888142210014,0.6179966044142615,0.04952906671837194 +flat_mae,patch,logistic,ppmi_dx,61,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,61,1291.5496650148827,test,0.55,0.04880499564593772,0.529239460194581,0.04994525973428201,0.5301358234295416,0.05054336981795323 +flat_mae,patch,logistic,ppmi_dx,62,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,62,21.54434690031882,test,0.62,0.04615824520061394,0.5824175824175825,0.05139620385566298,0.5814940577249575,0.04899773105916247 +flat_mae,patch,logistic,ppmi_dx,63,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,63,2.782559402207126,test,0.54,0.04758266491065837,0.4875222816399287,0.050203450217702825,0.4915110356536503,0.04795426261259089 +flat_mae,patch,logistic,ppmi_dx,64,0.046415888336127774,train,0.8291814946619217,0.015007337693545696,0.81197724991636,0.017071033195863634,0.8021301648469279,0.01706420255301747 +flat_mae,patch,logistic,ppmi_dx,64,0.046415888336127774,test,0.65,0.04114918711226262,0.6011396011396011,0.04928851433577382,0.6005942275042444,0.04532489700476101 +flat_mae,patch,logistic,ppmi_dx,65,0.005994842503189409,train,0.7259786476868327,0.016395105467462496,0.6805167958656331,0.02069187496073376,0.6748287304645686,0.01872036835639237 +flat_mae,patch,logistic,ppmi_dx,65,0.005994842503189409,test,0.62,0.04081115043710481,0.5476190476190476,0.050173414267953485,0.5560271646859083,0.04344546078792213 +flat_mae,patch,logistic,ppmi_dx,66,0.046415888336127774,train,0.8256227758007118,0.015746880183346443,0.8061796171171172,0.0182500260315042,0.7948913508884607,0.018084061632786617 +flat_mae,patch,logistic,ppmi_dx,66,0.046415888336127774,test,0.59,0.04784696855601199,0.5577607593571352,0.050637052283693765,0.5573005093378608,0.04949236143908256 +flat_mae,patch,logistic,ppmi_dx,67,0.005994842503189409,train,0.7330960854092526,0.01735939412771078,0.6916695926966292,0.021457017711110936,0.6849577178334404,0.019705178738312627 +flat_mae,patch,logistic,ppmi_dx,67,0.005994842503189409,test,0.58,0.04517122535420087,0.5174632352941176,0.0514281833161737,0.5237691001697793,0.047187049630870274 +flat_mae,patch,logistic,ppmi_dx,68,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,68,166.81005372000556,test,0.6,0.050080878586542386,0.5796553173602353,0.05239001576145548,0.5806451612903225,0.052700890201385436 +flat_mae,patch,logistic,ppmi_dx,69,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,69,166.81005372000556,test,0.62,0.045947552709584005,0.5924495924495925,0.049063677870131786,0.5916808149405772,0.04844953604004136 +flat_mae,patch,logistic,ppmi_dx,70,0.046415888336127774,train,0.8131672597864769,0.015349466918237895,0.7920740795551844,0.017912050238747527,0.7812968315135945,0.017730720776784985 +flat_mae,patch,logistic,ppmi_dx,70,0.046415888336127774,test,0.61,0.04507843830480378,0.5741893219783819,0.04886585614819813,0.5734295415959253,0.04742353649505929 +flat_mae,patch,logistic,ppmi_dx,71,0.3593813663804626,train,0.9483985765124555,0.009428055789217106,0.9448264188628785,0.010208022997151378,0.9398281952472705,0.011105918156266732 +flat_mae,patch,logistic,ppmi_dx,71,0.3593813663804626,test,0.55,0.05028534180056848,0.508679986898133,0.05295729513822671,0.5097623089983022,0.0514000588722376 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,train,0.7259786476868327,0.016507790174170568,0.6774448419797257,0.021386057850709426,0.6722195461357311,0.019018355332042283 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,test,0.64,0.04349604119917122,0.5863970588235294,0.05053869992910319,0.5874363327674024,0.04623764003795999 +flat_mae,patch,logistic,ppmi_dx,73,0.3593813663804626,train,0.9448398576512456,0.009739906014802648,0.9411345337086054,0.010495922949425272,0.9369380218368657,0.011181722576772484 +flat_mae,patch,logistic,ppmi_dx,73,0.3593813663804626,test,0.65,0.04933728407604131,0.6266666666666667,0.05219130204490227,0.6260611205432938,0.0517882666350904 +flat_mae,patch,logistic,ppmi_dx,74,0.046415888336127774,train,0.8309608540925267,0.015523107197208135,0.8141553798867319,0.017666816572959935,0.8044449796617427,0.01777670055579514 +flat_mae,patch,logistic,ppmi_dx,74,0.046415888336127774,test,0.6,0.04600433023096847,0.5604395604395604,0.05104965650769923,0.5602716468590832,0.048815325017878046 +flat_mae,patch,logistic,ppmi_dx,75,0.005994842503189409,train,0.7366548042704626,0.015879845761197588,0.6919954970968124,0.020145976257193676,0.6852387069150075,0.018144734489034656 +flat_mae,patch,logistic,ppmi_dx,75,0.005994842503189409,test,0.65,0.04152059729820851,0.5944849959448499,0.05086677452937275,0.5955008488964346,0.045903108065366724 +flat_mae,patch,logistic,ppmi_dx,76,0.046415888336127774,train,0.8149466192170819,0.015132922533685676,0.7948242694862182,0.0176911138879785,0.7844813744380219,0.01770689348989579 +flat_mae,patch,logistic,ppmi_dx,76,0.046415888336127774,test,0.72,0.04277492723547289,0.6880570409982174,0.048925432795284066,0.6825127334465195,0.04679678245081813 +flat_mae,patch,logistic,ppmi_dx,77,0.3593813663804626,train,0.9430604982206405,0.009219411211725674,0.9391774891774892,0.009981732974608904,0.9346232070220509,0.010872791325420444 +flat_mae,patch,logistic,ppmi_dx,77,0.3593813663804626,test,0.66,0.04486218452104177,0.6392190152801358,0.047773143641127815,0.6392190152801358,0.048090744782581796 +flat_mae,patch,logistic,ppmi_dx,78,0.3593813663804626,train,0.9323843416370107,0.009794739960301805,0.9277732683982685,0.010609334940927768,0.9233435024619996,0.011477828838889863 +flat_mae,patch,logistic,ppmi_dx,78,0.3593813663804626,test,0.67,0.043003492881392776,0.6296711929076422,0.049938020032827345,0.6269100169779287,0.04717843959283201 +flat_mae,patch,logistic,ppmi_dx,79,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,79,1291.5496650148827,test,0.59,0.04611767557021928,0.5327635327635327,0.05151628612808517,0.5369269949066213,0.047372314832263854 +flat_mae,patch,logistic,ppmi_dx,80,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,80,2.782559402207126,test,0.6,0.04839406575190806,0.5833333333333333,0.05008338213612533,0.5857385398981324,0.0510699986495397 +flat_mae,patch,logistic,ppmi_dx,81,0.046415888336127774,train,0.8113879003558719,0.015302604899022383,0.7898279730740463,0.017953102607484804,0.7789820166987798,0.017788057196535957 +flat_mae,patch,logistic,ppmi_dx,81,0.046415888336127774,test,0.61,0.045553006486948815,0.568536342515765,0.05003759087847805,0.5683361629881154,0.04775172983690121 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,train,0.7491103202846975,0.01504217674465068,0.7022264810326363,0.020247105123115918,0.6944845857418112,0.017952033297989802 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,test,0.63,0.04292476674368773,0.5713127099988413,0.05181808230635205,0.5742784380305602,0.04638186181682066 +flat_mae,patch,logistic,ppmi_dx,83,0.046415888336127774,train,0.8362989323843416,0.014524287054841557,0.8193809823237617,0.016667742216380003,0.8087802397773496,0.016753263766858633 +flat_mae,patch,logistic,ppmi_dx,83,0.046415888336127774,test,0.55,0.04994769664358908,0.5248653785239151,0.05105099075687778,0.5250424448217317,0.05112423597965946 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,train,0.7366548042704626,0.01601960329155893,0.6919954970968124,0.02024559707001699,0.6852387069150075,0.01826543491447043 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,test,0.59,0.046030007603736066,0.539894512400404,0.05110378052434421,0.5420203735144312,0.048238238425047854 +flat_mae,patch,logistic,ppmi_dx,85,0.3593813663804626,train,0.9430604982206405,0.010056545969789624,0.9394070080862533,0.010799632583245643,0.936362663241276,0.011550906933002786 +flat_mae,patch,logistic,ppmi_dx,85,0.3593813663804626,test,0.63,0.04557706001926847,0.6053333333333333,0.047688024102301815,0.6048387096774194,0.04747682142158888 +flat_mae,patch,logistic,ppmi_dx,86,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,86,166.81005372000556,test,0.66,0.04545327270945404,0.6310763888888888,0.04995575048281524,0.6290322580645161,0.04889331383881605 +flat_mae,patch,logistic,ppmi_dx,87,0.046415888336127774,train,0.806049822064057,0.016166668353087877,0.7846932499165247,0.01875440130196279,0.7746467565831727,0.018513828033348046 +flat_mae,patch,logistic,ppmi_dx,87,0.046415888336127774,test,0.6,0.04711536479748406,0.554367201426025,0.053948927097137535,0.5551782682512734,0.05062286819955253 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,train,0.8238434163701067,0.015084137328542031,0.8054037038980117,0.01743577167430011,0.7951857204024835,0.017548319111194425 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,test,0.64,0.04127117638255542,0.5989304812834224,0.0478327702061941,0.597623089983022,0.04504135057837615 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,train,0.7241992882562278,0.015496688395396675,0.6748207239727791,0.020257240480142307,0.6699047313209163,0.017929756261927488 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,test,0.75,0.037104802923610854,0.6932891669733775,0.05197200240411804,0.6863327674023769,0.044350578393458434 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,train,0.7277580071174378,0.016030231235737286,0.6878265194613769,0.019238778834035037,0.681492185827446,0.017768453624771693 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,test,0.64,0.03828216817266233,0.5714285714285714,0.04706986841693695,0.5772495755517827,0.041056132083054 +flat_mae,patch,logistic,ppmi_dx,91,0.3593813663804626,train,0.9501779359430605,0.009247380777305858,0.946780303030303,0.00996449292376542,0.9421430100620852,0.01059746223337036 +flat_mae,patch,logistic,ppmi_dx,91,0.3593813663804626,test,0.63,0.04548587033354424,0.6009060511271707,0.04959468253259841,0.5997453310696095,0.04855752883879786 +flat_mae,patch,logistic,ppmi_dx,92,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,92,1291.5496650148827,test,0.64,0.050904318087957916,0.625,0.0519588195021414,0.6281833616298811,0.0527024302942833 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,train,0.7259786476868327,0.017463695886141113,0.6805167958656331,0.02157354980770396,0.6748287304645686,0.019528881839355902 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,test,0.65,0.04399750447468584,0.6266666666666667,0.047746458364485066,0.6260611205432938,0.047789687770116024 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,train,0.7277580071174378,0.01674493474167773,0.6840643290969015,0.020902591055221804,0.678013273388996,0.018934096882191234 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,test,0.66,0.041025241010870364,0.609375,0.04947568732048449,0.6086587436332768,0.04501942277367941 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,train,0.7455516014234875,0.016647616844962645,0.7037995304489483,0.021285308553379123,0.695943052879469,0.019343628963898237 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,test,0.64,0.0427258422971391,0.5792426367461431,0.05195978680227832,0.5823429541595926,0.045979232158601016 +flat_mae,patch,logistic,ppmi_dx,96,0.046415888336127774,train,0.8327402135231317,0.015259119202492686,0.8145527051125434,0.017715908112977036,0.8032808820381074,0.017719190035597496 +flat_mae,patch,logistic,ppmi_dx,96,0.046415888336127774,test,0.6,0.042422164018352485,0.5404411764705883,0.049152077986878207,0.5449915110356536,0.04485641089731464 +flat_mae,patch,logistic,ppmi_dx,97,0.3593813663804626,train,0.9430604982206405,0.009353604264759834,0.9391774891774892,0.010107929759359466,0.9346232070220509,0.01087469109081418 +flat_mae,patch,logistic,ppmi_dx,97,0.3593813663804626,test,0.64,0.04777656329205775,0.6216897856242118,0.04971825246175989,0.6230899830220713,0.049994696753527385 +flat_mae,patch,logistic,ppmi_dx,98,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,98,21.54434690031882,test,0.65,0.04750210100616604,0.6368917937545389,0.04887577630474463,0.6413412563667232,0.04991168465312845 +flat_mae,patch,logistic,ppmi_dx,99,0.3593813663804626,train,0.9483985765124555,0.008905091507312692,0.944932305727405,0.00962221421759671,0.9406979233568828,0.010517686544863595 +flat_mae,patch,logistic,ppmi_dx,99,0.3593813663804626,test,0.62,0.04707819452782785,0.6041666666666667,0.049364416466533834,0.6069609507640068,0.05029149910994365 +flat_mae,patch,logistic,ppmi_dx,100,0.046415888336127774,train,0.8113879003558719,0.015734568284522944,0.7918954796338993,0.018011918232139868,0.7824609291372298,0.017942483734992538 +flat_mae,patch,logistic,ppmi_dx,100,0.046415888336127774,test,0.63,0.04481706817720231,0.5960257670051315,0.05032183049997936,0.5946519524617997,0.048798049363389454 diff --git a/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic/log.txt b/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..5c1f44bbd5027e6dd09adb5c672f67101e0a29bc --- /dev/null +++ b/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:46 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_1; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_1/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:15:51 time: 4.0994 data: 3.0545 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:24 time: 0.2139 data: 0.0827 max mem: 2851 +extract (train) [ 40/232] eta: 0:00:55 time: 0.1736 data: 0.0540 max mem: 2851 +extract (train) [ 60/232] eta: 0:00:42 time: 0.1674 data: 0.0499 max mem: 2851 +extract (train) [ 80/232] eta: 0:00:35 time: 0.1894 data: 0.0584 max mem: 2851 +extract (train) [100/232] eta: 0:00:29 time: 0.1701 data: 0.0487 max mem: 2851 +extract (train) [120/232] eta: 0:00:24 time: 0.1926 data: 0.0586 max mem: 2851 +extract (train) [140/232] eta: 0:00:19 time: 0.1973 data: 0.0626 max mem: 2851 +extract (train) [160/232] eta: 0:00:15 time: 0.1737 data: 0.0501 max mem: 2851 +extract (train) [180/232] eta: 0:00:10 time: 0.1864 data: 0.0559 max mem: 2851 +extract (train) [200/232] eta: 0:00:06 time: 0.1712 data: 0.0509 max mem: 2851 +extract (train) [220/232] eta: 0:00:02 time: 0.1419 data: 0.0389 max mem: 2851 +extract (train) [231/232] eta: 0:00:00 time: 0.1461 data: 0.0434 max mem: 2851 +extract (train) Total time: 0:00:45 (0.1973 s / it) +extract (validation) [ 0/50] eta: 0:03:13 time: 3.8609 data: 3.7234 max mem: 2851 +extract (validation) [20/50] eta: 0:00:11 time: 0.2089 data: 0.0684 max mem: 2851 +extract (validation) [40/50] eta: 0:00:02 time: 0.1416 data: 0.0398 max mem: 2851 +extract (validation) [49/50] eta: 0:00:00 time: 0.1398 data: 0.0424 max mem: 2851 +extract (validation) Total time: 0:00:12 (0.2508 s / it) +extract (test) [ 0/50] eta: 0:03:25 time: 4.1131 data: 3.9726 max mem: 2851 +extract (test) [20/50] eta: 0:00:12 time: 0.2182 data: 0.0635 max mem: 2851 +extract (test) [40/50] eta: 0:00:02 time: 0.1397 data: 0.0359 max mem: 2851 +extract (test) [49/50] eta: 0:00:00 time: 0.1391 data: 0.0369 max mem: 2851 +extract (test) Total time: 0:00:12 (0.2568 s / it) +feature extraction time: 0:01:11 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|--------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 0.35938 | train | 0.94306 | 0.0097188 | 0.93941 | 0.010426 | 0.93567 | 0.011095 | +| flat_mae | patch | logistic | ppmi_dx | | 0.35938 | test | 0.63 | 0.04525 | 0.59066 | 0.048446 | 0.58923 | 0.046842 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04371370036956378, "f1": 0.5766488413547237, "f1_std": 0.04965517929109973, "bacc": 0.5764006791171477, "bacc_std": 0.046718817119045165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04961217189359885, "f1": 0.5626666666666666, "f1_std": 0.05281989014396253, "bacc": 0.5623938879456706, "bacc_std": 0.05229097998959533} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.04351703574463683, "f1": 0.6615351020853806, "f1_std": 0.048463542592696605, "bacc": 0.6583191850594228, "bacc_std": 0.047252174901520914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.04106226978626486, "f1": 0.66078697421981, "f1_std": 0.047734870379201884, "bacc": 0.6561969439728353, "bacc_std": 0.044782648390475896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.56, "acc_std": 0.050015721528335465, "f1": 0.5280995280995281, "f1_std": 0.05238886009304962, "bacc": 0.5280135823429541, "bacc_std": 0.05173854732883097} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.04470615170197497, "f1": 0.525101763907734, "f1_std": 0.051937224805165036, "bacc": 0.5288624787775891, "bacc_std": 0.04761150502647497} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.55, "acc_std": 0.051013896146050244, "f1": 0.5366079703429101, "f1_std": 0.05184591645257948, "bacc": 0.5403225806451613, "bacc_std": 0.05342905332313932} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.038295294750138695, "f1": 0.5386109762020399, "f1_std": 0.048785906087796614, "bacc": 0.5509337860780985, "bacc_std": 0.04092566604131759} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.03829867360627519, "f1": 0.6579785352046232, "f1_std": 0.04956219876842803, "bacc": 0.6540747028862479, "bacc_std": 0.0439527155611839} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04425717568937268, "f1": 0.5604395604395604, "f1_std": 0.04849572941282711, "bacc": 0.5602716468590832, "bacc_std": 0.04660202522938627} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 2.782559402207126, "split": "test", "acc": 0.7, "acc_std": 0.044095732219796506, "f1": 0.6782496782496783, "f1_std": 0.047327395321723906, "bacc": 0.6765704584040747, "bacc_std": 0.04682570102858115} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.65, "acc_std": 0.044211034821637005, "f1": 0.6072270227808326, "f1_std": 0.050228756052448094, "bacc": 0.6056876061120543, "bacc_std": 0.04736082615996879} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.04650550074991129, "f1": 0.568536342515765, "f1_std": 0.05122102941892769, "bacc": 0.5683361629881154, "bacc_std": 0.04890203025188589} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.57, "acc_std": 0.04531796994570696, "f1": 0.5305164319248826, "f1_std": 0.04858880543837976, "bacc": 0.5309847198641766, "bacc_std": 0.0469430625270563} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04194617503420306, "f1": 0.609375, "f1_std": 0.0507321062318044, "bacc": 0.6086587436332768, "bacc_std": 0.04615700467813121} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.045189817437117405, "f1": 0.6224786970121885, "f1_std": 0.0490666338031973, "bacc": 0.6209677419354839, "bacc_std": 0.04831710061727428} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.04571877513669849, "f1": 0.5464100011063171, "f1_std": 0.050084490057855875, "bacc": 0.5471137521222411, "bacc_std": 0.047829283307919995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 1291.5496650148827, "split": "test", "acc": 0.57, "acc_std": 0.049609978834907795, "f1": 0.557203171661003, "f1_std": 0.05066544682878853, "bacc": 0.5615449915110357, "bacc_std": 0.05224853446061583} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.71, "acc_std": 0.044014888390179974, "f1": 0.6938021328265231, "f1_std": 0.04620432585274497, "bacc": 0.6948217317487266, "bacc_std": 0.046390377377879745} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.041664848493664294, "f1": 0.6176572818908586, "f1_std": 0.050292728202088126, "bacc": 0.616723259762309, "bacc_std": 0.04547365133574492} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 1291.5496650148827, "split": "test", "acc": 0.61, "acc_std": 0.04743608752837865, "f1": 0.5882166613873931, "f1_std": 0.049491386541767775, "bacc": 0.5887096774193548, "bacc_std": 0.049504562984715186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 21.54434690031882, "split": "test", "acc": 0.55, "acc_std": 0.04869856260712424, "f1": 0.5248653785239151, "f1_std": 0.050756742363550975, "bacc": 0.5250424448217317, "bacc_std": 0.051113161301739526} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04496213518061614, "f1": 0.609375, "f1_std": 0.054040822047925936, "bacc": 0.6086587436332768, "bacc_std": 0.04955820404901081} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04088236294540716, "f1": 0.5636277862955537, "f1_std": 0.050203991210757234, "bacc": 0.5691850594227504, "bacc_std": 0.04412904201521217} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.03998951862676019, "f1": 0.5792426367461431, "f1_std": 0.05036360647994039, "bacc": 0.5823429541595926, "bacc_std": 0.044420179846896654} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04565597879796249, "f1": 0.6179966044142615, "f1_std": 0.048775255708017544, "bacc": 0.6179966044142615, "bacc_std": 0.04896193072503621} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.041886183879651775, "f1": 0.5476190476190476, "f1_std": 0.052077775167443024, "bacc": 0.5560271646859083, "bacc_std": 0.04543871301389427} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.04606073816169255, "f1": 0.5924495924495925, "f1_std": 0.04968790442111471, "bacc": 0.5916808149405772, "bacc_std": 0.04891071030375556} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.047497974693664576, "f1": 0.5906626839252129, "f1_std": 0.05315077608236612, "bacc": 0.5895585738539898, "bacc_std": 0.0506647830186329} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 21.54434690031882, "split": "test", "acc": 0.61, "acc_std": 0.04777488461524528, "f1": 0.584, "f1_std": 0.051627396178459475, "bacc": 0.583616298811545, "bacc_std": 0.05144063503619579} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04606840131804012, "f1": 0.6353496353496353, "f1_std": 0.04849142693462424, "bacc": 0.634125636672326, "bacc_std": 0.04791073543150732} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04428216345211692, "f1": 0.6440513428972063, "f1_std": 0.048367590788555063, "bacc": 0.6421901528013583, "bacc_std": 0.04776257510752226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04723928449923855, "f1": 0.6353496353496353, "f1_std": 0.05120044521088683, "bacc": 0.634125636672326, "bacc_std": 0.05076863277495403} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.044037125246773313, "f1": 0.6296711929076422, "f1_std": 0.0515727101953694, "bacc": 0.6269100169779287, "bacc_std": 0.04865137855127997} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.049603528100327705, "f1": 0.5659722222222222, "f1_std": 0.053085290658552796, "bacc": 0.565365025466893, "bacc_std": 0.05195229570282048} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04114984811636612, "f1": 0.5792426367461431, "f1_std": 0.04936410027607919, "bacc": 0.5823429541595926, "bacc_std": 0.04417171930517304} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.65, "acc_std": 0.04500335987456937, "f1": 0.630450849963045, "f1_std": 0.04682204692793159, "bacc": 0.6311544991511036, "bacc_std": 0.04707293583689922} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.04530611879205721, "f1": 0.6847414880201765, "f1_std": 0.04735395224066736, "bacc": 0.6867572156196944, "bacc_std": 0.04779090071698159} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.0459393121411281, "f1": 0.6396986570586308, "f1_std": 0.05064134614606536, "bacc": 0.6370967741935484, "bacc_std": 0.04919403747059671} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 0.000774263682681127, "split": "test", "acc": 0.66, "acc_std": 0.03585707182690745, "f1": 0.5687468290208015, "f1_std": 0.052414192804968146, "bacc": 0.5831918505942275, "bacc_std": 0.04086209520637194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 2.782559402207126, "split": "test", "acc": 0.6, "acc_std": 0.05132166793860075, "f1": 0.586606035551881, "f1_std": 0.05222995387608565, "bacc": 0.5908319185059423, "bacc_std": 0.05319657754243219} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.039683119837028935, "f1": 0.6259934548854604, "f1_std": 0.04975199597504042, "bacc": 0.6247877758913413, "bacc_std": 0.04432757335355691} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.04425144065451429, "f1": 0.554367201426025, "f1_std": 0.049556954063591244, "bacc": 0.5551782682512734, "bacc_std": 0.04696458801086889} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04148917449166709, "f1": 0.6259934548854604, "f1_std": 0.05256835054930224, "bacc": 0.6247877758913413, "bacc_std": 0.046974610754166264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.044687465804182716, "f1": 0.6212121212121212, "f1_std": 0.050608278902441156, "bacc": 0.6188455008488964, "bacc_std": 0.04811151937006594} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.0459838058451016, "f1": 0.5783475783475784, "f1_std": 0.0538732946615018, "bacc": 0.5793718166383701, "bacc_std": 0.04953516975815706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.04302354704112621, "f1": 0.6744791666666667, "f1_std": 0.04795109699290812, "bacc": 0.6714770797962648, "bacc_std": 0.047428645864378505} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.042187291925412804, "f1": 0.5989304812834224, "f1_std": 0.048709334146665534, "bacc": 0.597623089983022, "bacc_std": 0.04596119541178976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.04786762998102162, "f1": 0.6224786970121885, "f1_std": 0.05126797979686403, "bacc": 0.6209677419354839, "bacc_std": 0.050436386082368084} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04265457068122946, "f1": 0.6155585707824514, "f1_std": 0.04908085042653531, "bacc": 0.6137521222410866, "bacc_std": 0.04579370911799187} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.04964866564168668, "f1": 0.5442708333333334, "f1_std": 0.053437358865708384, "bacc": 0.5441426146010186, "bacc_std": 0.05207740706125955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.046995046547482, "f1": 0.5824175824175825, "f1_std": 0.051826829456541615, "bacc": 0.5814940577249575, "bacc_std": 0.04979921677443064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 2.782559402207126, "split": "test", "acc": 0.55, "acc_std": 0.04745463517929517, "f1": 0.5396419437340154, "f1_std": 0.04775294687403358, "bacc": 0.5454159592529711, "bacc_std": 0.049309415309893} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.55, "acc_std": 0.04605473265583027, "f1": 0.5021573182874212, "f1_std": 0.04786812136845858, "bacc": 0.5046689303904923, "bacc_std": 0.04612370514877537} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 21.54434690031882, "split": "test", "acc": 0.6, "acc_std": 0.04482849094047222, "f1": 0.5833333333333333, "f1_std": 0.046418273462788096, "bacc": 0.5857385398981324, "bacc_std": 0.04725411490121178} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.046751337948768915, "f1": 0.592944369063772, "f1_std": 0.05434622524608595, "bacc": 0.5925297113752122, "bacc_std": 0.05029172201083732} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.040116829386181545, "f1": 0.6493688639551192, "f1_std": 0.0507632949920872, "bacc": 0.6460101867572157, "bacc_std": 0.04553371611500591} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04486856806273184, "f1": 0.6296711929076422, "f1_std": 0.05230670261082857, "bacc": 0.6269100169779287, "bacc_std": 0.049255407704104734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 1291.5496650148827, "split": "test", "acc": 0.63, "acc_std": 0.04941750297212517, "f1": 0.6009060511271707, "f1_std": 0.05312936191366021, "bacc": 0.5997453310696095, "bacc_std": 0.052341003946846515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 166.81005372000556, "split": "test", "acc": 0.64, "acc_std": 0.04657853582928515, "f1": 0.6179966044142615, "f1_std": 0.0492888142210014, "bacc": 0.6179966044142615, "bacc_std": 0.04952906671837194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 1291.5496650148827, "split": "test", "acc": 0.55, "acc_std": 0.04880499564593772, "f1": 0.529239460194581, "f1_std": 0.04994525973428201, "bacc": 0.5301358234295416, "bacc_std": 0.05054336981795323} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 21.54434690031882, "split": "test", "acc": 0.62, "acc_std": 0.04615824520061394, "f1": 0.5824175824175825, "f1_std": 0.05139620385566298, "bacc": 0.5814940577249575, "bacc_std": 0.04899773105916247} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 2.782559402207126, "split": "test", "acc": 0.54, "acc_std": 0.04758266491065837, "f1": 0.4875222816399287, "f1_std": 0.050203450217702825, "bacc": 0.4915110356536503, "bacc_std": 0.04795426261259089} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04114918711226262, "f1": 0.6011396011396011, "f1_std": 0.04928851433577382, "bacc": 0.6005942275042444, "bacc_std": 0.04532489700476101} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04081115043710481, "f1": 0.5476190476190476, "f1_std": 0.050173414267953485, "bacc": 0.5560271646859083, "bacc_std": 0.04344546078792213} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.04784696855601199, "f1": 0.5577607593571352, "f1_std": 0.050637052283693765, "bacc": 0.5573005093378608, "bacc_std": 0.04949236143908256} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.04517122535420087, "f1": 0.5174632352941176, "f1_std": 0.0514281833161737, "bacc": 0.5237691001697793, "bacc_std": 0.047187049630870274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 166.81005372000556, "split": "test", "acc": 0.6, "acc_std": 0.050080878586542386, "f1": 0.5796553173602353, "f1_std": 0.05239001576145548, "bacc": 0.5806451612903225, "bacc_std": 0.052700890201385436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 166.81005372000556, "split": "test", "acc": 0.62, "acc_std": 0.045947552709584005, "f1": 0.5924495924495925, "f1_std": 0.049063677870131786, "bacc": 0.5916808149405772, "bacc_std": 0.04844953604004136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04507843830480378, "f1": 0.5741893219783819, "f1_std": 0.04886585614819813, "bacc": 0.5734295415959253, "bacc_std": 0.04742353649505929} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.55, "acc_std": 0.05028534180056848, "f1": 0.508679986898133, "f1_std": 0.05295729513822671, "bacc": 0.5097623089983022, "bacc_std": 0.0514000588722376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04349604119917122, "f1": 0.5863970588235294, "f1_std": 0.05053869992910319, "bacc": 0.5874363327674024, "bacc_std": 0.04623764003795999} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.65, "acc_std": 0.04933728407604131, "f1": 0.6266666666666667, "f1_std": 0.05219130204490227, "bacc": 0.6260611205432938, "bacc_std": 0.0517882666350904} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.04600433023096847, "f1": 0.5604395604395604, "f1_std": 0.05104965650769923, "bacc": 0.5602716468590832, "bacc_std": 0.048815325017878046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04152059729820851, "f1": 0.5944849959448499, "f1_std": 0.05086677452937275, "bacc": 0.5955008488964346, "bacc_std": 0.045903108065366724} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.72, "acc_std": 0.04277492723547289, "f1": 0.6880570409982174, "f1_std": 0.048925432795284066, "bacc": 0.6825127334465195, "bacc_std": 0.04679678245081813} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04486218452104177, "f1": 0.6392190152801358, "f1_std": 0.047773143641127815, "bacc": 0.6392190152801358, "bacc_std": 0.048090744782581796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.67, "acc_std": 0.043003492881392776, "f1": 0.6296711929076422, "f1_std": 0.049938020032827345, "bacc": 0.6269100169779287, "bacc_std": 0.04717843959283201} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 1291.5496650148827, "split": "test", "acc": 0.59, "acc_std": 0.04611767557021928, "f1": 0.5327635327635327, "f1_std": 0.05151628612808517, "bacc": 0.5369269949066213, "bacc_std": 0.047372314832263854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.6, "acc_std": 0.04839406575190806, "f1": 0.5833333333333333, "f1_std": 0.05008338213612533, "bacc": 0.5857385398981324, "bacc_std": 0.0510699986495397} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.045553006486948815, "f1": 0.568536342515765, "f1_std": 0.05003759087847805, "bacc": 0.5683361629881154, "bacc_std": 0.04775172983690121} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04292476674368773, "f1": 0.5713127099988413, "f1_std": 0.05181808230635205, "bacc": 0.5742784380305602, "bacc_std": 0.04638186181682066} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.55, "acc_std": 0.04994769664358908, "f1": 0.5248653785239151, "f1_std": 0.05105099075687778, "bacc": 0.5250424448217317, "bacc_std": 0.05112423597965946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.046030007603736066, "f1": 0.539894512400404, "f1_std": 0.05110378052434421, "bacc": 0.5420203735144312, "bacc_std": 0.048238238425047854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.04557706001926847, "f1": 0.6053333333333333, "f1_std": 0.047688024102301815, "bacc": 0.6048387096774194, "bacc_std": 0.04747682142158888} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 166.81005372000556, "split": "test", "acc": 0.66, "acc_std": 0.04545327270945404, "f1": 0.6310763888888888, "f1_std": 0.04995575048281524, "bacc": 0.6290322580645161, "bacc_std": 0.04889331383881605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.04711536479748406, "f1": 0.554367201426025, "f1_std": 0.053948927097137535, "bacc": 0.5551782682512734, "bacc_std": 0.05062286819955253} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04127117638255542, "f1": 0.5989304812834224, "f1_std": 0.0478327702061941, "bacc": 0.597623089983022, "bacc_std": 0.04504135057837615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.75, "acc_std": 0.037104802923610854, "f1": 0.6932891669733775, "f1_std": 0.05197200240411804, "bacc": 0.6863327674023769, "bacc_std": 0.044350578393458434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.03828216817266233, "f1": 0.5714285714285714, "f1_std": 0.04706986841693695, "bacc": 0.5772495755517827, "bacc_std": 0.041056132083054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.04548587033354424, "f1": 0.6009060511271707, "f1_std": 0.04959468253259841, "bacc": 0.5997453310696095, "bacc_std": 0.04855752883879786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 1291.5496650148827, "split": "test", "acc": 0.64, "acc_std": 0.050904318087957916, "f1": 0.625, "f1_std": 0.0519588195021414, "bacc": 0.6281833616298811, "bacc_std": 0.0527024302942833} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04399750447468584, "f1": 0.6266666666666667, "f1_std": 0.047746458364485066, "bacc": 0.6260611205432938, "bacc_std": 0.047789687770116024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.041025241010870364, "f1": 0.609375, "f1_std": 0.04947568732048449, "bacc": 0.6086587436332768, "bacc_std": 0.04501942277367941} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.0427258422971391, "f1": 0.5792426367461431, "f1_std": 0.05195978680227832, "bacc": 0.5823429541595926, "bacc_std": 0.045979232158601016} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.042422164018352485, "f1": 0.5404411764705883, "f1_std": 0.049152077986878207, "bacc": 0.5449915110356536, "bacc_std": 0.04485641089731464} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04777656329205775, "f1": 0.6216897856242118, "f1_std": 0.04971825246175989, "bacc": 0.6230899830220713, "bacc_std": 0.049994696753527385} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 21.54434690031882, "split": "test", "acc": 0.65, "acc_std": 0.04750210100616604, "f1": 0.6368917937545389, "f1_std": 0.04887577630474463, "bacc": 0.6413412563667232, "bacc_std": 0.04991168465312845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.04707819452782785, "f1": 0.6041666666666667, "f1_std": 0.049364416466533834, "bacc": 0.6069609507640068, "bacc_std": 0.05029149910994365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04481706817720231, "f1": 0.5960257670051315, "f1_std": 0.05032183049997936, "bacc": 0.5946519524617997, "bacc_std": 0.048798049363389454} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|-------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 85.496 | 308 | 0.8598 | 0.10187 | 0.84087 | 0.11921 | 0.83511 | 0.12136 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 85.496 | 308 | 0.6304 | 0.043133 | 0.59217 | 0.044997 | 0.59278 | 0.043239 | + + +done! total time: 0:05:18 diff --git a/data_scaling/n400_1/pretrain/config.yaml b/data_scaling/n400_1/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b035b74fa3d82bda5dd05fd6aa2ffbab4c63dcef --- /dev/null +++ b/data_scaling/n400_1/pretrain/config.yaml @@ -0,0 +1,109 @@ +name: data_scaling/n400_1/pretrain +notes: data scaling experiment n400_1 (seed=1644) +output_dir: experiments/data_scaling/output/data_scaling/n400_1/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..00399}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 1644 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 diff --git a/data_scaling/n400_1/pretrain/log.json b/data_scaling/n400_1/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..dce6618e5543b812b88cb621d4446d8c1f69eb19 --- /dev/null +++ b/data_scaling/n400_1/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.05253171782687306, "train/loss": 0.9932034360313415, "eval/hcp-train-subset/loss": 0.9886764874381404, "eval/hcp-val/loss": 0.9893021083647205, "eval/nsd-val/loss": 0.9902563835344007} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.08323407599210739, "train/loss": 0.9877021530723572, "eval/hcp-train-subset/loss": 0.9861039163604859, "eval/hcp-val/loss": 0.9858533605452506, "eval/nsd-val/loss": 0.9875980336819926} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.11681490068622429, "train/loss": 0.9839487573242187, "eval/hcp-train-subset/loss": 0.9818917426370806, "eval/hcp-val/loss": 0.9817590540455233, "eval/nsd-val/loss": 0.9835789761235637} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.19776365545697885, "train/loss": 0.9759244383239746, "eval/hcp-train-subset/loss": 0.9704708755016327, "eval/hcp-val/loss": 0.9707422612174865, "eval/nsd-val/loss": 0.971840298944904} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.22726607908768637, "train/loss": 0.9475549354171753, "eval/hcp-train-subset/loss": 0.9239714241796925, "eval/hcp-val/loss": 0.9228701360764042, "eval/nsd-val/loss": 0.8982545983406806} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.17325427122271014, "train/loss": 0.9093724193954468, "eval/hcp-train-subset/loss": 0.8869803163313097, "eval/hcp-val/loss": 0.8862056520677382, "eval/nsd-val/loss": 0.8514787333626901} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.12361626946985273, "train/loss": 0.8794224549674988, "eval/hcp-train-subset/loss": 0.8687657608139899, "eval/hcp-val/loss": 0.8677900933450268, "eval/nsd-val/loss": 0.8338712444228511} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.09967410319369101, "train/loss": 0.8649758139324188, "eval/hcp-train-subset/loss": 0.8611551926982018, "eval/hcp-val/loss": 0.8604481047199618, "eval/nsd-val/loss": 0.8275549671342296} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.08736097741808264, "train/loss": 0.8568171174621582, "eval/hcp-train-subset/loss": 0.8557896940938888, "eval/hcp-val/loss": 0.8556686505194633, "eval/nsd-val/loss": 0.8230214224707696} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.08096725973282749, "train/loss": 0.854563008184433, "eval/hcp-train-subset/loss": 0.8529351555532024, "eval/hcp-val/loss": 0.8534167053238038, "eval/nsd-val/loss": 0.8209515415853069} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.07810898929743723, "train/loss": 0.8515216468048096, "eval/hcp-train-subset/loss": 0.8496764879072866, "eval/hcp-val/loss": 0.8512248368032517, "eval/nsd-val/loss": 0.8206694539516203} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.07819226134995359, "train/loss": 0.8464853427028656, "eval/hcp-train-subset/loss": 0.8486170970624493, "eval/hcp-val/loss": 0.8492289829638696, "eval/nsd-val/loss": 0.8197939501654717} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.07955919921030881, "train/loss": 0.8419188573646545, "eval/hcp-train-subset/loss": 0.8472811118248971, "eval/hcp-val/loss": 0.8489567214442838, "eval/nsd-val/loss": 0.8206922056213501} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.07950216620130045, "train/loss": 0.8411275442504883, "eval/hcp-train-subset/loss": 0.8455295283948222, "eval/hcp-val/loss": 0.8478296629844173, "eval/nsd-val/loss": 0.8170165861806562} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.07988865848358782, "train/loss": 0.8407622010993957, "eval/hcp-train-subset/loss": 0.8443546304779668, "eval/hcp-val/loss": 0.8470775475425105, "eval/nsd-val/loss": 0.8185480784985327} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.08342094592510346, "train/loss": 0.8335819577503204, "eval/hcp-train-subset/loss": 0.844599429638155, "eval/hcp-val/loss": 0.8477778636640118, "eval/nsd-val/loss": 0.8179272768958923} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.08650592072476707, "train/loss": 0.8305048271751404, "eval/hcp-train-subset/loss": 0.8434155352653996, "eval/hcp-val/loss": 0.8484255584978289, "eval/nsd-val/loss": 0.8212427045068433} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.08660692447828965, "train/loss": 0.8313600676250458, "eval/hcp-train-subset/loss": 0.8420162287450605, "eval/hcp-val/loss": 0.8471774833817636, "eval/nsd-val/loss": 0.8181885701994742} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.08684488090659073, "train/loss": 0.8302282214546204, "eval/hcp-train-subset/loss": 0.841385486625856, "eval/hcp-val/loss": 0.8470755594391977, "eval/nsd-val/loss": 0.8188107282884659} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.0904795483747005, "train/loss": 0.8243127859306335, "eval/hcp-train-subset/loss": 0.8412181494697448, "eval/hcp-val/loss": 0.8474279959355632, "eval/nsd-val/loss": 0.8162468200729739} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.09209480850567395, "train/loss": 0.8249018680667877, "eval/hcp-train-subset/loss": 0.8407210986460408, "eval/hcp-val/loss": 0.8467553969352476, "eval/nsd-val/loss": 0.8163584614953687} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.0931894850331004, "train/loss": 0.8213745770359039, "eval/hcp-train-subset/loss": 0.8395926269792742, "eval/hcp-val/loss": 0.846748645267179, "eval/nsd-val/loss": 0.8193576826203254} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.09653842270557998, "train/loss": 0.8189185058784485, "eval/hcp-train-subset/loss": 0.8382422289540691, "eval/hcp-val/loss": 0.8463601902607949, "eval/nsd-val/loss": 0.8195704913908436} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.09914825185382126, "train/loss": 0.817587581577301, "eval/hcp-train-subset/loss": 0.8375641809355828, "eval/hcp-val/loss": 0.8478448265983213, "eval/nsd-val/loss": 0.8150216408314244} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.1014105735904753, "train/loss": 0.8121148253440857, "eval/hcp-train-subset/loss": 0.836208398303678, "eval/hcp-val/loss": 0.8472535888994893, "eval/nsd-val/loss": 0.8206808124819109} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.10224344105117698, "train/loss": 0.8135703350543976, "eval/hcp-train-subset/loss": 0.837048884361021, "eval/hcp-val/loss": 0.8481925716323238, "eval/nsd-val/loss": 0.8211272435803567} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.10513349672035337, "train/loss": 0.8112368075370788, "eval/hcp-train-subset/loss": 0.8354599139382762, "eval/hcp-val/loss": 0.8456464582873929, "eval/nsd-val/loss": 0.8214374040403674} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.10831208675169796, "train/loss": 0.8063164767456055, "eval/hcp-train-subset/loss": 0.8359815872484638, "eval/hcp-val/loss": 0.8489981101405236, "eval/nsd-val/loss": 0.8251731924472316} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.10739962854686548, "train/loss": 0.8061359535312652, "eval/hcp-train-subset/loss": 0.8337298381713129, "eval/hcp-val/loss": 0.8473209671435817, "eval/nsd-val/loss": 0.822485173902204} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.10951835639469526, "train/loss": 0.8040780637168884, "eval/hcp-train-subset/loss": 0.8333918711831493, "eval/hcp-val/loss": 0.8472099111926171, "eval/nsd-val/loss": 0.8201563166033837} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.1117721069019846, "train/loss": 0.8024293426990509, "eval/hcp-train-subset/loss": 0.8320255010358749, "eval/hcp-val/loss": 0.8458288571526927, "eval/nsd-val/loss": 0.8205841991209215} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.11285329373701262, "train/loss": 0.8015312059211731, "eval/hcp-train-subset/loss": 0.8308570365751943, "eval/hcp-val/loss": 0.8464438415342762, "eval/nsd-val/loss": 0.8173661347358457} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.11484475577340751, "train/loss": 0.7999681686973572, "eval/hcp-train-subset/loss": 0.8326494299596355, "eval/hcp-val/loss": 0.8472545560329191, "eval/nsd-val/loss": 0.8233350563433862} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.11509341292043905, "train/loss": 0.8015153414726257, "eval/hcp-train-subset/loss": 0.8318735659122467, "eval/hcp-val/loss": 0.8483233586434396, "eval/nsd-val/loss": 0.8193432265712369} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.12044507258948715, "train/loss": 0.7947001446723938, "eval/hcp-train-subset/loss": 0.8302329182624817, "eval/hcp-val/loss": 0.8471583627885387, "eval/nsd-val/loss": 0.8202403928003004} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.12255365371699332, "train/loss": 0.7941262307834626, "eval/hcp-train-subset/loss": 0.8295572803866479, "eval/hcp-val/loss": 0.8467848358615753, "eval/nsd-val/loss": 0.8218030641155858} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.12224419595089572, "train/loss": 0.7922537671661377, "eval/hcp-train-subset/loss": 0.8291280346532022, "eval/hcp-val/loss": 0.8465725298850767, "eval/nsd-val/loss": 0.8242481802740405} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.12066988228477821, "train/loss": 0.7958024103832245, "eval/hcp-train-subset/loss": 0.8275175296491192, "eval/hcp-val/loss": 0.8482665563783338, "eval/nsd-val/loss": 0.8213300166591522} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.12498596353796938, "train/loss": 0.7918025265407562, "eval/hcp-train-subset/loss": 0.8277492638557188, "eval/hcp-val/loss": 0.8491401335885448, "eval/nsd-val/loss": 0.8202772130889278} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.1266783281152087, "train/loss": 0.7900685538864136, "eval/hcp-train-subset/loss": 0.8259285938355231, "eval/hcp-val/loss": 0.847166121006012, "eval/nsd-val/loss": 0.8211023298002058} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.13049605029944975, "train/loss": 0.7870240499305725, "eval/hcp-train-subset/loss": 0.8242251892243663, "eval/hcp-val/loss": 0.8471699126305119, "eval/nsd-val/loss": 0.8193107612671391} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.13106415353981385, "train/loss": 0.7880352628040314, "eval/hcp-train-subset/loss": 0.825802787657707, "eval/hcp-val/loss": 0.8475084996992542, "eval/nsd-val/loss": 0.8229518696185081} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.13351266802182463, "train/loss": 0.7878979594993591, "eval/hcp-train-subset/loss": 0.8238233750866305, "eval/hcp-val/loss": 0.8466926755443696, "eval/nsd-val/loss": 0.82596541220142} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.1333800126964205, "train/loss": 0.7846602644443512, "eval/hcp-train-subset/loss": 0.8250910841649578, "eval/hcp-val/loss": 0.8497149136758619, "eval/nsd-val/loss": 0.8242935845928807} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.13576362153131244, "train/loss": 0.7825197706127167, "eval/hcp-train-subset/loss": 0.8225518676542467, "eval/hcp-val/loss": 0.8466382363150197, "eval/nsd-val/loss": 0.8218715229342061} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.13701451805428566, "train/loss": 0.7842067969989777, "eval/hcp-train-subset/loss": 0.8225269673332092, "eval/hcp-val/loss": 0.8475705202548734, "eval/nsd-val/loss": 0.8255816390437465} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.13963640641408556, "train/loss": 0.7814966522789002, "eval/hcp-train-subset/loss": 0.8224642276763916, "eval/hcp-val/loss": 0.848196211361116, "eval/nsd-val/loss": 0.825363798487571} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.14344608756052088, "train/loss": 0.778756197423935, "eval/hcp-train-subset/loss": 0.8227367987555843, "eval/hcp-val/loss": 0.8461911735996124, "eval/nsd-val/loss": 0.8219214918151978} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.1446737284438599, "train/loss": 0.7793798372936249, "eval/hcp-train-subset/loss": 0.822237059954674, "eval/hcp-val/loss": 0.8458063256356024, "eval/nsd-val/loss": 0.8215657597587954} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.14779189460686784, "train/loss": 0.7768739538955689, "eval/hcp-train-subset/loss": 0.8226848571531234, "eval/hcp-val/loss": 0.8501322394417178, "eval/nsd-val/loss": 0.8245307764699382} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.14948619369217092, "train/loss": 0.7769580447387695, "eval/hcp-train-subset/loss": 0.8208253095226903, "eval/hcp-val/loss": 0.8476070482884684, "eval/nsd-val/loss": 0.8238639908452188} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.14907017342683046, "train/loss": 0.7799463856697082, "eval/hcp-train-subset/loss": 0.8229544720342082, "eval/hcp-val/loss": 0.8507427592431346, "eval/nsd-val/loss": 0.8285107901019435} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.15441324675495346, "train/loss": 0.7744219431495667, "eval/hcp-train-subset/loss": 0.8205355521171324, "eval/hcp-val/loss": 0.8477706466951678, "eval/nsd-val/loss": 0.8243187521734545} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.15424857152418428, "train/loss": 0.7726257054615021, "eval/hcp-train-subset/loss": 0.821001248974954, "eval/hcp-val/loss": 0.8487818606438176, "eval/nsd-val/loss": 0.8230471216863201} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.15512333795185013, "train/loss": 0.7742177011108399, "eval/hcp-train-subset/loss": 0.8184664480147823, "eval/hcp-val/loss": 0.8480845266772855, "eval/nsd-val/loss": 0.8325743204162966} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.159109075849049, "train/loss": 0.7708654251194, "eval/hcp-train-subset/loss": 0.818173090296407, "eval/hcp-val/loss": 0.8488256873623017, "eval/nsd-val/loss": 0.82753588787971} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.16024275188585899, "train/loss": 0.7731923755073548, "eval/hcp-train-subset/loss": 0.8166144076854952, "eval/hcp-val/loss": 0.8486992726402898, "eval/nsd-val/loss": 0.8235403770400632} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.1603284323630055, "train/loss": 0.7710657037734986, "eval/hcp-train-subset/loss": 0.817559951736081, "eval/hcp-val/loss": 0.8496170774582894, "eval/nsd-val/loss": 0.8241921115306116} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.1657444095471315, "train/loss": 0.7702999696540832, "eval/hcp-train-subset/loss": 0.8182939540955328, "eval/hcp-val/loss": 0.8506767230649148, "eval/nsd-val/loss": 0.8261494742285821} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.1661175585292102, "train/loss": 0.7699218058681488, "eval/hcp-train-subset/loss": 0.8165130346052109, "eval/hcp-val/loss": 0.8512075216539444, "eval/nsd-val/loss": 0.8311463873232564} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.16927230839125726, "train/loss": 0.7689740070819855, "eval/hcp-train-subset/loss": 0.813839348093156, "eval/hcp-val/loss": 0.8473740470024848, "eval/nsd-val/loss": 0.8273406644021312} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.16812480998281557, "train/loss": 0.7700548267459869, "eval/hcp-train-subset/loss": 0.8159355261633473, "eval/hcp-val/loss": 0.8511628124021715, "eval/nsd-val/loss": 0.8287307437389128} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.17203274478708966, "train/loss": 0.7684106969451904, "eval/hcp-train-subset/loss": 0.8141190630774344, "eval/hcp-val/loss": 0.8506488982708224, "eval/nsd-val/loss": 0.8252008480410422} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.17539655518103395, "train/loss": 0.7668382850933075, "eval/hcp-train-subset/loss": 0.8128110631819694, "eval/hcp-val/loss": 0.8500380169960761, "eval/nsd-val/loss": 0.8299871586984203} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.1766963716633934, "train/loss": 0.7661759544181824, "eval/hcp-train-subset/loss": 0.8121587326449733, "eval/hcp-val/loss": 0.8507088057456478, "eval/nsd-val/loss": 0.8320937281654727} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.18066113911302004, "train/loss": 0.7650150953578949, "eval/hcp-train-subset/loss": 0.8114573071079869, "eval/hcp-val/loss": 0.8494856194142373, "eval/nsd-val/loss": 0.8296796179586842} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.18205371822937655, "train/loss": 0.765590187997818, "eval/hcp-train-subset/loss": 0.8118510890391565, "eval/hcp-val/loss": 0.8515608752927473, "eval/nsd-val/loss": 0.8312406482235077} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.18485950638996806, "train/loss": 0.7613125121498108, "eval/hcp-train-subset/loss": 0.8097735162704222, "eval/hcp-val/loss": 0.850867276230166, "eval/nsd-val/loss": 0.8317391036018249} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.18474739708039772, "train/loss": 0.763850643529892, "eval/hcp-train-subset/loss": 0.81056153870398, "eval/hcp-val/loss": 0.849835213153593, "eval/nsd-val/loss": 0.8296023107344105} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.1862102629039699, "train/loss": 0.7638243045425415, "eval/hcp-train-subset/loss": 0.8109314624340304, "eval/hcp-val/loss": 0.8509873872803103, "eval/nsd-val/loss": 0.8314043004666606} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.18901930939169412, "train/loss": 0.762642031545639, "eval/hcp-train-subset/loss": 0.8113910152066138, "eval/hcp-val/loss": 0.851910739175735, "eval/nsd-val/loss": 0.83644282529431} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.19357516208875844, "train/loss": 0.7589323935794831, "eval/hcp-train-subset/loss": 0.8093348501190063, "eval/hcp-val/loss": 0.8521012759977772, "eval/nsd-val/loss": 0.8303937258258942} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.19075288377285995, "train/loss": 0.7621443289089203, "eval/hcp-train-subset/loss": 0.8094597305020979, "eval/hcp-val/loss": 0.8516703565274516, "eval/nsd-val/loss": 0.8306347295161216} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.194079022159839, "train/loss": 0.7613156198692321, "eval/hcp-train-subset/loss": 0.8084090332831105, "eval/hcp-val/loss": 0.8505908443081763, "eval/nsd-val/loss": 0.8320958989281808} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.19617348185423414, "train/loss": 0.758829739599228, "eval/hcp-train-subset/loss": 0.8082149951688705, "eval/hcp-val/loss": 0.85311389450104, "eval/nsd-val/loss": 0.8355936763748046} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.19898682391550973, "train/loss": 0.758649975862503, "eval/hcp-train-subset/loss": 0.807007182990351, "eval/hcp-val/loss": 0.852161685305257, "eval/nsd-val/loss": 0.831628736949736} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.202016500500955, "train/loss": 0.7555100770187378, "eval/hcp-train-subset/loss": 0.8071074899165861, "eval/hcp-val/loss": 0.8531546285075526, "eval/nsd-val/loss": 0.8378788280871606} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.20085163550680687, "train/loss": 0.758845798034668, "eval/hcp-train-subset/loss": 0.805934811792066, "eval/hcp-val/loss": 0.8516272594851833, "eval/nsd-val/loss": 0.8313788252492105} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.20067481323778485, "train/loss": 0.7598386356449127, "eval/hcp-train-subset/loss": 0.8075570992885097, "eval/hcp-val/loss": 0.8549752802618088, "eval/nsd-val/loss": 0.8385972390251775} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.20369711179679098, "train/loss": 0.7582556018352509, "eval/hcp-train-subset/loss": 0.8066243365887673, "eval/hcp-val/loss": 0.8542477132812623, "eval/nsd-val/loss": 0.8336295466269216} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.20598111606505806, "train/loss": 0.7568941364097596, "eval/hcp-train-subset/loss": 0.8060596364159738, "eval/hcp-val/loss": 0.853348538760216, "eval/nsd-val/loss": 0.8343931551902525} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.205365603002654, "train/loss": 0.7579613129138947, "eval/hcp-train-subset/loss": 0.8066622461042097, "eval/hcp-val/loss": 0.8540809548670246, "eval/nsd-val/loss": 0.8345428551397016} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.2063855711351627, "train/loss": 0.7581566770744324, "eval/hcp-train-subset/loss": 0.8050795428214534, "eval/hcp-val/loss": 0.854724302407234, "eval/nsd-val/loss": 0.8341913675108263} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.20804161543471728, "train/loss": 0.7569181223106384, "eval/hcp-train-subset/loss": 0.8043526478352085, "eval/hcp-val/loss": 0.8539002383908918, "eval/nsd-val/loss": 0.837493811884234} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.20769129680152434, "train/loss": 0.7569669394397736, "eval/hcp-train-subset/loss": 0.8038652837276459, "eval/hcp-val/loss": 0.8542846431655269, "eval/nsd-val/loss": 0.835448625587648} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.2100026357761571, "train/loss": 0.7560576700878143, "eval/hcp-train-subset/loss": 0.8040805362885998, "eval/hcp-val/loss": 0.8552798109669839, "eval/nsd-val/loss": 0.8365880866204539} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.20904529661927698, "train/loss": 0.7561215976428985, "eval/hcp-train-subset/loss": 0.8040098730594881, "eval/hcp-val/loss": 0.8547314001667884, "eval/nsd-val/loss": 0.8367736704887883} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.21029932399559007, "train/loss": 0.7572222897434234, "eval/hcp-train-subset/loss": 0.8038484886769326, "eval/hcp-val/loss": 0.8548858694491848, "eval/nsd-val/loss": 0.8352917355875815} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.20896513257104135, "train/loss": 0.7596117394256592, "eval/hcp-train-subset/loss": 0.8030020556142253, "eval/hcp-val/loss": 0.8546109737888459, "eval/nsd-val/loss": 0.836073363019574} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.2119673614108278, "train/loss": 0.7557221819114686, "eval/hcp-train-subset/loss": 0.8024012696358466, "eval/hcp-val/loss": 0.8550502917458934, "eval/nsd-val/loss": 0.8359243369871571} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.2126477457196589, "train/loss": 0.7542780232810974, "eval/hcp-train-subset/loss": 0.8022062163199147, "eval/hcp-val/loss": 0.855003701102349, "eval/nsd-val/loss": 0.8363813113781714} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.21149170586765384, "train/loss": 0.7561877486610412, "eval/hcp-train-subset/loss": 0.8021432690082058, "eval/hcp-val/loss": 0.8551743117070967, "eval/nsd-val/loss": 0.8371614756122712} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.2108704291185093, "train/loss": 0.7580078503417969, "eval/hcp-train-subset/loss": 0.802458381460559, "eval/hcp-val/loss": 0.8554909911847883, "eval/nsd-val/loss": 0.8363811152596627} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.2140366171967243, "train/loss": 0.7550402507400513, "eval/hcp-train-subset/loss": 0.8013764879395885, "eval/hcp-val/loss": 0.8543134581658148, "eval/nsd-val/loss": 0.8350671481701636} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.21287263802228706, "train/loss": 0.7556635715484619, "eval/hcp-train-subset/loss": 0.800883170097105, "eval/hcp-val/loss": 0.855681755850392, "eval/nsd-val/loss": 0.8359930332629911} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.2141508347666065, "train/loss": 0.7559113196182251, "eval/hcp-train-subset/loss": 0.8008155601639901, "eval/hcp-val/loss": 0.8544887063964721, "eval/nsd-val/loss": 0.8359333392112486} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.20990763355413794, "train/loss": 0.7609630836582184, "eval/hcp-train-subset/loss": 0.8009506790868698, "eval/hcp-val/loss": 0.8545020107300051, "eval/nsd-val/loss": 0.8354712013275393} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.21346596403931944, "train/loss": 0.7584872759819031, "eval/hcp-train-subset/loss": 0.8010522723197937, "eval/hcp-val/loss": 0.8541870847825082, "eval/nsd-val/loss": 0.8356792503787626} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.21411126424482602, "train/loss": 0.7558657725811004, "eval/hcp-train-subset/loss": 0.8010722917895163, "eval/hcp-val/loss": 0.8545104034485356, "eval/nsd-val/loss": 0.8360496576755277} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.2135729413722274, "train/loss": 0.7562797626209259, "eval/hcp-train-subset/loss": 0.8008644936546203, "eval/hcp-val/loss": 0.8544662075657998, "eval/nsd-val/loss": 0.8362287629035211} diff --git a/data_scaling/n400_1/pretrain/log.txt b/data_scaling/n400_1/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c24c8b5609f8f99063c1808a75483ff577d8dd58 --- /dev/null +++ b/data_scaling/n400_1/pretrain/log.txt @@ -0,0 +1,8243 @@ +pretraining fmri mae +start: 2026-01-17 20:35:24 +cwd: /admin/home/connor/fmri-fm +sha: 4c3ccfb0b63e4f01e9758042b5299530a6d93949, status: has uncommitted changes, branch: dev/clane9 +config: +name: data_scaling/n400_1/pretrain +notes: data scaling experiment n400_1 (seed=1644) +output_dir: experiments/data_scaling/output/data_scaling/n400_1/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..00399}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 1644 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +val transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..00399}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [8543, 6917, 6772, 3955, 6165, 1554, 1082, 5811, 6919, 3150] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +loading dataset: nsd-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1493, 4276, 245, 3092, 3905, 1862, 2362, 4411, 1138, 2824] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=0, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=True, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 14:41:50 lr: 0.000000 grad: 0.0260 (0.0260) loss: 0.9963 (0.9963) time: 8.4657 data: 7.1765 max mem: 8570 +Train: [0] [ 100/6250] eta: 0:25:31 lr: 0.000000 grad: 0.0137 (0.0163) loss: 0.9963 (0.9962) time: 0.1694 data: 0.0688 max mem: 9377 +Train: [0] [ 200/6250] eta: 0:20:18 lr: 0.000001 grad: 0.0137 (0.0149) loss: 0.9957 (0.9960) time: 0.1643 data: 0.0665 max mem: 9377 +Train: [0] [ 300/6250] eta: 0:18:26 lr: 0.000001 grad: 0.0128 (0.0142) loss: 0.9961 (0.9960) time: 0.1431 data: 0.0604 max mem: 9377 +Train: [0] [ 400/6250] eta: 0:17:37 lr: 0.000002 grad: 0.0128 (0.0138) loss: 0.9956 (0.9959) time: 0.1577 data: 0.0728 max mem: 9377 +Train: [0] [ 500/6250] eta: 0:17:10 lr: 0.000002 grad: 0.0127 (0.0136) loss: 0.9958 (0.9959) time: 0.1322 data: 0.0523 max mem: 9377 +Train: [0] [ 600/6250] eta: 0:16:34 lr: 0.000002 grad: 0.0125 (0.0134) loss: 0.9958 (0.9959) time: 0.1324 data: 0.0315 max mem: 9377 +Train: [0] [ 700/6250] eta: 0:16:20 lr: 0.000003 grad: 0.0129 (0.0134) loss: 0.9957 (0.9959) time: 0.1616 data: 0.0746 max mem: 9377 +Train: [0] [ 800/6250] eta: 0:16:07 lr: 0.000003 grad: 0.0127 (0.0133) loss: 0.9958 (0.9959) time: 0.2327 data: 0.1452 max mem: 9377 +Train: [0] [ 900/6250] eta: 0:15:55 lr: 0.000004 grad: 0.0130 (0.0133) loss: 0.9962 (0.9959) time: 0.2187 data: 0.1303 max mem: 9377 +Train: [0] [1000/6250] eta: 0:15:49 lr: 0.000004 grad: 0.0140 (0.0134) loss: 0.9958 (0.9959) time: 0.2000 data: 0.1172 max mem: 9377 +Train: [0] [1100/6250] eta: 0:15:22 lr: 0.000004 grad: 0.0151 (0.0135) loss: 0.9959 (0.9959) time: 0.2335 data: 0.1513 max mem: 9377 +Train: [0] [1200/6250] eta: 0:15:09 lr: 0.000005 grad: 0.0157 (0.0138) loss: 0.9957 (0.9959) time: 0.3339 data: 0.2510 max mem: 9377 +Train: [0] [1300/6250] eta: 0:14:38 lr: 0.000005 grad: 0.0140 (0.0142) loss: 0.9961 (0.9959) time: 0.1701 data: 0.0785 max mem: 9377 +Train: [0] [1400/6250] eta: 0:14:28 lr: 0.000006 grad: 0.0182 (0.0146) loss: 0.9953 (0.9959) time: 0.2941 data: 0.2045 max mem: 9377 +Train: [0] [1500/6250] eta: 0:14:31 lr: 0.000006 grad: 0.0234 (0.0151) loss: 0.9958 (0.9958) time: 0.0927 data: 0.0002 max mem: 9377 +Train: [0] [1600/6250] eta: 0:14:49 lr: 0.000006 grad: 0.0246 (0.0158) loss: 0.9955 (0.9958) time: 0.8063 data: 0.7068 max mem: 9377 +Train: [0] [1700/6250] eta: 0:14:24 lr: 0.000007 grad: 0.0261 (0.0164) loss: 0.9954 (0.9958) time: 0.1408 data: 0.0480 max mem: 9377 +Train: [0] [1800/6250] eta: 0:14:02 lr: 0.000007 grad: 0.0334 (0.0172) loss: 0.9955 (0.9958) time: 0.1890 data: 0.0956 max mem: 9377 +Train: [0] [1900/6250] eta: 0:13:36 lr: 0.000008 grad: 0.0443 (0.0185) loss: 0.9946 (0.9957) time: 0.1309 data: 0.0459 max mem: 9377 +Train: [0] [2000/6250] eta: 0:13:16 lr: 0.000008 grad: 0.0395 (0.0196) loss: 0.9941 (0.9957) time: 0.2558 data: 0.1721 max mem: 9377 +Train: [0] [2100/6250] eta: 0:13:00 lr: 0.000008 grad: 0.0347 (0.0207) loss: 0.9952 (0.9956) time: 0.1673 data: 0.0483 max mem: 9377 +Train: [0] [2200/6250] eta: 0:12:36 lr: 0.000009 grad: 0.0375 (0.0217) loss: 0.9940 (0.9956) time: 0.1476 data: 0.0514 max mem: 9377 +Train: [0] [2300/6250] eta: 0:12:18 lr: 0.000009 grad: 0.0338 (0.0225) loss: 0.9953 (0.9955) time: 0.2320 data: 0.1500 max mem: 9377 +Train: [0] [2400/6250] eta: 0:11:53 lr: 0.000010 grad: 0.0458 (0.0234) loss: 0.9939 (0.9955) time: 0.1776 data: 0.0880 max mem: 9377 +Train: [0] [2500/6250] eta: 0:11:30 lr: 0.000010 grad: 0.0392 (0.0245) loss: 0.9942 (0.9954) time: 0.1336 data: 0.0439 max mem: 9377 +Train: [0] [2600/6250] eta: 0:11:09 lr: 0.000010 grad: 0.0467 (0.0253) loss: 0.9935 (0.9954) time: 0.1639 data: 0.0640 max mem: 9377 +Train: [0] [2700/6250] eta: 0:10:50 lr: 0.000011 grad: 0.0661 (0.0266) loss: 0.9937 (0.9953) time: 0.0942 data: 0.0002 max mem: 9377 +Train: [0] [2800/6250] eta: 0:10:29 lr: 0.000011 grad: 0.0650 (0.0278) loss: 0.9927 (0.9953) time: 0.1922 data: 0.1083 max mem: 9377 +Train: [0] [2900/6250] eta: 0:10:10 lr: 0.000012 grad: 0.0514 (0.0289) loss: 0.9932 (0.9952) time: 0.1633 data: 0.0763 max mem: 9377 +Train: [0] [3000/6250] eta: 0:09:51 lr: 0.000012 grad: 0.0581 (0.0302) loss: 0.9926 (0.9951) time: 0.2128 data: 0.1265 max mem: 9377 +Train: [0] [3100/6250] eta: 0:09:35 lr: 0.000012 grad: 0.0733 (0.0315) loss: 0.9926 (0.9951) time: 0.0916 data: 0.0027 max mem: 9377 +Train: [0] [3200/6250] eta: 0:09:16 lr: 0.000013 grad: 0.0643 (0.0325) loss: 0.9926 (0.9950) time: 0.2127 data: 0.1256 max mem: 9377 +Train: [0] [3300/6250] eta: 0:08:54 lr: 0.000013 grad: 0.0733 (0.0338) loss: 0.9920 (0.9949) time: 0.1663 data: 0.0823 max mem: 9377 +Train: [0] [3400/6250] eta: 0:08:33 lr: 0.000014 grad: 0.0601 (0.0349) loss: 0.9926 (0.9949) time: 0.1866 data: 0.0919 max mem: 9377 +Train: [0] [3500/6250] eta: 0:08:13 lr: 0.000014 grad: 0.0858 (0.0360) loss: 0.9926 (0.9948) time: 0.1490 data: 0.0586 max mem: 9377 +Train: [0] [3600/6250] eta: 0:07:53 lr: 0.000014 grad: 0.0758 (0.0370) loss: 0.9912 (0.9947) time: 0.1471 data: 0.0677 max mem: 9377 +Train: [0] [3700/6250] eta: 0:07:34 lr: 0.000015 grad: 0.0629 (0.0379) loss: 0.9929 (0.9947) time: 0.1346 data: 0.0419 max mem: 9377 +Train: [0] [3800/6250] eta: 0:07:16 lr: 0.000015 grad: 0.0692 (0.0388) loss: 0.9927 (0.9946) time: 0.2019 data: 0.1086 max mem: 9377 +Train: [0] [3900/6250] eta: 0:06:57 lr: 0.000016 grad: 0.0717 (0.0396) loss: 0.9920 (0.9945) time: 0.2249 data: 0.1397 max mem: 9377 +Train: [0] [4000/6250] eta: 0:06:38 lr: 0.000016 grad: 0.0628 (0.0402) loss: 0.9918 (0.9945) time: 0.1323 data: 0.0416 max mem: 9377 +Train: [0] [4100/6250] eta: 0:06:19 lr: 0.000016 grad: 0.0656 (0.0410) loss: 0.9917 (0.9944) time: 0.1812 data: 0.0892 max mem: 9377 +Train: [0] [4200/6250] eta: 0:06:01 lr: 0.000017 grad: 0.0704 (0.0417) loss: 0.9910 (0.9943) time: 0.1826 data: 0.0951 max mem: 9377 +Train: [0] [4300/6250] eta: 0:05:44 lr: 0.000017 grad: 0.0697 (0.0424) loss: 0.9921 (0.9943) time: 0.2502 data: 0.1668 max mem: 9377 +Train: [0] [4400/6250] eta: 0:05:26 lr: 0.000018 grad: 0.0693 (0.0432) loss: 0.9907 (0.9942) time: 0.2190 data: 0.1234 max mem: 9377 +Train: [0] [4500/6250] eta: 0:05:12 lr: 0.000018 grad: 0.0755 (0.0439) loss: 0.9917 (0.9941) time: 0.1090 data: 0.0004 max mem: 9377 +Train: [0] [4600/6250] eta: 0:04:56 lr: 0.000018 grad: 0.0608 (0.0446) loss: 0.9921 (0.9941) time: 0.3261 data: 0.2043 max mem: 9377 +Train: [0] [4700/6250] eta: 0:04:38 lr: 0.000019 grad: 0.0500 (0.0451) loss: 0.9915 (0.9940) time: 0.1726 data: 0.0683 max mem: 9377 +Train: [0] [4800/6250] eta: 0:04:20 lr: 0.000019 grad: 0.0819 (0.0457) loss: 0.9908 (0.9940) time: 0.1779 data: 0.0888 max mem: 9377 +Train: [0] [4900/6250] eta: 0:04:01 lr: 0.000020 grad: 0.0709 (0.0463) loss: 0.9907 (0.9939) time: 0.1235 data: 0.0459 max mem: 9377 +Train: [0] [5000/6250] eta: 0:03:43 lr: 0.000020 grad: 0.0672 (0.0468) loss: 0.9920 (0.9939) time: 0.1502 data: 0.0633 max mem: 9377 +Train: [0] [5100/6250] eta: 0:03:25 lr: 0.000020 grad: 0.0680 (0.0474) loss: 0.9906 (0.9938) time: 0.1112 data: 0.0204 max mem: 9377 +Train: [0] [5200/6250] eta: 0:03:08 lr: 0.000021 grad: 0.0703 (0.0479) loss: 0.9915 (0.9937) time: 0.1256 data: 0.0264 max mem: 9377 +Train: [0] [5300/6250] eta: 0:02:50 lr: 0.000021 grad: 0.0677 (0.0483) loss: 0.9910 (0.9937) time: 0.1063 data: 0.0002 max mem: 9377 +Train: [0] [5400/6250] eta: 0:02:34 lr: 0.000022 grad: 0.0689 (0.0489) loss: 0.9906 (0.9936) time: 0.2264 data: 0.1087 max mem: 9377 +Train: [0] [5500/6250] eta: 0:02:15 lr: 0.000022 grad: 0.0670 (0.0494) loss: 0.9906 (0.9936) time: 0.1529 data: 0.0735 max mem: 9377 +Train: [0] [5600/6250] eta: 0:01:58 lr: 0.000022 grad: 0.0667 (0.0498) loss: 0.9916 (0.9935) time: 0.1350 data: 0.0416 max mem: 9377 +Train: [0] [5700/6250] eta: 0:01:39 lr: 0.000023 grad: 0.0607 (0.0501) loss: 0.9916 (0.9935) time: 0.1101 data: 0.0157 max mem: 9377 +Train: [0] [5800/6250] eta: 0:01:21 lr: 0.000023 grad: 0.0719 (0.0506) loss: 0.9889 (0.9934) time: 0.1195 data: 0.0281 max mem: 9377 +Train: [0] [5900/6250] eta: 0:01:03 lr: 0.000024 grad: 0.0625 (0.0509) loss: 0.9898 (0.9934) time: 0.1984 data: 0.1187 max mem: 9377 +Train: [0] [6000/6250] eta: 0:00:45 lr: 0.000024 grad: 0.0696 (0.0514) loss: 0.9902 (0.9933) time: 0.1325 data: 0.0461 max mem: 9377 +Train: [0] [6100/6250] eta: 0:00:27 lr: 0.000024 grad: 0.0768 (0.0519) loss: 0.9899 (0.9933) time: 0.1932 data: 0.0958 max mem: 9377 +Train: [0] [6200/6250] eta: 0:00:09 lr: 0.000025 grad: 0.0757 (0.0523) loss: 0.9906 (0.9932) time: 0.1876 data: 0.1080 max mem: 9377 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.0725 (0.0525) loss: 0.9897 (0.9932) time: 0.1912 data: 0.1020 max mem: 9377 +Train: [0] Total time: 0:18:50 (0.1809 s / it) +Averaged stats: lr: 0.000025 grad: 0.0725 (0.0525) loss: 0.9897 (0.9932) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:04:08 loss: 0.9888 (0.9888) time: 4.0117 data: 3.9553 max mem: 9377 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9896 (0.9887) time: 0.1261 data: 0.0968 max mem: 9377 +Eval (hcp-train-subset): [0] Total time: 0:00:14 (0.2370 s / it) +Averaged stats (hcp-train-subset): loss: 0.9896 (0.9887) +Eval (hcp-val): [0] [ 0/62] eta: 0:05:53 loss: 0.9848 (0.9848) time: 5.7089 data: 5.6790 max mem: 9377 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9905 (0.9893) time: 0.1455 data: 0.1185 max mem: 9377 +Eval (hcp-val): [0] Total time: 0:00:14 (0.2353 s / it) +Averaged stats (hcp-val): loss: 0.9905 (0.9893) +Eval (nsd-val): [0] [ 0/62] eta: 0:03:35 loss: 0.9879 (0.9879) time: 3.4699 data: 3.3936 max mem: 9377 +Eval (nsd-val): [0] [61/62] eta: 0:00:00 loss: 0.9917 (0.9903) time: 0.1317 data: 0.1065 max mem: 9377 +Eval (nsd-val): [0] Total time: 0:00:14 (0.2295 s / it) +Averaged stats (nsd-val): loss: 0.9917 (0.9903) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [1] [ 0/6250] eta: 9:30:45 lr: 0.000025 grad: 0.0961 (0.0961) loss: 0.9933 (0.9933) time: 5.4793 data: 5.2064 max mem: 9377 +Train: [1] [ 100/6250] eta: 0:22:56 lr: 0.000025 grad: 0.0816 (0.0826) loss: 0.9900 (0.9897) time: 0.1690 data: 0.0713 max mem: 9377 +Train: [1] [ 200/6250] eta: 0:19:38 lr: 0.000026 grad: 0.0660 (0.0786) loss: 0.9903 (0.9897) time: 0.1854 data: 0.0935 max mem: 9377 +Train: [1] [ 300/6250] eta: 0:17:51 lr: 0.000026 grad: 0.0744 (0.0842) loss: 0.9893 (0.9893) time: 0.1227 data: 0.0024 max mem: 9377 +Train: [1] [ 400/6250] eta: 0:17:16 lr: 0.000027 grad: 0.0796 (0.0861) loss: 0.9899 (0.9891) time: 0.2048 data: 0.0532 max mem: 9377 +Train: [1] [ 500/6250] eta: 0:17:40 lr: 0.000027 grad: 0.0757 (0.0866) loss: 0.9900 (0.9891) time: 0.4563 data: 0.3370 max mem: 9377 +Train: [1] [ 600/6250] eta: 0:17:59 lr: 0.000027 grad: 0.0672 (0.0860) loss: 0.9902 (0.9890) time: 0.1128 data: 0.0003 max mem: 9377 +Train: [1] [ 700/6250] eta: 0:19:06 lr: 0.000028 grad: 0.0686 (0.0854) loss: 0.9881 (0.9890) time: 0.1885 data: 0.0956 max mem: 9377 +Train: [1] [ 800/6250] eta: 0:20:04 lr: 0.000028 grad: 0.0757 (0.0848) loss: 0.9873 (0.9890) time: 0.1667 data: 0.0636 max mem: 9377 +Train: [1] [ 900/6250] eta: 0:19:02 lr: 0.000029 grad: 0.0778 (0.0845) loss: 0.9894 (0.9890) time: 0.1122 data: 0.0004 max mem: 9377 +Train: [1] [1000/6250] eta: 0:18:18 lr: 0.000029 grad: 0.0762 (0.0840) loss: 0.9888 (0.9890) time: 0.2097 data: 0.1250 max mem: 9377 +Train: [1] [1100/6250] eta: 0:17:31 lr: 0.000029 grad: 0.0808 (0.0842) loss: 0.9882 (0.9890) time: 0.1687 data: 0.0893 max mem: 9377 +Train: [1] [1200/6250] eta: 0:16:48 lr: 0.000030 grad: 0.0758 (0.0842) loss: 0.9879 (0.9890) time: 0.1615 data: 0.0791 max mem: 9377 +Train: [1] [1300/6250] eta: 0:16:15 lr: 0.000030 grad: 0.0698 (0.0839) loss: 0.9889 (0.9890) time: 0.1952 data: 0.1093 max mem: 9377 +Train: [1] [1400/6250] eta: 0:15:39 lr: 0.000031 grad: 0.0765 (0.0840) loss: 0.9888 (0.9889) time: 0.1456 data: 0.0615 max mem: 9377 +Train: [1] [1500/6250] eta: 0:15:09 lr: 0.000031 grad: 0.0824 (0.0842) loss: 0.9895 (0.9889) time: 0.1288 data: 0.0433 max mem: 9377 +Train: [1] [1600/6250] eta: 0:14:43 lr: 0.000031 grad: 0.0760 (0.0840) loss: 0.9889 (0.9889) time: 0.1798 data: 0.0943 max mem: 9377 +Train: [1] [1700/6250] eta: 0:14:13 lr: 0.000032 grad: 0.0742 (0.0838) loss: 0.9892 (0.9889) time: 0.1320 data: 0.0367 max mem: 9377 +Train: [1] [1800/6250] eta: 0:13:50 lr: 0.000032 grad: 0.0792 (0.0835) loss: 0.9881 (0.9889) time: 0.1885 data: 0.0907 max mem: 9377 +Train: [1] [1900/6250] eta: 0:13:26 lr: 0.000033 grad: 0.0700 (0.0833) loss: 0.9882 (0.9889) time: 0.1075 data: 0.0005 max mem: 9377 +Train: [1] [2000/6250] eta: 0:13:03 lr: 0.000033 grad: 0.0655 (0.0832) loss: 0.9878 (0.9888) time: 0.1580 data: 0.0656 max mem: 9377 +Train: [1] [2100/6250] eta: 0:12:40 lr: 0.000033 grad: 0.0754 (0.0829) loss: 0.9899 (0.9888) time: 0.1765 data: 0.0839 max mem: 9377 +Train: [1] [2200/6250] eta: 0:12:19 lr: 0.000034 grad: 0.0754 (0.0829) loss: 0.9875 (0.9888) time: 0.2307 data: 0.1449 max mem: 9377 +Train: [1] [2300/6250] eta: 0:11:55 lr: 0.000034 grad: 0.0839 (0.0828) loss: 0.9884 (0.9888) time: 0.1820 data: 0.0897 max mem: 9377 +Train: [1] [2400/6250] eta: 0:11:34 lr: 0.000035 grad: 0.0779 (0.0831) loss: 0.9880 (0.9888) time: 0.1646 data: 0.0854 max mem: 9377 +Train: [1] [2500/6250] eta: 0:11:12 lr: 0.000035 grad: 0.0834 (0.0832) loss: 0.9881 (0.9887) time: 0.1060 data: 0.0003 max mem: 9377 +Train: [1] [2600/6250] eta: 0:11:06 lr: 0.000035 grad: 0.0824 (0.0833) loss: 0.9893 (0.9887) time: 0.1731 data: 0.0804 max mem: 9377 +Train: [1] [2700/6250] eta: 0:10:42 lr: 0.000036 grad: 0.0794 (0.0832) loss: 0.9880 (0.9887) time: 0.1373 data: 0.0537 max mem: 9377 +Train: [1] [2800/6250] eta: 0:10:19 lr: 0.000036 grad: 0.0739 (0.0832) loss: 0.9873 (0.9887) time: 0.1306 data: 0.0427 max mem: 9377 +Train: [1] [2900/6250] eta: 0:09:59 lr: 0.000037 grad: 0.0734 (0.0831) loss: 0.9871 (0.9886) time: 0.1086 data: 0.0183 max mem: 9377 +Train: [1] [3000/6250] eta: 0:09:39 lr: 0.000037 grad: 0.0656 (0.0829) loss: 0.9886 (0.9886) time: 0.1759 data: 0.0888 max mem: 9377 +Train: [1] [3100/6250] eta: 0:09:18 lr: 0.000037 grad: 0.0817 (0.0828) loss: 0.9882 (0.9886) time: 0.1117 data: 0.0220 max mem: 9377 +Train: [1] [3200/6250] eta: 0:08:58 lr: 0.000038 grad: 0.0762 (0.0828) loss: 0.9883 (0.9886) time: 0.1628 data: 0.0771 max mem: 9377 +Train: [1] [3300/6250] eta: 0:08:38 lr: 0.000038 grad: 0.0722 (0.0828) loss: 0.9878 (0.9886) time: 0.1277 data: 0.0384 max mem: 9377 +Train: [1] [3400/6250] eta: 0:08:18 lr: 0.000039 grad: 0.0725 (0.0827) loss: 0.9876 (0.9886) time: 0.1272 data: 0.0243 max mem: 9377 +Train: [1] [3500/6250] eta: 0:08:01 lr: 0.000039 grad: 0.0762 (0.0827) loss: 0.9879 (0.9885) time: 0.1781 data: 0.0939 max mem: 9377 +Train: [1] [3600/6250] eta: 0:07:41 lr: 0.000039 grad: 0.0717 (0.0828) loss: 0.9887 (0.9885) time: 0.1310 data: 0.0335 max mem: 9377 +Train: [1] [3700/6250] eta: 0:07:23 lr: 0.000040 grad: 0.0759 (0.0827) loss: 0.9870 (0.9885) time: 0.1861 data: 0.1016 max mem: 9377 +Train: [1] [3800/6250] eta: 0:07:04 lr: 0.000040 grad: 0.0710 (0.0827) loss: 0.9869 (0.9885) time: 0.1333 data: 0.0433 max mem: 9377 +Train: [1] [3900/6250] eta: 0:06:46 lr: 0.000041 grad: 0.0853 (0.0828) loss: 0.9865 (0.9884) time: 0.1722 data: 0.0872 max mem: 9377 +Train: [1] [4000/6250] eta: 0:06:32 lr: 0.000041 grad: 0.0773 (0.0829) loss: 0.9870 (0.9884) time: 0.1500 data: 0.0396 max mem: 9377 +Train: [1] [4100/6250] eta: 0:06:17 lr: 0.000041 grad: 0.0825 (0.0828) loss: 0.9883 (0.9884) time: 0.1769 data: 0.0705 max mem: 9377 +Train: [1] [4200/6250] eta: 0:06:07 lr: 0.000042 grad: 0.0827 (0.0828) loss: 0.9864 (0.9884) time: 0.1161 data: 0.0002 max mem: 9377 +Train: [1] [4300/6250] eta: 0:05:53 lr: 0.000042 grad: 0.0791 (0.0828) loss: 0.9877 (0.9883) time: 0.5342 data: 0.4032 max mem: 9377 +Train: [1] [4400/6250] eta: 0:05:36 lr: 0.000043 grad: 0.0726 (0.0827) loss: 0.9882 (0.9883) time: 0.1742 data: 0.0677 max mem: 9377 +Train: [1] [4500/6250] eta: 0:05:18 lr: 0.000043 grad: 0.0761 (0.0827) loss: 0.9865 (0.9883) time: 0.1100 data: 0.0003 max mem: 9377 +Train: [1] [4600/6250] eta: 0:04:59 lr: 0.000043 grad: 0.0730 (0.0827) loss: 0.9880 (0.9882) time: 0.1324 data: 0.0428 max mem: 9377 +Train: [1] [4700/6250] eta: 0:04:40 lr: 0.000044 grad: 0.0819 (0.0827) loss: 0.9887 (0.9882) time: 0.1306 data: 0.0424 max mem: 9377 +Train: [1] [4800/6250] eta: 0:04:21 lr: 0.000044 grad: 0.0761 (0.0826) loss: 0.9874 (0.9882) time: 0.1404 data: 0.0515 max mem: 9377 +Train: [1] [4900/6250] eta: 0:04:03 lr: 0.000045 grad: 0.0743 (0.0825) loss: 0.9859 (0.9882) time: 0.1761 data: 0.0882 max mem: 9377 +Train: [1] [5000/6250] eta: 0:03:44 lr: 0.000045 grad: 0.0740 (0.0824) loss: 0.9870 (0.9882) time: 0.1252 data: 0.0318 max mem: 9377 +Train: [1] [5100/6250] eta: 0:03:27 lr: 0.000045 grad: 0.0734 (0.0824) loss: 0.9856 (0.9882) time: 0.2298 data: 0.1474 max mem: 9377 +Train: [1] [5200/6250] eta: 0:03:09 lr: 0.000046 grad: 0.0746 (0.0825) loss: 0.9864 (0.9881) time: 0.1267 data: 0.0277 max mem: 9377 +Train: [1] [5300/6250] eta: 0:02:51 lr: 0.000046 grad: 0.0825 (0.0825) loss: 0.9866 (0.9881) time: 0.2496 data: 0.1579 max mem: 9377 +Train: [1] [5400/6250] eta: 0:02:33 lr: 0.000047 grad: 0.0891 (0.0826) loss: 0.9873 (0.9880) time: 0.1547 data: 0.0608 max mem: 9377 +Train: [1] [5500/6250] eta: 0:02:14 lr: 0.000047 grad: 0.0795 (0.0827) loss: 0.9856 (0.9880) time: 0.1648 data: 0.0682 max mem: 9377 +Train: [1] [5600/6250] eta: 0:01:56 lr: 0.000047 grad: 0.0743 (0.0828) loss: 0.9868 (0.9880) time: 0.1395 data: 0.0543 max mem: 9377 +Train: [1] [5700/6250] eta: 0:01:38 lr: 0.000048 grad: 0.0873 (0.0828) loss: 0.9851 (0.9879) time: 0.1088 data: 0.0004 max mem: 9377 +Train: [1] [5800/6250] eta: 0:01:20 lr: 0.000048 grad: 0.0757 (0.0829) loss: 0.9872 (0.9879) time: 0.1760 data: 0.0879 max mem: 9377 +Train: [1] [5900/6250] eta: 0:01:02 lr: 0.000049 grad: 0.0890 (0.0829) loss: 0.9876 (0.9878) time: 0.1081 data: 0.0002 max mem: 9377 +Train: [1] [6000/6250] eta: 0:00:45 lr: 0.000049 grad: 0.0739 (0.0830) loss: 0.9841 (0.9878) time: 0.1041 data: 0.0002 max mem: 9377 +Train: [1] [6100/6250] eta: 0:00:26 lr: 0.000049 grad: 0.0812 (0.0831) loss: 0.9871 (0.9878) time: 0.0894 data: 0.0002 max mem: 9377 +Train: [1] [6200/6250] eta: 0:00:08 lr: 0.000050 grad: 0.0870 (0.0832) loss: 0.9856 (0.9877) time: 0.2101 data: 0.1268 max mem: 9377 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0747 (0.0832) loss: 0.9851 (0.9877) time: 0.1317 data: 0.0445 max mem: 9377 +Train: [1] Total time: 0:18:44 (0.1799 s / it) +Averaged stats: lr: 0.000050 grad: 0.0747 (0.0832) loss: 0.9851 (0.9877) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:03:08 loss: 0.9876 (0.9876) time: 3.0428 data: 2.9389 max mem: 9377 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9860 (0.9861) time: 0.1055 data: 0.0783 max mem: 9377 +Eval (hcp-train-subset): [1] Total time: 0:00:17 (0.2799 s / it) +Averaged stats (hcp-train-subset): loss: 0.9860 (0.9861) +Eval (hcp-val): [1] [ 0/62] eta: 0:05:13 loss: 0.9881 (0.9881) time: 5.0556 data: 5.0270 max mem: 9377 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9855 (0.9859) time: 0.1026 data: 0.0774 max mem: 9377 +Eval (hcp-val): [1] Total time: 0:00:13 (0.2251 s / it) +Averaged stats (hcp-val): loss: 0.9855 (0.9859) +Eval (nsd-val): [1] [ 0/62] eta: 0:04:50 loss: 0.9886 (0.9886) time: 4.6826 data: 4.6526 max mem: 9377 +Eval (nsd-val): [1] [61/62] eta: 0:00:00 loss: 0.9877 (0.9876) time: 0.1340 data: 0.1089 max mem: 9377 +Eval (nsd-val): [1] Total time: 0:00:13 (0.2206 s / it) +Averaged stats (nsd-val): loss: 0.9877 (0.9876) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [2] [ 0/6250] eta: 10:35:58 lr: 0.000050 grad: 0.0582 (0.0582) loss: 0.9886 (0.9886) time: 6.1053 data: 5.9280 max mem: 9377 +Train: [2] [ 100/6250] eta: 0:23:46 lr: 0.000050 grad: 0.0850 (0.0805) loss: 0.9857 (0.9871) time: 0.2019 data: 0.1089 max mem: 9377 +Train: [2] [ 200/6250] eta: 0:20:14 lr: 0.000051 grad: 0.0994 (0.0856) loss: 0.9860 (0.9864) time: 0.1566 data: 0.0663 max mem: 9377 +Train: [2] [ 300/6250] eta: 0:18:24 lr: 0.000051 grad: 0.0913 (0.0882) loss: 0.9867 (0.9859) time: 0.1444 data: 0.0541 max mem: 9377 +Train: [2] [ 400/6250] eta: 0:17:24 lr: 0.000052 grad: 0.0789 (0.0879) loss: 0.9867 (0.9858) time: 0.1475 data: 0.0424 max mem: 9377 +Train: [2] [ 500/6250] eta: 0:16:38 lr: 0.000052 grad: 0.0751 (0.0887) loss: 0.9873 (0.9858) time: 0.1475 data: 0.0242 max mem: 9377 +Train: [2] [ 600/6250] eta: 0:16:18 lr: 0.000052 grad: 0.0825 (0.0884) loss: 0.9868 (0.9857) time: 0.1554 data: 0.0673 max mem: 9377 +Train: [2] [ 700/6250] eta: 0:16:15 lr: 0.000053 grad: 0.0732 (0.0876) loss: 0.9849 (0.9857) time: 0.1218 data: 0.0307 max mem: 9377 +Train: [2] [ 800/6250] eta: 0:16:17 lr: 0.000053 grad: 0.0803 (0.0873) loss: 0.9859 (0.9858) time: 0.1736 data: 0.0827 max mem: 9377 +Train: [2] [ 900/6250] eta: 0:15:55 lr: 0.000054 grad: 0.0718 (0.0865) loss: 0.9877 (0.9859) time: 0.1706 data: 0.0801 max mem: 9377 +Train: [2] [1000/6250] eta: 0:15:27 lr: 0.000054 grad: 0.0826 (0.0862) loss: 0.9843 (0.9859) time: 0.1636 data: 0.0760 max mem: 9377 +Train: [2] [1100/6250] eta: 0:15:04 lr: 0.000054 grad: 0.0798 (0.0857) loss: 0.9854 (0.9860) time: 0.1611 data: 0.0740 max mem: 9377 +Train: [2] [1200/6250] eta: 0:15:08 lr: 0.000055 grad: 0.0772 (0.0853) loss: 0.9849 (0.9860) time: 0.1400 data: 0.0550 max mem: 9377 +Train: [2] [1300/6250] eta: 0:14:42 lr: 0.000055 grad: 0.0770 (0.0853) loss: 0.9884 (0.9860) time: 0.1486 data: 0.0551 max mem: 9377 +Train: [2] [1400/6250] eta: 0:14:27 lr: 0.000056 grad: 0.0835 (0.0852) loss: 0.9854 (0.9860) time: 0.3084 data: 0.2073 max mem: 9377 +Train: [2] [1500/6250] eta: 0:14:03 lr: 0.000056 grad: 0.0834 (0.0856) loss: 0.9823 (0.9859) time: 0.1588 data: 0.0522 max mem: 9377 +Train: [2] [1600/6250] eta: 0:13:36 lr: 0.000056 grad: 0.0781 (0.0860) loss: 0.9874 (0.9859) time: 0.1289 data: 0.0350 max mem: 9377 +Train: [2] [1700/6250] eta: 0:13:16 lr: 0.000057 grad: 0.0855 (0.0859) loss: 0.9844 (0.9859) time: 0.1260 data: 0.0353 max mem: 9377 +Train: [2] [1800/6250] eta: 0:12:52 lr: 0.000057 grad: 0.0906 (0.0861) loss: 0.9855 (0.9858) time: 0.1461 data: 0.0588 max mem: 9377 +Train: [2] [1900/6250] eta: 0:12:29 lr: 0.000058 grad: 0.0886 (0.0865) loss: 0.9860 (0.9858) time: 0.1264 data: 0.0312 max mem: 9377 +Train: [2] [2000/6250] eta: 0:12:09 lr: 0.000058 grad: 0.0771 (0.0866) loss: 0.9859 (0.9858) time: 0.1965 data: 0.1055 max mem: 9377 +Train: [2] [2100/6250] eta: 0:11:49 lr: 0.000058 grad: 0.0842 (0.0867) loss: 0.9860 (0.9857) time: 0.1482 data: 0.0702 max mem: 9377 +Train: [2] [2200/6250] eta: 0:11:32 lr: 0.000059 grad: 0.0910 (0.0869) loss: 0.9838 (0.9857) time: 0.1846 data: 0.0925 max mem: 9377 +Train: [2] [2300/6250] eta: 0:11:18 lr: 0.000059 grad: 0.0815 (0.0871) loss: 0.9858 (0.9856) time: 0.2716 data: 0.1823 max mem: 9377 +Train: [2] [2400/6250] eta: 0:10:56 lr: 0.000060 grad: 0.0901 (0.0873) loss: 0.9847 (0.9856) time: 0.0993 data: 0.0002 max mem: 9377 +Train: [2] [2500/6250] eta: 0:10:41 lr: 0.000060 grad: 0.0862 (0.0874) loss: 0.9848 (0.9856) time: 0.1613 data: 0.0688 max mem: 9377 +Train: [2] [2600/6250] eta: 0:10:24 lr: 0.000060 grad: 0.0845 (0.0875) loss: 0.9869 (0.9856) time: 0.2067 data: 0.1227 max mem: 9377 +Train: [2] [2700/6250] eta: 0:10:05 lr: 0.000061 grad: 0.0756 (0.0875) loss: 0.9879 (0.9856) time: 0.1501 data: 0.0643 max mem: 9377 +Train: [2] [2800/6250] eta: 0:09:45 lr: 0.000061 grad: 0.0853 (0.0875) loss: 0.9843 (0.9856) time: 0.1094 data: 0.0039 max mem: 9377 +Train: [2] [2900/6250] eta: 0:09:27 lr: 0.000062 grad: 0.0815 (0.0874) loss: 0.9857 (0.9856) time: 0.1177 data: 0.0283 max mem: 9377 +Train: [2] [3000/6250] eta: 0:09:10 lr: 0.000062 grad: 0.0811 (0.0873) loss: 0.9853 (0.9856) time: 0.2048 data: 0.1274 max mem: 9377 +Train: [2] [3100/6250] eta: 0:08:51 lr: 0.000062 grad: 0.0907 (0.0875) loss: 0.9850 (0.9856) time: 0.1775 data: 0.0912 max mem: 9377 +Train: [2] [3200/6250] eta: 0:08:34 lr: 0.000063 grad: 0.0766 (0.0877) loss: 0.9851 (0.9855) time: 0.1214 data: 0.0320 max mem: 9377 +Train: [2] [3300/6250] eta: 0:08:16 lr: 0.000063 grad: 0.0870 (0.0878) loss: 0.9850 (0.9855) time: 0.1432 data: 0.0448 max mem: 9377 +Train: [2] [3400/6250] eta: 0:07:58 lr: 0.000064 grad: 0.0772 (0.0879) loss: 0.9844 (0.9854) time: 0.1379 data: 0.0488 max mem: 9377 +Train: [2] [3500/6250] eta: 0:07:41 lr: 0.000064 grad: 0.0888 (0.0883) loss: 0.9853 (0.9854) time: 0.1284 data: 0.0408 max mem: 9377 +Train: [2] [3600/6250] eta: 0:07:23 lr: 0.000064 grad: 0.0906 (0.0885) loss: 0.9867 (0.9854) time: 0.1374 data: 0.0531 max mem: 9377 +Train: [2] [3700/6250] eta: 0:07:06 lr: 0.000065 grad: 0.0943 (0.0887) loss: 0.9817 (0.9853) time: 0.1491 data: 0.0615 max mem: 9377 +Train: [2] [3800/6250] eta: 0:06:49 lr: 0.000065 grad: 0.0947 (0.0890) loss: 0.9842 (0.9853) time: 0.1117 data: 0.0136 max mem: 9377 +Train: [2] [3900/6250] eta: 0:06:31 lr: 0.000066 grad: 0.0880 (0.0893) loss: 0.9843 (0.9852) time: 0.1449 data: 0.0589 max mem: 9377 +Train: [2] [4000/6250] eta: 0:06:14 lr: 0.000066 grad: 0.0994 (0.0897) loss: 0.9828 (0.9852) time: 0.2093 data: 0.1200 max mem: 9377 +Train: [2] [4100/6250] eta: 0:05:57 lr: 0.000066 grad: 0.1136 (0.0903) loss: 0.9854 (0.9851) time: 0.1778 data: 0.0932 max mem: 9377 +Train: [2] [4200/6250] eta: 0:05:40 lr: 0.000067 grad: 0.0981 (0.0909) loss: 0.9826 (0.9851) time: 0.1691 data: 0.0807 max mem: 9377 +Train: [2] [4300/6250] eta: 0:05:24 lr: 0.000067 grad: 0.1122 (0.0914) loss: 0.9836 (0.9850) time: 0.1743 data: 0.0910 max mem: 9377 +Train: [2] [4400/6250] eta: 0:05:07 lr: 0.000068 grad: 0.1072 (0.0922) loss: 0.9833 (0.9850) time: 0.2024 data: 0.1174 max mem: 9377 +Train: [2] [4500/6250] eta: 0:04:50 lr: 0.000068 grad: 0.1115 (0.0928) loss: 0.9820 (0.9849) time: 0.1673 data: 0.0834 max mem: 9377 +Train: [2] [4600/6250] eta: 0:04:33 lr: 0.000068 grad: 0.1223 (0.0936) loss: 0.9830 (0.9848) time: 0.1961 data: 0.1135 max mem: 9377 +Train: [2] [4700/6250] eta: 0:04:19 lr: 0.000069 grad: 0.1080 (0.0942) loss: 0.9816 (0.9848) time: 0.4592 data: 0.3749 max mem: 9377 +Train: [2] [4800/6250] eta: 0:04:01 lr: 0.000069 grad: 0.1040 (0.0947) loss: 0.9802 (0.9848) time: 0.1486 data: 0.0632 max mem: 9377 +Train: [2] [4900/6250] eta: 0:03:45 lr: 0.000070 grad: 0.1409 (0.0957) loss: 0.9838 (0.9847) time: 0.1595 data: 0.0707 max mem: 9377 +Train: [2] [5000/6250] eta: 0:03:28 lr: 0.000070 grad: 0.1185 (0.0968) loss: 0.9834 (0.9847) time: 0.1811 data: 0.0968 max mem: 9377 +Train: [2] [5100/6250] eta: 0:03:11 lr: 0.000070 grad: 0.1324 (0.0978) loss: 0.9826 (0.9846) time: 0.1584 data: 0.0680 max mem: 9377 +Train: [2] [5200/6250] eta: 0:02:53 lr: 0.000071 grad: 0.1327 (0.0990) loss: 0.9828 (0.9846) time: 0.1367 data: 0.0491 max mem: 9377 +Train: [2] [5300/6250] eta: 0:02:37 lr: 0.000071 grad: 0.1074 (0.1002) loss: 0.9807 (0.9845) time: 0.2201 data: 0.1349 max mem: 9377 +Train: [2] [5400/6250] eta: 0:02:20 lr: 0.000072 grad: 0.1599 (0.1016) loss: 0.9803 (0.9845) time: 0.1595 data: 0.0689 max mem: 9377 +Train: [2] [5500/6250] eta: 0:02:04 lr: 0.000072 grad: 0.1885 (0.1031) loss: 0.9806 (0.9844) time: 0.1678 data: 0.0788 max mem: 9377 +Train: [2] [5600/6250] eta: 0:01:47 lr: 0.000072 grad: 0.1762 (0.1049) loss: 0.9829 (0.9843) time: 0.1140 data: 0.0103 max mem: 9377 +Train: [2] [5700/6250] eta: 0:01:30 lr: 0.000073 grad: 0.2384 (0.1071) loss: 0.9810 (0.9843) time: 0.1510 data: 0.0651 max mem: 9377 +Train: [2] [5800/6250] eta: 0:01:14 lr: 0.000073 grad: 0.2280 (0.1088) loss: 0.9796 (0.9842) time: 0.2208 data: 0.1271 max mem: 9377 +Train: [2] [5900/6250] eta: 0:00:57 lr: 0.000074 grad: 0.2495 (0.1107) loss: 0.9824 (0.9842) time: 0.1589 data: 0.0778 max mem: 9377 +Train: [2] [6000/6250] eta: 0:00:41 lr: 0.000074 grad: 0.1901 (0.1125) loss: 0.9788 (0.9841) time: 0.1332 data: 0.0510 max mem: 9377 +Train: [2] [6100/6250] eta: 0:00:24 lr: 0.000074 grad: 0.2598 (0.1142) loss: 0.9771 (0.9840) time: 0.1860 data: 0.0992 max mem: 9377 +Train: [2] [6200/6250] eta: 0:00:08 lr: 0.000075 grad: 0.1606 (0.1162) loss: 0.9797 (0.9840) time: 0.2898 data: 0.2067 max mem: 9377 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.1784 (0.1168) loss: 0.9785 (0.9839) time: 0.2276 data: 0.1250 max mem: 9377 +Train: [2] Total time: 0:17:19 (0.1663 s / it) +Averaged stats: lr: 0.000075 grad: 0.1784 (0.1168) loss: 0.9785 (0.9839) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:02:56 loss: 0.9788 (0.9788) time: 2.8524 data: 2.7551 max mem: 9377 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9822 (0.9819) time: 0.1096 data: 0.0832 max mem: 9377 +Eval (hcp-train-subset): [2] Total time: 0:00:12 (0.2050 s / it) +Averaged stats (hcp-train-subset): loss: 0.9822 (0.9819) +Eval (hcp-val): [2] [ 0/62] eta: 0:03:42 loss: 0.9802 (0.9802) time: 3.5959 data: 3.5235 max mem: 9377 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9792 (0.9818) time: 0.1173 data: 0.0918 max mem: 9377 +Eval (hcp-val): [2] Total time: 0:00:13 (0.2205 s / it) +Averaged stats (hcp-val): loss: 0.9792 (0.9818) +Eval (nsd-val): [2] [ 0/62] eta: 0:05:11 loss: 0.9817 (0.9817) time: 5.0269 data: 4.9971 max mem: 9377 +Eval (nsd-val): [2] [61/62] eta: 0:00:00 loss: 0.9843 (0.9836) time: 0.1180 data: 0.0930 max mem: 9377 +Eval (nsd-val): [2] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (nsd-val): loss: 0.9843 (0.9836) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [3] [ 0/6250] eta: 12:50:03 lr: 0.000075 grad: 0.3062 (0.3062) loss: 0.9835 (0.9835) time: 7.3926 data: 7.2905 max mem: 9377 +Train: [3] [ 100/6250] eta: 0:23:32 lr: 0.000075 grad: 0.1587 (0.2133) loss: 0.9804 (0.9834) time: 0.1614 data: 0.0786 max mem: 9377 +Train: [3] [ 200/6250] eta: 0:20:27 lr: 0.000076 grad: 0.1230 (0.2087) loss: 0.9825 (0.9824) time: 0.1660 data: 0.0805 max mem: 9377 +Train: [3] [ 300/6250] eta: 0:19:00 lr: 0.000076 grad: 0.1783 (0.2075) loss: 0.9805 (0.9814) time: 0.1760 data: 0.0867 max mem: 9377 +Train: [3] [ 400/6250] eta: 0:18:23 lr: 0.000077 grad: 0.1555 (0.2029) loss: 0.9810 (0.9810) time: 0.1896 data: 0.0981 max mem: 9377 +Train: [3] [ 500/6250] eta: 0:18:52 lr: 0.000077 grad: 0.2015 (0.2035) loss: 0.9773 (0.9805) time: 0.2854 data: 0.1869 max mem: 9377 +Train: [3] [ 600/6250] eta: 0:22:34 lr: 0.000077 grad: 0.1247 (0.2030) loss: 0.9813 (0.9806) time: 0.1463 data: 0.0160 max mem: 9377 +Train: [3] [ 700/6250] eta: 0:22:36 lr: 0.000078 grad: 0.1828 (0.2041) loss: 0.9809 (0.9806) time: 0.2171 data: 0.1176 max mem: 9377 +Train: [3] [ 800/6250] eta: 0:23:44 lr: 0.000078 grad: 0.2099 (0.2026) loss: 0.9796 (0.9806) time: 0.3597 data: 0.2415 max mem: 9377 +Train: [3] [ 900/6250] eta: 0:23:11 lr: 0.000079 grad: 0.1166 (0.2031) loss: 0.9796 (0.9806) time: 0.1285 data: 0.0281 max mem: 9377 +Train: [3] [1000/6250] eta: 0:22:12 lr: 0.000079 grad: 0.1790 (0.2058) loss: 0.9779 (0.9804) time: 0.1774 data: 0.0880 max mem: 9377 +Train: [3] [1100/6250] eta: 0:21:09 lr: 0.000079 grad: 0.1836 (0.2058) loss: 0.9801 (0.9804) time: 0.2104 data: 0.1268 max mem: 9377 +Train: [3] [1200/6250] eta: 0:20:09 lr: 0.000080 grad: 0.1409 (0.2053) loss: 0.9800 (0.9804) time: 0.1791 data: 0.0940 max mem: 9377 +Train: [3] [1300/6250] eta: 0:19:19 lr: 0.000080 grad: 0.1627 (0.2056) loss: 0.9787 (0.9803) time: 0.1597 data: 0.0762 max mem: 9377 +Train: [3] [1400/6250] eta: 0:18:36 lr: 0.000081 grad: 0.1399 (0.2031) loss: 0.9793 (0.9802) time: 0.1878 data: 0.1001 max mem: 9377 +Train: [3] [1500/6250] eta: 0:18:00 lr: 0.000081 grad: 0.1654 (0.2022) loss: 0.9798 (0.9802) time: 0.2199 data: 0.1160 max mem: 9377 +Train: [3] [1600/6250] eta: 0:17:22 lr: 0.000081 grad: 0.1727 (0.2026) loss: 0.9799 (0.9801) time: 0.2019 data: 0.0957 max mem: 9377 +Train: [3] [1700/6250] eta: 0:16:37 lr: 0.000082 grad: 0.2217 (0.2027) loss: 0.9786 (0.9800) time: 0.1185 data: 0.0239 max mem: 9377 +Train: [3] [1800/6250] eta: 0:16:02 lr: 0.000082 grad: 0.2345 (0.2031) loss: 0.9802 (0.9799) time: 0.2039 data: 0.1146 max mem: 9377 +Train: [3] [1900/6250] eta: 0:15:27 lr: 0.000083 grad: 0.2194 (0.2035) loss: 0.9758 (0.9797) time: 0.1678 data: 0.0804 max mem: 9377 +Train: [3] [2000/6250] eta: 0:14:53 lr: 0.000083 grad: 0.2155 (0.2047) loss: 0.9807 (0.9797) time: 0.1715 data: 0.0959 max mem: 9377 +Train: [3] [2100/6250] eta: 0:14:23 lr: 0.000083 grad: 0.1213 (0.2033) loss: 0.9744 (0.9795) time: 0.1751 data: 0.0895 max mem: 9377 +Train: [3] [2200/6250] eta: 0:14:02 lr: 0.000084 grad: 0.1494 (0.2036) loss: 0.9769 (0.9794) time: 0.1000 data: 0.0002 max mem: 9377 +Train: [3] [2300/6250] eta: 0:13:31 lr: 0.000084 grad: 0.2162 (0.2043) loss: 0.9769 (0.9793) time: 0.1177 data: 0.0324 max mem: 9377 +Train: [3] [2400/6250] eta: 0:13:02 lr: 0.000085 grad: 0.1551 (0.2051) loss: 0.9788 (0.9792) time: 0.1252 data: 0.0385 max mem: 9377 +Train: [3] [2500/6250] eta: 0:12:36 lr: 0.000085 grad: 0.1572 (0.2052) loss: 0.9772 (0.9791) time: 0.1721 data: 0.0942 max mem: 9377 +Train: [3] [2600/6250] eta: 0:12:10 lr: 0.000085 grad: 0.2182 (0.2062) loss: 0.9757 (0.9790) time: 0.2116 data: 0.1293 max mem: 9377 +Train: [3] [2700/6250] eta: 0:11:44 lr: 0.000086 grad: 0.2417 (0.2067) loss: 0.9762 (0.9789) time: 0.1936 data: 0.1005 max mem: 9377 +Train: [3] [2800/6250] eta: 0:11:19 lr: 0.000086 grad: 0.1645 (0.2072) loss: 0.9793 (0.9788) time: 0.1696 data: 0.0871 max mem: 9377 +Train: [3] [2900/6250] eta: 0:10:54 lr: 0.000087 grad: 0.1931 (0.2069) loss: 0.9761 (0.9787) time: 0.1719 data: 0.0836 max mem: 9377 +Train: [3] [3000/6250] eta: 0:10:30 lr: 0.000087 grad: 0.2030 (0.2066) loss: 0.9757 (0.9786) time: 0.1340 data: 0.0533 max mem: 9377 +Train: [3] [3100/6250] eta: 0:10:06 lr: 0.000087 grad: 0.1613 (0.2061) loss: 0.9758 (0.9785) time: 0.1581 data: 0.0704 max mem: 9377 +Train: [3] [3200/6250] eta: 0:09:46 lr: 0.000088 grad: 0.1315 (0.2065) loss: 0.9782 (0.9784) time: 0.1533 data: 0.0490 max mem: 9377 +Train: [3] [3300/6250] eta: 0:09:26 lr: 0.000088 grad: 0.1663 (0.2061) loss: 0.9758 (0.9783) time: 0.2561 data: 0.1568 max mem: 9377 +Train: [3] [3400/6250] eta: 0:09:05 lr: 0.000089 grad: 0.1575 (0.2059) loss: 0.9771 (0.9782) time: 0.1095 data: 0.0162 max mem: 9377 +Train: [3] [3500/6250] eta: 0:08:51 lr: 0.000089 grad: 0.1878 (0.2059) loss: 0.9763 (0.9782) time: 0.1790 data: 0.0886 max mem: 9377 +Train: [3] [3600/6250] eta: 0:08:28 lr: 0.000089 grad: 0.2157 (0.2057) loss: 0.9749 (0.9781) time: 0.1462 data: 0.0605 max mem: 9377 +Train: [3] [3700/6250] eta: 0:08:14 lr: 0.000090 grad: 0.1868 (0.2049) loss: 0.9739 (0.9780) time: 0.1510 data: 0.0622 max mem: 9377 +Train: [3] [3800/6250] eta: 0:07:54 lr: 0.000090 grad: 0.1475 (0.2041) loss: 0.9773 (0.9780) time: 0.1861 data: 0.0930 max mem: 9377 +Train: [3] [3900/6250] eta: 0:07:32 lr: 0.000091 grad: 0.1472 (0.2043) loss: 0.9774 (0.9780) time: 0.1944 data: 0.1094 max mem: 9377 +Train: [3] [4000/6250] eta: 0:07:11 lr: 0.000091 grad: 0.2018 (0.2041) loss: 0.9763 (0.9779) time: 0.1036 data: 0.0130 max mem: 9377 +Train: [3] [4100/6250] eta: 0:06:53 lr: 0.000091 grad: 0.1739 (0.2043) loss: 0.9761 (0.9778) time: 0.2138 data: 0.1164 max mem: 9377 +Train: [3] [4200/6250] eta: 0:06:31 lr: 0.000092 grad: 0.1904 (0.2038) loss: 0.9748 (0.9777) time: 0.1237 data: 0.0348 max mem: 9377 +Train: [3] [4300/6250] eta: 0:06:10 lr: 0.000092 grad: 0.1863 (0.2039) loss: 0.9739 (0.9776) time: 0.1537 data: 0.0590 max mem: 9377 +Train: [3] [4400/6250] eta: 0:05:50 lr: 0.000093 grad: 0.1402 (0.2031) loss: 0.9795 (0.9776) time: 0.2118 data: 0.1241 max mem: 9377 +Train: [3] [4500/6250] eta: 0:05:30 lr: 0.000093 grad: 0.1296 (0.2025) loss: 0.9755 (0.9775) time: 0.2106 data: 0.1231 max mem: 9377 +Train: [3] [4600/6250] eta: 0:05:14 lr: 0.000093 grad: 0.1500 (0.2017) loss: 0.9777 (0.9774) time: 0.1184 data: 0.0037 max mem: 9377 +Train: [3] [4700/6250] eta: 0:04:53 lr: 0.000094 grad: 0.1567 (0.2015) loss: 0.9738 (0.9774) time: 0.1480 data: 0.0591 max mem: 9377 +Train: [3] [4800/6250] eta: 0:04:33 lr: 0.000094 grad: 0.1653 (0.2013) loss: 0.9731 (0.9773) time: 0.1261 data: 0.0395 max mem: 9377 +Train: [3] [4900/6250] eta: 0:04:14 lr: 0.000095 grad: 0.1628 (0.2010) loss: 0.9730 (0.9772) time: 0.1344 data: 0.0424 max mem: 9377 +Train: [3] [5000/6250] eta: 0:03:55 lr: 0.000095 grad: 0.1696 (0.2005) loss: 0.9740 (0.9771) time: 0.1553 data: 0.0705 max mem: 9377 +Train: [3] [5100/6250] eta: 0:03:36 lr: 0.000095 grad: 0.1513 (0.2004) loss: 0.9714 (0.9770) time: 0.1729 data: 0.0799 max mem: 9377 +Train: [3] [5200/6250] eta: 0:03:17 lr: 0.000096 grad: 0.1359 (0.2001) loss: 0.9739 (0.9769) time: 0.1651 data: 0.0609 max mem: 9377 +Train: [3] [5300/6250] eta: 0:02:58 lr: 0.000096 grad: 0.1551 (0.2000) loss: 0.9709 (0.9768) time: 0.1060 data: 0.0003 max mem: 9377 +Train: [3] [5400/6250] eta: 0:02:39 lr: 0.000097 grad: 0.1652 (0.1996) loss: 0.9717 (0.9767) time: 0.1891 data: 0.1010 max mem: 9377 +Train: [3] [5500/6250] eta: 0:02:20 lr: 0.000097 grad: 0.1626 (0.1994) loss: 0.9698 (0.9766) time: 0.1384 data: 0.0481 max mem: 9377 +Train: [3] [5600/6250] eta: 0:02:01 lr: 0.000097 grad: 0.1337 (0.1995) loss: 0.9759 (0.9765) time: 0.1221 data: 0.0369 max mem: 9377 +Train: [3] [5700/6250] eta: 0:01:42 lr: 0.000098 grad: 0.1162 (0.1991) loss: 0.9739 (0.9764) time: 0.1365 data: 0.0445 max mem: 9377 +Train: [3] [5800/6250] eta: 0:01:23 lr: 0.000098 grad: 0.1346 (0.1987) loss: 0.9710 (0.9763) time: 0.1641 data: 0.0814 max mem: 9377 +Train: [3] [5900/6250] eta: 0:01:04 lr: 0.000099 grad: 0.1698 (0.1985) loss: 0.9719 (0.9762) time: 0.1251 data: 0.0323 max mem: 9377 +Train: [3] [6000/6250] eta: 0:00:46 lr: 0.000099 grad: 0.1696 (0.1981) loss: 0.9725 (0.9762) time: 0.1749 data: 0.0982 max mem: 9377 +Train: [3] [6100/6250] eta: 0:00:27 lr: 0.000099 grad: 0.1556 (0.1981) loss: 0.9712 (0.9761) time: 0.1147 data: 0.0229 max mem: 9377 +Train: [3] [6200/6250] eta: 0:00:09 lr: 0.000100 grad: 0.1771 (0.1978) loss: 0.9694 (0.9760) time: 0.1056 data: 0.0002 max mem: 9377 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1488 (0.1978) loss: 0.9672 (0.9759) time: 0.1326 data: 0.0459 max mem: 9377 +Train: [3] Total time: 0:19:12 (0.1844 s / it) +Averaged stats: lr: 0.000100 grad: 0.1488 (0.1978) loss: 0.9672 (0.9759) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:04:35 loss: 0.9689 (0.9689) time: 4.4455 data: 4.4154 max mem: 9377 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9712 (0.9705) time: 0.1313 data: 0.1066 max mem: 9377 +Eval (hcp-train-subset): [3] Total time: 0:00:13 (0.2140 s / it) +Averaged stats (hcp-train-subset): loss: 0.9712 (0.9705) +Eval (hcp-val): [3] [ 0/62] eta: 0:03:12 loss: 0.9653 (0.9653) time: 3.1019 data: 3.0345 max mem: 9377 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9717 (0.9707) time: 0.1376 data: 0.1118 max mem: 9377 +Eval (hcp-val): [3] Total time: 0:00:13 (0.2221 s / it) +Averaged stats (hcp-val): loss: 0.9717 (0.9707) +Eval (nsd-val): [3] [ 0/62] eta: 0:05:32 loss: 0.9614 (0.9614) time: 5.3568 data: 5.3253 max mem: 9377 +Eval (nsd-val): [3] [61/62] eta: 0:00:00 loss: 0.9734 (0.9718) time: 0.1594 data: 0.1342 max mem: 9377 +Eval (nsd-val): [3] Total time: 0:00:14 (0.2394 s / it) +Averaged stats (nsd-val): loss: 0.9734 (0.9718) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [4] [ 0/6250] eta: 11:02:55 lr: 0.000100 grad: 0.3400 (0.3400) loss: 0.9717 (0.9717) time: 6.3640 data: 6.2422 max mem: 9377 +Train: [4] [ 100/6250] eta: 0:21:49 lr: 0.000100 grad: 0.1749 (0.2037) loss: 0.9687 (0.9710) time: 0.1502 data: 0.0675 max mem: 9377 +Train: [4] [ 200/6250] eta: 0:18:46 lr: 0.000101 grad: 0.1490 (0.1950) loss: 0.9679 (0.9700) time: 0.1761 data: 0.0859 max mem: 9377 +Train: [4] [ 300/6250] eta: 0:18:33 lr: 0.000101 grad: 0.1431 (0.1933) loss: 0.9667 (0.9689) time: 0.2113 data: 0.1341 max mem: 9377 +Train: [4] [ 400/6250] eta: 0:18:01 lr: 0.000102 grad: 0.1557 (0.1980) loss: 0.9669 (0.9682) time: 0.1633 data: 0.0764 max mem: 9377 +Train: [4] [ 500/6250] eta: 0:17:29 lr: 0.000102 grad: 0.2236 (0.1959) loss: 0.9659 (0.9676) time: 0.1549 data: 0.0601 max mem: 9377 +Train: [4] [ 600/6250] eta: 0:16:54 lr: 0.000102 grad: 0.1865 (0.1965) loss: 0.9673 (0.9674) time: 0.1806 data: 0.0960 max mem: 9377 +Train: [4] [ 700/6250] eta: 0:16:30 lr: 0.000103 grad: 0.1552 (0.1972) loss: 0.9663 (0.9673) time: 0.1038 data: 0.0003 max mem: 9377 +Train: [4] [ 800/6250] eta: 0:16:07 lr: 0.000103 grad: 0.1750 (0.1978) loss: 0.9645 (0.9670) time: 0.1796 data: 0.0673 max mem: 9377 +Train: [4] [ 900/6250] eta: 0:15:39 lr: 0.000104 grad: 0.1402 (0.1957) loss: 0.9640 (0.9666) time: 0.1380 data: 0.0278 max mem: 9377 +Train: [4] [1000/6250] eta: 0:15:07 lr: 0.000104 grad: 0.1485 (0.1954) loss: 0.9645 (0.9665) time: 0.1531 data: 0.0552 max mem: 9377 +Train: [4] [1100/6250] eta: 0:15:20 lr: 0.000104 grad: 0.1461 (0.1948) loss: 0.9632 (0.9662) time: 0.1359 data: 0.0364 max mem: 9377 +Train: [4] [1200/6250] eta: 0:14:56 lr: 0.000105 grad: 0.2062 (0.1962) loss: 0.9598 (0.9659) time: 0.0970 data: 0.0061 max mem: 9377 +Train: [4] [1300/6250] eta: 0:14:35 lr: 0.000105 grad: 0.2320 (0.1973) loss: 0.9648 (0.9656) time: 0.1436 data: 0.0405 max mem: 9377 +Train: [4] [1400/6250] eta: 0:14:18 lr: 0.000106 grad: 0.1900 (0.1971) loss: 0.9597 (0.9654) time: 0.1963 data: 0.1061 max mem: 9377 +Train: [4] [1500/6250] eta: 0:14:16 lr: 0.000106 grad: 0.1600 (0.1969) loss: 0.9652 (0.9652) time: 0.3471 data: 0.2287 max mem: 9377 +Train: [4] [1600/6250] eta: 0:13:52 lr: 0.000106 grad: 0.1815 (0.1972) loss: 0.9609 (0.9649) time: 0.1080 data: 0.0133 max mem: 9377 +Train: [4] [1700/6250] eta: 0:13:28 lr: 0.000107 grad: 0.1772 (0.1967) loss: 0.9600 (0.9647) time: 0.1482 data: 0.0588 max mem: 9377 +Train: [4] [1800/6250] eta: 0:13:05 lr: 0.000107 grad: 0.2149 (0.1983) loss: 0.9587 (0.9644) time: 0.1359 data: 0.0354 max mem: 9377 +Train: [4] [1900/6250] eta: 0:12:43 lr: 0.000108 grad: 0.2427 (0.1987) loss: 0.9601 (0.9641) time: 0.1028 data: 0.0004 max mem: 9377 +Train: [4] [2000/6250] eta: 0:12:22 lr: 0.000108 grad: 0.1789 (0.1988) loss: 0.9559 (0.9638) time: 0.1680 data: 0.0861 max mem: 9377 +Train: [4] [2100/6250] eta: 0:12:00 lr: 0.000108 grad: 0.1647 (0.2001) loss: 0.9555 (0.9635) time: 0.1538 data: 0.0722 max mem: 9377 +Train: [4] [2200/6250] eta: 0:11:42 lr: 0.000109 grad: 0.1546 (0.2007) loss: 0.9566 (0.9631) time: 0.1716 data: 0.0832 max mem: 9377 +Train: [4] [2300/6250] eta: 0:11:22 lr: 0.000109 grad: 0.2070 (0.2026) loss: 0.9588 (0.9628) time: 0.1299 data: 0.0324 max mem: 9377 +Train: [4] [2400/6250] eta: 0:11:01 lr: 0.000110 grad: 0.2007 (0.2036) loss: 0.9541 (0.9624) time: 0.1649 data: 0.0824 max mem: 9377 +Train: [4] [2500/6250] eta: 0:10:41 lr: 0.000110 grad: 0.2229 (0.2059) loss: 0.9544 (0.9621) time: 0.1274 data: 0.0356 max mem: 9377 +Train: [4] [2600/6250] eta: 0:10:20 lr: 0.000110 grad: 0.2492 (0.2072) loss: 0.9499 (0.9617) time: 0.1430 data: 0.0552 max mem: 9377 +Train: [4] [2700/6250] eta: 0:10:05 lr: 0.000111 grad: 0.2972 (0.2087) loss: 0.9505 (0.9613) time: 0.2436 data: 0.1610 max mem: 9377 +Train: [4] [2800/6250] eta: 0:09:45 lr: 0.000111 grad: 0.1970 (0.2091) loss: 0.9520 (0.9608) time: 0.1657 data: 0.0736 max mem: 9377 +Train: [4] [2900/6250] eta: 0:09:27 lr: 0.000112 grad: 0.2215 (0.2104) loss: 0.9509 (0.9605) time: 0.1203 data: 0.0096 max mem: 9377 +Train: [4] [3000/6250] eta: 0:09:08 lr: 0.000112 grad: 0.2036 (0.2118) loss: 0.9482 (0.9601) time: 0.0959 data: 0.0027 max mem: 9377 +Train: [4] [3100/6250] eta: 0:08:50 lr: 0.000112 grad: 0.2061 (0.2124) loss: 0.9464 (0.9598) time: 0.1356 data: 0.0528 max mem: 9377 +Train: [4] [3200/6250] eta: 0:08:32 lr: 0.000113 grad: 0.2047 (0.2129) loss: 0.9458 (0.9594) time: 0.1275 data: 0.0381 max mem: 9377 +Train: [4] [3300/6250] eta: 0:08:14 lr: 0.000113 grad: 0.2372 (0.2144) loss: 0.9505 (0.9591) time: 0.1498 data: 0.0671 max mem: 9377 +Train: [4] [3400/6250] eta: 0:07:58 lr: 0.000114 grad: 0.2185 (0.2147) loss: 0.9490 (0.9587) time: 0.2121 data: 0.1227 max mem: 9377 +Train: [4] [3500/6250] eta: 0:07:39 lr: 0.000114 grad: 0.2514 (0.2154) loss: 0.9447 (0.9584) time: 0.1815 data: 0.0977 max mem: 9377 +Train: [4] [3600/6250] eta: 0:07:21 lr: 0.000114 grad: 0.1664 (0.2164) loss: 0.9431 (0.9580) time: 0.1080 data: 0.0109 max mem: 9377 +Train: [4] [3700/6250] eta: 0:07:03 lr: 0.000115 grad: 0.2351 (0.2171) loss: 0.9458 (0.9576) time: 0.1364 data: 0.0427 max mem: 9377 +Train: [4] [3800/6250] eta: 0:06:46 lr: 0.000115 grad: 0.2404 (0.2179) loss: 0.9439 (0.9572) time: 0.1100 data: 0.0060 max mem: 9377 +Train: [4] [3900/6250] eta: 0:06:34 lr: 0.000116 grad: 0.2145 (0.2201) loss: 0.9387 (0.9568) time: 0.4616 data: 0.3486 max mem: 9377 +Train: [4] [4000/6250] eta: 0:06:20 lr: 0.000116 grad: 0.1896 (0.2207) loss: 0.9407 (0.9564) time: 0.4736 data: 0.3712 max mem: 9377 +Train: [4] [4100/6250] eta: 0:06:03 lr: 0.000116 grad: 0.2137 (0.2218) loss: 0.9417 (0.9560) time: 0.0986 data: 0.0002 max mem: 9377 +Train: [4] [4200/6250] eta: 0:05:49 lr: 0.000117 grad: 0.2271 (0.2224) loss: 0.9386 (0.9556) time: 0.1690 data: 0.0827 max mem: 9377 +Train: [4] [4300/6250] eta: 0:05:35 lr: 0.000117 grad: 0.1875 (0.2230) loss: 0.9339 (0.9552) time: 0.0996 data: 0.0002 max mem: 9377 +Train: [4] [4400/6250] eta: 0:05:18 lr: 0.000118 grad: 0.2591 (0.2234) loss: 0.9345 (0.9547) time: 0.1895 data: 0.1004 max mem: 9377 +Train: [4] [4500/6250] eta: 0:05:05 lr: 0.000118 grad: 0.2468 (0.2245) loss: 0.9380 (0.9543) time: 0.1178 data: 0.0123 max mem: 9377 +Train: [4] [4600/6250] eta: 0:04:47 lr: 0.000118 grad: 0.2169 (0.2248) loss: 0.9397 (0.9539) time: 0.2205 data: 0.1388 max mem: 9377 +Train: [4] [4700/6250] eta: 0:04:29 lr: 0.000119 grad: 0.2146 (0.2248) loss: 0.9356 (0.9535) time: 0.1503 data: 0.0640 max mem: 9377 +Train: [4] [4800/6250] eta: 0:04:11 lr: 0.000119 grad: 0.2090 (0.2255) loss: 0.9354 (0.9532) time: 0.1847 data: 0.1025 max mem: 9377 +Train: [4] [4900/6250] eta: 0:03:53 lr: 0.000120 grad: 0.1924 (0.2260) loss: 0.9319 (0.9528) time: 0.1649 data: 0.0876 max mem: 9377 +Train: [4] [5000/6250] eta: 0:03:35 lr: 0.000120 grad: 0.3097 (0.2268) loss: 0.9367 (0.9524) time: 0.1527 data: 0.0554 max mem: 9377 +Train: [4] [5100/6250] eta: 0:03:20 lr: 0.000120 grad: 0.1612 (0.2271) loss: 0.9284 (0.9520) time: 0.1395 data: 0.0314 max mem: 9377 +Train: [4] [5200/6250] eta: 0:03:02 lr: 0.000121 grad: 0.2461 (0.2280) loss: 0.9313 (0.9516) time: 0.1637 data: 0.0668 max mem: 9377 +Train: [4] [5300/6250] eta: 0:02:44 lr: 0.000121 grad: 0.2298 (0.2282) loss: 0.9335 (0.9512) time: 0.1100 data: 0.0003 max mem: 9377 +Train: [4] [5400/6250] eta: 0:02:27 lr: 0.000122 grad: 0.2301 (0.2285) loss: 0.9313 (0.9508) time: 0.1154 data: 0.0105 max mem: 9377 +Train: [4] [5500/6250] eta: 0:02:09 lr: 0.000122 grad: 0.2057 (0.2281) loss: 0.9298 (0.9504) time: 0.1316 data: 0.0422 max mem: 9377 +Train: [4] [5600/6250] eta: 0:01:51 lr: 0.000122 grad: 0.1653 (0.2278) loss: 0.9288 (0.9500) time: 0.1466 data: 0.0535 max mem: 9377 +Train: [4] [5700/6250] eta: 0:01:34 lr: 0.000123 grad: 0.2149 (0.2278) loss: 0.9283 (0.9496) time: 0.1437 data: 0.0508 max mem: 9377 +Train: [4] [5800/6250] eta: 0:01:17 lr: 0.000123 grad: 0.2278 (0.2276) loss: 0.9265 (0.9492) time: 0.1818 data: 0.0976 max mem: 9377 +Train: [4] [5900/6250] eta: 0:01:00 lr: 0.000124 grad: 0.2687 (0.2279) loss: 0.9262 (0.9489) time: 0.1717 data: 0.0959 max mem: 9377 +Train: [4] [6000/6250] eta: 0:00:42 lr: 0.000124 grad: 0.1898 (0.2276) loss: 0.9237 (0.9485) time: 0.1516 data: 0.0564 max mem: 9377 +Train: [4] [6100/6250] eta: 0:00:25 lr: 0.000124 grad: 0.2059 (0.2275) loss: 0.9266 (0.9481) time: 0.1852 data: 0.0845 max mem: 9377 +Train: [4] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.2377 (0.2276) loss: 0.9231 (0.9477) time: 0.1175 data: 0.0192 max mem: 9377 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1538 (0.2273) loss: 0.9249 (0.9476) time: 0.1298 data: 0.0415 max mem: 9377 +Train: [4] Total time: 0:17:53 (0.1718 s / it) +Averaged stats: lr: 0.000125 grad: 0.1538 (0.2273) loss: 0.9249 (0.9476) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:05:07 loss: 0.9206 (0.9206) time: 4.9599 data: 4.9295 max mem: 9377 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9234 (0.9240) time: 0.1335 data: 0.1084 max mem: 9377 +Eval (hcp-train-subset): [4] Total time: 0:00:13 (0.2239 s / it) +Averaged stats (hcp-train-subset): loss: 0.9234 (0.9240) +Making plots (hcp-train-subset): example=11 +Eval (hcp-val): [4] [ 0/62] eta: 0:03:59 loss: 0.9217 (0.9217) time: 3.8549 data: 3.7995 max mem: 9377 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9222 (0.9229) time: 0.1330 data: 0.1070 max mem: 9377 +Eval (hcp-val): [4] Total time: 0:00:13 (0.2165 s / it) +Averaged stats (hcp-val): loss: 0.9222 (0.9229) +Making plots (hcp-val): example=36 +Eval (nsd-val): [4] [ 0/62] eta: 0:05:30 loss: 0.8899 (0.8899) time: 5.3353 data: 5.3039 max mem: 9377 +Eval (nsd-val): [4] [61/62] eta: 0:00:00 loss: 0.8992 (0.8983) time: 0.1238 data: 0.0989 max mem: 9377 +Eval (nsd-val): [4] Total time: 0:00:13 (0.2214 s / it) +Averaged stats (nsd-val): loss: 0.8992 (0.8983) +Making plots (nsd-val): example=5 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00004.pth +Train: [5] [ 0/6250] eta: 12:57:10 lr: 0.000125 grad: 0.1533 (0.1533) loss: 0.9393 (0.9393) time: 7.4609 data: 7.3586 max mem: 9377 +Train: [5] [ 100/6250] eta: 0:23:33 lr: 0.000125 grad: 0.2298 (0.2721) loss: 0.9231 (0.9188) time: 0.1689 data: 0.0828 max mem: 9377 +Train: [5] [ 200/6250] eta: 0:22:09 lr: 0.000125 grad: 0.1975 (0.2483) loss: 0.9229 (0.9202) time: 0.2150 data: 0.1373 max mem: 9377 +Train: [5] [ 300/6250] eta: 0:23:03 lr: 0.000125 grad: 0.2248 (0.2363) loss: 0.9226 (0.9208) time: 0.3694 data: 0.2839 max mem: 9377 +Train: [5] [ 400/6250] eta: 0:23:03 lr: 0.000125 grad: 0.1652 (0.2299) loss: 0.9269 (0.9214) time: 0.4202 data: 0.3000 max mem: 9377 +Train: [5] [ 500/6250] eta: 0:21:26 lr: 0.000125 grad: 0.2292 (0.2259) loss: 0.9234 (0.9218) time: 0.1970 data: 0.1046 max mem: 9377 +Train: [5] [ 600/6250] eta: 0:20:48 lr: 0.000125 grad: 0.1739 (0.2205) loss: 0.9238 (0.9223) time: 0.3020 data: 0.2261 max mem: 9377 +Train: [5] [ 700/6250] eta: 0:19:57 lr: 0.000125 grad: 0.2013 (0.2197) loss: 0.9265 (0.9225) time: 0.2104 data: 0.0941 max mem: 9377 +Train: [5] [ 800/6250] eta: 0:21:12 lr: 0.000125 grad: 0.2063 (0.2186) loss: 0.9284 (0.9227) time: 0.6122 data: 0.4617 max mem: 9377 +Train: [5] [ 900/6250] eta: 0:23:03 lr: 0.000125 grad: 0.1724 (0.2152) loss: 0.9199 (0.9224) time: 0.3577 data: 0.2083 max mem: 9377 +Train: [5] [1000/6250] eta: 0:22:08 lr: 0.000125 grad: 0.1839 (0.2128) loss: 0.9180 (0.9223) time: 0.1512 data: 0.0355 max mem: 9377 +Train: [5] [1100/6250] eta: 0:20:59 lr: 0.000125 grad: 0.2242 (0.2115) loss: 0.9226 (0.9220) time: 0.1870 data: 0.0775 max mem: 9377 +Train: [5] [1200/6250] eta: 0:19:57 lr: 0.000125 grad: 0.1856 (0.2103) loss: 0.9210 (0.9218) time: 0.1445 data: 0.0497 max mem: 9377 +Train: [5] [1300/6250] eta: 0:19:08 lr: 0.000125 grad: 0.1579 (0.2081) loss: 0.9193 (0.9216) time: 0.1341 data: 0.0357 max mem: 9377 +Train: [5] [1400/6250] eta: 0:18:19 lr: 0.000125 grad: 0.1862 (0.2064) loss: 0.9169 (0.9215) time: 0.1723 data: 0.0909 max mem: 9377 +Train: [5] [1500/6250] eta: 0:17:38 lr: 0.000125 grad: 0.1645 (0.2045) loss: 0.9206 (0.9214) time: 0.1653 data: 0.0811 max mem: 9377 +Train: [5] [1600/6250] eta: 0:17:00 lr: 0.000125 grad: 0.1788 (0.2026) loss: 0.9176 (0.9212) time: 0.2012 data: 0.1231 max mem: 9377 +Train: [5] [1700/6250] eta: 0:16:25 lr: 0.000125 grad: 0.1639 (0.2015) loss: 0.9187 (0.9211) time: 0.2118 data: 0.1245 max mem: 9377 +Train: [5] [1800/6250] eta: 0:15:45 lr: 0.000125 grad: 0.1617 (0.2000) loss: 0.9172 (0.9209) time: 0.1418 data: 0.0506 max mem: 9377 +Train: [5] [1900/6250] eta: 0:15:14 lr: 0.000125 grad: 0.1553 (0.1989) loss: 0.9157 (0.9207) time: 0.1099 data: 0.0163 max mem: 9377 +Train: [5] [2000/6250] eta: 0:14:42 lr: 0.000125 grad: 0.1777 (0.1975) loss: 0.9185 (0.9206) time: 0.1905 data: 0.1116 max mem: 9377 +Train: [5] [2100/6250] eta: 0:14:10 lr: 0.000125 grad: 0.2004 (0.1966) loss: 0.9163 (0.9205) time: 0.1624 data: 0.0721 max mem: 9377 +Train: [5] [2200/6250] eta: 0:13:40 lr: 0.000125 grad: 0.1942 (0.1957) loss: 0.9119 (0.9203) time: 0.1465 data: 0.0666 max mem: 9377 +Train: [5] [2300/6250] eta: 0:13:14 lr: 0.000125 grad: 0.1491 (0.1945) loss: 0.9175 (0.9200) time: 0.1821 data: 0.0979 max mem: 9377 +Train: [5] [2400/6250] eta: 0:13:14 lr: 0.000125 grad: 0.1497 (0.1937) loss: 0.9154 (0.9198) time: 0.1262 data: 0.0003 max mem: 9377 +Train: [5] [2500/6250] eta: 0:12:49 lr: 0.000125 grad: 0.1727 (0.1926) loss: 0.9168 (0.9196) time: 0.2161 data: 0.1245 max mem: 9377 +Train: [5] [2600/6250] eta: 0:12:23 lr: 0.000125 grad: 0.1696 (0.1916) loss: 0.9110 (0.9193) time: 0.1211 data: 0.0003 max mem: 9377 +Train: [5] [2700/6250] eta: 0:12:11 lr: 0.000125 grad: 0.1583 (0.1905) loss: 0.9144 (0.9191) time: 0.1137 data: 0.0032 max mem: 9377 +Train: [5] [2800/6250] eta: 0:11:45 lr: 0.000125 grad: 0.1550 (0.1894) loss: 0.9142 (0.9189) time: 0.1508 data: 0.0594 max mem: 9377 +Train: [5] [2900/6250] eta: 0:11:22 lr: 0.000125 grad: 0.1554 (0.1890) loss: 0.9153 (0.9188) time: 0.1756 data: 0.0887 max mem: 9377 +Train: [5] [3000/6250] eta: 0:10:57 lr: 0.000125 grad: 0.1578 (0.1875) loss: 0.9119 (0.9186) time: 0.1847 data: 0.1016 max mem: 9377 +Train: [5] [3100/6250] eta: 0:10:36 lr: 0.000125 grad: 0.1550 (0.1865) loss: 0.9081 (0.9184) time: 0.3990 data: 0.3184 max mem: 9377 +Train: [5] [3200/6250] eta: 0:10:12 lr: 0.000125 grad: 0.1601 (0.1862) loss: 0.9086 (0.9182) time: 0.1679 data: 0.0784 max mem: 9377 +Train: [5] [3300/6250] eta: 0:09:59 lr: 0.000125 grad: 0.1520 (0.1859) loss: 0.9084 (0.9180) time: 0.1004 data: 0.0002 max mem: 9377 +Train: [5] [3400/6250] eta: 0:09:34 lr: 0.000125 grad: 0.1452 (0.1851) loss: 0.9112 (0.9177) time: 0.1652 data: 0.0840 max mem: 9377 +Train: [5] [3500/6250] eta: 0:09:11 lr: 0.000125 grad: 0.1452 (0.1841) loss: 0.9093 (0.9175) time: 0.1392 data: 0.0497 max mem: 9377 +Train: [5] [3600/6250] eta: 0:08:49 lr: 0.000125 grad: 0.1507 (0.1836) loss: 0.9100 (0.9172) time: 0.0801 data: 0.0002 max mem: 9377 +Train: [5] [3700/6250] eta: 0:08:26 lr: 0.000125 grad: 0.1545 (0.1831) loss: 0.9088 (0.9170) time: 0.1426 data: 0.0469 max mem: 9377 +Train: [5] [3800/6250] eta: 0:08:05 lr: 0.000125 grad: 0.1387 (0.1823) loss: 0.9102 (0.9167) time: 0.2334 data: 0.1455 max mem: 9377 +Train: [5] [3900/6250] eta: 0:07:49 lr: 0.000125 grad: 0.1474 (0.1817) loss: 0.9080 (0.9165) time: 0.3310 data: 0.2290 max mem: 9377 +Train: [5] [4000/6250] eta: 0:07:26 lr: 0.000125 grad: 0.1550 (0.1814) loss: 0.9038 (0.9162) time: 0.1175 data: 0.0002 max mem: 9377 +Train: [5] [4100/6250] eta: 0:07:06 lr: 0.000125 grad: 0.1519 (0.1809) loss: 0.9032 (0.9160) time: 0.1295 data: 0.0461 max mem: 9377 +Train: [5] [4200/6250] eta: 0:06:44 lr: 0.000125 grad: 0.1393 (0.1803) loss: 0.9028 (0.9157) time: 0.1202 data: 0.0301 max mem: 9377 +Train: [5] [4300/6250] eta: 0:06:22 lr: 0.000125 grad: 0.1625 (0.1802) loss: 0.9076 (0.9154) time: 0.1726 data: 0.0862 max mem: 9377 +Train: [5] [4400/6250] eta: 0:06:01 lr: 0.000125 grad: 0.1524 (0.1796) loss: 0.9012 (0.9151) time: 0.1263 data: 0.0282 max mem: 9377 +Train: [5] [4500/6250] eta: 0:05:40 lr: 0.000125 grad: 0.1477 (0.1791) loss: 0.9009 (0.9148) time: 0.1125 data: 0.0258 max mem: 9377 +Train: [5] [4600/6250] eta: 0:05:20 lr: 0.000125 grad: 0.1540 (0.1787) loss: 0.9029 (0.9146) time: 0.2782 data: 0.1919 max mem: 9377 +Train: [5] [4700/6250] eta: 0:04:59 lr: 0.000125 grad: 0.1592 (0.1784) loss: 0.8968 (0.9142) time: 0.1236 data: 0.0445 max mem: 9377 +Train: [5] [4800/6250] eta: 0:04:39 lr: 0.000125 grad: 0.1503 (0.1779) loss: 0.8977 (0.9140) time: 0.1729 data: 0.0914 max mem: 9377 +Train: [5] [4900/6250] eta: 0:04:19 lr: 0.000125 grad: 0.1459 (0.1777) loss: 0.9030 (0.9137) time: 0.1039 data: 0.0122 max mem: 9377 +Train: [5] [5000/6250] eta: 0:03:59 lr: 0.000125 grad: 0.1478 (0.1774) loss: 0.8990 (0.9134) time: 0.1632 data: 0.0666 max mem: 9377 +Train: [5] [5100/6250] eta: 0:03:39 lr: 0.000125 grad: 0.1657 (0.1770) loss: 0.8985 (0.9131) time: 0.1078 data: 0.0003 max mem: 9377 +Train: [5] [5200/6250] eta: 0:03:19 lr: 0.000125 grad: 0.1423 (0.1767) loss: 0.8940 (0.9128) time: 0.1306 data: 0.0415 max mem: 9377 +Train: [5] [5300/6250] eta: 0:03:00 lr: 0.000125 grad: 0.1628 (0.1763) loss: 0.8939 (0.9125) time: 0.1876 data: 0.1099 max mem: 9377 +Train: [5] [5400/6250] eta: 0:02:41 lr: 0.000125 grad: 0.1641 (0.1760) loss: 0.8937 (0.9121) time: 0.0962 data: 0.0002 max mem: 9377 +Train: [5] [5500/6250] eta: 0:02:21 lr: 0.000125 grad: 0.1501 (0.1757) loss: 0.8925 (0.9118) time: 0.1961 data: 0.1176 max mem: 9377 +Train: [5] [5600/6250] eta: 0:02:02 lr: 0.000125 grad: 0.1689 (0.1755) loss: 0.8911 (0.9114) time: 0.1367 data: 0.0457 max mem: 9377 +Train: [5] [5700/6250] eta: 0:01:43 lr: 0.000125 grad: 0.1467 (0.1751) loss: 0.8937 (0.9111) time: 0.1402 data: 0.0537 max mem: 9377 +Train: [5] [5800/6250] eta: 0:01:24 lr: 0.000125 grad: 0.1296 (0.1748) loss: 0.8953 (0.9107) time: 0.1561 data: 0.0725 max mem: 9377 +Train: [5] [5900/6250] eta: 0:01:05 lr: 0.000125 grad: 0.1420 (0.1746) loss: 0.8896 (0.9104) time: 0.1363 data: 0.0464 max mem: 9377 +Train: [5] [6000/6250] eta: 0:00:46 lr: 0.000125 grad: 0.1253 (0.1743) loss: 0.8893 (0.9101) time: 0.1594 data: 0.0733 max mem: 9377 +Train: [5] [6100/6250] eta: 0:00:28 lr: 0.000125 grad: 0.1455 (0.1739) loss: 0.8923 (0.9098) time: 0.0981 data: 0.0002 max mem: 9377 +Train: [5] [6200/6250] eta: 0:00:09 lr: 0.000125 grad: 0.1409 (0.1735) loss: 0.8928 (0.9095) time: 0.1668 data: 0.0687 max mem: 9377 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1458 (0.1733) loss: 0.8896 (0.9094) time: 0.2656 data: 0.1733 max mem: 9377 +Train: [5] Total time: 0:19:37 (0.1884 s / it) +Averaged stats: lr: 0.000125 grad: 0.1458 (0.1733) loss: 0.8896 (0.9094) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:06:06 loss: 0.8909 (0.8909) time: 5.9147 data: 5.8834 max mem: 9377 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.8863 (0.8870) time: 0.1287 data: 0.1039 max mem: 9377 +Eval (hcp-train-subset): [5] Total time: 0:00:14 (0.2303 s / it) +Averaged stats (hcp-train-subset): loss: 0.8863 (0.8870) +Eval (hcp-val): [5] [ 0/62] eta: 0:03:31 loss: 0.8817 (0.8817) time: 3.4134 data: 3.3275 max mem: 9377 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.8848 (0.8862) time: 0.1160 data: 0.0909 max mem: 9377 +Eval (hcp-val): [5] Total time: 0:00:14 (0.2298 s / it) +Averaged stats (hcp-val): loss: 0.8848 (0.8862) +Eval (nsd-val): [5] [ 0/62] eta: 0:05:51 loss: 0.8402 (0.8402) time: 5.6756 data: 5.6446 max mem: 9377 +Eval (nsd-val): [5] [61/62] eta: 0:00:00 loss: 0.8510 (0.8515) time: 0.0928 data: 0.0660 max mem: 9377 +Eval (nsd-val): [5] Total time: 0:00:13 (0.2204 s / it) +Averaged stats (nsd-val): loss: 0.8510 (0.8515) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [6] [ 0/6250] eta: 12:31:15 lr: 0.000125 grad: 0.1446 (0.1446) loss: 0.8958 (0.8958) time: 7.2120 data: 7.0644 max mem: 9377 +Train: [6] [ 100/6250] eta: 0:30:45 lr: 0.000125 grad: 0.1572 (0.1493) loss: 0.8887 (0.8932) time: 0.2188 data: 0.1274 max mem: 9377 +Train: [6] [ 200/6250] eta: 0:25:59 lr: 0.000125 grad: 0.1309 (0.1487) loss: 0.8957 (0.8925) time: 0.1128 data: 0.0003 max mem: 9377 +Train: [6] [ 300/6250] eta: 0:23:10 lr: 0.000125 grad: 0.1470 (0.1473) loss: 0.8922 (0.8930) time: 0.2021 data: 0.1052 max mem: 9377 +Train: [6] [ 400/6250] eta: 0:21:39 lr: 0.000125 grad: 0.1396 (0.1483) loss: 0.8903 (0.8920) time: 0.1799 data: 0.0877 max mem: 9377 +Train: [6] [ 500/6250] eta: 0:20:40 lr: 0.000125 grad: 0.1619 (0.1495) loss: 0.8877 (0.8910) time: 0.1006 data: 0.0003 max mem: 9377 +Train: [6] [ 600/6250] eta: 0:22:26 lr: 0.000125 grad: 0.1367 (0.1486) loss: 0.8857 (0.8904) time: 0.4796 data: 0.2673 max mem: 9377 +Train: [6] [ 700/6250] eta: 0:21:41 lr: 0.000125 grad: 0.1455 (0.1487) loss: 0.8901 (0.8899) time: 0.3055 data: 0.2245 max mem: 9377 +Train: [6] [ 800/6250] eta: 0:20:42 lr: 0.000125 grad: 0.1245 (0.1484) loss: 0.8852 (0.8896) time: 0.2477 data: 0.1645 max mem: 9377 +Train: [6] [ 900/6250] eta: 0:19:37 lr: 0.000125 grad: 0.1307 (0.1468) loss: 0.8879 (0.8893) time: 0.1456 data: 0.0656 max mem: 9377 +Train: [6] [1000/6250] eta: 0:18:44 lr: 0.000125 grad: 0.1196 (0.1450) loss: 0.8880 (0.8891) time: 0.1505 data: 0.0699 max mem: 9377 +Train: [6] [1100/6250] eta: 0:17:59 lr: 0.000125 grad: 0.1218 (0.1445) loss: 0.8864 (0.8890) time: 0.1783 data: 0.0912 max mem: 9377 +Train: [6] [1200/6250] eta: 0:17:12 lr: 0.000125 grad: 0.1347 (0.1437) loss: 0.8849 (0.8887) time: 0.1287 data: 0.0220 max mem: 9377 +Train: [6] [1300/6250] eta: 0:16:31 lr: 0.000125 grad: 0.1279 (0.1437) loss: 0.8875 (0.8885) time: 0.1641 data: 0.0830 max mem: 9377 +Train: [6] [1400/6250] eta: 0:15:53 lr: 0.000125 grad: 0.1170 (0.1430) loss: 0.8848 (0.8883) time: 0.1579 data: 0.0742 max mem: 9377 +Train: [6] [1500/6250] eta: 0:15:29 lr: 0.000125 grad: 0.1374 (0.1424) loss: 0.8838 (0.8880) time: 0.2056 data: 0.1198 max mem: 9377 +Train: [6] [1600/6250] eta: 0:15:22 lr: 0.000125 grad: 0.1315 (0.1422) loss: 0.8882 (0.8878) time: 0.1015 data: 0.0060 max mem: 9377 +Train: [6] [1700/6250] eta: 0:15:09 lr: 0.000125 grad: 0.1340 (0.1417) loss: 0.8814 (0.8877) time: 0.3036 data: 0.2083 max mem: 9377 +Train: [6] [1800/6250] eta: 0:15:01 lr: 0.000125 grad: 0.1336 (0.1411) loss: 0.8855 (0.8875) time: 0.6783 data: 0.5880 max mem: 9377 +Train: [6] [1900/6250] eta: 0:14:56 lr: 0.000125 grad: 0.1390 (0.1407) loss: 0.8841 (0.8873) time: 0.4242 data: 0.3265 max mem: 9377 +Train: [6] [2000/6250] eta: 0:14:31 lr: 0.000125 grad: 0.1289 (0.1400) loss: 0.8839 (0.8870) time: 0.1076 data: 0.0002 max mem: 9377 +Train: [6] [2100/6250] eta: 0:14:25 lr: 0.000125 grad: 0.1150 (0.1391) loss: 0.8843 (0.8869) time: 0.6762 data: 0.5612 max mem: 9377 +Train: [6] [2200/6250] eta: 0:14:07 lr: 0.000125 grad: 0.1122 (0.1387) loss: 0.8830 (0.8867) time: 0.1078 data: 0.0062 max mem: 9377 +Train: [6] [2300/6250] eta: 0:13:49 lr: 0.000125 grad: 0.1164 (0.1382) loss: 0.8825 (0.8864) time: 0.1038 data: 0.0003 max mem: 9377 +Train: [6] [2400/6250] eta: 0:13:40 lr: 0.000125 grad: 0.1200 (0.1377) loss: 0.8820 (0.8862) time: 0.1412 data: 0.0335 max mem: 9377 +Train: [6] [2500/6250] eta: 0:13:21 lr: 0.000125 grad: 0.1193 (0.1375) loss: 0.8791 (0.8860) time: 0.1540 data: 0.0580 max mem: 9377 +Train: [6] [2600/6250] eta: 0:12:55 lr: 0.000125 grad: 0.1170 (0.1371) loss: 0.8810 (0.8858) time: 0.2919 data: 0.2066 max mem: 9377 +Train: [6] [2700/6250] eta: 0:12:26 lr: 0.000125 grad: 0.1157 (0.1368) loss: 0.8789 (0.8855) time: 0.2217 data: 0.1390 max mem: 9377 +Train: [6] [2800/6250] eta: 0:11:58 lr: 0.000125 grad: 0.1232 (0.1363) loss: 0.8791 (0.8854) time: 0.1817 data: 0.1003 max mem: 9377 +Train: [6] [2900/6250] eta: 0:11:31 lr: 0.000125 grad: 0.1090 (0.1358) loss: 0.8814 (0.8851) time: 0.1588 data: 0.0678 max mem: 9377 +Train: [6] [3000/6250] eta: 0:11:10 lr: 0.000125 grad: 0.1121 (0.1352) loss: 0.8821 (0.8849) time: 0.1061 data: 0.0003 max mem: 9377 +Train: [6] [3100/6250] eta: 0:10:46 lr: 0.000125 grad: 0.1170 (0.1347) loss: 0.8802 (0.8847) time: 0.1171 data: 0.0002 max mem: 9377 +Train: [6] [3200/6250] eta: 0:10:21 lr: 0.000125 grad: 0.1372 (0.1344) loss: 0.8817 (0.8846) time: 0.1412 data: 0.0475 max mem: 9377 +Train: [6] [3300/6250] eta: 0:09:56 lr: 0.000125 grad: 0.1164 (0.1340) loss: 0.8729 (0.8843) time: 0.1541 data: 0.0614 max mem: 9377 +Train: [6] [3400/6250] eta: 0:09:31 lr: 0.000125 grad: 0.1308 (0.1337) loss: 0.8759 (0.8841) time: 0.0963 data: 0.0004 max mem: 9377 +Train: [6] [3500/6250] eta: 0:09:08 lr: 0.000125 grad: 0.1097 (0.1332) loss: 0.8769 (0.8839) time: 0.1670 data: 0.0760 max mem: 9377 +Train: [6] [3600/6250] eta: 0:08:45 lr: 0.000125 grad: 0.1203 (0.1328) loss: 0.8777 (0.8837) time: 0.1652 data: 0.0711 max mem: 9377 +Train: [6] [3700/6250] eta: 0:08:22 lr: 0.000125 grad: 0.1128 (0.1324) loss: 0.8746 (0.8835) time: 0.1447 data: 0.0525 max mem: 9377 +Train: [6] [3800/6250] eta: 0:08:00 lr: 0.000125 grad: 0.1019 (0.1319) loss: 0.8763 (0.8833) time: 0.1241 data: 0.0337 max mem: 9377 +Train: [6] [3900/6250] eta: 0:07:39 lr: 0.000125 grad: 0.1179 (0.1316) loss: 0.8772 (0.8831) time: 0.1939 data: 0.1053 max mem: 9377 +Train: [6] [4000/6250] eta: 0:07:17 lr: 0.000125 grad: 0.1148 (0.1311) loss: 0.8772 (0.8829) time: 0.1806 data: 0.1038 max mem: 9377 +Train: [6] [4100/6250] eta: 0:06:56 lr: 0.000125 grad: 0.1119 (0.1306) loss: 0.8725 (0.8827) time: 0.1653 data: 0.0746 max mem: 9377 +Train: [6] [4200/6250] eta: 0:06:35 lr: 0.000125 grad: 0.1082 (0.1302) loss: 0.8739 (0.8826) time: 0.1938 data: 0.1056 max mem: 9377 +Train: [6] [4300/6250] eta: 0:06:13 lr: 0.000125 grad: 0.1032 (0.1297) loss: 0.8789 (0.8824) time: 0.1651 data: 0.0796 max mem: 9377 +Train: [6] [4400/6250] eta: 0:05:53 lr: 0.000125 grad: 0.1058 (0.1293) loss: 0.8764 (0.8822) time: 0.1766 data: 0.0848 max mem: 9377 +Train: [6] [4500/6250] eta: 0:05:33 lr: 0.000125 grad: 0.1003 (0.1289) loss: 0.8735 (0.8821) time: 0.1659 data: 0.0701 max mem: 9377 +Train: [6] [4600/6250] eta: 0:05:15 lr: 0.000125 grad: 0.1074 (0.1285) loss: 0.8783 (0.8819) time: 0.1567 data: 0.0641 max mem: 9377 +Train: [6] [4700/6250] eta: 0:04:55 lr: 0.000125 grad: 0.1013 (0.1280) loss: 0.8709 (0.8817) time: 0.1540 data: 0.0637 max mem: 9377 +Train: [6] [4800/6250] eta: 0:04:35 lr: 0.000125 grad: 0.1103 (0.1278) loss: 0.8713 (0.8816) time: 0.1678 data: 0.0809 max mem: 9377 +Train: [6] [4900/6250] eta: 0:04:16 lr: 0.000125 grad: 0.1075 (0.1274) loss: 0.8777 (0.8814) time: 0.1486 data: 0.0525 max mem: 9377 +Train: [6] [5000/6250] eta: 0:03:56 lr: 0.000125 grad: 0.1146 (0.1271) loss: 0.8707 (0.8812) time: 0.1785 data: 0.0951 max mem: 9377 +Train: [6] [5100/6250] eta: 0:03:36 lr: 0.000125 grad: 0.1059 (0.1269) loss: 0.8740 (0.8811) time: 0.1635 data: 0.0794 max mem: 9377 +Train: [6] [5200/6250] eta: 0:03:17 lr: 0.000125 grad: 0.0954 (0.1264) loss: 0.8728 (0.8809) time: 0.1583 data: 0.0681 max mem: 9377 +Train: [6] [5300/6250] eta: 0:02:58 lr: 0.000125 grad: 0.1004 (0.1262) loss: 0.8738 (0.8808) time: 0.1817 data: 0.0872 max mem: 9377 +Train: [6] [5400/6250] eta: 0:02:39 lr: 0.000125 grad: 0.1110 (0.1258) loss: 0.8755 (0.8806) time: 0.2297 data: 0.1415 max mem: 9377 +Train: [6] [5500/6250] eta: 0:02:21 lr: 0.000125 grad: 0.0996 (0.1255) loss: 0.8692 (0.8805) time: 0.2398 data: 0.1291 max mem: 9377 +Train: [6] [5600/6250] eta: 0:02:03 lr: 0.000125 grad: 0.1024 (0.1253) loss: 0.8727 (0.8803) time: 0.4756 data: 0.3746 max mem: 9377 +Train: [6] [5700/6250] eta: 0:01:44 lr: 0.000125 grad: 0.0991 (0.1250) loss: 0.8744 (0.8802) time: 0.2485 data: 0.1513 max mem: 9377 +Train: [6] [5800/6250] eta: 0:01:25 lr: 0.000125 grad: 0.1040 (0.1247) loss: 0.8723 (0.8801) time: 0.1807 data: 0.0915 max mem: 9377 +Train: [6] [5900/6250] eta: 0:01:06 lr: 0.000125 grad: 0.1144 (0.1244) loss: 0.8668 (0.8799) time: 0.1021 data: 0.0150 max mem: 9377 +Train: [6] [6000/6250] eta: 0:00:47 lr: 0.000125 grad: 0.1115 (0.1243) loss: 0.8727 (0.8798) time: 0.1989 data: 0.0996 max mem: 9377 +Train: [6] [6100/6250] eta: 0:00:28 lr: 0.000125 grad: 0.1066 (0.1241) loss: 0.8705 (0.8796) time: 0.1291 data: 0.0335 max mem: 9377 +Train: [6] [6200/6250] eta: 0:00:09 lr: 0.000125 grad: 0.0967 (0.1238) loss: 0.8732 (0.8795) time: 0.1454 data: 0.0466 max mem: 9377 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0944 (0.1236) loss: 0.8702 (0.8794) time: 0.1395 data: 0.0614 max mem: 9377 +Train: [6] Total time: 0:19:52 (0.1908 s / it) +Averaged stats: lr: 0.000125 grad: 0.0944 (0.1236) loss: 0.8702 (0.8794) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:04:56 loss: 0.8668 (0.8668) time: 4.7825 data: 4.7510 max mem: 9377 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.8684 (0.8688) time: 0.1345 data: 0.1099 max mem: 9377 +Eval (hcp-train-subset): [6] Total time: 0:00:13 (0.2250 s / it) +Averaged stats (hcp-train-subset): loss: 0.8684 (0.8688) +Eval (hcp-val): [6] [ 0/62] eta: 0:03:38 loss: 0.8639 (0.8639) time: 3.5233 data: 3.4363 max mem: 9377 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.8673 (0.8678) time: 0.1330 data: 0.1071 max mem: 9377 +Eval (hcp-val): [6] Total time: 0:00:13 (0.2213 s / it) +Averaged stats (hcp-val): loss: 0.8673 (0.8678) +Eval (nsd-val): [6] [ 0/62] eta: 0:05:49 loss: 0.8282 (0.8282) time: 5.6396 data: 5.6088 max mem: 9377 +Eval (nsd-val): [6] [61/62] eta: 0:00:00 loss: 0.8330 (0.8339) time: 0.1250 data: 0.0974 max mem: 9377 +Eval (nsd-val): [6] Total time: 0:00:14 (0.2338 s / it) +Averaged stats (nsd-val): loss: 0.8330 (0.8339) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [7] [ 0/6250] eta: 11:07:58 lr: 0.000125 grad: 0.1178 (0.1178) loss: 0.8737 (0.8737) time: 6.4126 data: 6.2564 max mem: 9377 +Train: [7] [ 100/6250] eta: 0:23:25 lr: 0.000125 grad: 0.1145 (0.1180) loss: 0.8802 (0.8821) time: 0.1564 data: 0.0477 max mem: 9377 +Train: [7] [ 200/6250] eta: 0:21:07 lr: 0.000125 grad: 0.1305 (0.1181) loss: 0.8682 (0.8749) time: 0.1929 data: 0.1037 max mem: 9377 +Train: [7] [ 300/6250] eta: 0:19:38 lr: 0.000125 grad: 0.1115 (0.1178) loss: 0.8645 (0.8716) time: 0.1636 data: 0.0747 max mem: 9377 +Train: [7] [ 400/6250] eta: 0:18:48 lr: 0.000125 grad: 0.1051 (0.1157) loss: 0.8689 (0.8703) time: 0.2043 data: 0.1116 max mem: 9377 +Train: [7] [ 500/6250] eta: 0:17:45 lr: 0.000125 grad: 0.0960 (0.1150) loss: 0.8680 (0.8698) time: 0.1613 data: 0.0802 max mem: 9377 +Train: [7] [ 600/6250] eta: 0:17:27 lr: 0.000125 grad: 0.1386 (0.1163) loss: 0.8699 (0.8695) time: 0.1143 data: 0.0258 max mem: 9377 +Train: [7] [ 700/6250] eta: 0:16:54 lr: 0.000125 grad: 0.1035 (0.1153) loss: 0.8661 (0.8692) time: 0.2179 data: 0.1315 max mem: 9377 +Train: [7] [ 800/6250] eta: 0:16:21 lr: 0.000125 grad: 0.1004 (0.1142) loss: 0.8658 (0.8691) time: 0.1811 data: 0.0920 max mem: 9377 +Train: [7] [ 900/6250] eta: 0:15:58 lr: 0.000125 grad: 0.0998 (0.1145) loss: 0.8654 (0.8690) time: 0.1372 data: 0.0391 max mem: 9377 +Train: [7] [1000/6250] eta: 0:15:29 lr: 0.000125 grad: 0.1041 (0.1137) loss: 0.8695 (0.8688) time: 0.1568 data: 0.0759 max mem: 9377 +Train: [7] [1100/6250] eta: 0:15:06 lr: 0.000125 grad: 0.1112 (0.1134) loss: 0.8686 (0.8685) time: 0.1654 data: 0.0791 max mem: 9377 +Train: [7] [1200/6250] eta: 0:14:39 lr: 0.000125 grad: 0.0991 (0.1131) loss: 0.8618 (0.8682) time: 0.1735 data: 0.0847 max mem: 9377 +Train: [7] [1300/6250] eta: 0:14:14 lr: 0.000125 grad: 0.1002 (0.1122) loss: 0.8634 (0.8680) time: 0.1542 data: 0.0704 max mem: 9377 +Train: [7] [1400/6250] eta: 0:13:59 lr: 0.000125 grad: 0.1052 (0.1119) loss: 0.8669 (0.8678) time: 0.1731 data: 0.0949 max mem: 9377 +Train: [7] [1500/6250] eta: 0:13:54 lr: 0.000125 grad: 0.0905 (0.1115) loss: 0.8621 (0.8676) time: 0.1502 data: 0.0525 max mem: 9377 +Train: [7] [1600/6250] eta: 0:13:37 lr: 0.000125 grad: 0.0916 (0.1111) loss: 0.8722 (0.8676) time: 0.1188 data: 0.0157 max mem: 9377 +Train: [7] [1700/6250] eta: 0:13:22 lr: 0.000125 grad: 0.0984 (0.1105) loss: 0.8659 (0.8673) time: 0.1244 data: 0.0329 max mem: 9377 +Train: [7] [1800/6250] eta: 0:13:25 lr: 0.000125 grad: 0.1000 (0.1100) loss: 0.8640 (0.8672) time: 0.1587 data: 0.0777 max mem: 9377 +Train: [7] [1900/6250] eta: 0:13:34 lr: 0.000125 grad: 0.1058 (0.1096) loss: 0.8642 (0.8670) time: 0.4371 data: 0.3439 max mem: 9377 +Train: [7] [2000/6250] eta: 0:13:45 lr: 0.000125 grad: 0.0977 (0.1092) loss: 0.8675 (0.8670) time: 0.1013 data: 0.0002 max mem: 9377 +Train: [7] [2100/6250] eta: 0:13:36 lr: 0.000125 grad: 0.0955 (0.1088) loss: 0.8643 (0.8668) time: 0.3000 data: 0.1933 max mem: 9377 +Train: [7] [2200/6250] eta: 0:13:44 lr: 0.000125 grad: 0.0987 (0.1086) loss: 0.8646 (0.8667) time: 0.5757 data: 0.4691 max mem: 9377 +Train: [7] [2300/6250] eta: 0:13:25 lr: 0.000125 grad: 0.1005 (0.1082) loss: 0.8647 (0.8666) time: 0.2959 data: 0.2109 max mem: 9377 +Train: [7] [2400/6250] eta: 0:13:13 lr: 0.000125 grad: 0.0994 (0.1079) loss: 0.8692 (0.8667) time: 0.1414 data: 0.0311 max mem: 9377 +Train: [7] [2500/6250] eta: 0:12:48 lr: 0.000125 grad: 0.0940 (0.1077) loss: 0.8647 (0.8667) time: 0.2476 data: 0.1607 max mem: 9377 +Train: [7] [2600/6250] eta: 0:12:19 lr: 0.000125 grad: 0.0907 (0.1073) loss: 0.8672 (0.8666) time: 0.1615 data: 0.0732 max mem: 9377 +Train: [7] [2700/6250] eta: 0:11:54 lr: 0.000125 grad: 0.0859 (0.1068) loss: 0.8641 (0.8666) time: 0.1507 data: 0.0652 max mem: 9377 +Train: [7] [2800/6250] eta: 0:11:30 lr: 0.000125 grad: 0.0971 (0.1066) loss: 0.8644 (0.8665) time: 0.1266 data: 0.0490 max mem: 9377 +Train: [7] [2900/6250] eta: 0:11:14 lr: 0.000125 grad: 0.0991 (0.1063) loss: 0.8655 (0.8665) time: 0.1494 data: 0.0589 max mem: 9377 +Train: [7] [3000/6250] eta: 0:10:49 lr: 0.000125 grad: 0.0930 (0.1060) loss: 0.8659 (0.8664) time: 0.1204 data: 0.0303 max mem: 9377 +Train: [7] [3100/6250] eta: 0:10:25 lr: 0.000125 grad: 0.0978 (0.1057) loss: 0.8638 (0.8664) time: 0.2081 data: 0.1089 max mem: 9377 +Train: [7] [3200/6250] eta: 0:10:02 lr: 0.000125 grad: 0.0907 (0.1054) loss: 0.8666 (0.8663) time: 0.2359 data: 0.1495 max mem: 9377 +Train: [7] [3300/6250] eta: 0:09:40 lr: 0.000125 grad: 0.0963 (0.1053) loss: 0.8613 (0.8663) time: 0.2275 data: 0.1413 max mem: 9377 +Train: [7] [3400/6250] eta: 0:09:18 lr: 0.000125 grad: 0.0932 (0.1051) loss: 0.8626 (0.8662) time: 0.1624 data: 0.0766 max mem: 9377 +Train: [7] [3500/6250] eta: 0:09:07 lr: 0.000125 grad: 0.0971 (0.1049) loss: 0.8641 (0.8662) time: 0.1116 data: 0.0161 max mem: 9377 +Train: [7] [3600/6250] eta: 0:08:45 lr: 0.000125 grad: 0.0946 (0.1048) loss: 0.8643 (0.8661) time: 0.2105 data: 0.1140 max mem: 9377 +Train: [7] [3700/6250] eta: 0:08:22 lr: 0.000125 grad: 0.0927 (0.1045) loss: 0.8624 (0.8661) time: 0.1650 data: 0.0870 max mem: 9377 +Train: [7] [3800/6250] eta: 0:08:00 lr: 0.000125 grad: 0.0959 (0.1043) loss: 0.8621 (0.8660) time: 0.1213 data: 0.0167 max mem: 9377 +Train: [7] [3900/6250] eta: 0:07:38 lr: 0.000125 grad: 0.1023 (0.1042) loss: 0.8645 (0.8660) time: 0.1470 data: 0.0658 max mem: 9377 +Train: [7] [4000/6250] eta: 0:07:16 lr: 0.000125 grad: 0.0921 (0.1040) loss: 0.8594 (0.8659) time: 0.1065 data: 0.0205 max mem: 9377 +Train: [7] [4100/6250] eta: 0:06:56 lr: 0.000125 grad: 0.0958 (0.1038) loss: 0.8627 (0.8658) time: 0.2935 data: 0.2155 max mem: 9377 +Train: [7] [4200/6250] eta: 0:06:35 lr: 0.000125 grad: 0.0906 (0.1036) loss: 0.8581 (0.8658) time: 0.1383 data: 0.0467 max mem: 9377 +Train: [7] [4300/6250] eta: 0:06:14 lr: 0.000125 grad: 0.0942 (0.1034) loss: 0.8596 (0.8657) time: 0.1179 data: 0.0334 max mem: 9377 +Train: [7] [4400/6250] eta: 0:05:54 lr: 0.000125 grad: 0.0908 (0.1031) loss: 0.8622 (0.8656) time: 0.1746 data: 0.0717 max mem: 9377 +Train: [7] [4500/6250] eta: 0:05:33 lr: 0.000125 grad: 0.0865 (0.1029) loss: 0.8661 (0.8656) time: 0.1655 data: 0.0828 max mem: 9377 +Train: [7] [4600/6250] eta: 0:05:13 lr: 0.000125 grad: 0.0913 (0.1026) loss: 0.8643 (0.8656) time: 0.1646 data: 0.0814 max mem: 9377 +Train: [7] [4700/6250] eta: 0:04:54 lr: 0.000125 grad: 0.0952 (0.1025) loss: 0.8646 (0.8655) time: 0.2000 data: 0.1194 max mem: 9377 +Train: [7] [4800/6250] eta: 0:04:33 lr: 0.000125 grad: 0.0897 (0.1022) loss: 0.8597 (0.8655) time: 0.1438 data: 0.0665 max mem: 9377 +Train: [7] [4900/6250] eta: 0:04:14 lr: 0.000125 grad: 0.0934 (0.1021) loss: 0.8684 (0.8655) time: 0.2281 data: 0.1465 max mem: 9377 +Train: [7] [5000/6250] eta: 0:03:55 lr: 0.000125 grad: 0.0906 (0.1019) loss: 0.8645 (0.8655) time: 0.2635 data: 0.1837 max mem: 9377 +Train: [7] [5100/6250] eta: 0:03:37 lr: 0.000125 grad: 0.0891 (0.1017) loss: 0.8657 (0.8655) time: 0.1177 data: 0.0003 max mem: 9377 +Train: [7] [5200/6250] eta: 0:03:21 lr: 0.000125 grad: 0.0897 (0.1015) loss: 0.8675 (0.8655) time: 0.1771 data: 0.0423 max mem: 9377 +Train: [7] [5300/6250] eta: 0:03:03 lr: 0.000125 grad: 0.0891 (0.1013) loss: 0.8622 (0.8655) time: 0.2448 data: 0.1598 max mem: 9377 +Train: [7] [5400/6250] eta: 0:02:43 lr: 0.000125 grad: 0.0820 (0.1011) loss: 0.8632 (0.8654) time: 0.0977 data: 0.0002 max mem: 9377 +Train: [7] [5500/6250] eta: 0:02:24 lr: 0.000125 grad: 0.0874 (0.1009) loss: 0.8659 (0.8654) time: 0.1175 data: 0.0127 max mem: 9377 +Train: [7] [5600/6250] eta: 0:02:05 lr: 0.000125 grad: 0.0841 (0.1007) loss: 0.8656 (0.8654) time: 0.1440 data: 0.0340 max mem: 9377 +Train: [7] [5700/6250] eta: 0:01:46 lr: 0.000125 grad: 0.0827 (0.1005) loss: 0.8645 (0.8654) time: 0.1300 data: 0.0503 max mem: 9377 +Train: [7] [5800/6250] eta: 0:01:26 lr: 0.000125 grad: 0.0869 (0.1003) loss: 0.8614 (0.8653) time: 0.0906 data: 0.0002 max mem: 9377 +Train: [7] [5900/6250] eta: 0:01:07 lr: 0.000125 grad: 0.0852 (0.1001) loss: 0.8601 (0.8652) time: 0.1901 data: 0.1092 max mem: 9377 +Train: [7] [6000/6250] eta: 0:00:47 lr: 0.000125 grad: 0.0903 (0.1000) loss: 0.8590 (0.8652) time: 0.1516 data: 0.0588 max mem: 9377 +Train: [7] [6100/6250] eta: 0:00:28 lr: 0.000125 grad: 0.0852 (0.0999) loss: 0.8577 (0.8651) time: 0.2519 data: 0.1710 max mem: 9377 +Train: [7] [6200/6250] eta: 0:00:09 lr: 0.000125 grad: 0.0931 (0.0997) loss: 0.8544 (0.8650) time: 0.3730 data: 0.2957 max mem: 9377 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0937 (0.0997) loss: 0.8561 (0.8650) time: 0.1330 data: 0.0386 max mem: 9377 +Train: [7] Total time: 0:20:03 (0.1925 s / it) +Averaged stats: lr: 0.000125 grad: 0.0937 (0.0997) loss: 0.8561 (0.8650) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:03:36 loss: 0.8578 (0.8578) time: 3.4872 data: 3.4172 max mem: 9377 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.8609 (0.8612) time: 0.1375 data: 0.1117 max mem: 9377 +Eval (hcp-train-subset): [7] Total time: 0:00:13 (0.2249 s / it) +Averaged stats (hcp-train-subset): loss: 0.8609 (0.8612) +Eval (hcp-val): [7] [ 0/62] eta: 0:04:22 loss: 0.8584 (0.8584) time: 4.2293 data: 4.1802 max mem: 9377 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.8594 (0.8604) time: 0.1295 data: 0.1026 max mem: 9377 +Eval (hcp-val): [7] Total time: 0:00:13 (0.2124 s / it) +Averaged stats (hcp-val): loss: 0.8594 (0.8604) +Eval (nsd-val): [7] [ 0/62] eta: 0:05:56 loss: 0.8189 (0.8189) time: 5.7532 data: 5.7122 max mem: 9377 +Eval (nsd-val): [7] [61/62] eta: 0:00:00 loss: 0.8270 (0.8276) time: 0.1393 data: 0.1143 max mem: 9377 +Eval (nsd-val): [7] Total time: 0:00:13 (0.2196 s / it) +Averaged stats (nsd-val): loss: 0.8270 (0.8276) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [8] [ 0/6250] eta: 9:24:59 lr: 0.000125 grad: 0.0976 (0.0976) loss: 0.8752 (0.8752) time: 5.4240 data: 5.1621 max mem: 9377 +Train: [8] [ 100/6250] eta: 0:24:37 lr: 0.000125 grad: 0.0997 (0.1086) loss: 0.8586 (0.8620) time: 0.1975 data: 0.1038 max mem: 9377 +Train: [8] [ 200/6250] eta: 0:21:20 lr: 0.000125 grad: 0.0957 (0.1038) loss: 0.8592 (0.8601) time: 0.1762 data: 0.0718 max mem: 9377 +Train: [8] [ 300/6250] eta: 0:19:25 lr: 0.000125 grad: 0.0907 (0.1002) loss: 0.8563 (0.8597) time: 0.1649 data: 0.0694 max mem: 9377 +Train: [8] [ 400/6250] eta: 0:18:26 lr: 0.000125 grad: 0.0850 (0.0982) loss: 0.8629 (0.8605) time: 0.1699 data: 0.0581 max mem: 9377 +Train: [8] [ 500/6250] eta: 0:17:35 lr: 0.000125 grad: 0.0918 (0.0963) loss: 0.8588 (0.8605) time: 0.1567 data: 0.0437 max mem: 9377 +Train: [8] [ 600/6250] eta: 0:16:49 lr: 0.000125 grad: 0.0859 (0.0956) loss: 0.8587 (0.8604) time: 0.1554 data: 0.0664 max mem: 9377 +Train: [8] [ 700/6250] eta: 0:16:18 lr: 0.000125 grad: 0.0967 (0.0955) loss: 0.8523 (0.8596) time: 0.1615 data: 0.0737 max mem: 9377 +Train: [8] [ 800/6250] eta: 0:16:13 lr: 0.000125 grad: 0.0923 (0.0951) loss: 0.8581 (0.8593) time: 0.1128 data: 0.0004 max mem: 9377 +Train: [8] [ 900/6250] eta: 0:15:52 lr: 0.000125 grad: 0.0971 (0.0950) loss: 0.8534 (0.8590) time: 0.2343 data: 0.1484 max mem: 9377 +Train: [8] [1000/6250] eta: 0:15:35 lr: 0.000125 grad: 0.0867 (0.0945) loss: 0.8578 (0.8590) time: 0.1111 data: 0.0272 max mem: 9377 +Train: [8] [1100/6250] eta: 0:16:03 lr: 0.000125 grad: 0.0892 (0.0943) loss: 0.8580 (0.8587) time: 0.1530 data: 0.0362 max mem: 9377 +Train: [8] [1200/6250] eta: 0:15:44 lr: 0.000125 grad: 0.0852 (0.0939) loss: 0.8586 (0.8586) time: 0.1910 data: 0.0609 max mem: 9377 +Train: [8] [1300/6250] eta: 0:15:42 lr: 0.000125 grad: 0.0909 (0.0938) loss: 0.8598 (0.8587) time: 0.1004 data: 0.0002 max mem: 9377 +Train: [8] [1400/6250] eta: 0:15:10 lr: 0.000125 grad: 0.1076 (0.0938) loss: 0.8546 (0.8586) time: 0.1379 data: 0.0516 max mem: 9377 +Train: [8] [1500/6250] eta: 0:14:38 lr: 0.000125 grad: 0.0873 (0.0936) loss: 0.8588 (0.8585) time: 0.1199 data: 0.0124 max mem: 9377 +Train: [8] [1600/6250] eta: 0:14:22 lr: 0.000125 grad: 0.0834 (0.0932) loss: 0.8589 (0.8586) time: 0.0955 data: 0.0003 max mem: 9377 +Train: [8] [1700/6250] eta: 0:14:03 lr: 0.000125 grad: 0.0808 (0.0929) loss: 0.8587 (0.8584) time: 0.1820 data: 0.0980 max mem: 9377 +Train: [8] [1800/6250] eta: 0:13:43 lr: 0.000125 grad: 0.0839 (0.0926) loss: 0.8581 (0.8582) time: 0.1841 data: 0.0874 max mem: 9377 +Train: [8] [1900/6250] eta: 0:13:36 lr: 0.000125 grad: 0.0979 (0.0926) loss: 0.8535 (0.8580) time: 0.1155 data: 0.0054 max mem: 9377 +Train: [8] [2000/6250] eta: 0:13:33 lr: 0.000125 grad: 0.0860 (0.0925) loss: 0.8521 (0.8579) time: 0.1018 data: 0.0002 max mem: 9377 +Train: [8] [2100/6250] eta: 0:13:29 lr: 0.000125 grad: 0.0932 (0.0923) loss: 0.8548 (0.8578) time: 0.4610 data: 0.2960 max mem: 9377 +Train: [8] [2200/6250] eta: 0:13:12 lr: 0.000125 grad: 0.0808 (0.0920) loss: 0.8580 (0.8577) time: 0.2231 data: 0.1364 max mem: 9377 +Train: [8] [2300/6250] eta: 0:13:00 lr: 0.000125 grad: 0.0832 (0.0919) loss: 0.8590 (0.8576) time: 0.1139 data: 0.0003 max mem: 9377 +Train: [8] [2400/6250] eta: 0:12:55 lr: 0.000125 grad: 0.0852 (0.0920) loss: 0.8576 (0.8575) time: 0.1499 data: 0.0530 max mem: 9377 +Train: [8] [2500/6250] eta: 0:12:34 lr: 0.000125 grad: 0.0932 (0.0919) loss: 0.8542 (0.8575) time: 0.2494 data: 0.1616 max mem: 9377 +Train: [8] [2600/6250] eta: 0:12:07 lr: 0.000125 grad: 0.0842 (0.0918) loss: 0.8580 (0.8575) time: 0.1130 data: 0.0157 max mem: 9377 +Train: [8] [2700/6250] eta: 0:11:42 lr: 0.000125 grad: 0.0841 (0.0916) loss: 0.8577 (0.8575) time: 0.1660 data: 0.0706 max mem: 9377 +Train: [8] [2800/6250] eta: 0:11:18 lr: 0.000125 grad: 0.0902 (0.0914) loss: 0.8578 (0.8575) time: 0.1672 data: 0.0833 max mem: 9377 +Train: [8] [2900/6250] eta: 0:10:54 lr: 0.000125 grad: 0.0852 (0.0913) loss: 0.8587 (0.8574) time: 0.1474 data: 0.0586 max mem: 9377 +Train: [8] [3000/6250] eta: 0:10:38 lr: 0.000125 grad: 0.0860 (0.0911) loss: 0.8532 (0.8574) time: 0.4415 data: 0.3623 max mem: 9377 +Train: [8] [3100/6250] eta: 0:10:17 lr: 0.000125 grad: 0.0835 (0.0909) loss: 0.8583 (0.8574) time: 0.3040 data: 0.2152 max mem: 9377 +Train: [8] [3200/6250] eta: 0:09:52 lr: 0.000125 grad: 0.0816 (0.0907) loss: 0.8556 (0.8574) time: 0.1059 data: 0.0005 max mem: 9377 +Train: [8] [3300/6250] eta: 0:09:29 lr: 0.000125 grad: 0.0787 (0.0906) loss: 0.8576 (0.8574) time: 0.1333 data: 0.0439 max mem: 9377 +Train: [8] [3400/6250] eta: 0:09:07 lr: 0.000125 grad: 0.0835 (0.0905) loss: 0.8627 (0.8574) time: 0.1593 data: 0.0733 max mem: 9377 +Train: [8] [3500/6250] eta: 0:08:48 lr: 0.000125 grad: 0.0804 (0.0903) loss: 0.8603 (0.8574) time: 0.2985 data: 0.2202 max mem: 9377 +Train: [8] [3600/6250] eta: 0:08:24 lr: 0.000125 grad: 0.0873 (0.0901) loss: 0.8593 (0.8574) time: 0.1471 data: 0.0496 max mem: 9377 +Train: [8] [3700/6250] eta: 0:08:05 lr: 0.000125 grad: 0.0767 (0.0899) loss: 0.8589 (0.8574) time: 0.0981 data: 0.0004 max mem: 9377 +Train: [8] [3800/6250] eta: 0:07:43 lr: 0.000125 grad: 0.0780 (0.0897) loss: 0.8581 (0.8575) time: 0.1640 data: 0.0791 max mem: 9377 +Train: [8] [3900/6250] eta: 0:07:22 lr: 0.000125 grad: 0.0837 (0.0896) loss: 0.8562 (0.8575) time: 0.1535 data: 0.0713 max mem: 9377 +Train: [8] [4000/6250] eta: 0:07:02 lr: 0.000125 grad: 0.0792 (0.0895) loss: 0.8576 (0.8575) time: 0.1929 data: 0.1124 max mem: 9377 +Train: [8] [4100/6250] eta: 0:06:42 lr: 0.000125 grad: 0.0808 (0.0892) loss: 0.8573 (0.8575) time: 0.1933 data: 0.1105 max mem: 9377 +Train: [8] [4200/6250] eta: 0:06:22 lr: 0.000125 grad: 0.0821 (0.0891) loss: 0.8553 (0.8576) time: 0.1912 data: 0.1052 max mem: 9377 +Train: [8] [4300/6250] eta: 0:06:02 lr: 0.000125 grad: 0.0806 (0.0889) loss: 0.8598 (0.8576) time: 0.1894 data: 0.1032 max mem: 9377 +Train: [8] [4400/6250] eta: 0:05:42 lr: 0.000125 grad: 0.0797 (0.0887) loss: 0.8587 (0.8576) time: 0.0915 data: 0.0002 max mem: 9377 +Train: [8] [4500/6250] eta: 0:05:23 lr: 0.000125 grad: 0.0828 (0.0886) loss: 0.8579 (0.8576) time: 0.2133 data: 0.1275 max mem: 9377 +Train: [8] [4600/6250] eta: 0:05:04 lr: 0.000125 grad: 0.0802 (0.0885) loss: 0.8555 (0.8576) time: 0.2266 data: 0.1338 max mem: 9377 +Train: [8] [4700/6250] eta: 0:04:45 lr: 0.000125 grad: 0.0843 (0.0884) loss: 0.8526 (0.8576) time: 0.1990 data: 0.1132 max mem: 9377 +Train: [8] [4800/6250] eta: 0:04:27 lr: 0.000125 grad: 0.0826 (0.0883) loss: 0.8579 (0.8576) time: 0.2270 data: 0.1278 max mem: 9377 +Train: [8] [4900/6250] eta: 0:04:10 lr: 0.000125 grad: 0.0826 (0.0882) loss: 0.8581 (0.8575) time: 0.1922 data: 0.0901 max mem: 9377 +Train: [8] [5000/6250] eta: 0:03:54 lr: 0.000125 grad: 0.0771 (0.0880) loss: 0.8590 (0.8575) time: 0.5783 data: 0.4047 max mem: 9377 +Train: [8] [5100/6250] eta: 0:03:36 lr: 0.000125 grad: 0.0811 (0.0879) loss: 0.8561 (0.8575) time: 0.1870 data: 0.0988 max mem: 9377 +Train: [8] [5200/6250] eta: 0:03:16 lr: 0.000124 grad: 0.0825 (0.0878) loss: 0.8535 (0.8574) time: 0.1171 data: 0.0319 max mem: 9377 +Train: [8] [5300/6250] eta: 0:02:58 lr: 0.000124 grad: 0.0791 (0.0877) loss: 0.8537 (0.8574) time: 0.2534 data: 0.1585 max mem: 9377 +Train: [8] [5400/6250] eta: 0:02:39 lr: 0.000124 grad: 0.0813 (0.0876) loss: 0.8556 (0.8574) time: 0.1464 data: 0.0673 max mem: 9377 +Train: [8] [5500/6250] eta: 0:02:20 lr: 0.000124 grad: 0.0803 (0.0875) loss: 0.8521 (0.8574) time: 0.1949 data: 0.0908 max mem: 9377 +Train: [8] [5600/6250] eta: 0:02:03 lr: 0.000124 grad: 0.0798 (0.0874) loss: 0.8543 (0.8573) time: 0.8237 data: 0.7249 max mem: 9377 +Train: [8] [5700/6250] eta: 0:01:44 lr: 0.000124 grad: 0.0887 (0.0874) loss: 0.8559 (0.8573) time: 0.1269 data: 0.0003 max mem: 9377 +Train: [8] [5800/6250] eta: 0:01:25 lr: 0.000124 grad: 0.0828 (0.0874) loss: 0.8543 (0.8572) time: 0.2183 data: 0.1224 max mem: 9377 +Train: [8] [5900/6250] eta: 0:01:05 lr: 0.000124 grad: 0.0890 (0.0874) loss: 0.8498 (0.8572) time: 0.1826 data: 0.0962 max mem: 9377 +Train: [8] [6000/6250] eta: 0:00:46 lr: 0.000124 grad: 0.0794 (0.0874) loss: 0.8549 (0.8571) time: 0.2434 data: 0.1605 max mem: 9377 +Train: [8] [6100/6250] eta: 0:00:28 lr: 0.000124 grad: 0.0852 (0.0874) loss: 0.8546 (0.8570) time: 0.1184 data: 0.0266 max mem: 9377 +Train: [8] [6200/6250] eta: 0:00:09 lr: 0.000124 grad: 0.0808 (0.0874) loss: 0.8525 (0.8569) time: 0.1443 data: 0.0614 max mem: 9377 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0835 (0.0874) loss: 0.8504 (0.8568) time: 0.1758 data: 0.0890 max mem: 9377 +Train: [8] Total time: 0:19:32 (0.1877 s / it) +Averaged stats: lr: 0.000124 grad: 0.0835 (0.0874) loss: 0.8504 (0.8568) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:04:08 loss: 0.8573 (0.8573) time: 4.0018 data: 3.9172 max mem: 9377 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.8572 (0.8558) time: 0.1256 data: 0.0990 max mem: 9377 +Eval (hcp-train-subset): [8] Total time: 0:00:14 (0.2264 s / it) +Averaged stats (hcp-train-subset): loss: 0.8572 (0.8558) +Eval (hcp-val): [8] [ 0/62] eta: 0:04:28 loss: 0.8555 (0.8555) time: 4.3354 data: 4.2286 max mem: 9377 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8553 (0.8557) time: 0.0999 data: 0.0747 max mem: 9377 +Eval (hcp-val): [8] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (hcp-val): loss: 0.8553 (0.8557) +Eval (nsd-val): [8] [ 0/62] eta: 0:05:44 loss: 0.8101 (0.8101) time: 5.5625 data: 5.5300 max mem: 9377 +Eval (nsd-val): [8] [61/62] eta: 0:00:00 loss: 0.8226 (0.8230) time: 0.1235 data: 0.0968 max mem: 9377 +Eval (nsd-val): [8] Total time: 0:00:13 (0.2211 s / it) +Averaged stats (nsd-val): loss: 0.8226 (0.8230) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [9] [ 0/6250] eta: 9:34:57 lr: 0.000124 grad: 0.1536 (0.1536) loss: 0.8550 (0.8550) time: 5.5197 data: 5.2984 max mem: 9377 +Train: [9] [ 100/6250] eta: 0:22:52 lr: 0.000124 grad: 0.0823 (0.0916) loss: 0.8677 (0.8641) time: 0.1514 data: 0.0467 max mem: 9377 +Train: [9] [ 200/6250] eta: 0:20:28 lr: 0.000124 grad: 0.0848 (0.0880) loss: 0.8570 (0.8619) time: 0.2039 data: 0.1184 max mem: 9377 +Train: [9] [ 300/6250] eta: 0:19:30 lr: 0.000124 grad: 0.0814 (0.0871) loss: 0.8569 (0.8602) time: 0.2068 data: 0.1102 max mem: 9377 +Train: [9] [ 400/6250] eta: 0:19:00 lr: 0.000124 grad: 0.0797 (0.0867) loss: 0.8602 (0.8594) time: 0.1974 data: 0.1011 max mem: 9377 +Train: [9] [ 500/6250] eta: 0:20:49 lr: 0.000124 grad: 0.0827 (0.0861) loss: 0.8579 (0.8586) time: 0.5092 data: 0.4140 max mem: 9377 +Train: [9] [ 600/6250] eta: 0:20:43 lr: 0.000124 grad: 0.0833 (0.0858) loss: 0.8564 (0.8581) time: 0.1395 data: 0.0454 max mem: 9377 +Train: [9] [ 700/6250] eta: 0:20:10 lr: 0.000124 grad: 0.0762 (0.0851) loss: 0.8591 (0.8577) time: 0.2342 data: 0.1350 max mem: 9377 +Train: [9] [ 800/6250] eta: 0:19:20 lr: 0.000124 grad: 0.0808 (0.0845) loss: 0.8566 (0.8576) time: 0.2047 data: 0.1132 max mem: 9377 +Train: [9] [ 900/6250] eta: 0:18:30 lr: 0.000124 grad: 0.0773 (0.0840) loss: 0.8542 (0.8573) time: 0.2014 data: 0.1054 max mem: 9377 +Train: [9] [1000/6250] eta: 0:18:13 lr: 0.000124 grad: 0.0838 (0.0836) loss: 0.8568 (0.8570) time: 0.3100 data: 0.2336 max mem: 9377 +Train: [9] [1100/6250] eta: 0:17:27 lr: 0.000124 grad: 0.0750 (0.0835) loss: 0.8553 (0.8569) time: 0.2150 data: 0.1348 max mem: 9377 +Train: [9] [1200/6250] eta: 0:16:49 lr: 0.000124 grad: 0.0739 (0.0831) loss: 0.8579 (0.8570) time: 0.1598 data: 0.0674 max mem: 9377 +Train: [9] [1300/6250] eta: 0:16:10 lr: 0.000124 grad: 0.0847 (0.0828) loss: 0.8568 (0.8569) time: 0.1785 data: 0.0885 max mem: 9377 +Train: [9] [1400/6250] eta: 0:15:39 lr: 0.000124 grad: 0.0757 (0.0825) loss: 0.8583 (0.8569) time: 0.1579 data: 0.0718 max mem: 9377 +Train: [9] [1500/6250] eta: 0:15:16 lr: 0.000124 grad: 0.0806 (0.0823) loss: 0.8545 (0.8569) time: 0.1758 data: 0.0774 max mem: 9377 +Train: [9] [1600/6250] eta: 0:14:54 lr: 0.000124 grad: 0.0775 (0.0824) loss: 0.8546 (0.8568) time: 0.2052 data: 0.1151 max mem: 9377 +Train: [9] [1700/6250] eta: 0:14:40 lr: 0.000124 grad: 0.0756 (0.0823) loss: 0.8523 (0.8567) time: 0.2771 data: 0.1723 max mem: 9377 +Train: [9] [1800/6250] eta: 0:14:14 lr: 0.000124 grad: 0.0783 (0.0824) loss: 0.8514 (0.8564) time: 0.2346 data: 0.1235 max mem: 9377 +Train: [9] [1900/6250] eta: 0:13:51 lr: 0.000124 grad: 0.0777 (0.0823) loss: 0.8546 (0.8563) time: 0.1417 data: 0.0262 max mem: 9377 +Train: [9] [2000/6250] eta: 0:13:29 lr: 0.000124 grad: 0.0776 (0.0823) loss: 0.8532 (0.8562) time: 0.2026 data: 0.1082 max mem: 9377 +Train: [9] [2100/6250] eta: 0:13:06 lr: 0.000124 grad: 0.0820 (0.0825) loss: 0.8512 (0.8560) time: 0.1414 data: 0.0539 max mem: 9377 +Train: [9] [2200/6250] eta: 0:13:17 lr: 0.000124 grad: 0.0844 (0.0825) loss: 0.8566 (0.8559) time: 1.0961 data: 1.0115 max mem: 9377 +Train: [9] [2300/6250] eta: 0:12:48 lr: 0.000124 grad: 0.0808 (0.0824) loss: 0.8544 (0.8559) time: 0.1460 data: 0.0421 max mem: 9377 +Train: [9] [2400/6250] eta: 0:12:25 lr: 0.000124 grad: 0.0743 (0.0824) loss: 0.8529 (0.8558) time: 0.1883 data: 0.0786 max mem: 9377 +Train: [9] [2500/6250] eta: 0:11:59 lr: 0.000124 grad: 0.0804 (0.0826) loss: 0.8566 (0.8557) time: 0.1750 data: 0.0846 max mem: 9377 +Train: [9] [2600/6250] eta: 0:11:32 lr: 0.000124 grad: 0.0794 (0.0825) loss: 0.8540 (0.8557) time: 0.1196 data: 0.0311 max mem: 9377 +Train: [9] [2700/6250] eta: 0:11:14 lr: 0.000124 grad: 0.0770 (0.0824) loss: 0.8605 (0.8557) time: 0.1166 data: 0.0007 max mem: 9377 +Train: [9] [2800/6250] eta: 0:10:52 lr: 0.000124 grad: 0.0738 (0.0824) loss: 0.8570 (0.8556) time: 0.1309 data: 0.0443 max mem: 9377 +Train: [9] [2900/6250] eta: 0:10:30 lr: 0.000124 grad: 0.0738 (0.0822) loss: 0.8528 (0.8556) time: 0.0988 data: 0.0115 max mem: 9377 +Train: [9] [3000/6250] eta: 0:10:11 lr: 0.000124 grad: 0.0855 (0.0824) loss: 0.8586 (0.8555) time: 0.1640 data: 0.0837 max mem: 9377 +Train: [9] [3100/6250] eta: 0:09:50 lr: 0.000124 grad: 0.0779 (0.0823) loss: 0.8542 (0.8555) time: 0.1156 data: 0.0295 max mem: 9377 +Train: [9] [3200/6250] eta: 0:09:30 lr: 0.000124 grad: 0.0778 (0.0822) loss: 0.8523 (0.8554) time: 0.1370 data: 0.0438 max mem: 9377 +Train: [9] [3300/6250] eta: 0:09:10 lr: 0.000124 grad: 0.0725 (0.0821) loss: 0.8584 (0.8554) time: 0.1629 data: 0.0824 max mem: 9377 +Train: [9] [3400/6250] eta: 0:08:50 lr: 0.000124 grad: 0.0768 (0.0820) loss: 0.8529 (0.8554) time: 0.2535 data: 0.1668 max mem: 9377 +Train: [9] [3500/6250] eta: 0:08:30 lr: 0.000124 grad: 0.0743 (0.0819) loss: 0.8554 (0.8554) time: 0.2491 data: 0.1674 max mem: 9377 +Train: [9] [3600/6250] eta: 0:08:08 lr: 0.000124 grad: 0.0765 (0.0819) loss: 0.8553 (0.8553) time: 0.1226 data: 0.0292 max mem: 9377 +Train: [9] [3700/6250] eta: 0:07:48 lr: 0.000124 grad: 0.0750 (0.0818) loss: 0.8571 (0.8553) time: 0.1698 data: 0.0843 max mem: 9377 +Train: [9] [3800/6250] eta: 0:07:28 lr: 0.000124 grad: 0.0776 (0.0818) loss: 0.8535 (0.8553) time: 0.2237 data: 0.1408 max mem: 9377 +Train: [9] [3900/6250] eta: 0:07:10 lr: 0.000124 grad: 0.0743 (0.0819) loss: 0.8559 (0.8552) time: 0.0942 data: 0.0002 max mem: 9377 +Train: [9] [4000/6250] eta: 0:06:52 lr: 0.000124 grad: 0.0759 (0.0819) loss: 0.8518 (0.8552) time: 0.1837 data: 0.0944 max mem: 9377 +Train: [9] [4100/6250] eta: 0:06:33 lr: 0.000124 grad: 0.0735 (0.0818) loss: 0.8536 (0.8552) time: 0.1049 data: 0.0002 max mem: 9377 +Train: [9] [4200/6250] eta: 0:06:15 lr: 0.000124 grad: 0.0795 (0.0818) loss: 0.8500 (0.8551) time: 0.2213 data: 0.1328 max mem: 9377 +Train: [9] [4300/6250] eta: 0:05:56 lr: 0.000124 grad: 0.0809 (0.0818) loss: 0.8571 (0.8551) time: 0.1199 data: 0.0233 max mem: 9377 +Train: [9] [4400/6250] eta: 0:05:39 lr: 0.000124 grad: 0.0840 (0.0818) loss: 0.8531 (0.8550) time: 0.2441 data: 0.1462 max mem: 9377 +Train: [9] [4500/6250] eta: 0:05:22 lr: 0.000124 grad: 0.0789 (0.0817) loss: 0.8548 (0.8550) time: 0.1025 data: 0.0003 max mem: 9377 +Train: [9] [4600/6250] eta: 0:05:03 lr: 0.000124 grad: 0.0778 (0.0817) loss: 0.8520 (0.8550) time: 0.1609 data: 0.0770 max mem: 9377 +Train: [9] [4700/6250] eta: 0:04:45 lr: 0.000124 grad: 0.0804 (0.0817) loss: 0.8563 (0.8549) time: 0.1465 data: 0.0553 max mem: 9377 +Train: [9] [4800/6250] eta: 0:04:28 lr: 0.000124 grad: 0.0767 (0.0817) loss: 0.8561 (0.8549) time: 0.3311 data: 0.2312 max mem: 9377 +Train: [9] [4900/6250] eta: 0:04:08 lr: 0.000124 grad: 0.0736 (0.0816) loss: 0.8531 (0.8549) time: 0.1327 data: 0.0467 max mem: 9377 +Train: [9] [5000/6250] eta: 0:03:49 lr: 0.000124 grad: 0.0773 (0.0817) loss: 0.8542 (0.8549) time: 0.1484 data: 0.0577 max mem: 9377 +Train: [9] [5100/6250] eta: 0:03:30 lr: 0.000124 grad: 0.0741 (0.0816) loss: 0.8562 (0.8549) time: 0.1403 data: 0.0541 max mem: 9377 +Train: [9] [5200/6250] eta: 0:03:11 lr: 0.000124 grad: 0.0784 (0.0816) loss: 0.8511 (0.8549) time: 0.1451 data: 0.0581 max mem: 9377 +Train: [9] [5300/6250] eta: 0:02:53 lr: 0.000124 grad: 0.0744 (0.0815) loss: 0.8564 (0.8549) time: 0.1482 data: 0.0655 max mem: 9377 +Train: [9] [5400/6250] eta: 0:02:34 lr: 0.000124 grad: 0.0753 (0.0814) loss: 0.8523 (0.8548) time: 0.2205 data: 0.1385 max mem: 9377 +Train: [9] [5500/6250] eta: 0:02:16 lr: 0.000124 grad: 0.0795 (0.0814) loss: 0.8539 (0.8548) time: 0.1620 data: 0.0701 max mem: 9377 +Train: [9] [5600/6250] eta: 0:01:57 lr: 0.000124 grad: 0.0757 (0.0813) loss: 0.8550 (0.8547) time: 0.1311 data: 0.0432 max mem: 9377 +Train: [9] [5700/6250] eta: 0:01:39 lr: 0.000124 grad: 0.0788 (0.0813) loss: 0.8573 (0.8547) time: 0.1638 data: 0.0774 max mem: 9377 +Train: [9] [5800/6250] eta: 0:01:21 lr: 0.000124 grad: 0.0805 (0.0812) loss: 0.8513 (0.8547) time: 0.1460 data: 0.0654 max mem: 9377 +Train: [9] [5900/6250] eta: 0:01:02 lr: 0.000124 grad: 0.0791 (0.0812) loss: 0.8518 (0.8547) time: 0.1422 data: 0.0564 max mem: 9377 +Train: [9] [6000/6250] eta: 0:00:44 lr: 0.000124 grad: 0.0740 (0.0811) loss: 0.8527 (0.8547) time: 0.1128 data: 0.0319 max mem: 9377 +Train: [9] [6100/6250] eta: 0:00:26 lr: 0.000124 grad: 0.0714 (0.0810) loss: 0.8503 (0.8546) time: 0.1627 data: 0.0821 max mem: 9377 +Train: [9] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0803 (0.0810) loss: 0.8522 (0.8546) time: 0.1733 data: 0.0825 max mem: 9377 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0761 (0.0810) loss: 0.8464 (0.8546) time: 0.1161 data: 0.0307 max mem: 9377 +Train: [9] Total time: 0:18:42 (0.1796 s / it) +Averaged stats: lr: 0.000124 grad: 0.0761 (0.0810) loss: 0.8464 (0.8546) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:05:16 loss: 0.8494 (0.8494) time: 5.0995 data: 5.0358 max mem: 9377 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.8535 (0.8529) time: 0.1375 data: 0.1120 max mem: 9377 +Eval (hcp-train-subset): [9] Total time: 0:00:13 (0.2184 s / it) +Averaged stats (hcp-train-subset): loss: 0.8535 (0.8529) +Making plots (hcp-train-subset): example=1 +Eval (hcp-val): [9] [ 0/62] eta: 0:05:50 loss: 0.8521 (0.8521) time: 5.6579 data: 5.6110 max mem: 9377 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8537 (0.8534) time: 0.1476 data: 0.1224 max mem: 9377 +Eval (hcp-val): [9] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (hcp-val): loss: 0.8537 (0.8534) +Making plots (hcp-val): example=3 +Eval (nsd-val): [9] [ 0/62] eta: 0:06:21 loss: 0.8099 (0.8099) time: 6.1523 data: 6.1143 max mem: 9377 +Eval (nsd-val): [9] [61/62] eta: 0:00:00 loss: 0.8197 (0.8210) time: 0.1318 data: 0.1059 max mem: 9377 +Eval (nsd-val): [9] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (nsd-val): loss: 0.8197 (0.8210) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00009.pth +Train: [10] [ 0/6250] eta: 1 day, 2:28:09 lr: 0.000124 grad: 0.1291 (0.1291) loss: 0.7842 (0.7842) time: 15.2464 data: 15.0659 max mem: 9377 +Train: [10] [ 100/6250] eta: 0:48:57 lr: 0.000124 grad: 0.0835 (0.0919) loss: 0.8585 (0.8532) time: 0.2680 data: 0.1550 max mem: 9377 +Train: [10] [ 200/6250] eta: 0:33:13 lr: 0.000124 grad: 0.0709 (0.0958) loss: 0.8588 (0.8547) time: 0.3096 data: 0.1823 max mem: 9377 +Train: [10] [ 300/6250] eta: 0:31:09 lr: 0.000124 grad: 0.0784 (0.0909) loss: 0.8509 (0.8544) time: 0.2209 data: 0.0883 max mem: 9377 +Train: [10] [ 400/6250] eta: 0:27:57 lr: 0.000124 grad: 0.0784 (0.0888) loss: 0.8480 (0.8536) time: 0.2140 data: 0.1234 max mem: 9377 +Train: [10] [ 500/6250] eta: 0:26:29 lr: 0.000124 grad: 0.0765 (0.0867) loss: 0.8510 (0.8538) time: 0.4809 data: 0.3659 max mem: 9377 +Train: [10] [ 600/6250] eta: 0:24:52 lr: 0.000124 grad: 0.0756 (0.0854) loss: 0.8494 (0.8539) time: 0.1275 data: 0.0003 max mem: 9377 +Train: [10] [ 700/6250] eta: 0:22:57 lr: 0.000124 grad: 0.0754 (0.0837) loss: 0.8596 (0.8538) time: 0.1290 data: 0.0407 max mem: 9377 +Train: [10] [ 800/6250] eta: 0:22:28 lr: 0.000124 grad: 0.0728 (0.0830) loss: 0.8540 (0.8539) time: 0.1672 data: 0.0682 max mem: 9377 +Train: [10] [ 900/6250] eta: 0:21:22 lr: 0.000124 grad: 0.0695 (0.0821) loss: 0.8576 (0.8540) time: 0.2085 data: 0.1239 max mem: 9377 +Train: [10] [1000/6250] eta: 0:20:24 lr: 0.000124 grad: 0.0742 (0.0814) loss: 0.8553 (0.8541) time: 0.2244 data: 0.1309 max mem: 9377 +Train: [10] [1100/6250] eta: 0:20:09 lr: 0.000124 grad: 0.0700 (0.0806) loss: 0.8552 (0.8541) time: 0.1022 data: 0.0004 max mem: 9377 +Train: [10] [1200/6250] eta: 0:19:13 lr: 0.000124 grad: 0.0758 (0.0801) loss: 0.8499 (0.8541) time: 0.1600 data: 0.0700 max mem: 9377 +Train: [10] [1300/6250] eta: 0:18:28 lr: 0.000124 grad: 0.0721 (0.0797) loss: 0.8503 (0.8539) time: 0.1576 data: 0.0638 max mem: 9377 +Train: [10] [1400/6250] eta: 0:17:45 lr: 0.000124 grad: 0.0743 (0.0794) loss: 0.8522 (0.8539) time: 0.2063 data: 0.1259 max mem: 9377 +Train: [10] [1500/6250] eta: 0:17:02 lr: 0.000124 grad: 0.0708 (0.0791) loss: 0.8524 (0.8538) time: 0.1250 data: 0.0446 max mem: 9377 +Train: [10] [1600/6250] eta: 0:16:24 lr: 0.000124 grad: 0.0730 (0.0790) loss: 0.8548 (0.8539) time: 0.1647 data: 0.0832 max mem: 9377 +Train: [10] [1700/6250] eta: 0:15:52 lr: 0.000124 grad: 0.0720 (0.0788) loss: 0.8571 (0.8538) time: 0.1924 data: 0.1091 max mem: 9377 +Train: [10] [1800/6250] eta: 0:15:16 lr: 0.000124 grad: 0.0761 (0.0786) loss: 0.8501 (0.8538) time: 0.1553 data: 0.0739 max mem: 9377 +Train: [10] [1900/6250] eta: 0:14:48 lr: 0.000124 grad: 0.0728 (0.0784) loss: 0.8561 (0.8538) time: 0.1549 data: 0.0543 max mem: 9377 +Train: [10] [2000/6250] eta: 0:14:24 lr: 0.000124 grad: 0.0796 (0.0784) loss: 0.8518 (0.8538) time: 0.2944 data: 0.2132 max mem: 9377 +Train: [10] [2100/6250] eta: 0:13:54 lr: 0.000124 grad: 0.0781 (0.0785) loss: 0.8544 (0.8538) time: 0.2081 data: 0.1230 max mem: 9377 +Train: [10] [2200/6250] eta: 0:13:25 lr: 0.000124 grad: 0.0753 (0.0784) loss: 0.8514 (0.8537) time: 0.1476 data: 0.0740 max mem: 9377 +Train: [10] [2300/6250] eta: 0:12:59 lr: 0.000124 grad: 0.0770 (0.0784) loss: 0.8530 (0.8537) time: 0.1591 data: 0.0787 max mem: 9377 +Train: [10] [2400/6250] eta: 0:12:34 lr: 0.000124 grad: 0.0728 (0.0782) loss: 0.8542 (0.8536) time: 0.1806 data: 0.0957 max mem: 9377 +Train: [10] [2500/6250] eta: 0:12:09 lr: 0.000124 grad: 0.0734 (0.0782) loss: 0.8536 (0.8536) time: 0.1810 data: 0.0964 max mem: 9377 +Train: [10] [2600/6250] eta: 0:11:46 lr: 0.000124 grad: 0.0736 (0.0781) loss: 0.8514 (0.8535) time: 0.1782 data: 0.0927 max mem: 9377 +Train: [10] [2700/6250] eta: 0:11:21 lr: 0.000124 grad: 0.0768 (0.0781) loss: 0.8541 (0.8535) time: 0.1638 data: 0.0667 max mem: 9377 +Train: [10] [2800/6250] eta: 0:10:57 lr: 0.000124 grad: 0.0702 (0.0781) loss: 0.8532 (0.8534) time: 0.1366 data: 0.0441 max mem: 9377 +Train: [10] [2900/6250] eta: 0:10:33 lr: 0.000124 grad: 0.0772 (0.0780) loss: 0.8495 (0.8534) time: 0.1315 data: 0.0423 max mem: 9377 +Train: [10] [3000/6250] eta: 0:10:11 lr: 0.000124 grad: 0.0735 (0.0779) loss: 0.8507 (0.8534) time: 0.1556 data: 0.0626 max mem: 9377 +Train: [10] [3100/6250] eta: 0:09:50 lr: 0.000124 grad: 0.0772 (0.0779) loss: 0.8495 (0.8533) time: 0.1382 data: 0.0501 max mem: 9377 +Train: [10] [3200/6250] eta: 0:09:38 lr: 0.000124 grad: 0.0742 (0.0780) loss: 0.8540 (0.8532) time: 0.5815 data: 0.4779 max mem: 9377 +Train: [10] [3300/6250] eta: 0:09:15 lr: 0.000124 grad: 0.0760 (0.0780) loss: 0.8494 (0.8531) time: 0.1825 data: 0.0910 max mem: 9377 +Train: [10] [3400/6250] eta: 0:08:55 lr: 0.000124 grad: 0.0777 (0.0780) loss: 0.8493 (0.8530) time: 0.1157 data: 0.0101 max mem: 9377 +Train: [10] [3500/6250] eta: 0:08:34 lr: 0.000124 grad: 0.0711 (0.0780) loss: 0.8517 (0.8529) time: 0.1359 data: 0.0480 max mem: 9377 +Train: [10] [3600/6250] eta: 0:08:13 lr: 0.000124 grad: 0.0743 (0.0781) loss: 0.8529 (0.8528) time: 0.1381 data: 0.0424 max mem: 9377 +Train: [10] [3700/6250] eta: 0:08:02 lr: 0.000124 grad: 0.0776 (0.0781) loss: 0.8569 (0.8527) time: 0.0984 data: 0.0003 max mem: 9377 +Train: [10] [3800/6250] eta: 0:07:40 lr: 0.000124 grad: 0.0754 (0.0781) loss: 0.8553 (0.8527) time: 0.1506 data: 0.0711 max mem: 9377 +Train: [10] [3900/6250] eta: 0:07:20 lr: 0.000124 grad: 0.0812 (0.0781) loss: 0.8472 (0.8526) time: 0.1636 data: 0.0791 max mem: 9377 +Train: [10] [4000/6250] eta: 0:06:59 lr: 0.000124 grad: 0.0716 (0.0781) loss: 0.8507 (0.8525) time: 0.1531 data: 0.0704 max mem: 9377 +Train: [10] [4100/6250] eta: 0:06:39 lr: 0.000124 grad: 0.0751 (0.0781) loss: 0.8546 (0.8524) time: 0.1456 data: 0.0561 max mem: 9377 +Train: [10] [4200/6250] eta: 0:06:18 lr: 0.000124 grad: 0.0780 (0.0781) loss: 0.8520 (0.8524) time: 0.1543 data: 0.0599 max mem: 9377 +Train: [10] [4300/6250] eta: 0:05:59 lr: 0.000124 grad: 0.0783 (0.0781) loss: 0.8535 (0.8524) time: 0.2002 data: 0.1104 max mem: 9377 +Train: [10] [4400/6250] eta: 0:05:39 lr: 0.000124 grad: 0.0767 (0.0781) loss: 0.8497 (0.8523) time: 0.1455 data: 0.0661 max mem: 9377 +Train: [10] [4500/6250] eta: 0:05:20 lr: 0.000124 grad: 0.0731 (0.0780) loss: 0.8480 (0.8523) time: 0.1106 data: 0.0251 max mem: 9377 +Train: [10] [4600/6250] eta: 0:05:00 lr: 0.000124 grad: 0.0726 (0.0780) loss: 0.8487 (0.8522) time: 0.1307 data: 0.0401 max mem: 9377 +Train: [10] [4700/6250] eta: 0:04:41 lr: 0.000124 grad: 0.0804 (0.0780) loss: 0.8496 (0.8521) time: 0.1482 data: 0.0603 max mem: 9377 +Train: [10] [4800/6250] eta: 0:04:22 lr: 0.000124 grad: 0.0767 (0.0781) loss: 0.8520 (0.8522) time: 0.1418 data: 0.0653 max mem: 9377 +Train: [10] [4900/6250] eta: 0:04:03 lr: 0.000124 grad: 0.0745 (0.0781) loss: 0.8537 (0.8521) time: 0.1733 data: 0.0855 max mem: 9377 +Train: [10] [5000/6250] eta: 0:03:45 lr: 0.000124 grad: 0.0791 (0.0781) loss: 0.8490 (0.8521) time: 0.1275 data: 0.0306 max mem: 9377 +Train: [10] [5100/6250] eta: 0:03:26 lr: 0.000124 grad: 0.0758 (0.0781) loss: 0.8505 (0.8521) time: 0.1403 data: 0.0601 max mem: 9377 +Train: [10] [5200/6250] eta: 0:03:08 lr: 0.000124 grad: 0.0765 (0.0781) loss: 0.8517 (0.8520) time: 0.2075 data: 0.1247 max mem: 9377 +Train: [10] [5300/6250] eta: 0:02:50 lr: 0.000124 grad: 0.0723 (0.0780) loss: 0.8506 (0.8520) time: 0.1394 data: 0.0439 max mem: 9377 +Train: [10] [5400/6250] eta: 0:02:31 lr: 0.000124 grad: 0.0811 (0.0780) loss: 0.8492 (0.8520) time: 0.1357 data: 0.0457 max mem: 9377 +Train: [10] [5500/6250] eta: 0:02:13 lr: 0.000124 grad: 0.0723 (0.0780) loss: 0.8510 (0.8519) time: 0.1797 data: 0.0967 max mem: 9377 +Train: [10] [5600/6250] eta: 0:01:55 lr: 0.000124 grad: 0.0780 (0.0780) loss: 0.8460 (0.8519) time: 0.1403 data: 0.0462 max mem: 9377 +Train: [10] [5700/6250] eta: 0:01:37 lr: 0.000124 grad: 0.0737 (0.0780) loss: 0.8539 (0.8518) time: 0.1265 data: 0.0394 max mem: 9377 +Train: [10] [5800/6250] eta: 0:01:19 lr: 0.000124 grad: 0.0732 (0.0780) loss: 0.8499 (0.8518) time: 0.1327 data: 0.0529 max mem: 9377 +Train: [10] [5900/6250] eta: 0:01:01 lr: 0.000124 grad: 0.0781 (0.0780) loss: 0.8447 (0.8517) time: 0.1413 data: 0.0648 max mem: 9377 +Train: [10] [6000/6250] eta: 0:00:44 lr: 0.000124 grad: 0.0805 (0.0780) loss: 0.8485 (0.8516) time: 0.1829 data: 0.1002 max mem: 9377 +Train: [10] [6100/6250] eta: 0:00:26 lr: 0.000124 grad: 0.0799 (0.0781) loss: 0.8522 (0.8516) time: 0.2128 data: 0.0004 max mem: 9377 +Train: [10] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0793 (0.0781) loss: 0.8511 (0.8515) time: 0.1121 data: 0.0055 max mem: 9377 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0790 (0.0781) loss: 0.8523 (0.8515) time: 0.2952 data: 0.1799 max mem: 9377 +Train: [10] Total time: 0:18:49 (0.1808 s / it) +Averaged stats: lr: 0.000124 grad: 0.0790 (0.0781) loss: 0.8523 (0.8515) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:04:56 loss: 0.8501 (0.8501) time: 4.7808 data: 4.7397 max mem: 9377 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.8483 (0.8497) time: 0.1251 data: 0.1001 max mem: 9377 +Eval (hcp-train-subset): [10] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-train-subset): loss: 0.8483 (0.8497) +Eval (hcp-val): [10] [ 0/62] eta: 0:04:46 loss: 0.8487 (0.8487) time: 4.6182 data: 4.5683 max mem: 9377 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8511 (0.8512) time: 0.1349 data: 0.1078 max mem: 9377 +Eval (hcp-val): [10] Total time: 0:00:14 (0.2269 s / it) +Averaged stats (hcp-val): loss: 0.8511 (0.8512) +Eval (nsd-val): [10] [ 0/62] eta: 0:05:24 loss: 0.8099 (0.8099) time: 5.2372 data: 5.2066 max mem: 9377 +Eval (nsd-val): [10] [61/62] eta: 0:00:00 loss: 0.8209 (0.8207) time: 0.1380 data: 0.1112 max mem: 9377 +Eval (nsd-val): [10] Total time: 0:00:14 (0.2310 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8207) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [11] [ 0/6250] eta: 20:28:11 lr: 0.000124 grad: 0.1009 (0.1009) loss: 0.8740 (0.8740) time: 11.7907 data: 11.1398 max mem: 9377 +Train: [11] [ 100/6250] eta: 0:37:51 lr: 0.000124 grad: 0.0816 (0.0849) loss: 0.8512 (0.8595) time: 0.1402 data: 0.0007 max mem: 9377 +Train: [11] [ 200/6250] eta: 0:31:13 lr: 0.000124 grad: 0.0748 (0.0809) loss: 0.8567 (0.8566) time: 0.1898 data: 0.0162 max mem: 9377 +Train: [11] [ 300/6250] eta: 0:26:46 lr: 0.000124 grad: 0.0781 (0.0797) loss: 0.8540 (0.8534) time: 0.2252 data: 0.1218 max mem: 9377 +Train: [11] [ 400/6250] eta: 0:26:26 lr: 0.000124 grad: 0.0733 (0.0788) loss: 0.8536 (0.8530) time: 0.5319 data: 0.4230 max mem: 9377 +Train: [11] [ 500/6250] eta: 0:23:38 lr: 0.000124 grad: 0.0775 (0.0783) loss: 0.8483 (0.8525) time: 0.1596 data: 0.0522 max mem: 9377 +Train: [11] [ 600/6250] eta: 0:21:56 lr: 0.000124 grad: 0.0762 (0.0783) loss: 0.8495 (0.8517) time: 0.1764 data: 0.0766 max mem: 9377 +Train: [11] [ 700/6250] eta: 0:21:14 lr: 0.000124 grad: 0.0773 (0.0782) loss: 0.8464 (0.8509) time: 0.1581 data: 0.0582 max mem: 9377 +Train: [11] [ 800/6250] eta: 0:20:29 lr: 0.000124 grad: 0.0759 (0.0779) loss: 0.8438 (0.8504) time: 0.2023 data: 0.1152 max mem: 9377 +Train: [11] [ 900/6250] eta: 0:19:29 lr: 0.000124 grad: 0.0696 (0.0775) loss: 0.8486 (0.8500) time: 0.1366 data: 0.0510 max mem: 9377 +Train: [11] [1000/6250] eta: 0:18:39 lr: 0.000124 grad: 0.0715 (0.0771) loss: 0.8498 (0.8500) time: 0.1675 data: 0.0806 max mem: 9377 +Train: [11] [1100/6250] eta: 0:17:59 lr: 0.000124 grad: 0.0746 (0.0771) loss: 0.8475 (0.8496) time: 0.1741 data: 0.0980 max mem: 9377 +Train: [11] [1200/6250] eta: 0:17:19 lr: 0.000124 grad: 0.0756 (0.0771) loss: 0.8483 (0.8494) time: 0.1879 data: 0.1073 max mem: 9377 +Train: [11] [1300/6250] eta: 0:16:39 lr: 0.000124 grad: 0.0768 (0.0770) loss: 0.8427 (0.8492) time: 0.1505 data: 0.0525 max mem: 9377 +Train: [11] [1400/6250] eta: 0:16:04 lr: 0.000124 grad: 0.0742 (0.0769) loss: 0.8487 (0.8492) time: 0.1710 data: 0.0884 max mem: 9377 +Train: [11] [1500/6250] eta: 0:15:37 lr: 0.000124 grad: 0.0746 (0.0769) loss: 0.8462 (0.8490) time: 0.1892 data: 0.1005 max mem: 9377 +Train: [11] [1600/6250] eta: 0:15:07 lr: 0.000124 grad: 0.0732 (0.0768) loss: 0.8465 (0.8489) time: 0.1560 data: 0.0712 max mem: 9377 +Train: [11] [1700/6250] eta: 0:14:35 lr: 0.000124 grad: 0.0780 (0.0769) loss: 0.8491 (0.8488) time: 0.1380 data: 0.0484 max mem: 9377 +Train: [11] [1800/6250] eta: 0:14:07 lr: 0.000124 grad: 0.0777 (0.0771) loss: 0.8478 (0.8487) time: 0.1736 data: 0.0894 max mem: 9377 +Train: [11] [1900/6250] eta: 0:13:43 lr: 0.000124 grad: 0.0722 (0.0771) loss: 0.8480 (0.8486) time: 0.1574 data: 0.0748 max mem: 9377 +Train: [11] [2000/6250] eta: 0:13:21 lr: 0.000124 grad: 0.0734 (0.0771) loss: 0.8487 (0.8486) time: 0.2075 data: 0.1194 max mem: 9377 +Train: [11] [2100/6250] eta: 0:13:06 lr: 0.000124 grad: 0.0736 (0.0770) loss: 0.8440 (0.8486) time: 0.3551 data: 0.2638 max mem: 9377 +Train: [11] [2200/6250] eta: 0:12:48 lr: 0.000124 grad: 0.0769 (0.0770) loss: 0.8479 (0.8485) time: 0.1023 data: 0.0003 max mem: 9377 +Train: [11] [2300/6250] eta: 0:12:26 lr: 0.000124 grad: 0.0751 (0.0771) loss: 0.8455 (0.8484) time: 0.1713 data: 0.0792 max mem: 9377 +Train: [11] [2400/6250] eta: 0:12:10 lr: 0.000124 grad: 0.0785 (0.0771) loss: 0.8415 (0.8484) time: 0.1941 data: 0.0878 max mem: 9377 +Train: [11] [2500/6250] eta: 0:11:51 lr: 0.000124 grad: 0.0754 (0.0771) loss: 0.8461 (0.8484) time: 0.1625 data: 0.0775 max mem: 9377 +Train: [11] [2600/6250] eta: 0:11:29 lr: 0.000124 grad: 0.0697 (0.0770) loss: 0.8476 (0.8483) time: 0.1914 data: 0.1083 max mem: 9377 +Train: [11] [2700/6250] eta: 0:11:06 lr: 0.000124 grad: 0.0776 (0.0770) loss: 0.8472 (0.8483) time: 0.1761 data: 0.0908 max mem: 9377 +Train: [11] [2800/6250] eta: 0:10:43 lr: 0.000124 grad: 0.0768 (0.0770) loss: 0.8481 (0.8483) time: 0.1691 data: 0.0793 max mem: 9377 +Train: [11] [2900/6250] eta: 0:10:22 lr: 0.000124 grad: 0.0748 (0.0771) loss: 0.8504 (0.8483) time: 0.1552 data: 0.0753 max mem: 9377 +Train: [11] [3000/6250] eta: 0:10:01 lr: 0.000124 grad: 0.0730 (0.0771) loss: 0.8460 (0.8482) time: 0.1728 data: 0.0876 max mem: 9377 +Train: [11] [3100/6250] eta: 0:09:40 lr: 0.000124 grad: 0.0762 (0.0771) loss: 0.8492 (0.8481) time: 0.1651 data: 0.0839 max mem: 9377 +Train: [11] [3200/6250] eta: 0:09:18 lr: 0.000124 grad: 0.0736 (0.0771) loss: 0.8476 (0.8481) time: 0.1179 data: 0.0224 max mem: 9377 +Train: [11] [3300/6250] eta: 0:08:58 lr: 0.000124 grad: 0.0748 (0.0771) loss: 0.8495 (0.8480) time: 0.1587 data: 0.0797 max mem: 9377 +Train: [11] [3400/6250] eta: 0:08:37 lr: 0.000124 grad: 0.0818 (0.0772) loss: 0.8464 (0.8480) time: 0.1432 data: 0.0582 max mem: 9377 +Train: [11] [3500/6250] eta: 0:08:19 lr: 0.000124 grad: 0.0764 (0.0772) loss: 0.8467 (0.8479) time: 0.2469 data: 0.1744 max mem: 9377 +Train: [11] [3600/6250] eta: 0:07:58 lr: 0.000124 grad: 0.0771 (0.0774) loss: 0.8483 (0.8478) time: 0.1734 data: 0.0947 max mem: 9377 +Train: [11] [3700/6250] eta: 0:07:39 lr: 0.000124 grad: 0.0759 (0.0775) loss: 0.8499 (0.8478) time: 0.1770 data: 0.0941 max mem: 9377 +Train: [11] [3800/6250] eta: 0:07:21 lr: 0.000124 grad: 0.0750 (0.0775) loss: 0.8474 (0.8478) time: 0.1132 data: 0.0003 max mem: 9377 +Train: [11] [3900/6250] eta: 0:07:02 lr: 0.000124 grad: 0.0794 (0.0776) loss: 0.8436 (0.8478) time: 0.1999 data: 0.1160 max mem: 9377 +Train: [11] [4000/6250] eta: 0:06:44 lr: 0.000123 grad: 0.0752 (0.0776) loss: 0.8512 (0.8478) time: 0.1307 data: 0.0003 max mem: 9377 +Train: [11] [4100/6250] eta: 0:06:37 lr: 0.000123 grad: 0.0740 (0.0776) loss: 0.8480 (0.8477) time: 0.1787 data: 0.0798 max mem: 9377 +Train: [11] [4200/6250] eta: 0:06:16 lr: 0.000123 grad: 0.0747 (0.0776) loss: 0.8466 (0.8477) time: 0.1709 data: 0.0857 max mem: 9377 +Train: [11] [4300/6250] eta: 0:06:05 lr: 0.000123 grad: 0.0795 (0.0777) loss: 0.8465 (0.8476) time: 0.2724 data: 0.1618 max mem: 9377 +Train: [11] [4400/6250] eta: 0:05:45 lr: 0.000123 grad: 0.0735 (0.0777) loss: 0.8480 (0.8475) time: 0.1435 data: 0.0352 max mem: 9377 +Train: [11] [4500/6250] eta: 0:05:30 lr: 0.000123 grad: 0.0841 (0.0778) loss: 0.8476 (0.8475) time: 0.4642 data: 0.3309 max mem: 9377 +Train: [11] [4600/6250] eta: 0:05:10 lr: 0.000123 grad: 0.0704 (0.0777) loss: 0.8467 (0.8475) time: 0.1379 data: 0.0226 max mem: 9377 +Train: [11] [4700/6250] eta: 0:04:53 lr: 0.000123 grad: 0.0803 (0.0778) loss: 0.8450 (0.8475) time: 0.1590 data: 0.0548 max mem: 9377 +Train: [11] [4800/6250] eta: 0:04:33 lr: 0.000123 grad: 0.0786 (0.0778) loss: 0.8468 (0.8474) time: 0.1467 data: 0.0544 max mem: 9377 +Train: [11] [4900/6250] eta: 0:04:13 lr: 0.000123 grad: 0.0770 (0.0779) loss: 0.8475 (0.8473) time: 0.1526 data: 0.0635 max mem: 9377 +Train: [11] [5000/6250] eta: 0:03:53 lr: 0.000123 grad: 0.0737 (0.0779) loss: 0.8457 (0.8473) time: 0.1942 data: 0.1101 max mem: 9377 +Train: [11] [5100/6250] eta: 0:03:34 lr: 0.000123 grad: 0.0747 (0.0779) loss: 0.8491 (0.8472) time: 0.0935 data: 0.0004 max mem: 9377 +Train: [11] [5200/6250] eta: 0:03:19 lr: 0.000123 grad: 0.0723 (0.0779) loss: 0.8464 (0.8472) time: 0.2086 data: 0.1097 max mem: 9377 +Train: [11] [5300/6250] eta: 0:02:59 lr: 0.000123 grad: 0.0725 (0.0779) loss: 0.8454 (0.8471) time: 0.1066 data: 0.0138 max mem: 9377 +Train: [11] [5400/6250] eta: 0:02:39 lr: 0.000123 grad: 0.0714 (0.0780) loss: 0.8382 (0.8470) time: 0.1680 data: 0.0839 max mem: 9377 +Train: [11] [5500/6250] eta: 0:02:20 lr: 0.000123 grad: 0.0818 (0.0779) loss: 0.8349 (0.8469) time: 0.2643 data: 0.1724 max mem: 9377 +Train: [11] [5600/6250] eta: 0:02:01 lr: 0.000123 grad: 0.0748 (0.0779) loss: 0.8455 (0.8468) time: 0.1372 data: 0.0529 max mem: 9377 +Train: [11] [5700/6250] eta: 0:01:42 lr: 0.000123 grad: 0.0792 (0.0780) loss: 0.8425 (0.8468) time: 0.1485 data: 0.0623 max mem: 9377 +Train: [11] [5800/6250] eta: 0:01:23 lr: 0.000123 grad: 0.0724 (0.0780) loss: 0.8471 (0.8467) time: 0.2137 data: 0.1341 max mem: 9377 +Train: [11] [5900/6250] eta: 0:01:04 lr: 0.000123 grad: 0.0750 (0.0780) loss: 0.8440 (0.8467) time: 0.1695 data: 0.0803 max mem: 9377 +Train: [11] [6000/6250] eta: 0:00:46 lr: 0.000123 grad: 0.0906 (0.0781) loss: 0.8473 (0.8466) time: 0.2039 data: 0.1234 max mem: 9377 +Train: [11] [6100/6250] eta: 0:00:27 lr: 0.000123 grad: 0.0777 (0.0781) loss: 0.8479 (0.8466) time: 0.1432 data: 0.0607 max mem: 9377 +Train: [11] [6200/6250] eta: 0:00:09 lr: 0.000123 grad: 0.0760 (0.0782) loss: 0.8400 (0.8465) time: 0.1963 data: 0.1090 max mem: 9377 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0798 (0.0782) loss: 0.8423 (0.8465) time: 0.1657 data: 0.0765 max mem: 9377 +Train: [11] Total time: 0:19:15 (0.1849 s / it) +Averaged stats: lr: 0.000123 grad: 0.0798 (0.0782) loss: 0.8423 (0.8465) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:05:41 loss: 0.8489 (0.8489) time: 5.5148 data: 5.4811 max mem: 9377 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8448 (0.8486) time: 0.1213 data: 0.0964 max mem: 9377 +Eval (hcp-train-subset): [11] Total time: 0:00:13 (0.2188 s / it) +Averaged stats (hcp-train-subset): loss: 0.8448 (0.8486) +Eval (hcp-val): [11] [ 0/62] eta: 0:04:39 loss: 0.8497 (0.8497) time: 4.5131 data: 4.4503 max mem: 9377 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8477 (0.8492) time: 0.1411 data: 0.1157 max mem: 9377 +Eval (hcp-val): [11] Total time: 0:00:13 (0.2178 s / it) +Averaged stats (hcp-val): loss: 0.8477 (0.8492) +Eval (nsd-val): [11] [ 0/62] eta: 0:05:43 loss: 0.8100 (0.8100) time: 5.5425 data: 5.5123 max mem: 9377 +Eval (nsd-val): [11] [61/62] eta: 0:00:00 loss: 0.8173 (0.8198) time: 0.1297 data: 0.1026 max mem: 9377 +Eval (nsd-val): [11] Total time: 0:00:13 (0.2225 s / it) +Averaged stats (nsd-val): loss: 0.8173 (0.8198) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [12] [ 0/6250] eta: 10:40:31 lr: 0.000123 grad: 0.0872 (0.0872) loss: 0.8598 (0.8598) time: 6.1491 data: 6.0265 max mem: 9377 +Train: [12] [ 100/6250] eta: 0:24:02 lr: 0.000123 grad: 0.0827 (0.0942) loss: 0.8466 (0.8477) time: 0.1004 data: 0.0002 max mem: 9377 +Train: [12] [ 200/6250] eta: 0:20:05 lr: 0.000123 grad: 0.0770 (0.0876) loss: 0.8412 (0.8445) time: 0.1477 data: 0.0546 max mem: 9377 +Train: [12] [ 300/6250] eta: 0:19:01 lr: 0.000123 grad: 0.0716 (0.0848) loss: 0.8437 (0.8445) time: 0.1629 data: 0.0669 max mem: 9377 +Train: [12] [ 400/6250] eta: 0:17:58 lr: 0.000123 grad: 0.0789 (0.0854) loss: 0.8404 (0.8439) time: 0.1849 data: 0.0824 max mem: 9377 +Train: [12] [ 500/6250] eta: 0:17:08 lr: 0.000123 grad: 0.0767 (0.0844) loss: 0.8407 (0.8434) time: 0.1203 data: 0.0209 max mem: 9377 +Train: [12] [ 600/6250] eta: 0:16:31 lr: 0.000123 grad: 0.0747 (0.0839) loss: 0.8419 (0.8429) time: 0.1629 data: 0.0666 max mem: 9377 +Train: [12] [ 700/6250] eta: 0:15:50 lr: 0.000123 grad: 0.0789 (0.0830) loss: 0.8440 (0.8430) time: 0.1445 data: 0.0511 max mem: 9377 +Train: [12] [ 800/6250] eta: 0:15:20 lr: 0.000123 grad: 0.0790 (0.0827) loss: 0.8513 (0.8434) time: 0.1350 data: 0.0370 max mem: 9377 +Train: [12] [ 900/6250] eta: 0:15:03 lr: 0.000123 grad: 0.0761 (0.0821) loss: 0.8436 (0.8437) time: 0.2078 data: 0.1242 max mem: 9377 +Train: [12] [1000/6250] eta: 0:14:36 lr: 0.000123 grad: 0.0706 (0.0816) loss: 0.8460 (0.8439) time: 0.1306 data: 0.0440 max mem: 9377 +Train: [12] [1100/6250] eta: 0:14:16 lr: 0.000123 grad: 0.0751 (0.0814) loss: 0.8417 (0.8439) time: 0.1361 data: 0.0506 max mem: 9377 +Train: [12] [1200/6250] eta: 0:13:57 lr: 0.000123 grad: 0.0772 (0.0812) loss: 0.8379 (0.8436) time: 0.1784 data: 0.0841 max mem: 9377 +Train: [12] [1300/6250] eta: 0:13:33 lr: 0.000123 grad: 0.0738 (0.0809) loss: 0.8430 (0.8434) time: 0.1549 data: 0.0732 max mem: 9377 +Train: [12] [1400/6250] eta: 0:13:13 lr: 0.000123 grad: 0.0824 (0.0808) loss: 0.8377 (0.8433) time: 0.1770 data: 0.0890 max mem: 9377 +Train: [12] [1500/6250] eta: 0:12:53 lr: 0.000123 grad: 0.0783 (0.0808) loss: 0.8373 (0.8431) time: 0.1814 data: 0.0958 max mem: 9377 +Train: [12] [1600/6250] eta: 0:12:37 lr: 0.000123 grad: 0.0785 (0.0807) loss: 0.8392 (0.8429) time: 0.2024 data: 0.1146 max mem: 9377 +Train: [12] [1700/6250] eta: 0:12:23 lr: 0.000123 grad: 0.0780 (0.0807) loss: 0.8390 (0.8427) time: 0.2031 data: 0.1291 max mem: 9377 +Train: [12] [1800/6250] eta: 0:12:05 lr: 0.000123 grad: 0.0812 (0.0808) loss: 0.8348 (0.8425) time: 0.1877 data: 0.1053 max mem: 9377 +Train: [12] [1900/6250] eta: 0:11:51 lr: 0.000123 grad: 0.0803 (0.0808) loss: 0.8357 (0.8422) time: 0.2035 data: 0.1179 max mem: 9377 +Train: [12] [2000/6250] eta: 0:11:31 lr: 0.000123 grad: 0.0767 (0.0808) loss: 0.8382 (0.8420) time: 0.1603 data: 0.0671 max mem: 9377 +Train: [12] [2100/6250] eta: 0:11:16 lr: 0.000123 grad: 0.0736 (0.0807) loss: 0.8388 (0.8419) time: 0.1687 data: 0.0816 max mem: 9377 +Train: [12] [2200/6250] eta: 0:11:01 lr: 0.000123 grad: 0.0800 (0.0807) loss: 0.8373 (0.8417) time: 0.1454 data: 0.0609 max mem: 9377 +Train: [12] [2300/6250] eta: 0:10:48 lr: 0.000123 grad: 0.0798 (0.0807) loss: 0.8331 (0.8416) time: 0.2514 data: 0.1619 max mem: 9377 +Train: [12] [2400/6250] eta: 0:10:28 lr: 0.000123 grad: 0.0764 (0.0806) loss: 0.8356 (0.8414) time: 0.1758 data: 0.0820 max mem: 9377 +Train: [12] [2500/6250] eta: 0:10:10 lr: 0.000123 grad: 0.0760 (0.0807) loss: 0.8383 (0.8413) time: 0.1635 data: 0.0790 max mem: 9377 +Train: [12] [2600/6250] eta: 0:09:51 lr: 0.000123 grad: 0.0790 (0.0806) loss: 0.8411 (0.8413) time: 0.1480 data: 0.0627 max mem: 9377 +Train: [12] [2700/6250] eta: 0:09:33 lr: 0.000123 grad: 0.0767 (0.0805) loss: 0.8370 (0.8412) time: 0.1569 data: 0.0654 max mem: 9377 +Train: [12] [2800/6250] eta: 0:09:19 lr: 0.000123 grad: 0.0784 (0.0805) loss: 0.8447 (0.8411) time: 0.1629 data: 0.0834 max mem: 9377 +Train: [12] [2900/6250] eta: 0:09:02 lr: 0.000123 grad: 0.0740 (0.0804) loss: 0.8419 (0.8411) time: 0.1321 data: 0.0372 max mem: 9377 +Train: [12] [3000/6250] eta: 0:08:45 lr: 0.000123 grad: 0.0771 (0.0804) loss: 0.8422 (0.8410) time: 0.1443 data: 0.0542 max mem: 9377 +Train: [12] [3100/6250] eta: 0:08:31 lr: 0.000123 grad: 0.0770 (0.0803) loss: 0.8393 (0.8410) time: 0.2335 data: 0.1493 max mem: 9377 +Train: [12] [3200/6250] eta: 0:08:13 lr: 0.000123 grad: 0.0794 (0.0802) loss: 0.8441 (0.8411) time: 0.1821 data: 0.0912 max mem: 9377 +Train: [12] [3300/6250] eta: 0:07:56 lr: 0.000123 grad: 0.0706 (0.0801) loss: 0.8432 (0.8412) time: 0.1414 data: 0.0482 max mem: 9377 +Train: [12] [3400/6250] eta: 0:07:39 lr: 0.000123 grad: 0.0813 (0.0800) loss: 0.8357 (0.8412) time: 0.1326 data: 0.0429 max mem: 9377 +Train: [12] [3500/6250] eta: 0:07:23 lr: 0.000123 grad: 0.0783 (0.0800) loss: 0.8372 (0.8413) time: 0.1524 data: 0.0679 max mem: 9377 +Train: [12] [3600/6250] eta: 0:07:06 lr: 0.000123 grad: 0.0789 (0.0799) loss: 0.8454 (0.8413) time: 0.1088 data: 0.0307 max mem: 9377 +Train: [12] [3700/6250] eta: 0:06:50 lr: 0.000123 grad: 0.0775 (0.0798) loss: 0.8443 (0.8414) time: 0.1133 data: 0.0191 max mem: 9377 +Train: [12] [3800/6250] eta: 0:06:34 lr: 0.000123 grad: 0.0718 (0.0798) loss: 0.8479 (0.8415) time: 0.1586 data: 0.0681 max mem: 9377 +Train: [12] [3900/6250] eta: 0:06:19 lr: 0.000123 grad: 0.0740 (0.0798) loss: 0.8440 (0.8416) time: 0.1192 data: 0.0186 max mem: 9377 +Train: [12] [4000/6250] eta: 0:06:03 lr: 0.000123 grad: 0.0767 (0.0797) loss: 0.8414 (0.8416) time: 0.2202 data: 0.1246 max mem: 9377 +Train: [12] [4100/6250] eta: 0:05:47 lr: 0.000123 grad: 0.0777 (0.0797) loss: 0.8451 (0.8417) time: 0.1516 data: 0.0564 max mem: 9377 +Train: [12] [4200/6250] eta: 0:05:31 lr: 0.000123 grad: 0.0822 (0.0797) loss: 0.8399 (0.8417) time: 0.1816 data: 0.0999 max mem: 9377 +Train: [12] [4300/6250] eta: 0:05:14 lr: 0.000123 grad: 0.0746 (0.0797) loss: 0.8460 (0.8417) time: 0.1526 data: 0.0539 max mem: 9377 +Train: [12] [4400/6250] eta: 0:04:59 lr: 0.000123 grad: 0.0792 (0.0798) loss: 0.8425 (0.8417) time: 0.0884 data: 0.0002 max mem: 9377 +Train: [12] [4500/6250] eta: 0:04:45 lr: 0.000123 grad: 0.0721 (0.0798) loss: 0.8450 (0.8417) time: 0.1362 data: 0.0439 max mem: 9377 +Train: [12] [4600/6250] eta: 0:04:29 lr: 0.000123 grad: 0.0747 (0.0798) loss: 0.8419 (0.8417) time: 0.2086 data: 0.1270 max mem: 9377 +Train: [12] [4700/6250] eta: 0:04:14 lr: 0.000123 grad: 0.0723 (0.0799) loss: 0.8445 (0.8416) time: 0.3100 data: 0.2051 max mem: 9377 +Train: [12] [4800/6250] eta: 0:03:57 lr: 0.000123 grad: 0.0793 (0.0799) loss: 0.8473 (0.8417) time: 0.2033 data: 0.1214 max mem: 9377 +Train: [12] [4900/6250] eta: 0:03:40 lr: 0.000123 grad: 0.0721 (0.0799) loss: 0.8477 (0.8417) time: 0.2377 data: 0.1437 max mem: 9377 +Train: [12] [5000/6250] eta: 0:03:24 lr: 0.000123 grad: 0.0739 (0.0798) loss: 0.8430 (0.8417) time: 0.1863 data: 0.1012 max mem: 9377 +Train: [12] [5100/6250] eta: 0:03:09 lr: 0.000123 grad: 0.0737 (0.0798) loss: 0.8451 (0.8418) time: 0.3405 data: 0.2272 max mem: 9377 +Train: [12] [5200/6250] eta: 0:02:52 lr: 0.000123 grad: 0.0778 (0.0798) loss: 0.8437 (0.8418) time: 0.1326 data: 0.0472 max mem: 9377 +Train: [12] [5300/6250] eta: 0:02:36 lr: 0.000123 grad: 0.0748 (0.0798) loss: 0.8391 (0.8419) time: 0.1373 data: 0.0392 max mem: 9377 +Train: [12] [5400/6250] eta: 0:02:19 lr: 0.000123 grad: 0.0730 (0.0798) loss: 0.8426 (0.8419) time: 0.1549 data: 0.0595 max mem: 9377 +Train: [12] [5500/6250] eta: 0:02:02 lr: 0.000123 grad: 0.0708 (0.0797) loss: 0.8494 (0.8419) time: 0.1641 data: 0.0794 max mem: 9377 +Train: [12] [5600/6250] eta: 0:01:46 lr: 0.000123 grad: 0.0790 (0.0798) loss: 0.8367 (0.8419) time: 0.1614 data: 0.0723 max mem: 9377 +Train: [12] [5700/6250] eta: 0:01:30 lr: 0.000123 grad: 0.0771 (0.0797) loss: 0.8438 (0.8419) time: 0.1579 data: 0.0724 max mem: 9377 +Train: [12] [5800/6250] eta: 0:01:13 lr: 0.000123 grad: 0.0754 (0.0797) loss: 0.8459 (0.8419) time: 0.1185 data: 0.0161 max mem: 9377 +Train: [12] [5900/6250] eta: 0:00:57 lr: 0.000123 grad: 0.0751 (0.0797) loss: 0.8406 (0.8419) time: 0.2064 data: 0.1226 max mem: 9377 +Train: [12] [6000/6250] eta: 0:00:40 lr: 0.000123 grad: 0.0726 (0.0796) loss: 0.8420 (0.8420) time: 0.1901 data: 0.1126 max mem: 9377 +Train: [12] [6100/6250] eta: 0:00:24 lr: 0.000123 grad: 0.0729 (0.0795) loss: 0.8410 (0.8420) time: 0.1381 data: 0.0505 max mem: 9377 +Train: [12] [6200/6250] eta: 0:00:08 lr: 0.000123 grad: 0.0802 (0.0796) loss: 0.8411 (0.8419) time: 0.0990 data: 0.0187 max mem: 9377 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0702 (0.0796) loss: 0.8416 (0.8419) time: 0.1583 data: 0.0726 max mem: 9377 +Train: [12] Total time: 0:17:04 (0.1640 s / it) +Averaged stats: lr: 0.000123 grad: 0.0702 (0.0796) loss: 0.8416 (0.8419) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:05:20 loss: 0.8473 (0.8473) time: 5.1678 data: 5.1362 max mem: 9377 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8441 (0.8473) time: 0.1329 data: 0.1076 max mem: 9377 +Eval (hcp-train-subset): [12] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (hcp-train-subset): loss: 0.8441 (0.8473) +Eval (hcp-val): [12] [ 0/62] eta: 0:05:24 loss: 0.8457 (0.8457) time: 5.2346 data: 5.2035 max mem: 9377 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8484 (0.8490) time: 0.1280 data: 0.1031 max mem: 9377 +Eval (hcp-val): [12] Total time: 0:00:13 (0.2100 s / it) +Averaged stats (hcp-val): loss: 0.8484 (0.8490) +Eval (nsd-val): [12] [ 0/62] eta: 0:05:52 loss: 0.8135 (0.8135) time: 5.6820 data: 5.6515 max mem: 9377 +Eval (nsd-val): [12] [61/62] eta: 0:00:00 loss: 0.8190 (0.8207) time: 0.1476 data: 0.1197 max mem: 9377 +Eval (nsd-val): [12] Total time: 0:00:14 (0.2266 s / it) +Averaged stats (nsd-val): loss: 0.8190 (0.8207) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [13] [ 0/6250] eta: 10:32:29 lr: 0.000123 grad: nan (nan) loss: 0.8399 (0.8399) time: 6.0719 data: 5.8282 max mem: 9377 +Train: [13] [ 100/6250] eta: 0:24:50 lr: 0.000123 grad: 0.0755 (0.0949) loss: 0.8460 (0.8440) time: 0.2034 data: 0.1029 max mem: 9377 +Train: [13] [ 200/6250] eta: 0:21:02 lr: 0.000123 grad: 0.0836 (0.0921) loss: 0.8417 (0.8426) time: 0.1752 data: 0.0787 max mem: 9377 +Train: [13] [ 300/6250] eta: 0:19:18 lr: 0.000123 grad: 0.0772 (0.0892) loss: 0.8446 (0.8424) time: 0.1882 data: 0.1016 max mem: 9377 +Train: [13] [ 400/6250] eta: 0:18:02 lr: 0.000123 grad: 0.0721 (0.0863) loss: 0.8459 (0.8434) time: 0.1643 data: 0.0687 max mem: 9377 +Train: [13] [ 500/6250] eta: 0:17:02 lr: 0.000123 grad: 0.0747 (0.0845) loss: 0.8489 (0.8443) time: 0.1570 data: 0.0491 max mem: 9377 +Train: [13] [ 600/6250] eta: 0:16:17 lr: 0.000123 grad: 0.0757 (0.0833) loss: 0.8483 (0.8446) time: 0.1456 data: 0.0619 max mem: 9377 +Train: [13] [ 700/6250] eta: 0:15:44 lr: 0.000123 grad: 0.0722 (0.0824) loss: 0.8454 (0.8449) time: 0.1720 data: 0.0853 max mem: 9377 +Train: [13] [ 800/6250] eta: 0:15:15 lr: 0.000123 grad: 0.0766 (0.0816) loss: 0.8417 (0.8450) time: 0.1793 data: 0.0876 max mem: 9377 +Train: [13] [ 900/6250] eta: 0:14:54 lr: 0.000123 grad: 0.0735 (0.0811) loss: 0.8413 (0.8448) time: 0.1845 data: 0.0874 max mem: 9377 +Train: [13] [1000/6250] eta: 0:14:32 lr: 0.000123 grad: 0.0778 (0.0809) loss: 0.8370 (0.8444) time: 0.1562 data: 0.0789 max mem: 9377 +Train: [13] [1100/6250] eta: 0:14:20 lr: 0.000123 grad: 0.0780 (0.0807) loss: 0.8447 (0.8444) time: 0.1543 data: 0.0599 max mem: 9377 +Train: [13] [1200/6250] eta: 0:14:04 lr: 0.000123 grad: 0.0785 (0.0806) loss: 0.8364 (0.8441) time: 0.1754 data: 0.0820 max mem: 9377 +Train: [13] [1300/6250] eta: 0:13:42 lr: 0.000123 grad: 0.0785 (0.0805) loss: 0.8394 (0.8438) time: 0.1166 data: 0.0261 max mem: 9377 +Train: [13] [1400/6250] eta: 0:13:21 lr: 0.000123 grad: 0.0743 (0.0804) loss: 0.8494 (0.8438) time: 0.1795 data: 0.0959 max mem: 9377 +Train: [13] [1500/6250] eta: 0:13:05 lr: 0.000123 grad: 0.0748 (0.0802) loss: 0.8466 (0.8438) time: 0.2113 data: 0.1330 max mem: 9377 +Train: [13] [1600/6250] eta: 0:12:42 lr: 0.000123 grad: 0.0738 (0.0799) loss: 0.8423 (0.8438) time: 0.1030 data: 0.0121 max mem: 9377 +Train: [13] [1700/6250] eta: 0:12:26 lr: 0.000123 grad: 0.0752 (0.0798) loss: 0.8400 (0.8437) time: 0.1108 data: 0.0204 max mem: 9377 +Train: [13] [1800/6250] eta: 0:12:11 lr: 0.000123 grad: 0.0764 (0.0798) loss: 0.8426 (0.8436) time: 0.2009 data: 0.1123 max mem: 9377 +Train: [13] [1900/6250] eta: 0:11:57 lr: 0.000123 grad: 0.0743 (0.0798) loss: 0.8396 (0.8434) time: 0.1032 data: 0.0003 max mem: 9377 +Train: [13] [2000/6250] eta: 0:11:37 lr: 0.000123 grad: 0.0765 (0.0798) loss: 0.8432 (0.8432) time: 0.1273 data: 0.0414 max mem: 9377 +Train: [13] [2100/6250] eta: 0:11:21 lr: 0.000123 grad: 0.0733 (0.0798) loss: 0.8482 (0.8431) time: 0.1708 data: 0.0899 max mem: 9377 +Train: [13] [2200/6250] eta: 0:11:03 lr: 0.000123 grad: 0.0785 (0.0799) loss: 0.8416 (0.8430) time: 0.1676 data: 0.0857 max mem: 9377 +Train: [13] [2300/6250] eta: 0:10:48 lr: 0.000123 grad: 0.0751 (0.0798) loss: 0.8400 (0.8429) time: 0.1286 data: 0.0421 max mem: 9377 +Train: [13] [2400/6250] eta: 0:10:32 lr: 0.000123 grad: 0.0771 (0.0798) loss: 0.8361 (0.8428) time: 0.1594 data: 0.0703 max mem: 9377 +Train: [13] [2500/6250] eta: 0:10:14 lr: 0.000123 grad: 0.0780 (0.0798) loss: 0.8404 (0.8427) time: 0.1567 data: 0.0673 max mem: 9377 +Train: [13] [2600/6250] eta: 0:09:56 lr: 0.000123 grad: 0.0786 (0.0798) loss: 0.8444 (0.8427) time: 0.1304 data: 0.0406 max mem: 9377 +Train: [13] [2700/6250] eta: 0:09:39 lr: 0.000123 grad: 0.0762 (0.0797) loss: 0.8423 (0.8426) time: 0.1441 data: 0.0524 max mem: 9377 +Train: [13] [2800/6250] eta: 0:09:23 lr: 0.000123 grad: 0.0762 (0.0797) loss: 0.8428 (0.8425) time: 0.2247 data: 0.1359 max mem: 9377 +Train: [13] [2900/6250] eta: 0:09:05 lr: 0.000123 grad: 0.0769 (0.0796) loss: 0.8354 (0.8425) time: 0.1496 data: 0.0681 max mem: 9377 +Train: [13] [3000/6250] eta: 0:08:48 lr: 0.000123 grad: 0.0739 (0.0796) loss: 0.8460 (0.8425) time: 0.1445 data: 0.0589 max mem: 9377 +Train: [13] [3100/6250] eta: 0:08:34 lr: 0.000123 grad: 0.0792 (0.0795) loss: 0.8429 (0.8425) time: 0.2025 data: 0.1267 max mem: 9377 +Train: [13] [3200/6250] eta: 0:08:16 lr: 0.000123 grad: 0.0733 (0.0795) loss: 0.8434 (0.8425) time: 0.1479 data: 0.0664 max mem: 9377 +Train: [13] [3300/6250] eta: 0:08:00 lr: 0.000123 grad: 0.0746 (0.0794) loss: 0.8430 (0.8425) time: 0.1587 data: 0.0765 max mem: 9377 +Train: [13] [3400/6250] eta: 0:07:45 lr: 0.000123 grad: 0.0764 (0.0793) loss: 0.8394 (0.8426) time: 0.2034 data: 0.1203 max mem: 9377 +Train: [13] [3500/6250] eta: 0:07:29 lr: 0.000123 grad: 0.0719 (0.0793) loss: 0.8444 (0.8426) time: 0.1224 data: 0.0312 max mem: 9377 +Train: [13] [3600/6250] eta: 0:07:12 lr: 0.000123 grad: 0.0782 (0.0793) loss: 0.8361 (0.8426) time: 0.1357 data: 0.0360 max mem: 9377 +Train: [13] [3700/6250] eta: 0:06:55 lr: 0.000122 grad: 0.0743 (0.0793) loss: 0.8373 (0.8425) time: 0.1203 data: 0.0198 max mem: 9377 +Train: [13] [3800/6250] eta: 0:06:39 lr: 0.000122 grad: 0.0743 (0.0793) loss: 0.8440 (0.8425) time: 0.1787 data: 0.0668 max mem: 9377 +Train: [13] [3900/6250] eta: 0:06:25 lr: 0.000122 grad: 0.0752 (0.0793) loss: 0.8405 (0.8424) time: 0.1498 data: 0.0340 max mem: 9377 +Train: [13] [4000/6250] eta: 0:06:09 lr: 0.000122 grad: 0.0776 (0.0792) loss: 0.8405 (0.8423) time: 0.0796 data: 0.0002 max mem: 9377 +Train: [13] [4100/6250] eta: 0:05:53 lr: 0.000122 grad: 0.0822 (0.0793) loss: 0.8364 (0.8423) time: 0.1233 data: 0.0157 max mem: 9377 +Train: [13] [4200/6250] eta: 0:05:36 lr: 0.000122 grad: 0.0712 (0.0792) loss: 0.8434 (0.8422) time: 0.1479 data: 0.0589 max mem: 9377 +Train: [13] [4300/6250] eta: 0:05:20 lr: 0.000122 grad: 0.0796 (0.0792) loss: 0.8403 (0.8422) time: 0.1419 data: 0.0506 max mem: 9377 +Train: [13] [4400/6250] eta: 0:05:07 lr: 0.000122 grad: 0.0819 (0.0793) loss: 0.8386 (0.8421) time: 0.5608 data: 0.4508 max mem: 9377 +Train: [13] [4500/6250] eta: 0:04:50 lr: 0.000122 grad: 0.0730 (0.0793) loss: 0.8406 (0.8421) time: 0.1025 data: 0.0110 max mem: 9377 +Train: [13] [4600/6250] eta: 0:04:33 lr: 0.000122 grad: 0.0824 (0.0793) loss: 0.8403 (0.8421) time: 0.1344 data: 0.0464 max mem: 9377 +Train: [13] [4700/6250] eta: 0:04:15 lr: 0.000122 grad: 0.0729 (0.0794) loss: 0.8419 (0.8420) time: 0.1225 data: 0.0402 max mem: 9377 +Train: [13] [4800/6250] eta: 0:03:59 lr: 0.000122 grad: 0.0751 (0.0794) loss: 0.8410 (0.8420) time: 0.1155 data: 0.0283 max mem: 9377 +Train: [13] [4900/6250] eta: 0:03:42 lr: 0.000122 grad: 0.0782 (0.0794) loss: 0.8385 (0.8419) time: 0.1329 data: 0.0354 max mem: 9377 +Train: [13] [5000/6250] eta: 0:03:26 lr: 0.000122 grad: 0.0794 (0.0795) loss: 0.8409 (0.8419) time: 0.1569 data: 0.0700 max mem: 9377 +Train: [13] [5100/6250] eta: 0:03:09 lr: 0.000122 grad: 0.0768 (0.0795) loss: 0.8441 (0.8419) time: 0.1698 data: 0.0773 max mem: 9377 +Train: [13] [5200/6250] eta: 0:02:53 lr: 0.000122 grad: 0.0798 (0.0795) loss: 0.8396 (0.8418) time: 0.2312 data: 0.1210 max mem: 9377 +Train: [13] [5300/6250] eta: 0:02:36 lr: 0.000122 grad: 0.0821 (0.0795) loss: 0.8376 (0.8418) time: 0.1000 data: 0.0002 max mem: 9377 +Train: [13] [5400/6250] eta: 0:02:20 lr: 0.000122 grad: 0.0727 (0.0795) loss: 0.8328 (0.8417) time: 0.1265 data: 0.0348 max mem: 9377 +Train: [13] [5500/6250] eta: 0:02:03 lr: 0.000122 grad: 0.0739 (0.0795) loss: 0.8415 (0.8416) time: 0.1479 data: 0.0664 max mem: 9377 +Train: [13] [5600/6250] eta: 0:01:47 lr: 0.000122 grad: 0.0772 (0.0795) loss: 0.8415 (0.8416) time: 0.1901 data: 0.0980 max mem: 9377 +Train: [13] [5700/6250] eta: 0:01:30 lr: 0.000122 grad: 0.0729 (0.0795) loss: 0.8375 (0.8415) time: 0.1615 data: 0.0736 max mem: 9377 +Train: [13] [5800/6250] eta: 0:01:14 lr: 0.000122 grad: 0.0810 (0.0795) loss: 0.8349 (0.8414) time: 0.1524 data: 0.0711 max mem: 9377 +Train: [13] [5900/6250] eta: 0:00:57 lr: 0.000122 grad: 0.0817 (0.0795) loss: 0.8349 (0.8413) time: 0.1473 data: 0.0623 max mem: 9377 +Train: [13] [6000/6250] eta: 0:00:41 lr: 0.000122 grad: 0.0809 (0.0795) loss: 0.8355 (0.8413) time: 0.1042 data: 0.0168 max mem: 9377 +Train: [13] [6100/6250] eta: 0:00:24 lr: 0.000122 grad: 0.0743 (0.0795) loss: 0.8395 (0.8412) time: 0.1276 data: 0.0384 max mem: 9377 +Train: [13] [6200/6250] eta: 0:00:08 lr: 0.000122 grad: 0.0804 (0.0795) loss: 0.8341 (0.8412) time: 0.1534 data: 0.0708 max mem: 9377 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0718 (0.0795) loss: 0.8352 (0.8411) time: 0.1205 data: 0.0432 max mem: 9377 +Train: [13] Total time: 0:17:11 (0.1650 s / it) +Averaged stats: lr: 0.000122 grad: 0.0718 (0.0795) loss: 0.8352 (0.8411) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:05:24 loss: 0.8454 (0.8454) time: 5.2287 data: 5.1813 max mem: 9377 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8426 (0.8455) time: 0.1150 data: 0.0901 max mem: 9377 +Eval (hcp-train-subset): [13] Total time: 0:00:13 (0.2136 s / it) +Averaged stats (hcp-train-subset): loss: 0.8426 (0.8455) +Eval (hcp-val): [13] [ 0/62] eta: 0:05:47 loss: 0.8452 (0.8452) time: 5.6018 data: 5.5724 max mem: 9377 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8471 (0.8478) time: 0.1224 data: 0.0976 max mem: 9377 +Eval (hcp-val): [13] Total time: 0:00:14 (0.2270 s / it) +Averaged stats (hcp-val): loss: 0.8471 (0.8478) +Eval (nsd-val): [13] [ 0/62] eta: 0:06:09 loss: 0.8079 (0.8079) time: 5.9657 data: 5.9309 max mem: 9377 +Eval (nsd-val): [13] [61/62] eta: 0:00:00 loss: 0.8155 (0.8170) time: 0.1150 data: 0.0895 max mem: 9377 +Eval (nsd-val): [13] Total time: 0:00:13 (0.2214 s / it) +Averaged stats (nsd-val): loss: 0.8155 (0.8170) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [14] [ 0/6250] eta: 10:29:51 lr: 0.000122 grad: 0.1036 (0.1036) loss: 0.8738 (0.8738) time: 6.0467 data: 5.8955 max mem: 9377 +Train: [14] [ 100/6250] eta: 0:24:27 lr: 0.000122 grad: 0.0824 (0.0988) loss: 0.8431 (0.8454) time: 0.2082 data: 0.1180 max mem: 9377 +Train: [14] [ 200/6250] eta: 0:20:37 lr: 0.000122 grad: 0.0822 (0.0915) loss: 0.8453 (0.8429) time: 0.1595 data: 0.0613 max mem: 9377 +Train: [14] [ 300/6250] eta: 0:19:25 lr: 0.000122 grad: 0.0860 (0.0907) loss: 0.8475 (0.8420) time: 0.1382 data: 0.0413 max mem: 9377 +Train: [14] [ 400/6250] eta: 0:18:22 lr: 0.000122 grad: 0.0745 (0.0890) loss: 0.8509 (0.8423) time: 0.1820 data: 0.0913 max mem: 9377 +Train: [14] [ 500/6250] eta: 0:17:25 lr: 0.000122 grad: 0.0774 (0.0871) loss: 0.8436 (0.8425) time: 0.1499 data: 0.0646 max mem: 9377 +Train: [14] [ 600/6250] eta: 0:16:48 lr: 0.000122 grad: 0.0751 (0.0865) loss: 0.8387 (0.8425) time: 0.1787 data: 0.0926 max mem: 9377 +Train: [14] [ 700/6250] eta: 0:16:08 lr: 0.000122 grad: 0.0820 (0.0854) loss: 0.8400 (0.8423) time: 0.1443 data: 0.0543 max mem: 9377 +Train: [14] [ 800/6250] eta: 0:15:46 lr: 0.000122 grad: 0.0779 (0.0849) loss: 0.8341 (0.8421) time: 0.1467 data: 0.0497 max mem: 9377 +Train: [14] [ 900/6250] eta: 0:15:22 lr: 0.000122 grad: 0.0739 (0.0843) loss: 0.8461 (0.8422) time: 0.1673 data: 0.0827 max mem: 9377 +Train: [14] [1000/6250] eta: 0:15:11 lr: 0.000122 grad: 0.0736 (0.0838) loss: 0.8405 (0.8417) time: 0.1425 data: 0.0595 max mem: 9377 +Train: [14] [1100/6250] eta: 0:14:48 lr: 0.000122 grad: 0.0824 (0.0837) loss: 0.8452 (0.8412) time: 0.1710 data: 0.0872 max mem: 9377 +Train: [14] [1200/6250] eta: 0:14:19 lr: 0.000122 grad: 0.0830 (0.0837) loss: 0.8373 (0.8408) time: 0.1544 data: 0.0688 max mem: 9377 +Train: [14] [1300/6250] eta: 0:13:58 lr: 0.000122 grad: 0.0768 (0.0834) loss: 0.8372 (0.8405) time: 0.1604 data: 0.0758 max mem: 9377 +Train: [14] [1400/6250] eta: 0:13:38 lr: 0.000122 grad: 0.0755 (0.0833) loss: 0.8395 (0.8402) time: 0.2033 data: 0.1235 max mem: 9377 +Train: [14] [1500/6250] eta: 0:13:16 lr: 0.000122 grad: 0.0764 (0.0830) loss: 0.8431 (0.8401) time: 0.1590 data: 0.0727 max mem: 9377 +Train: [14] [1600/6250] eta: 0:12:55 lr: 0.000122 grad: 0.0738 (0.0829) loss: 0.8445 (0.8401) time: 0.1088 data: 0.0161 max mem: 9377 +Train: [14] [1700/6250] eta: 0:12:35 lr: 0.000122 grad: 0.0797 (0.0827) loss: 0.8374 (0.8400) time: 0.1670 data: 0.0832 max mem: 9377 +Train: [14] [1800/6250] eta: 0:12:16 lr: 0.000122 grad: 0.0853 (0.0827) loss: 0.8363 (0.8400) time: 0.1741 data: 0.0929 max mem: 9377 +Train: [14] [1900/6250] eta: 0:11:56 lr: 0.000122 grad: 0.0744 (0.0826) loss: 0.8444 (0.8400) time: 0.1322 data: 0.0518 max mem: 9377 +Train: [14] [2000/6250] eta: 0:11:38 lr: 0.000122 grad: 0.0767 (0.0823) loss: 0.8409 (0.8399) time: 0.1540 data: 0.0649 max mem: 9377 +Train: [14] [2100/6250] eta: 0:11:22 lr: 0.000122 grad: 0.0793 (0.0823) loss: 0.8451 (0.8398) time: 0.1910 data: 0.1074 max mem: 9377 +Train: [14] [2200/6250] eta: 0:11:05 lr: 0.000122 grad: 0.0741 (0.0823) loss: 0.8422 (0.8397) time: 0.1442 data: 0.0557 max mem: 9377 +Train: [14] [2300/6250] eta: 0:10:46 lr: 0.000122 grad: 0.0733 (0.0822) loss: 0.8424 (0.8397) time: 0.1683 data: 0.0826 max mem: 9377 +Train: [14] [2400/6250] eta: 0:11:08 lr: 0.000122 grad: 0.0803 (0.0821) loss: 0.8395 (0.8398) time: 0.1236 data: 0.0003 max mem: 9377 +Train: [14] [2500/6250] eta: 0:10:53 lr: 0.000122 grad: 0.0719 (0.0819) loss: 0.8422 (0.8397) time: 0.2700 data: 0.1728 max mem: 9377 +Train: [14] [2600/6250] eta: 0:11:00 lr: 0.000122 grad: 0.0738 (0.0817) loss: 0.8451 (0.8398) time: 0.1191 data: 0.0003 max mem: 9377 +Train: [14] [2700/6250] eta: 0:10:40 lr: 0.000122 grad: 0.0753 (0.0816) loss: 0.8486 (0.8400) time: 0.1292 data: 0.0004 max mem: 9377 +Train: [14] [2800/6250] eta: 0:10:43 lr: 0.000122 grad: 0.0743 (0.0814) loss: 0.8440 (0.8401) time: 0.7544 data: 0.6580 max mem: 9377 +Train: [14] [2900/6250] eta: 0:10:34 lr: 0.000122 grad: 0.0743 (0.0812) loss: 0.8446 (0.8403) time: 0.1015 data: 0.0002 max mem: 9377 +Train: [14] [3000/6250] eta: 0:10:10 lr: 0.000122 grad: 0.0768 (0.0811) loss: 0.8404 (0.8403) time: 0.1523 data: 0.0554 max mem: 9377 +Train: [14] [3100/6250] eta: 0:09:48 lr: 0.000122 grad: 0.0763 (0.0809) loss: 0.8453 (0.8405) time: 0.0977 data: 0.0002 max mem: 9377 +Train: [14] [3200/6250] eta: 0:09:40 lr: 0.000122 grad: 0.0744 (0.0807) loss: 0.8443 (0.8406) time: 0.2013 data: 0.1053 max mem: 9377 +Train: [14] [3300/6250] eta: 0:09:19 lr: 0.000122 grad: 0.0764 (0.0806) loss: 0.8386 (0.8407) time: 0.2075 data: 0.1235 max mem: 9377 +Train: [14] [3400/6250] eta: 0:08:59 lr: 0.000122 grad: 0.0773 (0.0805) loss: 0.8452 (0.8408) time: 0.2406 data: 0.1522 max mem: 9377 +Train: [14] [3500/6250] eta: 0:08:37 lr: 0.000122 grad: 0.0722 (0.0804) loss: 0.8473 (0.8409) time: 0.2093 data: 0.1294 max mem: 9377 +Train: [14] [3600/6250] eta: 0:08:16 lr: 0.000122 grad: 0.0736 (0.0802) loss: 0.8443 (0.8409) time: 0.1398 data: 0.0607 max mem: 9377 +Train: [14] [3700/6250] eta: 0:07:56 lr: 0.000122 grad: 0.0728 (0.0801) loss: 0.8372 (0.8410) time: 0.1649 data: 0.0720 max mem: 9377 +Train: [14] [3800/6250] eta: 0:07:35 lr: 0.000122 grad: 0.0747 (0.0800) loss: 0.8418 (0.8410) time: 0.1812 data: 0.0992 max mem: 9377 +Train: [14] [3900/6250] eta: 0:07:16 lr: 0.000122 grad: 0.0732 (0.0800) loss: 0.8425 (0.8410) time: 0.1860 data: 0.1024 max mem: 9377 +Train: [14] [4000/6250] eta: 0:06:56 lr: 0.000122 grad: 0.0776 (0.0799) loss: 0.8432 (0.8410) time: 0.1758 data: 0.0728 max mem: 9377 +Train: [14] [4100/6250] eta: 0:06:38 lr: 0.000122 grad: 0.0725 (0.0799) loss: 0.8420 (0.8410) time: 0.3190 data: 0.2383 max mem: 9377 +Train: [14] [4200/6250] eta: 0:06:17 lr: 0.000122 grad: 0.0771 (0.0799) loss: 0.8400 (0.8410) time: 0.1529 data: 0.0608 max mem: 9377 +Train: [14] [4300/6250] eta: 0:06:01 lr: 0.000122 grad: 0.0831 (0.0798) loss: 0.8431 (0.8410) time: 0.0961 data: 0.0002 max mem: 9377 +Train: [14] [4400/6250] eta: 0:05:42 lr: 0.000122 grad: 0.0788 (0.0798) loss: 0.8368 (0.8410) time: 0.2040 data: 0.1133 max mem: 9377 +Train: [14] [4500/6250] eta: 0:05:22 lr: 0.000122 grad: 0.0735 (0.0798) loss: 0.8400 (0.8410) time: 0.1575 data: 0.0617 max mem: 9377 +Train: [14] [4600/6250] eta: 0:05:03 lr: 0.000122 grad: 0.0803 (0.0798) loss: 0.8375 (0.8410) time: 0.2457 data: 0.1405 max mem: 9377 +Train: [14] [4700/6250] eta: 0:04:45 lr: 0.000122 grad: 0.0815 (0.0798) loss: 0.8405 (0.8410) time: 0.1437 data: 0.0490 max mem: 9377 +Train: [14] [4800/6250] eta: 0:04:26 lr: 0.000122 grad: 0.0834 (0.0797) loss: 0.8390 (0.8410) time: 0.2303 data: 0.1406 max mem: 9377 +Train: [14] [4900/6250] eta: 0:04:06 lr: 0.000122 grad: 0.0775 (0.0798) loss: 0.8403 (0.8410) time: 0.1494 data: 0.0646 max mem: 9377 +Train: [14] [5000/6250] eta: 0:03:47 lr: 0.000122 grad: 0.0761 (0.0797) loss: 0.8392 (0.8409) time: 0.1584 data: 0.0761 max mem: 9377 +Train: [14] [5100/6250] eta: 0:03:29 lr: 0.000122 grad: 0.0751 (0.0797) loss: 0.8430 (0.8409) time: 0.1356 data: 0.0471 max mem: 9377 +Train: [14] [5200/6250] eta: 0:03:10 lr: 0.000122 grad: 0.0775 (0.0797) loss: 0.8441 (0.8410) time: 0.1784 data: 0.1011 max mem: 9377 +Train: [14] [5300/6250] eta: 0:02:51 lr: 0.000122 grad: 0.0763 (0.0797) loss: 0.8402 (0.8410) time: 0.1644 data: 0.0798 max mem: 9377 +Train: [14] [5400/6250] eta: 0:02:33 lr: 0.000122 grad: 0.0732 (0.0797) loss: 0.8441 (0.8409) time: 0.1523 data: 0.0688 max mem: 9377 +Train: [14] [5500/6250] eta: 0:02:15 lr: 0.000122 grad: 0.0770 (0.0796) loss: 0.8431 (0.8410) time: 0.1860 data: 0.0959 max mem: 9377 +Train: [14] [5600/6250] eta: 0:01:56 lr: 0.000122 grad: 0.0752 (0.0796) loss: 0.8414 (0.8410) time: 0.1451 data: 0.0617 max mem: 9377 +Train: [14] [5700/6250] eta: 0:01:38 lr: 0.000122 grad: 0.0764 (0.0796) loss: 0.8386 (0.8410) time: 0.1280 data: 0.0309 max mem: 9377 +Train: [14] [5800/6250] eta: 0:01:20 lr: 0.000122 grad: 0.0799 (0.0796) loss: 0.8347 (0.8410) time: 0.1532 data: 0.0654 max mem: 9377 +Train: [14] [5900/6250] eta: 0:01:02 lr: 0.000122 grad: 0.0794 (0.0797) loss: 0.8352 (0.8410) time: 0.1059 data: 0.0160 max mem: 9377 +Train: [14] [6000/6250] eta: 0:00:44 lr: 0.000122 grad: 0.0790 (0.0798) loss: 0.8379 (0.8409) time: 0.2475 data: 0.1615 max mem: 9377 +Train: [14] [6100/6250] eta: 0:00:26 lr: 0.000122 grad: 0.0827 (0.0798) loss: 0.8364 (0.8409) time: 0.1569 data: 0.0694 max mem: 9377 +Train: [14] [6200/6250] eta: 0:00:08 lr: 0.000122 grad: 0.0790 (0.0799) loss: 0.8322 (0.8408) time: 0.1688 data: 0.0825 max mem: 9377 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0807 (0.0799) loss: 0.8402 (0.8408) time: 0.1404 data: 0.0577 max mem: 9377 +Train: [14] Total time: 0:18:33 (0.1782 s / it) +Averaged stats: lr: 0.000122 grad: 0.0807 (0.0799) loss: 0.8402 (0.8408) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:05:31 loss: 0.8425 (0.8425) time: 5.3515 data: 5.3127 max mem: 9377 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8427 (0.8444) time: 0.1445 data: 0.1188 max mem: 9377 +Eval (hcp-train-subset): [14] Total time: 0:00:13 (0.2222 s / it) +Averaged stats (hcp-train-subset): loss: 0.8427 (0.8444) +Making plots (hcp-train-subset): example=34 +Eval (hcp-val): [14] [ 0/62] eta: 0:05:34 loss: 0.8446 (0.8446) time: 5.3973 data: 5.3643 max mem: 9377 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8464 (0.8471) time: 0.2186 data: 0.1932 max mem: 9377 +Eval (hcp-val): [14] Total time: 0:00:15 (0.2540 s / it) +Averaged stats (hcp-val): loss: 0.8464 (0.8471) +Making plots (hcp-val): example=4 +Eval (nsd-val): [14] [ 0/62] eta: 0:04:01 loss: 0.8108 (0.8108) time: 3.8927 data: 3.8027 max mem: 9377 +Eval (nsd-val): [14] [61/62] eta: 0:00:00 loss: 0.8178 (0.8185) time: 0.1144 data: 0.0887 max mem: 9377 +Eval (nsd-val): [14] Total time: 0:00:13 (0.2150 s / it) +Averaged stats (nsd-val): loss: 0.8178 (0.8185) +Making plots (nsd-val): example=11 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00014.pth +Train: [15] [ 0/6250] eta: 10:55:03 lr: 0.000122 grad: 0.0761 (0.0761) loss: 0.8857 (0.8857) time: 6.2885 data: 5.9875 max mem: 9377 +Train: [15] [ 100/6250] eta: 0:23:25 lr: 0.000122 grad: 0.0707 (0.0842) loss: 0.8499 (0.8459) time: 0.1859 data: 0.0848 max mem: 9377 +Train: [15] [ 200/6250] eta: 0:21:18 lr: 0.000122 grad: 0.0761 (0.0809) loss: 0.8375 (0.8428) time: 0.2816 data: 0.1797 max mem: 9377 +Train: [15] [ 300/6250] eta: 0:19:45 lr: 0.000122 grad: 0.0904 (0.0814) loss: 0.8264 (0.8393) time: 0.2078 data: 0.1261 max mem: 9377 +Train: [15] [ 400/6250] eta: 0:18:46 lr: 0.000122 grad: 0.0806 (0.0829) loss: 0.8300 (0.8374) time: 0.1937 data: 0.0877 max mem: 9377 +Train: [15] [ 500/6250] eta: 0:18:03 lr: 0.000122 grad: 0.0818 (0.0830) loss: 0.8312 (0.8365) time: 0.1300 data: 0.0396 max mem: 9377 +Train: [15] [ 600/6250] eta: 0:17:12 lr: 0.000122 grad: 0.0784 (0.0829) loss: 0.8398 (0.8364) time: 0.1425 data: 0.0424 max mem: 9377 +Train: [15] [ 700/6250] eta: 0:16:40 lr: 0.000122 grad: 0.0893 (0.0831) loss: 0.8287 (0.8363) time: 0.2094 data: 0.1169 max mem: 9377 +Train: [15] [ 800/6250] eta: 0:16:02 lr: 0.000122 grad: 0.0818 (0.0828) loss: 0.8374 (0.8364) time: 0.1348 data: 0.0320 max mem: 9377 +Train: [15] [ 900/6250] eta: 0:15:31 lr: 0.000122 grad: 0.0741 (0.0826) loss: 0.8408 (0.8366) time: 0.1389 data: 0.0466 max mem: 9377 +Train: [15] [1000/6250] eta: 0:15:03 lr: 0.000122 grad: 0.0726 (0.0823) loss: 0.8385 (0.8367) time: 0.1612 data: 0.0750 max mem: 9377 +Train: [15] [1100/6250] eta: 0:14:36 lr: 0.000121 grad: 0.0799 (0.0823) loss: 0.8364 (0.8366) time: 0.1474 data: 0.0473 max mem: 9377 +Train: [15] [1200/6250] eta: 0:14:10 lr: 0.000121 grad: 0.0777 (0.0824) loss: 0.8444 (0.8366) time: 0.1139 data: 0.0236 max mem: 9377 +Train: [15] [1300/6250] eta: 0:14:20 lr: 0.000121 grad: 0.0819 (0.0822) loss: 0.8320 (0.8368) time: 0.1034 data: 0.0002 max mem: 9377 +Train: [15] [1400/6250] eta: 0:14:01 lr: 0.000121 grad: 0.0805 (0.0822) loss: 0.8285 (0.8367) time: 0.1577 data: 0.0525 max mem: 9377 +Train: [15] [1500/6250] eta: 0:13:45 lr: 0.000121 grad: 0.0890 (0.0825) loss: 0.8253 (0.8365) time: 0.1053 data: 0.0054 max mem: 9377 +Train: [15] [1600/6250] eta: 0:13:24 lr: 0.000121 grad: 0.0808 (0.0826) loss: 0.8330 (0.8364) time: 0.1640 data: 0.0615 max mem: 9377 +Train: [15] [1700/6250] eta: 0:13:04 lr: 0.000121 grad: 0.0791 (0.0828) loss: 0.8322 (0.8363) time: 0.1407 data: 0.0499 max mem: 9377 +Train: [15] [1800/6250] eta: 0:13:21 lr: 0.000121 grad: 0.0855 (0.0828) loss: 0.8350 (0.8362) time: 0.6333 data: 0.5235 max mem: 9377 +Train: [15] [1900/6250] eta: 0:12:54 lr: 0.000121 grad: 0.0832 (0.0828) loss: 0.8368 (0.8361) time: 0.1331 data: 0.0450 max mem: 9377 +Train: [15] [2000/6250] eta: 0:12:31 lr: 0.000121 grad: 0.0838 (0.0828) loss: 0.8364 (0.8361) time: 0.1489 data: 0.0588 max mem: 9377 +Train: [15] [2100/6250] eta: 0:12:09 lr: 0.000121 grad: 0.0834 (0.0829) loss: 0.8421 (0.8361) time: 0.1682 data: 0.0889 max mem: 9377 +Train: [15] [2200/6250] eta: 0:11:47 lr: 0.000121 grad: 0.0800 (0.0829) loss: 0.8305 (0.8361) time: 0.1293 data: 0.0355 max mem: 9377 +Train: [15] [2300/6250] eta: 0:11:26 lr: 0.000121 grad: 0.0879 (0.0831) loss: 0.8374 (0.8361) time: 0.1518 data: 0.0722 max mem: 9377 +Train: [15] [2400/6250] eta: 0:11:11 lr: 0.000121 grad: 0.0842 (0.0832) loss: 0.8314 (0.8360) time: 0.2585 data: 0.1757 max mem: 9377 +Train: [15] [2500/6250] eta: 0:10:50 lr: 0.000121 grad: 0.0767 (0.0833) loss: 0.8386 (0.8361) time: 0.2038 data: 0.1214 max mem: 9377 +Train: [15] [2600/6250] eta: 0:10:30 lr: 0.000121 grad: 0.0871 (0.0833) loss: 0.8284 (0.8359) time: 0.1280 data: 0.0370 max mem: 9377 +Train: [15] [2700/6250] eta: 0:10:12 lr: 0.000121 grad: 0.0784 (0.0833) loss: 0.8359 (0.8358) time: 0.1799 data: 0.0948 max mem: 9377 +Train: [15] [2800/6250] eta: 0:09:54 lr: 0.000121 grad: 0.0891 (0.0835) loss: 0.8344 (0.8356) time: 0.1524 data: 0.0723 max mem: 9377 +Train: [15] [2900/6250] eta: 0:09:41 lr: 0.000121 grad: 0.0823 (0.0835) loss: 0.8368 (0.8355) time: 0.1521 data: 0.0688 max mem: 9377 +Train: [15] [3000/6250] eta: 0:09:22 lr: 0.000121 grad: 0.0800 (0.0835) loss: 0.8343 (0.8355) time: 0.1752 data: 0.0923 max mem: 9377 +Train: [15] [3100/6250] eta: 0:09:06 lr: 0.000121 grad: 0.0831 (0.0835) loss: 0.8341 (0.8355) time: 0.2987 data: 0.2081 max mem: 9377 +Train: [15] [3200/6250] eta: 0:08:45 lr: 0.000121 grad: 0.0808 (0.0834) loss: 0.8344 (0.8355) time: 0.1696 data: 0.0839 max mem: 9377 +Train: [15] [3300/6250] eta: 0:08:29 lr: 0.000121 grad: 0.0826 (0.0834) loss: 0.8331 (0.8355) time: 0.2186 data: 0.1292 max mem: 9377 +Train: [15] [3400/6250] eta: 0:08:10 lr: 0.000121 grad: 0.0806 (0.0833) loss: 0.8342 (0.8355) time: 0.1425 data: 0.0466 max mem: 9377 +Train: [15] [3500/6250] eta: 0:07:51 lr: 0.000121 grad: 0.0754 (0.0833) loss: 0.8333 (0.8354) time: 0.1542 data: 0.0689 max mem: 9377 +Train: [15] [3600/6250] eta: 0:07:35 lr: 0.000121 grad: 0.0786 (0.0832) loss: 0.8311 (0.8353) time: 0.1598 data: 0.0733 max mem: 9377 +Train: [15] [3700/6250] eta: 0:07:17 lr: 0.000121 grad: 0.0852 (0.0833) loss: 0.8333 (0.8352) time: 0.1987 data: 0.1161 max mem: 9377 +Train: [15] [3800/6250] eta: 0:06:58 lr: 0.000121 grad: 0.0757 (0.0833) loss: 0.8381 (0.8351) time: 0.1566 data: 0.0722 max mem: 9377 +Train: [15] [3900/6250] eta: 0:06:42 lr: 0.000121 grad: 0.0810 (0.0832) loss: 0.8345 (0.8351) time: 0.0893 data: 0.0002 max mem: 9377 +Train: [15] [4000/6250] eta: 0:06:24 lr: 0.000121 grad: 0.0803 (0.0832) loss: 0.8356 (0.8351) time: 0.1545 data: 0.0698 max mem: 9377 +Train: [15] [4100/6250] eta: 0:06:07 lr: 0.000121 grad: 0.0735 (0.0831) loss: 0.8357 (0.8350) time: 0.1775 data: 0.0902 max mem: 9377 +Train: [15] [4200/6250] eta: 0:05:49 lr: 0.000121 grad: 0.0720 (0.0830) loss: 0.8370 (0.8350) time: 0.1441 data: 0.0378 max mem: 9377 +Train: [15] [4300/6250] eta: 0:05:31 lr: 0.000121 grad: 0.0751 (0.0829) loss: 0.8338 (0.8350) time: 0.1727 data: 0.0814 max mem: 9377 +Train: [15] [4400/6250] eta: 0:05:13 lr: 0.000121 grad: 0.0806 (0.0829) loss: 0.8319 (0.8350) time: 0.1771 data: 0.0779 max mem: 9377 +Train: [15] [4500/6250] eta: 0:04:56 lr: 0.000121 grad: 0.0820 (0.0828) loss: 0.8308 (0.8349) time: 0.1369 data: 0.0487 max mem: 9377 +Train: [15] [4600/6250] eta: 0:04:39 lr: 0.000121 grad: 0.0799 (0.0828) loss: 0.8338 (0.8348) time: 0.1868 data: 0.1033 max mem: 9377 +Train: [15] [4700/6250] eta: 0:04:21 lr: 0.000121 grad: 0.0810 (0.0828) loss: 0.8354 (0.8348) time: 0.1452 data: 0.0614 max mem: 9377 +Train: [15] [4800/6250] eta: 0:04:04 lr: 0.000121 grad: 0.0777 (0.0828) loss: 0.8359 (0.8348) time: 0.1381 data: 0.0472 max mem: 9377 +Train: [15] [4900/6250] eta: 0:03:47 lr: 0.000121 grad: 0.0798 (0.0828) loss: 0.8360 (0.8348) time: 0.1538 data: 0.0770 max mem: 9377 +Train: [15] [5000/6250] eta: 0:03:30 lr: 0.000121 grad: 0.0801 (0.0828) loss: 0.8375 (0.8348) time: 0.1976 data: 0.1112 max mem: 9377 +Train: [15] [5100/6250] eta: 0:03:12 lr: 0.000121 grad: 0.0816 (0.0828) loss: 0.8334 (0.8348) time: 0.1618 data: 0.0797 max mem: 9377 +Train: [15] [5200/6250] eta: 0:02:55 lr: 0.000121 grad: 0.0839 (0.0829) loss: 0.8318 (0.8347) time: 0.1584 data: 0.0724 max mem: 9377 +Train: [15] [5300/6250] eta: 0:02:38 lr: 0.000121 grad: 0.0851 (0.0829) loss: 0.8301 (0.8346) time: 0.1506 data: 0.0755 max mem: 9377 +Train: [15] [5400/6250] eta: 0:02:21 lr: 0.000121 grad: 0.0819 (0.0829) loss: 0.8338 (0.8346) time: 0.1790 data: 0.0997 max mem: 9377 +Train: [15] [5500/6250] eta: 0:02:04 lr: 0.000121 grad: 0.0843 (0.0830) loss: 0.8275 (0.8344) time: 0.1391 data: 0.0589 max mem: 9377 +Train: [15] [5600/6250] eta: 0:01:48 lr: 0.000121 grad: 0.0867 (0.0831) loss: 0.8319 (0.8343) time: 0.1715 data: 0.0730 max mem: 9377 +Train: [15] [5700/6250] eta: 0:01:31 lr: 0.000121 grad: 0.0845 (0.0831) loss: 0.8260 (0.8342) time: 0.1298 data: 0.0398 max mem: 9377 +Train: [15] [5800/6250] eta: 0:01:15 lr: 0.000121 grad: 0.0854 (0.0832) loss: 0.8242 (0.8341) time: 0.0974 data: 0.0002 max mem: 9377 +Train: [15] [5900/6250] eta: 0:00:58 lr: 0.000121 grad: 0.0882 (0.0833) loss: 0.8261 (0.8340) time: 0.2680 data: 0.1718 max mem: 9377 +Train: [15] [6000/6250] eta: 0:00:42 lr: 0.000121 grad: 0.0853 (0.0833) loss: 0.8209 (0.8338) time: 0.1552 data: 0.0574 max mem: 9377 +Train: [15] [6100/6250] eta: 0:00:25 lr: 0.000121 grad: 0.0812 (0.0834) loss: 0.8265 (0.8337) time: 0.2017 data: 0.1201 max mem: 9377 +Train: [15] [6200/6250] eta: 0:00:08 lr: 0.000121 grad: 0.0814 (0.0834) loss: 0.8212 (0.8336) time: 0.1973 data: 0.1042 max mem: 9377 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.0868 (0.0834) loss: 0.8255 (0.8336) time: 0.1662 data: 0.0753 max mem: 9377 +Train: [15] Total time: 0:17:37 (0.1693 s / it) +Averaged stats: lr: 0.000121 grad: 0.0868 (0.0834) loss: 0.8255 (0.8336) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:05:00 loss: 0.8459 (0.8459) time: 4.8445 data: 4.8112 max mem: 9377 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8426 (0.8446) time: 0.1068 data: 0.0801 max mem: 9377 +Eval (hcp-train-subset): [15] Total time: 0:00:13 (0.2114 s / it) +Averaged stats (hcp-train-subset): loss: 0.8426 (0.8446) +Eval (hcp-val): [15] [ 0/62] eta: 0:06:24 loss: 0.8458 (0.8458) time: 6.2011 data: 6.1704 max mem: 9377 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8477 (0.8478) time: 0.1177 data: 0.0922 max mem: 9377 +Eval (hcp-val): [15] Total time: 0:00:14 (0.2304 s / it) +Averaged stats (hcp-val): loss: 0.8477 (0.8478) +Eval (nsd-val): [15] [ 0/62] eta: 0:04:09 loss: 0.8065 (0.8065) time: 4.0318 data: 3.9366 max mem: 9377 +Eval (nsd-val): [15] [61/62] eta: 0:00:00 loss: 0.8156 (0.8179) time: 0.1358 data: 0.1101 max mem: 9377 +Eval (nsd-val): [15] Total time: 0:00:13 (0.2182 s / it) +Averaged stats (nsd-val): loss: 0.8156 (0.8179) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [16] [ 0/6250] eta: 11:45:37 lr: 0.000121 grad: 0.1463 (0.1463) loss: 0.8341 (0.8341) time: 6.7741 data: 6.6525 max mem: 9377 +Train: [16] [ 100/6250] eta: 0:23:19 lr: 0.000121 grad: 0.0917 (0.0969) loss: 0.8396 (0.8404) time: 0.2094 data: 0.1147 max mem: 9377 +Train: [16] [ 200/6250] eta: 0:20:14 lr: 0.000121 grad: 0.0765 (0.0913) loss: 0.8437 (0.8387) time: 0.2005 data: 0.1094 max mem: 9377 +Train: [16] [ 300/6250] eta: 0:18:38 lr: 0.000121 grad: 0.0823 (0.0893) loss: 0.8301 (0.8374) time: 0.1428 data: 0.0479 max mem: 9377 +Train: [16] [ 400/6250] eta: 0:17:48 lr: 0.000121 grad: 0.0784 (0.0883) loss: 0.8324 (0.8361) time: 0.1742 data: 0.0842 max mem: 9377 +Train: [16] [ 500/6250] eta: 0:17:09 lr: 0.000121 grad: 0.0905 (0.0877) loss: 0.8339 (0.8356) time: 0.1630 data: 0.0775 max mem: 9377 +Train: [16] [ 600/6250] eta: 0:16:41 lr: 0.000121 grad: 0.0854 (0.0876) loss: 0.8373 (0.8355) time: 0.1775 data: 0.0881 max mem: 9377 +Train: [16] [ 700/6250] eta: 0:16:20 lr: 0.000121 grad: 0.0840 (0.0873) loss: 0.8312 (0.8352) time: 0.1799 data: 0.0780 max mem: 9377 +Train: [16] [ 800/6250] eta: 0:16:49 lr: 0.000121 grad: 0.0841 (0.0870) loss: 0.8328 (0.8350) time: 0.2837 data: 0.1900 max mem: 9377 +Train: [16] [ 900/6250] eta: 0:17:57 lr: 0.000121 grad: 0.0801 (0.0867) loss: 0.8379 (0.8348) time: 0.5634 data: 0.4671 max mem: 9377 +Train: [16] [1000/6250] eta: 0:17:46 lr: 0.000121 grad: 0.0822 (0.0865) loss: 0.8329 (0.8349) time: 0.4495 data: 0.3689 max mem: 9377 +Train: [16] [1100/6250] eta: 0:17:06 lr: 0.000121 grad: 0.0785 (0.0860) loss: 0.8347 (0.8348) time: 0.1917 data: 0.1067 max mem: 9377 +Train: [16] [1200/6250] eta: 0:16:25 lr: 0.000121 grad: 0.0847 (0.0858) loss: 0.8295 (0.8347) time: 0.1437 data: 0.0620 max mem: 9377 +Train: [16] [1300/6250] eta: 0:16:13 lr: 0.000121 grad: 0.0783 (0.0854) loss: 0.8373 (0.8347) time: 0.0809 data: 0.0002 max mem: 9377 +Train: [16] [1400/6250] eta: 0:15:38 lr: 0.000121 grad: 0.0770 (0.0853) loss: 0.8360 (0.8345) time: 0.1171 data: 0.0141 max mem: 9377 +Train: [16] [1500/6250] eta: 0:15:10 lr: 0.000121 grad: 0.0870 (0.0854) loss: 0.8325 (0.8344) time: 0.1557 data: 0.0656 max mem: 9377 +Train: [16] [1600/6250] eta: 0:14:44 lr: 0.000121 grad: 0.0802 (0.0852) loss: 0.8396 (0.8345) time: 0.1622 data: 0.0652 max mem: 9377 +Train: [16] [1700/6250] eta: 0:14:23 lr: 0.000121 grad: 0.0795 (0.0851) loss: 0.8381 (0.8345) time: 0.0932 data: 0.0002 max mem: 9377 +Train: [16] [1800/6250] eta: 0:13:55 lr: 0.000121 grad: 0.0830 (0.0850) loss: 0.8330 (0.8345) time: 0.1653 data: 0.0904 max mem: 9377 +Train: [16] [1900/6250] eta: 0:13:29 lr: 0.000121 grad: 0.0781 (0.0852) loss: 0.8342 (0.8344) time: 0.1875 data: 0.1017 max mem: 9377 +Train: [16] [2000/6250] eta: 0:13:05 lr: 0.000121 grad: 0.0755 (0.0850) loss: 0.8396 (0.8343) time: 0.1678 data: 0.0854 max mem: 9377 +Train: [16] [2100/6250] eta: 0:12:43 lr: 0.000121 grad: 0.0814 (0.0850) loss: 0.8380 (0.8343) time: 0.1978 data: 0.1149 max mem: 9377 +Train: [16] [2200/6250] eta: 0:12:19 lr: 0.000121 grad: 0.0843 (0.0851) loss: 0.8283 (0.8343) time: 0.1472 data: 0.0518 max mem: 9377 +Train: [16] [2300/6250] eta: 0:11:57 lr: 0.000121 grad: 0.0759 (0.0851) loss: 0.8374 (0.8342) time: 0.1185 data: 0.0366 max mem: 9377 +Train: [16] [2400/6250] eta: 0:11:36 lr: 0.000121 grad: 0.0749 (0.0850) loss: 0.8388 (0.8343) time: 0.1146 data: 0.0342 max mem: 9377 +Train: [16] [2500/6250] eta: 0:11:14 lr: 0.000121 grad: 0.0829 (0.0849) loss: 0.8339 (0.8343) time: 0.1628 data: 0.0778 max mem: 9377 +Train: [16] [2600/6250] eta: 0:10:58 lr: 0.000121 grad: 0.0797 (0.0849) loss: 0.8313 (0.8342) time: 0.2908 data: 0.2161 max mem: 9377 +Train: [16] [2700/6250] eta: 0:10:36 lr: 0.000121 grad: 0.0846 (0.0848) loss: 0.8352 (0.8342) time: 0.1679 data: 0.0853 max mem: 9377 +Train: [16] [2800/6250] eta: 0:10:16 lr: 0.000121 grad: 0.0830 (0.0848) loss: 0.8309 (0.8341) time: 0.1955 data: 0.1143 max mem: 9377 +Train: [16] [2900/6250] eta: 0:09:55 lr: 0.000121 grad: 0.0791 (0.0848) loss: 0.8319 (0.8339) time: 0.1364 data: 0.0584 max mem: 9377 +Train: [16] [3000/6250] eta: 0:09:36 lr: 0.000121 grad: 0.0857 (0.0848) loss: 0.8277 (0.8337) time: 0.1770 data: 0.0874 max mem: 9377 +Train: [16] [3100/6250] eta: 0:09:17 lr: 0.000121 grad: 0.0780 (0.0849) loss: 0.8338 (0.8335) time: 0.1982 data: 0.1131 max mem: 9377 +Train: [16] [3200/6250] eta: 0:09:01 lr: 0.000121 grad: 0.0837 (0.0848) loss: 0.8295 (0.8334) time: 0.2978 data: 0.2157 max mem: 9377 +Train: [16] [3300/6250] eta: 0:08:39 lr: 0.000121 grad: 0.0784 (0.0849) loss: 0.8329 (0.8333) time: 0.1385 data: 0.0489 max mem: 9377 +Train: [16] [3400/6250] eta: 0:08:21 lr: 0.000121 grad: 0.0856 (0.0849) loss: 0.8269 (0.8332) time: 0.1644 data: 0.0707 max mem: 9377 +Train: [16] [3500/6250] eta: 0:08:01 lr: 0.000120 grad: 0.0843 (0.0850) loss: 0.8299 (0.8330) time: 0.1457 data: 0.0508 max mem: 9377 +Train: [16] [3600/6250] eta: 0:07:43 lr: 0.000120 grad: 0.0822 (0.0851) loss: 0.8311 (0.8330) time: 0.1929 data: 0.1082 max mem: 9377 +Train: [16] [3700/6250] eta: 0:07:24 lr: 0.000120 grad: 0.0841 (0.0851) loss: 0.8301 (0.8329) time: 0.1128 data: 0.0269 max mem: 9377 +Train: [16] [3800/6250] eta: 0:07:06 lr: 0.000120 grad: 0.0880 (0.0851) loss: 0.8313 (0.8328) time: 0.1214 data: 0.0374 max mem: 9377 +Train: [16] [3900/6250] eta: 0:06:49 lr: 0.000120 grad: 0.0778 (0.0851) loss: 0.8268 (0.8327) time: 0.2088 data: 0.1322 max mem: 9377 +Train: [16] [4000/6250] eta: 0:06:31 lr: 0.000120 grad: 0.0833 (0.0851) loss: 0.8339 (0.8327) time: 0.1285 data: 0.0365 max mem: 9377 +Train: [16] [4100/6250] eta: 0:06:12 lr: 0.000120 grad: 0.0827 (0.0852) loss: 0.8305 (0.8326) time: 0.1523 data: 0.0683 max mem: 9377 +Train: [16] [4200/6250] eta: 0:05:54 lr: 0.000120 grad: 0.0864 (0.0852) loss: 0.8323 (0.8325) time: 0.1585 data: 0.0728 max mem: 9377 +Train: [16] [4300/6250] eta: 0:05:36 lr: 0.000120 grad: 0.0820 (0.0853) loss: 0.8318 (0.8325) time: 0.1842 data: 0.0969 max mem: 9377 +Train: [16] [4400/6250] eta: 0:05:19 lr: 0.000120 grad: 0.0863 (0.0854) loss: 0.8240 (0.8323) time: 0.1707 data: 0.0749 max mem: 9377 +Train: [16] [4500/6250] eta: 0:05:01 lr: 0.000120 grad: 0.0864 (0.0856) loss: 0.8205 (0.8321) time: 0.1591 data: 0.0748 max mem: 9377 +Train: [16] [4600/6250] eta: 0:04:43 lr: 0.000120 grad: 0.0894 (0.0857) loss: 0.8239 (0.8320) time: 0.1653 data: 0.0821 max mem: 9377 +Train: [16] [4700/6250] eta: 0:04:27 lr: 0.000120 grad: 0.0900 (0.0859) loss: 0.8229 (0.8318) time: 0.1030 data: 0.0098 max mem: 9377 +Train: [16] [4800/6250] eta: 0:04:10 lr: 0.000120 grad: 0.0904 (0.0860) loss: 0.8178 (0.8317) time: 0.1176 data: 0.0348 max mem: 9377 +Train: [16] [4900/6250] eta: 0:03:52 lr: 0.000120 grad: 0.0905 (0.0861) loss: 0.8161 (0.8315) time: 0.1657 data: 0.0818 max mem: 9377 +Train: [16] [5000/6250] eta: 0:03:35 lr: 0.000120 grad: 0.0852 (0.0861) loss: 0.8273 (0.8314) time: 0.2240 data: 0.1369 max mem: 9377 +Train: [16] [5100/6250] eta: 0:03:17 lr: 0.000120 grad: 0.0843 (0.0862) loss: 0.8293 (0.8312) time: 0.1579 data: 0.0703 max mem: 9377 +Train: [16] [5200/6250] eta: 0:03:00 lr: 0.000120 grad: 0.0749 (0.0862) loss: 0.8310 (0.8311) time: 0.1130 data: 0.0210 max mem: 9377 +Train: [16] [5300/6250] eta: 0:02:43 lr: 0.000120 grad: 0.0836 (0.0863) loss: 0.8287 (0.8310) time: 0.1796 data: 0.0898 max mem: 9377 +Train: [16] [5400/6250] eta: 0:02:25 lr: 0.000120 grad: 0.0836 (0.0863) loss: 0.8266 (0.8309) time: 0.2075 data: 0.1243 max mem: 9377 +Train: [16] [5500/6250] eta: 0:02:08 lr: 0.000120 grad: 0.0885 (0.0863) loss: 0.8333 (0.8309) time: 0.2117 data: 0.1301 max mem: 9377 +Train: [16] [5600/6250] eta: 0:01:51 lr: 0.000120 grad: 0.0905 (0.0864) loss: 0.8241 (0.8308) time: 0.1485 data: 0.0559 max mem: 9377 +Train: [16] [5700/6250] eta: 0:01:33 lr: 0.000120 grad: 0.0822 (0.0864) loss: 0.8312 (0.8308) time: 0.1415 data: 0.0623 max mem: 9377 +Train: [16] [5800/6250] eta: 0:01:16 lr: 0.000120 grad: 0.0839 (0.0864) loss: 0.8252 (0.8307) time: 0.1541 data: 0.0739 max mem: 9377 +Train: [16] [5900/6250] eta: 0:00:59 lr: 0.000120 grad: 0.0847 (0.0865) loss: 0.8278 (0.8306) time: 0.1376 data: 0.0567 max mem: 9377 +Train: [16] [6000/6250] eta: 0:00:42 lr: 0.000120 grad: 0.0876 (0.0864) loss: 0.8351 (0.8306) time: 0.1311 data: 0.0512 max mem: 9377 +Train: [16] [6100/6250] eta: 0:00:25 lr: 0.000120 grad: 0.0788 (0.0864) loss: 0.8252 (0.8305) time: 0.1511 data: 0.0679 max mem: 9377 +Train: [16] [6200/6250] eta: 0:00:08 lr: 0.000120 grad: 0.0877 (0.0865) loss: 0.8323 (0.8305) time: 0.1636 data: 0.0883 max mem: 9377 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.0903 (0.0865) loss: 0.8289 (0.8305) time: 0.2307 data: 0.1459 max mem: 9377 +Train: [16] Total time: 0:17:48 (0.1709 s / it) +Averaged stats: lr: 0.000120 grad: 0.0903 (0.0865) loss: 0.8289 (0.8305) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:04:56 loss: 0.8430 (0.8430) time: 4.7859 data: 4.7556 max mem: 9377 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8440 (0.8434) time: 0.1289 data: 0.1041 max mem: 9377 +Eval (hcp-train-subset): [16] Total time: 0:00:13 (0.2207 s / it) +Averaged stats (hcp-train-subset): loss: 0.8440 (0.8434) +Eval (hcp-val): [16] [ 0/62] eta: 0:06:00 loss: 0.8458 (0.8458) time: 5.8185 data: 5.7882 max mem: 9377 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8480 (0.8484) time: 0.1221 data: 0.0943 max mem: 9377 +Eval (hcp-val): [16] Total time: 0:00:13 (0.2219 s / it) +Averaged stats (hcp-val): loss: 0.8480 (0.8484) +Eval (nsd-val): [16] [ 0/62] eta: 0:05:01 loss: 0.8093 (0.8093) time: 4.8661 data: 4.8263 max mem: 9377 +Eval (nsd-val): [16] [61/62] eta: 0:00:00 loss: 0.8205 (0.8212) time: 0.1127 data: 0.0876 max mem: 9377 +Eval (nsd-val): [16] Total time: 0:00:12 (0.2097 s / it) +Averaged stats (nsd-val): loss: 0.8205 (0.8212) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [17] [ 0/6250] eta: 11:18:48 lr: 0.000120 grad: 0.0922 (0.0922) loss: 0.8317 (0.8317) time: 6.5165 data: 6.3997 max mem: 9377 +Train: [17] [ 100/6250] eta: 0:54:00 lr: 0.000120 grad: 0.0801 (0.0898) loss: 0.8423 (0.8519) time: 0.3344 data: 0.2080 max mem: 9377 +Train: [17] [ 200/6250] eta: 0:36:05 lr: 0.000120 grad: 0.0736 (0.0858) loss: 0.8378 (0.8488) time: 0.1126 data: 0.0003 max mem: 9377 +Train: [17] [ 300/6250] eta: 0:29:26 lr: 0.000120 grad: 0.0777 (0.0845) loss: 0.8452 (0.8477) time: 0.1180 data: 0.0343 max mem: 9377 +Train: [17] [ 400/6250] eta: 0:27:55 lr: 0.000120 grad: 0.0753 (0.0838) loss: 0.8431 (0.8460) time: 0.6086 data: 0.5058 max mem: 9377 +Train: [17] [ 500/6250] eta: 0:25:33 lr: 0.000120 grad: 0.0794 (0.0831) loss: 0.8422 (0.8453) time: 0.2219 data: 0.1063 max mem: 9377 +Train: [17] [ 600/6250] eta: 0:23:46 lr: 0.000120 grad: 0.0761 (0.0824) loss: 0.8373 (0.8446) time: 0.2716 data: 0.1720 max mem: 9377 +Train: [17] [ 700/6250] eta: 0:23:03 lr: 0.000120 grad: 0.0814 (0.0822) loss: 0.8342 (0.8441) time: 0.4538 data: 0.3085 max mem: 9377 +Train: [17] [ 800/6250] eta: 0:24:47 lr: 0.000120 grad: 0.0746 (0.0824) loss: 0.8366 (0.8434) time: 0.1089 data: 0.0099 max mem: 9377 +Train: [17] [ 900/6250] eta: 0:24:23 lr: 0.000120 grad: 0.0798 (0.0823) loss: 0.8327 (0.8427) time: 0.2838 data: 0.0544 max mem: 9377 +Train: [17] [1000/6250] eta: 0:24:38 lr: 0.000120 grad: 0.0733 (0.0821) loss: 0.8396 (0.8422) time: 0.6659 data: 0.5504 max mem: 9377 +Train: [17] [1100/6250] eta: 0:23:04 lr: 0.000120 grad: 0.0768 (0.0820) loss: 0.8398 (0.8419) time: 0.1490 data: 0.0539 max mem: 9377 +Train: [17] [1200/6250] eta: 0:21:52 lr: 0.000120 grad: 0.0813 (0.0821) loss: 0.8331 (0.8414) time: 0.1557 data: 0.0816 max mem: 9377 +Train: [17] [1300/6250] eta: 0:20:49 lr: 0.000120 grad: 0.0816 (0.0823) loss: 0.8354 (0.8409) time: 0.1968 data: 0.1150 max mem: 9377 +Train: [17] [1400/6250] eta: 0:20:23 lr: 0.000120 grad: 0.0793 (0.0824) loss: 0.8325 (0.8403) time: 0.1355 data: 0.0316 max mem: 9377 +Train: [17] [1500/6250] eta: 0:19:28 lr: 0.000120 grad: 0.0840 (0.0827) loss: 0.8305 (0.8399) time: 0.1763 data: 0.0897 max mem: 9377 +Train: [17] [1600/6250] eta: 0:18:37 lr: 0.000120 grad: 0.0769 (0.0826) loss: 0.8326 (0.8395) time: 0.1596 data: 0.0711 max mem: 9377 +Train: [17] [1700/6250] eta: 0:17:53 lr: 0.000120 grad: 0.0848 (0.0826) loss: 0.8331 (0.8392) time: 0.1896 data: 0.0994 max mem: 9377 +Train: [17] [1800/6250] eta: 0:17:22 lr: 0.000120 grad: 0.0863 (0.0827) loss: 0.8344 (0.8390) time: 0.3027 data: 0.2184 max mem: 9377 +Train: [17] [1900/6250] eta: 0:16:41 lr: 0.000120 grad: 0.0820 (0.0829) loss: 0.8289 (0.8386) time: 0.2142 data: 0.1161 max mem: 9377 +Train: [17] [2000/6250] eta: 0:16:13 lr: 0.000120 grad: 0.0839 (0.0830) loss: 0.8390 (0.8383) time: 0.1313 data: 0.0329 max mem: 9377 +Train: [17] [2100/6250] eta: 0:15:35 lr: 0.000120 grad: 0.0799 (0.0832) loss: 0.8296 (0.8380) time: 0.1412 data: 0.0532 max mem: 9377 +Train: [17] [2200/6250] eta: 0:15:01 lr: 0.000120 grad: 0.0795 (0.0832) loss: 0.8340 (0.8377) time: 0.1329 data: 0.0401 max mem: 9377 +Train: [17] [2300/6250] eta: 0:14:29 lr: 0.000120 grad: 0.0898 (0.0833) loss: 0.8316 (0.8374) time: 0.1683 data: 0.0767 max mem: 9377 +Train: [17] [2400/6250] eta: 0:13:58 lr: 0.000120 grad: 0.0804 (0.0835) loss: 0.8329 (0.8371) time: 0.2059 data: 0.1275 max mem: 9377 +Train: [17] [2500/6250] eta: 0:13:27 lr: 0.000120 grad: 0.0858 (0.0836) loss: 0.8292 (0.8369) time: 0.1801 data: 0.0953 max mem: 9377 +Train: [17] [2600/6250] eta: 0:12:58 lr: 0.000120 grad: 0.0882 (0.0837) loss: 0.8264 (0.8367) time: 0.1673 data: 0.0852 max mem: 9377 +Train: [17] [2700/6250] eta: 0:12:29 lr: 0.000120 grad: 0.0875 (0.0838) loss: 0.8277 (0.8365) time: 0.1141 data: 0.0246 max mem: 9377 +Train: [17] [2800/6250] eta: 0:12:03 lr: 0.000120 grad: 0.0809 (0.0839) loss: 0.8351 (0.8363) time: 0.1693 data: 0.0845 max mem: 9377 +Train: [17] [2900/6250] eta: 0:11:37 lr: 0.000120 grad: 0.0760 (0.0839) loss: 0.8330 (0.8362) time: 0.1261 data: 0.0443 max mem: 9377 +Train: [17] [3000/6250] eta: 0:11:10 lr: 0.000120 grad: 0.0788 (0.0840) loss: 0.8374 (0.8360) time: 0.1774 data: 0.0959 max mem: 9377 +Train: [17] [3100/6250] eta: 0:10:46 lr: 0.000120 grad: 0.0910 (0.0841) loss: 0.8230 (0.8358) time: 0.1638 data: 0.0769 max mem: 9377 +Train: [17] [3200/6250] eta: 0:10:23 lr: 0.000120 grad: 0.0857 (0.0842) loss: 0.8303 (0.8356) time: 0.1331 data: 0.0361 max mem: 9377 +Train: [17] [3300/6250] eta: 0:09:59 lr: 0.000120 grad: 0.0830 (0.0843) loss: 0.8339 (0.8354) time: 0.1744 data: 0.0761 max mem: 9377 +Train: [17] [3400/6250] eta: 0:09:35 lr: 0.000120 grad: 0.0834 (0.0844) loss: 0.8231 (0.8352) time: 0.1521 data: 0.0596 max mem: 9377 +Train: [17] [3500/6250] eta: 0:09:11 lr: 0.000120 grad: 0.0840 (0.0845) loss: 0.8294 (0.8350) time: 0.1343 data: 0.0527 max mem: 9377 +Train: [17] [3600/6250] eta: 0:08:47 lr: 0.000120 grad: 0.0839 (0.0846) loss: 0.8238 (0.8348) time: 0.1686 data: 0.0805 max mem: 9377 +Train: [17] [3700/6250] eta: 0:08:23 lr: 0.000120 grad: 0.0837 (0.0847) loss: 0.8256 (0.8346) time: 0.1339 data: 0.0524 max mem: 9377 +Train: [17] [3800/6250] eta: 0:08:02 lr: 0.000120 grad: 0.0830 (0.0847) loss: 0.8280 (0.8344) time: 0.1999 data: 0.1136 max mem: 9377 +Train: [17] [3900/6250] eta: 0:07:39 lr: 0.000120 grad: 0.0872 (0.0849) loss: 0.8201 (0.8342) time: 0.1347 data: 0.0259 max mem: 9377 +Train: [17] [4000/6250] eta: 0:07:18 lr: 0.000120 grad: 0.0868 (0.0850) loss: 0.8249 (0.8340) time: 0.1587 data: 0.0747 max mem: 9377 +Train: [17] [4100/6250] eta: 0:06:56 lr: 0.000120 grad: 0.0846 (0.0851) loss: 0.8238 (0.8337) time: 0.1365 data: 0.0596 max mem: 9377 +Train: [17] [4200/6250] eta: 0:06:35 lr: 0.000120 grad: 0.0852 (0.0852) loss: 0.8258 (0.8334) time: 0.1542 data: 0.0709 max mem: 9377 +Train: [17] [4300/6250] eta: 0:06:15 lr: 0.000120 grad: 0.0870 (0.0853) loss: 0.8248 (0.8332) time: 0.1961 data: 0.1095 max mem: 9377 +Train: [17] [4400/6250] eta: 0:05:54 lr: 0.000120 grad: 0.0947 (0.0854) loss: 0.8256 (0.8330) time: 0.1906 data: 0.1020 max mem: 9377 +Train: [17] [4500/6250] eta: 0:05:34 lr: 0.000120 grad: 0.0892 (0.0855) loss: 0.8211 (0.8327) time: 0.1182 data: 0.0241 max mem: 9377 +Train: [17] [4600/6250] eta: 0:05:14 lr: 0.000120 grad: 0.0848 (0.0856) loss: 0.8245 (0.8326) time: 0.2364 data: 0.1480 max mem: 9377 +Train: [17] [4700/6250] eta: 0:04:54 lr: 0.000120 grad: 0.0861 (0.0857) loss: 0.8259 (0.8324) time: 0.1485 data: 0.0531 max mem: 9377 +Train: [17] [4800/6250] eta: 0:04:34 lr: 0.000120 grad: 0.0801 (0.0857) loss: 0.8301 (0.8323) time: 0.1727 data: 0.0832 max mem: 9377 +Train: [17] [4900/6250] eta: 0:04:14 lr: 0.000119 grad: 0.0873 (0.0858) loss: 0.8244 (0.8321) time: 0.1334 data: 0.0260 max mem: 9377 +Train: [17] [5000/6250] eta: 0:03:54 lr: 0.000119 grad: 0.0874 (0.0859) loss: 0.8261 (0.8320) time: 0.0926 data: 0.0002 max mem: 9377 +Train: [17] [5100/6250] eta: 0:03:35 lr: 0.000119 grad: 0.0828 (0.0859) loss: 0.8271 (0.8319) time: 0.1273 data: 0.0363 max mem: 9377 +Train: [17] [5200/6250] eta: 0:03:15 lr: 0.000119 grad: 0.0853 (0.0861) loss: 0.8299 (0.8318) time: 0.1339 data: 0.0359 max mem: 9377 +Train: [17] [5300/6250] eta: 0:02:56 lr: 0.000119 grad: 0.0976 (0.0862) loss: 0.8240 (0.8317) time: 0.1231 data: 0.0355 max mem: 9377 +Train: [17] [5400/6250] eta: 0:02:37 lr: 0.000119 grad: 0.0947 (0.0864) loss: 0.8272 (0.8316) time: 0.2098 data: 0.1267 max mem: 9377 +Train: [17] [5500/6250] eta: 0:02:18 lr: 0.000119 grad: 0.0893 (0.0864) loss: 0.8359 (0.8315) time: 0.1169 data: 0.0251 max mem: 9377 +Train: [17] [5600/6250] eta: 0:01:59 lr: 0.000119 grad: 0.0850 (0.0865) loss: 0.8269 (0.8315) time: 0.1469 data: 0.0627 max mem: 9377 +Train: [17] [5700/6250] eta: 0:01:41 lr: 0.000119 grad: 0.0859 (0.0865) loss: 0.8336 (0.8314) time: 0.2000 data: 0.1204 max mem: 9377 +Train: [17] [5800/6250] eta: 0:01:22 lr: 0.000119 grad: 0.0814 (0.0865) loss: 0.8324 (0.8314) time: 0.1806 data: 0.0950 max mem: 9377 +Train: [17] [5900/6250] eta: 0:01:04 lr: 0.000119 grad: 0.0795 (0.0865) loss: 0.8325 (0.8314) time: 0.0945 data: 0.0002 max mem: 9377 +Train: [17] [6000/6250] eta: 0:00:45 lr: 0.000119 grad: 0.0848 (0.0865) loss: 0.8339 (0.8314) time: 0.1317 data: 0.0392 max mem: 9377 +Train: [17] [6100/6250] eta: 0:00:27 lr: 0.000119 grad: 0.0932 (0.0866) loss: 0.8222 (0.8314) time: 0.2590 data: 0.1657 max mem: 9377 +Train: [17] [6200/6250] eta: 0:00:09 lr: 0.000119 grad: 0.0863 (0.0866) loss: 0.8288 (0.8314) time: 0.2141 data: 0.1302 max mem: 9377 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.0884 (0.0866) loss: 0.8332 (0.8314) time: 0.1713 data: 0.0664 max mem: 9377 +Train: [17] Total time: 0:19:15 (0.1849 s / it) +Averaged stats: lr: 0.000119 grad: 0.0884 (0.0866) loss: 0.8332 (0.8314) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:05:14 loss: 0.8390 (0.8390) time: 5.0773 data: 5.0461 max mem: 9377 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8430 (0.8420) time: 0.1237 data: 0.0945 max mem: 9377 +Eval (hcp-train-subset): [17] Total time: 0:00:12 (0.2090 s / it) +Averaged stats (hcp-train-subset): loss: 0.8430 (0.8420) +Eval (hcp-val): [17] [ 0/62] eta: 0:06:08 loss: 0.8440 (0.8440) time: 5.9447 data: 5.9143 max mem: 9377 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8462 (0.8472) time: 0.1350 data: 0.1097 max mem: 9377 +Eval (hcp-val): [17] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (hcp-val): loss: 0.8462 (0.8472) +Eval (nsd-val): [17] [ 0/62] eta: 0:04:50 loss: 0.8090 (0.8090) time: 4.6860 data: 4.6537 max mem: 9377 +Eval (nsd-val): [17] [61/62] eta: 0:00:00 loss: 0.8158 (0.8182) time: 0.1335 data: 0.1079 max mem: 9377 +Eval (nsd-val): [17] Total time: 0:00:13 (0.2229 s / it) +Averaged stats (nsd-val): loss: 0.8158 (0.8182) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [18] [ 0/6250] eta: 6:39:19 lr: 0.000119 grad: 0.0621 (0.0621) loss: 0.8797 (0.8797) time: 3.8335 data: 3.6313 max mem: 9377 +Train: [18] [ 100/6250] eta: 0:23:45 lr: 0.000119 grad: 0.0746 (0.1185) loss: 0.8384 (0.8342) time: 0.1787 data: 0.0744 max mem: 9377 +Train: [18] [ 200/6250] eta: 0:19:54 lr: 0.000119 grad: 0.0905 (0.1054) loss: 0.8286 (0.8339) time: 0.1542 data: 0.0636 max mem: 9377 +Train: [18] [ 300/6250] eta: 0:18:53 lr: 0.000119 grad: 0.0817 (0.0994) loss: 0.8338 (0.8333) time: 0.2008 data: 0.1099 max mem: 9377 +Train: [18] [ 400/6250] eta: 0:18:16 lr: 0.000119 grad: 0.0833 (0.0974) loss: 0.8345 (0.8313) time: 0.2144 data: 0.1220 max mem: 9377 +Train: [18] [ 500/6250] eta: 0:17:14 lr: 0.000119 grad: 0.0776 (0.0957) loss: 0.8404 (0.8309) time: 0.1212 data: 0.0224 max mem: 9377 +Train: [18] [ 600/6250] eta: 0:16:43 lr: 0.000119 grad: 0.0792 (0.0941) loss: 0.8313 (0.8305) time: 0.1741 data: 0.0679 max mem: 9377 +Train: [18] [ 700/6250] eta: 0:17:12 lr: 0.000119 grad: 0.0798 (0.0925) loss: 0.8371 (0.8312) time: 0.1445 data: 0.0501 max mem: 9377 +Train: [18] [ 800/6250] eta: 0:16:42 lr: 0.000119 grad: 0.0918 (0.0913) loss: 0.8367 (0.8314) time: 0.1534 data: 0.0607 max mem: 9377 +Train: [18] [ 900/6250] eta: 0:16:25 lr: 0.000119 grad: 0.0784 (0.0904) loss: 0.8368 (0.8316) time: 0.2119 data: 0.1147 max mem: 9377 +Train: [18] [1000/6250] eta: 0:15:51 lr: 0.000119 grad: 0.0765 (0.0895) loss: 0.8261 (0.8314) time: 0.1631 data: 0.0829 max mem: 9377 +Train: [18] [1100/6250] eta: 0:15:17 lr: 0.000119 grad: 0.0837 (0.0890) loss: 0.8324 (0.8315) time: 0.1577 data: 0.0764 max mem: 9377 +Train: [18] [1200/6250] eta: 0:14:54 lr: 0.000119 grad: 0.0806 (0.0884) loss: 0.8300 (0.8313) time: 0.2134 data: 0.1301 max mem: 9377 +Train: [18] [1300/6250] eta: 0:14:30 lr: 0.000119 grad: 0.0851 (0.0881) loss: 0.8251 (0.8309) time: 0.2006 data: 0.1147 max mem: 9377 +Train: [18] [1400/6250] eta: 0:14:28 lr: 0.000119 grad: 0.0816 (0.0878) loss: 0.8303 (0.8307) time: 0.1023 data: 0.0002 max mem: 9377 +Train: [18] [1500/6250] eta: 0:14:08 lr: 0.000119 grad: 0.0869 (0.0875) loss: 0.8273 (0.8306) time: 0.2886 data: 0.1975 max mem: 9377 +Train: [18] [1600/6250] eta: 0:13:40 lr: 0.000119 grad: 0.0791 (0.0873) loss: 0.8332 (0.8305) time: 0.1666 data: 0.0793 max mem: 9377 +Train: [18] [1700/6250] eta: 0:13:14 lr: 0.000119 grad: 0.0808 (0.0871) loss: 0.8315 (0.8305) time: 0.1422 data: 0.0551 max mem: 9377 +Train: [18] [1800/6250] eta: 0:12:52 lr: 0.000119 grad: 0.0841 (0.0870) loss: 0.8306 (0.8306) time: 0.1565 data: 0.0664 max mem: 9377 +Train: [18] [1900/6250] eta: 0:12:32 lr: 0.000119 grad: 0.0817 (0.0869) loss: 0.8324 (0.8308) time: 0.1849 data: 0.1007 max mem: 9377 +Train: [18] [2000/6250] eta: 0:12:08 lr: 0.000119 grad: 0.0878 (0.0869) loss: 0.8298 (0.8307) time: 0.1230 data: 0.0358 max mem: 9377 +Train: [18] [2100/6250] eta: 0:11:49 lr: 0.000119 grad: 0.0848 (0.0868) loss: 0.8307 (0.8308) time: 0.1609 data: 0.0799 max mem: 9377 +Train: [18] [2200/6250] eta: 0:11:31 lr: 0.000119 grad: 0.0821 (0.0868) loss: 0.8305 (0.8308) time: 0.1326 data: 0.0507 max mem: 9377 +Train: [18] [2300/6250] eta: 0:11:13 lr: 0.000119 grad: 0.0851 (0.0867) loss: 0.8360 (0.8308) time: 0.1713 data: 0.0896 max mem: 9377 +Train: [18] [2400/6250] eta: 0:10:53 lr: 0.000119 grad: 0.0876 (0.0867) loss: 0.8287 (0.8308) time: 0.1458 data: 0.0566 max mem: 9377 +Train: [18] [2500/6250] eta: 0:10:34 lr: 0.000119 grad: 0.0804 (0.0865) loss: 0.8305 (0.8308) time: 0.1759 data: 0.0869 max mem: 9377 +Train: [18] [2600/6250] eta: 0:10:15 lr: 0.000119 grad: 0.0845 (0.0864) loss: 0.8322 (0.8309) time: 0.1600 data: 0.0739 max mem: 9377 +Train: [18] [2700/6250] eta: 0:09:56 lr: 0.000119 grad: 0.0779 (0.0863) loss: 0.8388 (0.8310) time: 0.1699 data: 0.0840 max mem: 9377 +Train: [18] [2800/6250] eta: 0:09:39 lr: 0.000119 grad: 0.0829 (0.0862) loss: 0.8330 (0.8310) time: 0.1017 data: 0.0130 max mem: 9377 +Train: [18] [2900/6250] eta: 0:09:21 lr: 0.000119 grad: 0.0785 (0.0862) loss: 0.8304 (0.8310) time: 0.1788 data: 0.1047 max mem: 9377 +Train: [18] [3000/6250] eta: 0:09:05 lr: 0.000119 grad: 0.0871 (0.0863) loss: 0.8309 (0.8310) time: 0.2034 data: 0.1231 max mem: 9377 +Train: [18] [3100/6250] eta: 0:08:46 lr: 0.000119 grad: 0.0817 (0.0862) loss: 0.8358 (0.8310) time: 0.1639 data: 0.0888 max mem: 9377 +Train: [18] [3200/6250] eta: 0:08:29 lr: 0.000119 grad: 0.0833 (0.0861) loss: 0.8324 (0.8312) time: 0.1897 data: 0.1015 max mem: 9377 +Train: [18] [3300/6250] eta: 0:08:12 lr: 0.000119 grad: 0.0874 (0.0863) loss: 0.8154 (0.8311) time: 0.1566 data: 0.0734 max mem: 9377 +Train: [18] [3400/6250] eta: 0:07:55 lr: 0.000119 grad: 0.0879 (0.0863) loss: 0.8219 (0.8310) time: 0.1496 data: 0.0671 max mem: 9377 +Train: [18] [3500/6250] eta: 0:07:39 lr: 0.000119 grad: 0.0853 (0.0863) loss: 0.8315 (0.8310) time: 0.1778 data: 0.0942 max mem: 9377 +Train: [18] [3600/6250] eta: 0:07:22 lr: 0.000119 grad: 0.0825 (0.0863) loss: 0.8314 (0.8310) time: 0.1602 data: 0.0739 max mem: 9377 +Train: [18] [3700/6250] eta: 0:07:06 lr: 0.000119 grad: 0.0861 (0.0864) loss: 0.8301 (0.8309) time: 0.2399 data: 0.1551 max mem: 9377 +Train: [18] [3800/6250] eta: 0:06:48 lr: 0.000119 grad: 0.0791 (0.0864) loss: 0.8340 (0.8309) time: 0.1255 data: 0.0401 max mem: 9377 +Train: [18] [3900/6250] eta: 0:06:30 lr: 0.000119 grad: 0.0790 (0.0864) loss: 0.8304 (0.8309) time: 0.1505 data: 0.0566 max mem: 9377 +Train: [18] [4000/6250] eta: 0:06:13 lr: 0.000119 grad: 0.0814 (0.0864) loss: 0.8325 (0.8309) time: 0.1593 data: 0.0636 max mem: 9377 +Train: [18] [4100/6250] eta: 0:05:57 lr: 0.000119 grad: 0.0840 (0.0864) loss: 0.8320 (0.8309) time: 0.2271 data: 0.1412 max mem: 9377 +Train: [18] [4200/6250] eta: 0:05:39 lr: 0.000119 grad: 0.0811 (0.0864) loss: 0.8317 (0.8309) time: 0.1650 data: 0.0834 max mem: 9377 +Train: [18] [4300/6250] eta: 0:05:22 lr: 0.000119 grad: 0.0869 (0.0864) loss: 0.8298 (0.8308) time: 0.1517 data: 0.0706 max mem: 9377 +Train: [18] [4400/6250] eta: 0:05:06 lr: 0.000119 grad: 0.0824 (0.0864) loss: 0.8316 (0.8308) time: 0.1755 data: 0.0938 max mem: 9377 +Train: [18] [4500/6250] eta: 0:04:49 lr: 0.000119 grad: 0.0801 (0.0864) loss: 0.8251 (0.8308) time: 0.1717 data: 0.0887 max mem: 9377 +Train: [18] [4600/6250] eta: 0:04:32 lr: 0.000119 grad: 0.0841 (0.0864) loss: 0.8316 (0.8308) time: 0.2236 data: 0.1376 max mem: 9377 +Train: [18] [4700/6250] eta: 0:04:17 lr: 0.000119 grad: 0.0833 (0.0864) loss: 0.8284 (0.8308) time: 0.0986 data: 0.0002 max mem: 9377 +Train: [18] [4800/6250] eta: 0:04:03 lr: 0.000119 grad: 0.0864 (0.0864) loss: 0.8283 (0.8307) time: 0.2849 data: 0.1691 max mem: 9377 +Train: [18] [4900/6250] eta: 0:03:49 lr: 0.000119 grad: 0.0818 (0.0864) loss: 0.8302 (0.8307) time: 0.3399 data: 0.2415 max mem: 9377 +Train: [18] [5000/6250] eta: 0:03:32 lr: 0.000119 grad: 0.0840 (0.0865) loss: 0.8293 (0.8307) time: 0.1112 data: 0.0002 max mem: 9377 +Train: [18] [5100/6250] eta: 0:03:17 lr: 0.000119 grad: 0.0798 (0.0864) loss: 0.8367 (0.8307) time: 0.4847 data: 0.3839 max mem: 9377 +Train: [18] [5200/6250] eta: 0:02:59 lr: 0.000119 grad: 0.0810 (0.0864) loss: 0.8243 (0.8306) time: 0.1745 data: 0.0915 max mem: 9377 +Train: [18] [5300/6250] eta: 0:02:43 lr: 0.000119 grad: 0.0809 (0.0864) loss: 0.8319 (0.8306) time: 0.2037 data: 0.0948 max mem: 9377 +Train: [18] [5400/6250] eta: 0:02:25 lr: 0.000119 grad: 0.0928 (0.0864) loss: 0.8247 (0.8306) time: 0.1675 data: 0.0845 max mem: 9377 +Train: [18] [5500/6250] eta: 0:02:09 lr: 0.000119 grad: 0.0803 (0.0864) loss: 0.8311 (0.8306) time: 0.4451 data: 0.3250 max mem: 9377 +Train: [18] [5600/6250] eta: 0:01:52 lr: 0.000119 grad: 0.0898 (0.0865) loss: 0.8237 (0.8306) time: 0.1581 data: 0.0713 max mem: 9377 +Train: [18] [5700/6250] eta: 0:01:35 lr: 0.000119 grad: 0.0883 (0.0866) loss: 0.8299 (0.8305) time: 0.2044 data: 0.1150 max mem: 9377 +Train: [18] [5800/6250] eta: 0:01:17 lr: 0.000118 grad: 0.0892 (0.0866) loss: 0.8282 (0.8304) time: 0.1444 data: 0.0584 max mem: 9377 +Train: [18] [5900/6250] eta: 0:01:00 lr: 0.000118 grad: 0.0871 (0.0867) loss: 0.8279 (0.8304) time: 0.1939 data: 0.1026 max mem: 9377 +Train: [18] [6000/6250] eta: 0:00:43 lr: 0.000118 grad: 0.0885 (0.0867) loss: 0.8234 (0.8303) time: 0.1128 data: 0.0006 max mem: 9377 +Train: [18] [6100/6250] eta: 0:00:25 lr: 0.000118 grad: 0.0927 (0.0868) loss: 0.8275 (0.8303) time: 0.1711 data: 0.0888 max mem: 9377 +Train: [18] [6200/6250] eta: 0:00:08 lr: 0.000118 grad: 0.0854 (0.0868) loss: 0.8265 (0.8303) time: 0.2024 data: 0.1146 max mem: 9377 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.0860 (0.0868) loss: 0.8266 (0.8302) time: 0.1272 data: 0.0403 max mem: 9377 +Train: [18] Total time: 0:18:06 (0.1738 s / it) +Averaged stats: lr: 0.000118 grad: 0.0860 (0.0868) loss: 0.8266 (0.8302) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:06:08 loss: 0.8409 (0.8409) time: 5.9506 data: 5.9183 max mem: 9377 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8389 (0.8414) time: 0.1014 data: 0.0765 max mem: 9377 +Eval (hcp-train-subset): [18] Total time: 0:00:14 (0.2258 s / it) +Averaged stats (hcp-train-subset): loss: 0.8389 (0.8414) +Eval (hcp-val): [18] [ 0/62] eta: 0:06:03 loss: 0.8452 (0.8452) time: 5.8702 data: 5.8393 max mem: 9377 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8456 (0.8471) time: 0.1068 data: 0.0801 max mem: 9377 +Eval (hcp-val): [18] Total time: 0:00:14 (0.2316 s / it) +Averaged stats (hcp-val): loss: 0.8456 (0.8471) +Eval (nsd-val): [18] [ 0/62] eta: 0:05:01 loss: 0.8086 (0.8086) time: 4.8690 data: 4.8216 max mem: 9377 +Eval (nsd-val): [18] [61/62] eta: 0:00:00 loss: 0.8169 (0.8188) time: 0.1279 data: 0.1025 max mem: 9377 +Eval (nsd-val): [18] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (nsd-val): loss: 0.8169 (0.8188) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [19] [ 0/6250] eta: 11:03:48 lr: 0.000118 grad: 0.3363 (0.3363) loss: 0.8363 (0.8363) time: 6.3726 data: 6.2358 max mem: 9377 +Train: [19] [ 100/6250] eta: 0:24:23 lr: 0.000118 grad: 0.0851 (0.0967) loss: 0.8363 (0.8480) time: 0.1591 data: 0.0605 max mem: 9377 +Train: [19] [ 200/6250] eta: 0:20:45 lr: 0.000118 grad: 0.0784 (0.0955) loss: 0.8247 (0.8381) time: 0.1645 data: 0.0602 max mem: 9377 +Train: [19] [ 300/6250] eta: 0:19:20 lr: 0.000118 grad: 0.0912 (0.0948) loss: 0.8274 (0.8333) time: 0.2131 data: 0.1224 max mem: 9377 +Train: [19] [ 400/6250] eta: 0:18:00 lr: 0.000118 grad: 0.0859 (0.0947) loss: 0.8232 (0.8300) time: 0.1445 data: 0.0538 max mem: 9377 +Train: [19] [ 500/6250] eta: 0:17:07 lr: 0.000118 grad: 0.0889 (0.0939) loss: 0.8351 (0.8287) time: 0.1730 data: 0.0702 max mem: 9377 +Train: [19] [ 600/6250] eta: 0:16:38 lr: 0.000118 grad: 0.0812 (0.0931) loss: 0.8261 (0.8278) time: 0.1644 data: 0.0707 max mem: 9377 +Train: [19] [ 700/6250] eta: 0:16:10 lr: 0.000118 grad: 0.0873 (0.0929) loss: 0.8250 (0.8274) time: 0.1303 data: 0.0501 max mem: 9377 +Train: [19] [ 800/6250] eta: 0:16:03 lr: 0.000118 grad: 0.0907 (0.0925) loss: 0.8218 (0.8267) time: 0.2814 data: 0.1927 max mem: 9377 +Train: [19] [ 900/6250] eta: 0:15:26 lr: 0.000118 grad: 0.0890 (0.0925) loss: 0.8201 (0.8259) time: 0.1501 data: 0.0664 max mem: 9377 +Train: [19] [1000/6250] eta: 0:15:00 lr: 0.000118 grad: 0.0907 (0.0924) loss: 0.8169 (0.8254) time: 0.1300 data: 0.0305 max mem: 9377 +Train: [19] [1100/6250] eta: 0:14:43 lr: 0.000118 grad: 0.0851 (0.0922) loss: 0.8211 (0.8247) time: 0.1765 data: 0.0895 max mem: 9377 +Train: [19] [1200/6250] eta: 0:14:20 lr: 0.000118 grad: 0.0892 (0.0922) loss: 0.8281 (0.8241) time: 0.1417 data: 0.0553 max mem: 9377 +Train: [19] [1300/6250] eta: 0:13:54 lr: 0.000118 grad: 0.0850 (0.0921) loss: 0.8206 (0.8238) time: 0.1357 data: 0.0417 max mem: 9377 +Train: [19] [1400/6250] eta: 0:13:31 lr: 0.000118 grad: 0.0905 (0.0921) loss: 0.8228 (0.8235) time: 0.1559 data: 0.0605 max mem: 9377 +Train: [19] [1500/6250] eta: 0:13:07 lr: 0.000118 grad: 0.0887 (0.0921) loss: 0.8209 (0.8232) time: 0.1398 data: 0.0528 max mem: 9377 +Train: [19] [1600/6250] eta: 0:12:46 lr: 0.000118 grad: 0.0937 (0.0922) loss: 0.8241 (0.8231) time: 0.1520 data: 0.0626 max mem: 9377 +Train: [19] [1700/6250] eta: 0:12:29 lr: 0.000118 grad: 0.0867 (0.0921) loss: 0.8278 (0.8228) time: 0.1581 data: 0.0751 max mem: 9377 +Train: [19] [1800/6250] eta: 0:12:08 lr: 0.000118 grad: 0.0919 (0.0919) loss: 0.8136 (0.8228) time: 0.1388 data: 0.0460 max mem: 9377 +Train: [19] [1900/6250] eta: 0:11:53 lr: 0.000118 grad: 0.0890 (0.0920) loss: 0.8162 (0.8227) time: 0.1971 data: 0.1088 max mem: 9377 +Train: [19] [2000/6250] eta: 0:11:33 lr: 0.000118 grad: 0.0906 (0.0919) loss: 0.8209 (0.8226) time: 0.1462 data: 0.0591 max mem: 9377 +Train: [19] [2100/6250] eta: 0:11:15 lr: 0.000118 grad: 0.0840 (0.0919) loss: 0.8240 (0.8226) time: 0.1295 data: 0.0420 max mem: 9377 +Train: [19] [2200/6250] eta: 0:10:56 lr: 0.000118 grad: 0.0840 (0.0918) loss: 0.8307 (0.8227) time: 0.1390 data: 0.0460 max mem: 9377 +Train: [19] [2300/6250] eta: 0:10:40 lr: 0.000118 grad: 0.0860 (0.0918) loss: 0.8191 (0.8227) time: 0.1844 data: 0.0957 max mem: 9377 +Train: [19] [2400/6250] eta: 0:10:23 lr: 0.000118 grad: 0.0899 (0.0917) loss: 0.8244 (0.8228) time: 0.1656 data: 0.0824 max mem: 9377 +Train: [19] [2500/6250] eta: 0:10:14 lr: 0.000118 grad: 0.0847 (0.0915) loss: 0.8223 (0.8228) time: 0.2571 data: 0.1597 max mem: 9377 +Train: [19] [2600/6250] eta: 0:10:00 lr: 0.000118 grad: 0.0852 (0.0914) loss: 0.8193 (0.8228) time: 0.2783 data: 0.1815 max mem: 9377 +Train: [19] [2700/6250] eta: 0:09:44 lr: 0.000118 grad: 0.0846 (0.0913) loss: 0.8152 (0.8228) time: 0.2662 data: 0.1805 max mem: 9377 +Train: [19] [2800/6250] eta: 0:09:43 lr: 0.000118 grad: 0.0915 (0.0912) loss: 0.8263 (0.8229) time: 0.1047 data: 0.0002 max mem: 9377 +Train: [19] [2900/6250] eta: 0:09:24 lr: 0.000118 grad: 0.0824 (0.0912) loss: 0.8239 (0.8230) time: 0.1314 data: 0.0528 max mem: 9377 +Train: [19] [3000/6250] eta: 0:09:05 lr: 0.000118 grad: 0.0841 (0.0911) loss: 0.8240 (0.8230) time: 0.1456 data: 0.0594 max mem: 9377 +Train: [19] [3100/6250] eta: 0:08:48 lr: 0.000118 grad: 0.0893 (0.0910) loss: 0.8266 (0.8231) time: 0.1657 data: 0.0817 max mem: 9377 +Train: [19] [3200/6250] eta: 0:08:30 lr: 0.000118 grad: 0.0853 (0.0910) loss: 0.8342 (0.8232) time: 0.1324 data: 0.0436 max mem: 9377 +Train: [19] [3300/6250] eta: 0:08:15 lr: 0.000118 grad: 0.0837 (0.0909) loss: 0.8292 (0.8234) time: 0.2282 data: 0.1453 max mem: 9377 +Train: [19] [3400/6250] eta: 0:07:55 lr: 0.000118 grad: 0.0804 (0.0908) loss: 0.8361 (0.8236) time: 0.1837 data: 0.0959 max mem: 9377 +Train: [19] [3500/6250] eta: 0:07:39 lr: 0.000118 grad: 0.0853 (0.0907) loss: 0.8263 (0.8237) time: 0.1968 data: 0.1123 max mem: 9377 +Train: [19] [3600/6250] eta: 0:07:22 lr: 0.000118 grad: 0.0875 (0.0907) loss: 0.8265 (0.8239) time: 0.1582 data: 0.0660 max mem: 9377 +Train: [19] [3700/6250] eta: 0:07:05 lr: 0.000118 grad: 0.0862 (0.0905) loss: 0.8314 (0.8240) time: 0.1247 data: 0.0235 max mem: 9377 +Train: [19] [3800/6250] eta: 0:06:48 lr: 0.000118 grad: 0.0917 (0.0905) loss: 0.8254 (0.8241) time: 0.1452 data: 0.0551 max mem: 9377 +Train: [19] [3900/6250] eta: 0:06:31 lr: 0.000118 grad: 0.0863 (0.0904) loss: 0.8270 (0.8242) time: 0.1424 data: 0.0521 max mem: 9377 +Train: [19] [4000/6250] eta: 0:06:14 lr: 0.000118 grad: 0.0916 (0.0903) loss: 0.8258 (0.8243) time: 0.1232 data: 0.0228 max mem: 9377 +Train: [19] [4100/6250] eta: 0:05:57 lr: 0.000118 grad: 0.0802 (0.0902) loss: 0.8246 (0.8244) time: 0.1911 data: 0.1045 max mem: 9377 +Train: [19] [4200/6250] eta: 0:05:40 lr: 0.000118 grad: 0.0847 (0.0902) loss: 0.8242 (0.8245) time: 0.1854 data: 0.0997 max mem: 9377 +Train: [19] [4300/6250] eta: 0:05:23 lr: 0.000118 grad: 0.0851 (0.0901) loss: 0.8315 (0.8246) time: 0.1659 data: 0.0832 max mem: 9377 +Train: [19] [4400/6250] eta: 0:05:06 lr: 0.000118 grad: 0.0907 (0.0901) loss: 0.8260 (0.8246) time: 0.1758 data: 0.0954 max mem: 9377 +Train: [19] [4500/6250] eta: 0:04:51 lr: 0.000118 grad: 0.0885 (0.0901) loss: 0.8274 (0.8246) time: 0.0966 data: 0.0002 max mem: 9377 +Train: [19] [4600/6250] eta: 0:04:35 lr: 0.000118 grad: 0.0892 (0.0901) loss: 0.8301 (0.8246) time: 0.2213 data: 0.1498 max mem: 9377 +Train: [19] [4700/6250] eta: 0:04:18 lr: 0.000118 grad: 0.0854 (0.0902) loss: 0.8238 (0.8246) time: 0.1425 data: 0.0586 max mem: 9377 +Train: [19] [4800/6250] eta: 0:04:01 lr: 0.000118 grad: 0.0898 (0.0903) loss: 0.8204 (0.8246) time: 0.2153 data: 0.1308 max mem: 9377 +Train: [19] [4900/6250] eta: 0:03:44 lr: 0.000118 grad: 0.0938 (0.0903) loss: 0.8226 (0.8245) time: 0.1612 data: 0.0797 max mem: 9377 +Train: [19] [5000/6250] eta: 0:03:27 lr: 0.000118 grad: 0.0883 (0.0903) loss: 0.8205 (0.8245) time: 0.1360 data: 0.0569 max mem: 9377 +Train: [19] [5100/6250] eta: 0:03:11 lr: 0.000118 grad: 0.0864 (0.0903) loss: 0.8199 (0.8245) time: 0.1495 data: 0.0589 max mem: 9377 +Train: [19] [5200/6250] eta: 0:02:54 lr: 0.000118 grad: 0.0843 (0.0902) loss: 0.8225 (0.8245) time: 0.1525 data: 0.0620 max mem: 9377 +Train: [19] [5300/6250] eta: 0:02:37 lr: 0.000118 grad: 0.0900 (0.0903) loss: 0.8178 (0.8245) time: 0.1177 data: 0.0192 max mem: 9377 +Train: [19] [5400/6250] eta: 0:02:21 lr: 0.000118 grad: 0.0850 (0.0903) loss: 0.8234 (0.8244) time: 0.1221 data: 0.0420 max mem: 9377 +Train: [19] [5500/6250] eta: 0:02:04 lr: 0.000118 grad: 0.0953 (0.0903) loss: 0.8191 (0.8243) time: 0.1464 data: 0.0584 max mem: 9377 +Train: [19] [5600/6250] eta: 0:01:47 lr: 0.000118 grad: 0.0861 (0.0903) loss: 0.8233 (0.8243) time: 0.1371 data: 0.0003 max mem: 9377 +Train: [19] [5700/6250] eta: 0:01:31 lr: 0.000118 grad: 0.0867 (0.0903) loss: 0.8196 (0.8243) time: 0.2180 data: 0.1306 max mem: 9377 +Train: [19] [5800/6250] eta: 0:01:15 lr: 0.000118 grad: 0.0807 (0.0904) loss: 0.8288 (0.8243) time: 0.6817 data: 0.5752 max mem: 9377 +Train: [19] [5900/6250] eta: 0:00:58 lr: 0.000118 grad: 0.0898 (0.0904) loss: 0.8263 (0.8242) time: 0.0968 data: 0.0002 max mem: 9377 +Train: [19] [6000/6250] eta: 0:00:41 lr: 0.000118 grad: 0.0864 (0.0904) loss: 0.8317 (0.8243) time: 0.1241 data: 0.0384 max mem: 9377 +Train: [19] [6100/6250] eta: 0:00:25 lr: 0.000117 grad: 0.0932 (0.0904) loss: 0.8175 (0.8243) time: 0.1848 data: 0.0998 max mem: 9377 +Train: [19] [6200/6250] eta: 0:00:08 lr: 0.000117 grad: 0.0842 (0.0905) loss: 0.8294 (0.8243) time: 0.1317 data: 0.0461 max mem: 9377 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.0888 (0.0905) loss: 0.8267 (0.8243) time: 0.1359 data: 0.0520 max mem: 9377 +Train: [19] Total time: 0:17:33 (0.1685 s / it) +Averaged stats: lr: 0.000117 grad: 0.0888 (0.0905) loss: 0.8267 (0.8243) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:04:41 loss: 0.8471 (0.8471) time: 4.5339 data: 4.5001 max mem: 9377 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8388 (0.8412) time: 0.1464 data: 0.1215 max mem: 9377 +Eval (hcp-train-subset): [19] Total time: 0:00:16 (0.2612 s / it) +Averaged stats (hcp-train-subset): loss: 0.8388 (0.8412) +Making plots (hcp-train-subset): example=12 +Eval (hcp-val): [19] [ 0/62] eta: 0:04:54 loss: 0.8458 (0.8458) time: 4.7506 data: 4.6707 max mem: 9377 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8460 (0.8474) time: 0.1095 data: 0.0847 max mem: 9377 +Eval (hcp-val): [19] Total time: 0:00:13 (0.2200 s / it) +Averaged stats (hcp-val): loss: 0.8460 (0.8474) +Making plots (hcp-val): example=2 +Eval (nsd-val): [19] [ 0/62] eta: 0:03:37 loss: 0.8082 (0.8082) time: 3.5091 data: 3.4249 max mem: 9377 +Eval (nsd-val): [19] [61/62] eta: 0:00:00 loss: 0.8158 (0.8162) time: 0.1107 data: 0.0841 max mem: 9377 +Eval (nsd-val): [19] Total time: 0:00:13 (0.2255 s / it) +Averaged stats (nsd-val): loss: 0.8158 (0.8162) +Making plots (nsd-val): example=55 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 13:22:17 lr: 0.000117 grad: 0.1656 (0.1656) loss: 0.8549 (0.8549) time: 7.7020 data: 7.5729 max mem: 9377 +Train: [20] [ 100/6250] eta: 0:24:14 lr: 0.000117 grad: 0.1234 (0.1491) loss: 0.8113 (0.8258) time: 0.1639 data: 0.0557 max mem: 9377 +Train: [20] [ 200/6250] eta: 0:20:49 lr: 0.000117 grad: 0.0873 (0.1274) loss: 0.8218 (0.8197) time: 0.1704 data: 0.0744 max mem: 9377 +Train: [20] [ 300/6250] eta: 0:19:20 lr: 0.000117 grad: 0.0899 (0.1165) loss: 0.8242 (0.8203) time: 0.1282 data: 0.0408 max mem: 9377 +Train: [20] [ 400/6250] eta: 0:18:32 lr: 0.000117 grad: 0.0797 (0.1095) loss: 0.8371 (0.8220) time: 0.1620 data: 0.0535 max mem: 9377 +Train: [20] [ 500/6250] eta: 0:17:33 lr: 0.000117 grad: 0.0854 (0.1050) loss: 0.8337 (0.8231) time: 0.1354 data: 0.0417 max mem: 9377 +Train: [20] [ 600/6250] eta: 0:16:50 lr: 0.000117 grad: 0.0788 (0.1018) loss: 0.8317 (0.8244) time: 0.1631 data: 0.0758 max mem: 9377 +Train: [20] [ 700/6250] eta: 0:16:32 lr: 0.000117 grad: 0.0806 (0.0992) loss: 0.8326 (0.8256) time: 0.0955 data: 0.0002 max mem: 9377 +Train: [20] [ 800/6250] eta: 0:15:57 lr: 0.000117 grad: 0.0842 (0.0977) loss: 0.8331 (0.8261) time: 0.1356 data: 0.0383 max mem: 9377 +Train: [20] [ 900/6250] eta: 0:15:31 lr: 0.000117 grad: 0.0790 (0.0966) loss: 0.8327 (0.8266) time: 0.1446 data: 0.0522 max mem: 9377 +Train: [20] [1000/6250] eta: 0:15:07 lr: 0.000117 grad: 0.0792 (0.0957) loss: 0.8248 (0.8265) time: 0.1002 data: 0.0092 max mem: 9377 +Train: [20] [1100/6250] eta: 0:14:39 lr: 0.000117 grad: 0.0846 (0.0951) loss: 0.8239 (0.8263) time: 0.1259 data: 0.0415 max mem: 9377 +Train: [20] [1200/6250] eta: 0:14:14 lr: 0.000117 grad: 0.0942 (0.0947) loss: 0.8198 (0.8260) time: 0.1635 data: 0.0736 max mem: 9377 +Train: [20] [1300/6250] eta: 0:13:53 lr: 0.000117 grad: 0.0849 (0.0944) loss: 0.8143 (0.8257) time: 0.2212 data: 0.1375 max mem: 9377 +Train: [20] [1400/6250] eta: 0:13:29 lr: 0.000117 grad: 0.0797 (0.0939) loss: 0.8221 (0.8255) time: 0.1901 data: 0.1007 max mem: 9377 +Train: [20] [1500/6250] eta: 0:13:06 lr: 0.000117 grad: 0.0882 (0.0938) loss: 0.8247 (0.8252) time: 0.1527 data: 0.0602 max mem: 9377 +Train: [20] [1600/6250] eta: 0:12:50 lr: 0.000117 grad: 0.0890 (0.0937) loss: 0.8199 (0.8252) time: 0.1662 data: 0.0755 max mem: 9377 +Train: [20] [1700/6250] eta: 0:12:35 lr: 0.000117 grad: 0.0906 (0.0936) loss: 0.8180 (0.8251) time: 0.1901 data: 0.0843 max mem: 9377 +Train: [20] [1800/6250] eta: 0:12:21 lr: 0.000117 grad: 0.0966 (0.0936) loss: 0.8221 (0.8251) time: 0.2324 data: 0.1471 max mem: 9377 +Train: [20] [1900/6250] eta: 0:12:23 lr: 0.000117 grad: 0.0873 (0.0934) loss: 0.8210 (0.8249) time: 0.5160 data: 0.4104 max mem: 9377 +Train: [20] [2000/6250] eta: 0:12:02 lr: 0.000117 grad: 0.0885 (0.0932) loss: 0.8294 (0.8251) time: 0.1536 data: 0.0609 max mem: 9377 +Train: [20] [2100/6250] eta: 0:11:45 lr: 0.000117 grad: 0.0853 (0.0931) loss: 0.8225 (0.8250) time: 0.1788 data: 0.0780 max mem: 9377 +Train: [20] [2200/6250] eta: 0:11:25 lr: 0.000117 grad: 0.0838 (0.0930) loss: 0.8304 (0.8250) time: 0.1386 data: 0.0521 max mem: 9377 +Train: [20] [2300/6250] eta: 0:11:07 lr: 0.000117 grad: 0.0869 (0.0928) loss: 0.8343 (0.8252) time: 0.1341 data: 0.0556 max mem: 9377 +Train: [20] [2400/6250] eta: 0:10:47 lr: 0.000117 grad: 0.0919 (0.0928) loss: 0.8218 (0.8253) time: 0.1110 data: 0.0270 max mem: 9377 +Train: [20] [2500/6250] eta: 0:10:28 lr: 0.000117 grad: 0.0873 (0.0926) loss: 0.8262 (0.8254) time: 0.1485 data: 0.0616 max mem: 9377 +Train: [20] [2600/6250] eta: 0:10:10 lr: 0.000117 grad: 0.0833 (0.0925) loss: 0.8273 (0.8255) time: 0.1307 data: 0.0345 max mem: 9377 +Train: [20] [2700/6250] eta: 0:09:52 lr: 0.000117 grad: 0.0954 (0.0926) loss: 0.8252 (0.8255) time: 0.1622 data: 0.0805 max mem: 9377 +Train: [20] [2800/6250] eta: 0:09:35 lr: 0.000117 grad: 0.0885 (0.0926) loss: 0.8252 (0.8254) time: 0.1705 data: 0.0888 max mem: 9377 +Train: [20] [2900/6250] eta: 0:09:17 lr: 0.000117 grad: 0.0937 (0.0927) loss: 0.8269 (0.8253) time: 0.1446 data: 0.0591 max mem: 9377 +Train: [20] [3000/6250] eta: 0:08:59 lr: 0.000117 grad: 0.0913 (0.0926) loss: 0.8231 (0.8253) time: 0.1423 data: 0.0533 max mem: 9377 +Train: [20] [3100/6250] eta: 0:08:44 lr: 0.000117 grad: 0.0942 (0.0927) loss: 0.8179 (0.8252) time: 0.2716 data: 0.1865 max mem: 9377 +Train: [20] [3200/6250] eta: 0:08:29 lr: 0.000117 grad: 0.0879 (0.0926) loss: 0.8269 (0.8252) time: 0.2927 data: 0.2062 max mem: 9377 +Train: [20] [3300/6250] eta: 0:08:11 lr: 0.000117 grad: 0.0870 (0.0926) loss: 0.8183 (0.8251) time: 0.1622 data: 0.0796 max mem: 9377 +Train: [20] [3400/6250] eta: 0:07:54 lr: 0.000117 grad: 0.0886 (0.0926) loss: 0.8259 (0.8250) time: 0.1389 data: 0.0551 max mem: 9377 +Train: [20] [3500/6250] eta: 0:07:38 lr: 0.000117 grad: 0.0854 (0.0925) loss: 0.8303 (0.8249) time: 0.1808 data: 0.0960 max mem: 9377 +Train: [20] [3600/6250] eta: 0:07:34 lr: 0.000117 grad: 0.0905 (0.0925) loss: 0.8245 (0.8247) time: 0.4052 data: 0.2082 max mem: 9377 +Train: [20] [3700/6250] eta: 0:07:15 lr: 0.000117 grad: 0.0893 (0.0925) loss: 0.8218 (0.8246) time: 0.1194 data: 0.0266 max mem: 9377 +Train: [20] [3800/6250] eta: 0:07:09 lr: 0.000117 grad: 0.0854 (0.0925) loss: 0.8200 (0.8245) time: 0.1821 data: 0.0652 max mem: 9377 +Train: [20] [3900/6250] eta: 0:06:52 lr: 0.000117 grad: 0.0896 (0.0926) loss: 0.8255 (0.8245) time: 0.1626 data: 0.0515 max mem: 9377 +Train: [20] [4000/6250] eta: 0:06:35 lr: 0.000117 grad: 0.0848 (0.0925) loss: 0.8262 (0.8244) time: 0.2917 data: 0.1692 max mem: 9377 +Train: [20] [4100/6250] eta: 0:06:17 lr: 0.000117 grad: 0.0856 (0.0925) loss: 0.8284 (0.8244) time: 0.1220 data: 0.0009 max mem: 9377 +Train: [20] [4200/6250] eta: 0:06:02 lr: 0.000117 grad: 0.0839 (0.0924) loss: 0.8228 (0.8244) time: 0.1110 data: 0.0003 max mem: 9377 +Train: [20] [4300/6250] eta: 0:05:49 lr: 0.000117 grad: 0.0905 (0.0924) loss: 0.8258 (0.8244) time: 0.8411 data: 0.7299 max mem: 9377 +Train: [20] [4400/6250] eta: 0:05:30 lr: 0.000117 grad: 0.0913 (0.0925) loss: 0.8304 (0.8244) time: 0.1836 data: 0.1042 max mem: 9377 +Train: [20] [4500/6250] eta: 0:05:11 lr: 0.000117 grad: 0.0851 (0.0925) loss: 0.8282 (0.8244) time: 0.1964 data: 0.1091 max mem: 9377 +Train: [20] [4600/6250] eta: 0:04:53 lr: 0.000117 grad: 0.0856 (0.0925) loss: 0.8252 (0.8244) time: 0.1471 data: 0.0601 max mem: 9377 +Train: [20] [4700/6250] eta: 0:04:36 lr: 0.000117 grad: 0.0849 (0.0925) loss: 0.8282 (0.8244) time: 0.2047 data: 0.1076 max mem: 9377 +Train: [20] [4800/6250] eta: 0:04:18 lr: 0.000117 grad: 0.0896 (0.0925) loss: 0.8287 (0.8244) time: 0.1284 data: 0.0365 max mem: 9377 +Train: [20] [4900/6250] eta: 0:03:59 lr: 0.000117 grad: 0.0912 (0.0925) loss: 0.8217 (0.8244) time: 0.0986 data: 0.0025 max mem: 9377 +Train: [20] [5000/6250] eta: 0:03:41 lr: 0.000117 grad: 0.0873 (0.0925) loss: 0.8321 (0.8245) time: 0.1358 data: 0.0575 max mem: 9377 +Train: [20] [5100/6250] eta: 0:03:23 lr: 0.000117 grad: 0.0912 (0.0924) loss: 0.8318 (0.8246) time: 0.1474 data: 0.0556 max mem: 9377 +Train: [20] [5200/6250] eta: 0:03:05 lr: 0.000117 grad: 0.0847 (0.0924) loss: 0.8348 (0.8247) time: 0.1532 data: 0.0723 max mem: 9377 +Train: [20] [5300/6250] eta: 0:02:47 lr: 0.000117 grad: 0.0881 (0.0923) loss: 0.8312 (0.8247) time: 0.1898 data: 0.1068 max mem: 9377 +Train: [20] [5400/6250] eta: 0:02:30 lr: 0.000117 grad: 0.0841 (0.0923) loss: 0.8320 (0.8248) time: 0.1385 data: 0.0496 max mem: 9377 +Train: [20] [5500/6250] eta: 0:02:12 lr: 0.000117 grad: 0.0919 (0.0922) loss: 0.8286 (0.8248) time: 0.2024 data: 0.1231 max mem: 9377 +Train: [20] [5600/6250] eta: 0:01:54 lr: 0.000117 grad: 0.0883 (0.0922) loss: 0.8236 (0.8248) time: 0.1248 data: 0.0465 max mem: 9377 +Train: [20] [5700/6250] eta: 0:01:36 lr: 0.000117 grad: 0.0886 (0.0922) loss: 0.8288 (0.8248) time: 0.2230 data: 0.1289 max mem: 9377 +Train: [20] [5800/6250] eta: 0:01:18 lr: 0.000117 grad: 0.0889 (0.0921) loss: 0.8250 (0.8248) time: 0.1644 data: 0.0826 max mem: 9377 +Train: [20] [5900/6250] eta: 0:01:01 lr: 0.000117 grad: 0.0894 (0.0921) loss: 0.8203 (0.8248) time: 0.1221 data: 0.0292 max mem: 9377 +Train: [20] [6000/6250] eta: 0:00:43 lr: 0.000116 grad: 0.0910 (0.0921) loss: 0.8297 (0.8248) time: 0.1350 data: 0.0559 max mem: 9377 +Train: [20] [6100/6250] eta: 0:00:26 lr: 0.000116 grad: 0.0902 (0.0921) loss: 0.8255 (0.8248) time: 0.1252 data: 0.0340 max mem: 9377 +Train: [20] [6200/6250] eta: 0:00:08 lr: 0.000116 grad: 0.0872 (0.0921) loss: 0.8307 (0.8249) time: 0.1635 data: 0.0804 max mem: 9377 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.0876 (0.0921) loss: 0.8266 (0.8249) time: 0.1713 data: 0.0856 max mem: 9377 +Train: [20] Total time: 0:18:15 (0.1753 s / it) +Averaged stats: lr: 0.000116 grad: 0.0876 (0.0921) loss: 0.8266 (0.8249) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:04:52 loss: 0.8453 (0.8453) time: 4.7237 data: 4.6853 max mem: 9377 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8395 (0.8407) time: 0.1182 data: 0.0848 max mem: 9377 +Eval (hcp-train-subset): [20] Total time: 0:00:13 (0.2232 s / it) +Averaged stats (hcp-train-subset): loss: 0.8395 (0.8407) +Eval (hcp-val): [20] [ 0/62] eta: 0:04:14 loss: 0.8422 (0.8422) time: 4.1125 data: 4.0293 max mem: 9377 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8450 (0.8468) time: 0.1210 data: 0.0959 max mem: 9377 +Eval (hcp-val): [20] Total time: 0:00:13 (0.2151 s / it) +Averaged stats (hcp-val): loss: 0.8450 (0.8468) +Eval (nsd-val): [20] [ 0/62] eta: 0:03:19 loss: 0.8050 (0.8050) time: 3.2202 data: 3.1388 max mem: 9377 +Eval (nsd-val): [20] [61/62] eta: 0:00:00 loss: 0.8148 (0.8164) time: 0.1308 data: 0.1058 max mem: 9377 +Eval (nsd-val): [20] Total time: 0:00:13 (0.2190 s / it) +Averaged stats (nsd-val): loss: 0.8148 (0.8164) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [21] [ 0/6250] eta: 10:38:49 lr: 0.000116 grad: 0.0585 (0.0585) loss: 0.8630 (0.8630) time: 6.1327 data: 5.9808 max mem: 9377 +Train: [21] [ 100/6250] eta: 0:24:09 lr: 0.000116 grad: 0.0808 (0.1086) loss: 0.8426 (0.8438) time: 0.1646 data: 0.0612 max mem: 9377 +Train: [21] [ 200/6250] eta: 0:20:28 lr: 0.000116 grad: 0.0975 (0.1031) loss: 0.8391 (0.8394) time: 0.1736 data: 0.0906 max mem: 9377 +Train: [21] [ 300/6250] eta: 0:19:01 lr: 0.000116 grad: 0.0922 (0.1014) loss: 0.8319 (0.8373) time: 0.1603 data: 0.0728 max mem: 9377 +Train: [21] [ 400/6250] eta: 0:18:15 lr: 0.000116 grad: 0.0813 (0.0986) loss: 0.8338 (0.8348) time: 0.1573 data: 0.0670 max mem: 9377 +Train: [21] [ 500/6250] eta: 0:17:35 lr: 0.000116 grad: 0.0937 (0.0968) loss: 0.8345 (0.8340) time: 0.2000 data: 0.0969 max mem: 9377 +Train: [21] [ 600/6250] eta: 0:16:48 lr: 0.000116 grad: 0.0825 (0.0954) loss: 0.8306 (0.8331) time: 0.1348 data: 0.0386 max mem: 9377 +Train: [21] [ 700/6250] eta: 0:16:11 lr: 0.000116 grad: 0.0840 (0.0946) loss: 0.8278 (0.8323) time: 0.1571 data: 0.0676 max mem: 9377 +Train: [21] [ 800/6250] eta: 0:15:46 lr: 0.000116 grad: 0.0844 (0.0940) loss: 0.8246 (0.8314) time: 0.1955 data: 0.1106 max mem: 9377 +Train: [21] [ 900/6250] eta: 0:15:21 lr: 0.000116 grad: 0.0857 (0.0934) loss: 0.8250 (0.8308) time: 0.1578 data: 0.0807 max mem: 9377 +Train: [21] [1000/6250] eta: 0:14:58 lr: 0.000116 grad: 0.0826 (0.0927) loss: 0.8250 (0.8303) time: 0.1211 data: 0.0284 max mem: 9377 +Train: [21] [1100/6250] eta: 0:14:43 lr: 0.000116 grad: 0.0845 (0.0924) loss: 0.8219 (0.8299) time: 0.1515 data: 0.0585 max mem: 9377 +Train: [21] [1200/6250] eta: 0:14:29 lr: 0.000116 grad: 0.0832 (0.0919) loss: 0.8159 (0.8295) time: 0.1170 data: 0.0298 max mem: 9377 +Train: [21] [1300/6250] eta: 0:14:30 lr: 0.000116 grad: 0.0840 (0.0916) loss: 0.8275 (0.8293) time: 0.4002 data: 0.2681 max mem: 9377 +Train: [21] [1400/6250] eta: 0:14:24 lr: 0.000116 grad: 0.0876 (0.0912) loss: 0.8230 (0.8291) time: 0.1590 data: 0.0263 max mem: 9377 +Train: [21] [1500/6250] eta: 0:14:03 lr: 0.000116 grad: 0.0842 (0.0911) loss: 0.8222 (0.8289) time: 0.1476 data: 0.0466 max mem: 9377 +Train: [21] [1600/6250] eta: 0:13:44 lr: 0.000116 grad: 0.0896 (0.0908) loss: 0.8230 (0.8288) time: 0.2253 data: 0.1409 max mem: 9377 +Train: [21] [1700/6250] eta: 0:13:16 lr: 0.000116 grad: 0.0881 (0.0905) loss: 0.8166 (0.8286) time: 0.1642 data: 0.0784 max mem: 9377 +Train: [21] [1800/6250] eta: 0:12:53 lr: 0.000116 grad: 0.0852 (0.0904) loss: 0.8217 (0.8282) time: 0.1575 data: 0.0699 max mem: 9377 +Train: [21] [1900/6250] eta: 0:12:30 lr: 0.000116 grad: 0.0888 (0.0904) loss: 0.8177 (0.8278) time: 0.1522 data: 0.0666 max mem: 9377 +Train: [21] [2000/6250] eta: 0:12:12 lr: 0.000116 grad: 0.0861 (0.0903) loss: 0.8225 (0.8275) time: 0.1800 data: 0.0984 max mem: 9377 +Train: [21] [2100/6250] eta: 0:11:50 lr: 0.000116 grad: 0.0971 (0.0904) loss: 0.8203 (0.8271) time: 0.1761 data: 0.0827 max mem: 9377 +Train: [21] [2200/6250] eta: 0:11:29 lr: 0.000116 grad: 0.0808 (0.0904) loss: 0.8288 (0.8268) time: 0.1248 data: 0.0358 max mem: 9377 +Train: [21] [2300/6250] eta: 0:11:10 lr: 0.000116 grad: 0.0861 (0.0903) loss: 0.8189 (0.8265) time: 0.1428 data: 0.0538 max mem: 9377 +Train: [21] [2400/6250] eta: 0:10:53 lr: 0.000116 grad: 0.0965 (0.0904) loss: 0.8228 (0.8263) time: 0.2210 data: 0.1319 max mem: 9377 +Train: [21] [2500/6250] eta: 0:10:35 lr: 0.000116 grad: 0.0853 (0.0904) loss: 0.8214 (0.8262) time: 0.2018 data: 0.1162 max mem: 9377 +Train: [21] [2600/6250] eta: 0:10:34 lr: 0.000116 grad: 0.0954 (0.0905) loss: 0.8232 (0.8260) time: 0.1144 data: 0.0175 max mem: 9377 +Train: [21] [2700/6250] eta: 0:10:14 lr: 0.000116 grad: 0.0847 (0.0905) loss: 0.8199 (0.8258) time: 0.1303 data: 0.0471 max mem: 9377 +Train: [21] [2800/6250] eta: 0:09:56 lr: 0.000116 grad: 0.0943 (0.0906) loss: 0.8189 (0.8257) time: 0.1271 data: 0.0375 max mem: 9377 +Train: [21] [2900/6250] eta: 0:09:37 lr: 0.000116 grad: 0.0890 (0.0907) loss: 0.8190 (0.8254) time: 0.1614 data: 0.0770 max mem: 9377 +Train: [21] [3000/6250] eta: 0:09:19 lr: 0.000116 grad: 0.0892 (0.0907) loss: 0.8207 (0.8252) time: 0.1509 data: 0.0520 max mem: 9377 +Train: [21] [3100/6250] eta: 0:09:00 lr: 0.000116 grad: 0.0893 (0.0907) loss: 0.8164 (0.8251) time: 0.1898 data: 0.1011 max mem: 9377 +Train: [21] [3200/6250] eta: 0:08:46 lr: 0.000116 grad: 0.0935 (0.0908) loss: 0.8190 (0.8249) time: 0.1469 data: 0.0439 max mem: 9377 +Train: [21] [3300/6250] eta: 0:08:26 lr: 0.000116 grad: 0.0962 (0.0910) loss: 0.8163 (0.8246) time: 0.1455 data: 0.0574 max mem: 9377 +Train: [21] [3400/6250] eta: 0:08:15 lr: 0.000116 grad: 0.0977 (0.0911) loss: 0.8139 (0.8244) time: 0.1135 data: 0.0166 max mem: 9377 +Train: [21] [3500/6250] eta: 0:07:56 lr: 0.000116 grad: 0.0977 (0.0912) loss: 0.8165 (0.8242) time: 0.1257 data: 0.0436 max mem: 9377 +Train: [21] [3600/6250] eta: 0:07:39 lr: 0.000116 grad: 0.0917 (0.0914) loss: 0.8236 (0.8240) time: 0.1152 data: 0.0262 max mem: 9377 +Train: [21] [3700/6250] eta: 0:07:21 lr: 0.000116 grad: 0.0988 (0.0916) loss: 0.8186 (0.8238) time: 0.1274 data: 0.0442 max mem: 9377 +Train: [21] [3800/6250] eta: 0:07:02 lr: 0.000116 grad: 0.0985 (0.0918) loss: 0.8218 (0.8236) time: 0.1560 data: 0.0642 max mem: 9377 +Train: [21] [3900/6250] eta: 0:06:45 lr: 0.000116 grad: 0.0919 (0.0919) loss: 0.8168 (0.8234) time: 0.1677 data: 0.0774 max mem: 9377 +Train: [21] [4000/6250] eta: 0:06:27 lr: 0.000116 grad: 0.0865 (0.0920) loss: 0.8123 (0.8232) time: 0.1181 data: 0.0333 max mem: 9377 +Train: [21] [4100/6250] eta: 0:06:09 lr: 0.000116 grad: 0.0939 (0.0921) loss: 0.8164 (0.8230) time: 0.1579 data: 0.0723 max mem: 9377 +Train: [21] [4200/6250] eta: 0:05:52 lr: 0.000116 grad: 0.0927 (0.0921) loss: 0.8097 (0.8228) time: 0.1706 data: 0.0817 max mem: 9377 +Train: [21] [4300/6250] eta: 0:05:34 lr: 0.000116 grad: 0.1007 (0.0922) loss: 0.8093 (0.8226) time: 0.1631 data: 0.0712 max mem: 9377 +Train: [21] [4400/6250] eta: 0:05:16 lr: 0.000116 grad: 0.0956 (0.0923) loss: 0.8152 (0.8224) time: 0.1811 data: 0.0887 max mem: 9377 +Train: [21] [4500/6250] eta: 0:04:59 lr: 0.000116 grad: 0.0961 (0.0925) loss: 0.8141 (0.8222) time: 0.1542 data: 0.0593 max mem: 9377 +Train: [21] [4600/6250] eta: 0:04:42 lr: 0.000116 grad: 0.0979 (0.0926) loss: 0.8136 (0.8220) time: 0.2487 data: 0.1571 max mem: 9377 +Train: [21] [4700/6250] eta: 0:04:24 lr: 0.000116 grad: 0.0902 (0.0927) loss: 0.8075 (0.8219) time: 0.1557 data: 0.0592 max mem: 9377 +Train: [21] [4800/6250] eta: 0:04:07 lr: 0.000116 grad: 0.0892 (0.0928) loss: 0.8165 (0.8218) time: 0.1235 data: 0.0182 max mem: 9377 +Train: [21] [4900/6250] eta: 0:03:52 lr: 0.000116 grad: 0.0901 (0.0928) loss: 0.8101 (0.8217) time: 0.1211 data: 0.0126 max mem: 9377 +Train: [21] [5000/6250] eta: 0:03:35 lr: 0.000116 grad: 0.0936 (0.0929) loss: 0.8161 (0.8216) time: 0.1549 data: 0.0661 max mem: 9377 +Train: [21] [5100/6250] eta: 0:03:18 lr: 0.000116 grad: 0.0846 (0.0929) loss: 0.8307 (0.8216) time: 0.1546 data: 0.0771 max mem: 9377 +Train: [21] [5200/6250] eta: 0:03:00 lr: 0.000116 grad: 0.0882 (0.0929) loss: 0.8192 (0.8216) time: 0.1040 data: 0.0213 max mem: 9377 +Train: [21] [5300/6250] eta: 0:02:43 lr: 0.000116 grad: 0.0969 (0.0930) loss: 0.8130 (0.8215) time: 0.1932 data: 0.1111 max mem: 9377 +Train: [21] [5400/6250] eta: 0:02:25 lr: 0.000116 grad: 0.0964 (0.0931) loss: 0.8179 (0.8215) time: 0.1360 data: 0.0546 max mem: 9377 +Train: [21] [5500/6250] eta: 0:02:08 lr: 0.000116 grad: 0.0975 (0.0931) loss: 0.8191 (0.8215) time: 0.1344 data: 0.0517 max mem: 9377 +Train: [21] [5600/6250] eta: 0:01:51 lr: 0.000115 grad: 0.0904 (0.0933) loss: 0.8234 (0.8215) time: 0.1571 data: 0.0688 max mem: 9377 +Train: [21] [5700/6250] eta: 0:01:34 lr: 0.000115 grad: 0.0932 (0.0932) loss: 0.8279 (0.8215) time: 0.1882 data: 0.0889 max mem: 9377 +Train: [21] [5800/6250] eta: 0:01:16 lr: 0.000115 grad: 0.0947 (0.0933) loss: 0.8132 (0.8214) time: 0.1335 data: 0.0471 max mem: 9377 +Train: [21] [5900/6250] eta: 0:00:59 lr: 0.000115 grad: 0.0859 (0.0933) loss: 0.8255 (0.8214) time: 0.1772 data: 0.0896 max mem: 9377 +Train: [21] [6000/6250] eta: 0:00:42 lr: 0.000115 grad: 0.0914 (0.0933) loss: 0.8189 (0.8214) time: 0.1659 data: 0.0782 max mem: 9377 +Train: [21] [6100/6250] eta: 0:00:25 lr: 0.000115 grad: 0.0874 (0.0933) loss: 0.8206 (0.8214) time: 0.1461 data: 0.0603 max mem: 9377 +Train: [21] [6200/6250] eta: 0:00:08 lr: 0.000115 grad: 0.0875 (0.0932) loss: 0.8203 (0.8214) time: 0.1450 data: 0.0599 max mem: 9377 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.0855 (0.0932) loss: 0.8252 (0.8214) time: 0.1489 data: 0.0593 max mem: 9377 +Train: [21] Total time: 0:17:46 (0.1707 s / it) +Averaged stats: lr: 0.000115 grad: 0.0855 (0.0932) loss: 0.8252 (0.8214) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:05:10 loss: 0.8427 (0.8427) time: 5.0100 data: 4.9778 max mem: 9377 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8396 (0.8396) time: 0.1471 data: 0.1220 max mem: 9377 +Eval (hcp-train-subset): [21] Total time: 0:00:14 (0.2268 s / it) +Averaged stats (hcp-train-subset): loss: 0.8396 (0.8396) +Eval (hcp-val): [21] [ 0/62] eta: 0:05:29 loss: 0.8460 (0.8460) time: 5.3216 data: 5.2853 max mem: 9377 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8452 (0.8467) time: 0.1490 data: 0.1239 max mem: 9377 +Eval (hcp-val): [21] Total time: 0:00:13 (0.2183 s / it) +Averaged stats (hcp-val): loss: 0.8452 (0.8467) +Eval (nsd-val): [21] [ 0/62] eta: 0:06:01 loss: 0.8085 (0.8085) time: 5.8305 data: 5.7995 max mem: 9377 +Eval (nsd-val): [21] [61/62] eta: 0:00:00 loss: 0.8181 (0.8194) time: 0.1106 data: 0.0853 max mem: 9377 +Eval (nsd-val): [21] Total time: 0:00:14 (0.2290 s / it) +Averaged stats (nsd-val): loss: 0.8181 (0.8194) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [22] [ 0/6250] eta: 14:37:58 lr: 0.000115 grad: 0.0627 (0.0627) loss: 0.8462 (0.8462) time: 8.4285 data: 8.2773 max mem: 9377 +Train: [22] [ 100/6250] eta: 0:26:03 lr: 0.000115 grad: 0.0899 (0.1122) loss: 0.8377 (0.8326) time: 0.2305 data: 0.1438 max mem: 9377 +Train: [22] [ 200/6250] eta: 0:21:08 lr: 0.000115 grad: 0.0921 (0.1030) loss: 0.8255 (0.8313) time: 0.1241 data: 0.0182 max mem: 9377 +Train: [22] [ 300/6250] eta: 0:19:29 lr: 0.000115 grad: 0.0813 (0.0984) loss: 0.8286 (0.8307) time: 0.1582 data: 0.0600 max mem: 9377 +Train: [22] [ 400/6250] eta: 0:18:16 lr: 0.000115 grad: 0.0784 (0.0953) loss: 0.8332 (0.8308) time: 0.1624 data: 0.0704 max mem: 9377 +Train: [22] [ 500/6250] eta: 0:17:49 lr: 0.000115 grad: 0.0810 (0.0935) loss: 0.8301 (0.8308) time: 0.1711 data: 0.0622 max mem: 9377 +Train: [22] [ 600/6250] eta: 0:17:17 lr: 0.000115 grad: 0.0853 (0.0931) loss: 0.8246 (0.8297) time: 0.1576 data: 0.0586 max mem: 9377 +Train: [22] [ 700/6250] eta: 0:16:55 lr: 0.000115 grad: 0.0924 (0.0934) loss: 0.8210 (0.8282) time: 0.1796 data: 0.0954 max mem: 9377 +Train: [22] [ 800/6250] eta: 0:16:22 lr: 0.000115 grad: 0.0867 (0.0927) loss: 0.8272 (0.8278) time: 0.1192 data: 0.0216 max mem: 9377 +Train: [22] [ 900/6250] eta: 0:15:58 lr: 0.000115 grad: 0.0821 (0.0922) loss: 0.8323 (0.8275) time: 0.1584 data: 0.0717 max mem: 9377 +Train: [22] [1000/6250] eta: 0:15:30 lr: 0.000115 grad: 0.0818 (0.0916) loss: 0.8272 (0.8274) time: 0.2091 data: 0.1231 max mem: 9377 +Train: [22] [1100/6250] eta: 0:15:06 lr: 0.000115 grad: 0.0963 (0.0915) loss: 0.8282 (0.8272) time: 0.2633 data: 0.1726 max mem: 9377 +Train: [22] [1200/6250] eta: 0:14:38 lr: 0.000115 grad: 0.0889 (0.0914) loss: 0.8202 (0.8269) time: 0.1954 data: 0.1041 max mem: 9377 +Train: [22] [1300/6250] eta: 0:14:15 lr: 0.000115 grad: 0.0904 (0.0913) loss: 0.8259 (0.8269) time: 0.2028 data: 0.1188 max mem: 9377 +Train: [22] [1400/6250] eta: 0:13:51 lr: 0.000115 grad: 0.0875 (0.0913) loss: 0.8268 (0.8266) time: 0.1575 data: 0.0591 max mem: 9377 +Train: [22] [1500/6250] eta: 0:13:29 lr: 0.000115 grad: 0.0903 (0.0915) loss: 0.8249 (0.8262) time: 0.1800 data: 0.0979 max mem: 9377 +Train: [22] [1600/6250] eta: 0:13:09 lr: 0.000115 grad: 0.1001 (0.0918) loss: 0.8204 (0.8259) time: 0.1549 data: 0.0610 max mem: 9377 +Train: [22] [1700/6250] eta: 0:12:53 lr: 0.000115 grad: 0.0951 (0.0920) loss: 0.8179 (0.8256) time: 0.1937 data: 0.1116 max mem: 9377 +Train: [22] [1800/6250] eta: 0:13:00 lr: 0.000115 grad: 0.0920 (0.0922) loss: 0.8182 (0.8251) time: 0.2478 data: 0.1479 max mem: 9377 +Train: [22] [1900/6250] eta: 0:12:37 lr: 0.000115 grad: 0.0909 (0.0924) loss: 0.8164 (0.8246) time: 0.1681 data: 0.0708 max mem: 9377 +Train: [22] [2000/6250] eta: 0:12:47 lr: 0.000115 grad: 0.0927 (0.0926) loss: 0.8187 (0.8242) time: 0.1416 data: 0.0003 max mem: 9377 +Train: [22] [2100/6250] eta: 0:12:38 lr: 0.000115 grad: 0.0929 (0.0929) loss: 0.8138 (0.8238) time: 0.1410 data: 0.0455 max mem: 9377 +Train: [22] [2200/6250] eta: 0:12:34 lr: 0.000115 grad: 0.0897 (0.0929) loss: 0.8137 (0.8233) time: 0.1295 data: 0.0240 max mem: 9377 +Train: [22] [2300/6250] eta: 0:12:08 lr: 0.000115 grad: 0.0931 (0.0929) loss: 0.8211 (0.8231) time: 0.1374 data: 0.0500 max mem: 9377 +Train: [22] [2400/6250] eta: 0:11:46 lr: 0.000115 grad: 0.0957 (0.0929) loss: 0.8203 (0.8229) time: 0.1597 data: 0.0800 max mem: 9377 +Train: [22] [2500/6250] eta: 0:11:35 lr: 0.000115 grad: 0.0966 (0.0930) loss: 0.8163 (0.8227) time: 0.2832 data: 0.1764 max mem: 9377 +Train: [22] [2600/6250] eta: 0:11:08 lr: 0.000115 grad: 0.0939 (0.0932) loss: 0.8232 (0.8226) time: 0.1531 data: 0.0655 max mem: 9377 +Train: [22] [2700/6250] eta: 0:10:51 lr: 0.000115 grad: 0.0970 (0.0933) loss: 0.8148 (0.8225) time: 0.2255 data: 0.1439 max mem: 9377 +Train: [22] [2800/6250] eta: 0:10:34 lr: 0.000115 grad: 0.0923 (0.0935) loss: 0.8182 (0.8223) time: 0.0975 data: 0.0006 max mem: 9377 +Train: [22] [2900/6250] eta: 0:10:13 lr: 0.000115 grad: 0.0917 (0.0937) loss: 0.8176 (0.8221) time: 0.1692 data: 0.0763 max mem: 9377 +Train: [22] [3000/6250] eta: 0:09:53 lr: 0.000115 grad: 0.0907 (0.0939) loss: 0.8204 (0.8220) time: 0.1746 data: 0.0836 max mem: 9377 +Train: [22] [3100/6250] eta: 0:09:42 lr: 0.000115 grad: 0.0928 (0.0940) loss: 0.8134 (0.8218) time: 0.6191 data: 0.5246 max mem: 9377 +Train: [22] [3200/6250] eta: 0:09:20 lr: 0.000115 grad: 0.1009 (0.0941) loss: 0.8198 (0.8217) time: 0.1283 data: 0.0077 max mem: 9377 +Train: [22] [3300/6250] eta: 0:08:59 lr: 0.000115 grad: 0.0924 (0.0943) loss: 0.8152 (0.8215) time: 0.1446 data: 0.0440 max mem: 9377 +Train: [22] [3400/6250] eta: 0:08:38 lr: 0.000115 grad: 0.0962 (0.0944) loss: 0.8103 (0.8215) time: 0.1631 data: 0.0632 max mem: 9377 +Train: [22] [3500/6250] eta: 0:08:18 lr: 0.000115 grad: 0.0916 (0.0944) loss: 0.8208 (0.8213) time: 0.1181 data: 0.0154 max mem: 9377 +Train: [22] [3600/6250] eta: 0:07:59 lr: 0.000115 grad: 0.0932 (0.0945) loss: 0.8195 (0.8213) time: 0.1551 data: 0.0647 max mem: 9377 +Train: [22] [3700/6250] eta: 0:07:39 lr: 0.000115 grad: 0.0975 (0.0946) loss: 0.8200 (0.8212) time: 0.1174 data: 0.0305 max mem: 9377 +Train: [22] [3800/6250] eta: 0:07:20 lr: 0.000115 grad: 0.0972 (0.0948) loss: 0.8183 (0.8210) time: 0.1397 data: 0.0655 max mem: 9377 +Train: [22] [3900/6250] eta: 0:07:01 lr: 0.000115 grad: 0.0984 (0.0949) loss: 0.8098 (0.8208) time: 0.1501 data: 0.0676 max mem: 9377 +Train: [22] [4000/6250] eta: 0:06:41 lr: 0.000115 grad: 0.0978 (0.0950) loss: 0.8179 (0.8208) time: 0.1378 data: 0.0537 max mem: 9377 +Train: [22] [4100/6250] eta: 0:06:23 lr: 0.000115 grad: 0.0943 (0.0951) loss: 0.8214 (0.8207) time: 0.1971 data: 0.1080 max mem: 9377 +Train: [22] [4200/6250] eta: 0:06:05 lr: 0.000115 grad: 0.0917 (0.0951) loss: 0.8139 (0.8206) time: 0.2028 data: 0.1154 max mem: 9377 +Train: [22] [4300/6250] eta: 0:05:46 lr: 0.000115 grad: 0.0940 (0.0952) loss: 0.8195 (0.8205) time: 0.1821 data: 0.1006 max mem: 9377 +Train: [22] [4400/6250] eta: 0:05:28 lr: 0.000115 grad: 0.0965 (0.0953) loss: 0.8207 (0.8205) time: 0.1472 data: 0.0588 max mem: 9377 +Train: [22] [4500/6250] eta: 0:05:09 lr: 0.000115 grad: 0.0936 (0.0954) loss: 0.8127 (0.8204) time: 0.1719 data: 0.0878 max mem: 9377 +Train: [22] [4600/6250] eta: 0:04:51 lr: 0.000115 grad: 0.0995 (0.0955) loss: 0.8108 (0.8203) time: 0.1594 data: 0.0695 max mem: 9377 +Train: [22] [4700/6250] eta: 0:04:32 lr: 0.000115 grad: 0.0957 (0.0956) loss: 0.8204 (0.8203) time: 0.1973 data: 0.1039 max mem: 9377 +Train: [22] [4800/6250] eta: 0:04:14 lr: 0.000115 grad: 0.1081 (0.0957) loss: 0.8120 (0.8202) time: 0.1433 data: 0.0480 max mem: 9377 +Train: [22] [4900/6250] eta: 0:03:56 lr: 0.000114 grad: 0.0954 (0.0958) loss: 0.8104 (0.8200) time: 0.2020 data: 0.1126 max mem: 9377 +Train: [22] [5000/6250] eta: 0:03:38 lr: 0.000114 grad: 0.1009 (0.0959) loss: 0.8136 (0.8199) time: 0.1260 data: 0.0255 max mem: 9377 +Train: [22] [5100/6250] eta: 0:03:20 lr: 0.000114 grad: 0.1004 (0.0960) loss: 0.8105 (0.8197) time: 0.1418 data: 0.0540 max mem: 9377 +Train: [22] [5200/6250] eta: 0:03:02 lr: 0.000114 grad: 0.0974 (0.0961) loss: 0.8068 (0.8196) time: 0.1449 data: 0.0574 max mem: 9377 +Train: [22] [5300/6250] eta: 0:02:44 lr: 0.000114 grad: 0.1023 (0.0962) loss: 0.8141 (0.8194) time: 0.2127 data: 0.1398 max mem: 9377 +Train: [22] [5400/6250] eta: 0:02:27 lr: 0.000114 grad: 0.0953 (0.0963) loss: 0.8136 (0.8193) time: 0.1186 data: 0.0441 max mem: 9377 +Train: [22] [5500/6250] eta: 0:02:09 lr: 0.000114 grad: 0.1049 (0.0964) loss: 0.8132 (0.8192) time: 0.1570 data: 0.0810 max mem: 9377 +Train: [22] [5600/6250] eta: 0:01:52 lr: 0.000114 grad: 0.1018 (0.0964) loss: 0.8158 (0.8191) time: 0.1913 data: 0.1124 max mem: 9377 +Train: [22] [5700/6250] eta: 0:01:34 lr: 0.000114 grad: 0.0897 (0.0964) loss: 0.8266 (0.8191) time: 0.1553 data: 0.0693 max mem: 9377 +Train: [22] [5800/6250] eta: 0:01:17 lr: 0.000114 grad: 0.0880 (0.0964) loss: 0.8183 (0.8191) time: 0.1902 data: 0.0997 max mem: 9377 +Train: [22] [5900/6250] eta: 0:01:00 lr: 0.000114 grad: 0.0956 (0.0964) loss: 0.8226 (0.8191) time: 0.1626 data: 0.0738 max mem: 9377 +Train: [22] [6000/6250] eta: 0:00:43 lr: 0.000114 grad: 0.0954 (0.0965) loss: 0.8151 (0.8191) time: 0.2044 data: 0.1210 max mem: 9377 +Train: [22] [6100/6250] eta: 0:00:25 lr: 0.000114 grad: 0.0953 (0.0965) loss: 0.8160 (0.8190) time: 0.1507 data: 0.0558 max mem: 9377 +Train: [22] [6200/6250] eta: 0:00:08 lr: 0.000114 grad: 0.1013 (0.0965) loss: 0.8071 (0.8189) time: 0.0966 data: 0.0002 max mem: 9377 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.0971 (0.0965) loss: 0.8121 (0.8189) time: 0.1107 data: 0.0109 max mem: 9377 +Train: [22] Total time: 0:17:57 (0.1725 s / it) +Averaged stats: lr: 0.000114 grad: 0.0971 (0.0965) loss: 0.8121 (0.8189) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:05:08 loss: 0.8392 (0.8392) time: 4.9772 data: 4.9462 max mem: 9377 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8389 (0.8382) time: 0.1178 data: 0.0912 max mem: 9377 +Eval (hcp-train-subset): [22] Total time: 0:00:13 (0.2243 s / it) +Averaged stats (hcp-train-subset): loss: 0.8389 (0.8382) +Eval (hcp-val): [22] [ 0/62] eta: 0:04:51 loss: 0.8461 (0.8461) time: 4.7063 data: 4.6491 max mem: 9377 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8435 (0.8464) time: 0.1171 data: 0.0919 max mem: 9377 +Eval (hcp-val): [22] Total time: 0:00:14 (0.2327 s / it) +Averaged stats (hcp-val): loss: 0.8435 (0.8464) +Eval (nsd-val): [22] [ 0/62] eta: 0:04:57 loss: 0.8143 (0.8143) time: 4.8002 data: 4.7656 max mem: 9377 +Eval (nsd-val): [22] [61/62] eta: 0:00:00 loss: 0.8190 (0.8196) time: 0.1262 data: 0.1014 max mem: 9377 +Eval (nsd-val): [22] Total time: 0:00:13 (0.2180 s / it) +Averaged stats (nsd-val): loss: 0.8190 (0.8196) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [23] [ 0/6250] eta: 11:07:10 lr: 0.000114 grad: 0.2446 (0.2446) loss: 0.8010 (0.8010) time: 6.4050 data: 6.2903 max mem: 9377 +Train: [23] [ 100/6250] eta: 0:23:51 lr: 0.000114 grad: 0.1167 (0.1214) loss: 0.8301 (0.8359) time: 0.2104 data: 0.0930 max mem: 9377 +Train: [23] [ 200/6250] eta: 0:19:45 lr: 0.000114 grad: 0.1023 (0.1171) loss: 0.8150 (0.8284) time: 0.1547 data: 0.0558 max mem: 9377 +Train: [23] [ 300/6250] eta: 0:18:47 lr: 0.000114 grad: 0.1075 (0.1143) loss: 0.8091 (0.8239) time: 0.1933 data: 0.0969 max mem: 9377 +Train: [23] [ 400/6250] eta: 0:17:32 lr: 0.000114 grad: 0.0940 (0.1114) loss: 0.8213 (0.8217) time: 0.1889 data: 0.0946 max mem: 9377 +Train: [23] [ 500/6250] eta: 0:16:45 lr: 0.000114 grad: 0.0941 (0.1085) loss: 0.8166 (0.8208) time: 0.1839 data: 0.0892 max mem: 9377 +Train: [23] [ 600/6250] eta: 0:16:05 lr: 0.000114 grad: 0.0922 (0.1058) loss: 0.8229 (0.8208) time: 0.1645 data: 0.0774 max mem: 9377 +Train: [23] [ 700/6250] eta: 0:15:34 lr: 0.000114 grad: 0.0916 (0.1042) loss: 0.8246 (0.8205) time: 0.1480 data: 0.0468 max mem: 9377 +Train: [23] [ 800/6250] eta: 0:15:21 lr: 0.000114 grad: 0.0840 (0.1027) loss: 0.8248 (0.8205) time: 0.1893 data: 0.1079 max mem: 9377 +Train: [23] [ 900/6250] eta: 0:15:01 lr: 0.000114 grad: 0.0918 (0.1018) loss: 0.8151 (0.8208) time: 0.1655 data: 0.0797 max mem: 9377 +Train: [23] [1000/6250] eta: 0:14:42 lr: 0.000114 grad: 0.0937 (0.1014) loss: 0.8228 (0.8206) time: 0.1866 data: 0.1040 max mem: 9377 +Train: [23] [1100/6250] eta: 0:14:45 lr: 0.000114 grad: 0.0931 (0.1009) loss: 0.8174 (0.8204) time: 0.3620 data: 0.2714 max mem: 9377 +Train: [23] [1200/6250] eta: 0:14:22 lr: 0.000114 grad: 0.0901 (0.1004) loss: 0.8199 (0.8201) time: 0.0998 data: 0.0002 max mem: 9377 +Train: [23] [1300/6250] eta: 0:13:57 lr: 0.000114 grad: 0.0898 (0.1000) loss: 0.8177 (0.8201) time: 0.1175 data: 0.0280 max mem: 9377 +Train: [23] [1400/6250] eta: 0:13:36 lr: 0.000114 grad: 0.0893 (0.0997) loss: 0.8169 (0.8200) time: 0.1575 data: 0.0739 max mem: 9377 +Train: [23] [1500/6250] eta: 0:13:15 lr: 0.000114 grad: 0.1001 (0.0994) loss: 0.8205 (0.8198) time: 0.1504 data: 0.0685 max mem: 9377 +Train: [23] [1600/6250] eta: 0:12:58 lr: 0.000114 grad: 0.0898 (0.0992) loss: 0.8156 (0.8196) time: 0.1610 data: 0.0772 max mem: 9377 +Train: [23] [1700/6250] eta: 0:12:36 lr: 0.000114 grad: 0.0988 (0.0991) loss: 0.8119 (0.8193) time: 0.1513 data: 0.0595 max mem: 9377 +Train: [23] [1800/6250] eta: 0:12:23 lr: 0.000114 grad: 0.0912 (0.0988) loss: 0.8166 (0.8191) time: 0.1532 data: 0.0702 max mem: 9377 +Train: [23] [1900/6250] eta: 0:12:03 lr: 0.000114 grad: 0.0974 (0.0986) loss: 0.8203 (0.8189) time: 0.1408 data: 0.0523 max mem: 9377 +Train: [23] [2000/6250] eta: 0:11:49 lr: 0.000114 grad: 0.0957 (0.0986) loss: 0.8196 (0.8188) time: 0.2138 data: 0.1124 max mem: 9377 +Train: [23] [2100/6250] eta: 0:11:43 lr: 0.000114 grad: 0.0908 (0.0984) loss: 0.8167 (0.8186) time: 0.1682 data: 0.0548 max mem: 9377 +Train: [23] [2200/6250] eta: 0:11:25 lr: 0.000114 grad: 0.0935 (0.0983) loss: 0.8165 (0.8186) time: 0.1349 data: 0.0340 max mem: 9377 +Train: [23] [2300/6250] eta: 0:11:16 lr: 0.000114 grad: 0.0948 (0.0983) loss: 0.8072 (0.8185) time: 0.1037 data: 0.0032 max mem: 9377 +Train: [23] [2400/6250] eta: 0:10:59 lr: 0.000114 grad: 0.1022 (0.0984) loss: 0.8060 (0.8182) time: 0.2362 data: 0.1256 max mem: 9377 +Train: [23] [2500/6250] eta: 0:10:38 lr: 0.000114 grad: 0.0913 (0.0984) loss: 0.8180 (0.8181) time: 0.1321 data: 0.0343 max mem: 9377 +Train: [23] [2600/6250] eta: 0:10:23 lr: 0.000114 grad: 0.0967 (0.0984) loss: 0.8226 (0.8180) time: 0.2509 data: 0.1510 max mem: 9377 +Train: [23] [2700/6250] eta: 0:10:02 lr: 0.000114 grad: 0.0935 (0.0985) loss: 0.8141 (0.8178) time: 0.1479 data: 0.0587 max mem: 9377 +Train: [23] [2800/6250] eta: 0:09:44 lr: 0.000114 grad: 0.0993 (0.0985) loss: 0.8135 (0.8177) time: 0.1257 data: 0.0413 max mem: 9377 +Train: [23] [2900/6250] eta: 0:09:25 lr: 0.000114 grad: 0.0961 (0.0986) loss: 0.8143 (0.8177) time: 0.1322 data: 0.0399 max mem: 9377 +Train: [23] [3000/6250] eta: 0:09:07 lr: 0.000114 grad: 0.0998 (0.0987) loss: 0.8149 (0.8176) time: 0.1642 data: 0.0808 max mem: 9377 +Train: [23] [3100/6250] eta: 0:08:50 lr: 0.000114 grad: 0.0972 (0.0986) loss: 0.8249 (0.8176) time: 0.1696 data: 0.0894 max mem: 9377 +Train: [23] [3200/6250] eta: 0:08:32 lr: 0.000114 grad: 0.0956 (0.0986) loss: 0.8159 (0.8176) time: 0.2062 data: 0.1166 max mem: 9377 +Train: [23] [3300/6250] eta: 0:08:15 lr: 0.000114 grad: 0.0939 (0.0985) loss: 0.8108 (0.8176) time: 0.2087 data: 0.1354 max mem: 9377 +Train: [23] [3400/6250] eta: 0:07:57 lr: 0.000114 grad: 0.0949 (0.0985) loss: 0.8216 (0.8177) time: 0.1560 data: 0.0724 max mem: 9377 +Train: [23] [3500/6250] eta: 0:07:38 lr: 0.000114 grad: 0.0989 (0.0984) loss: 0.8123 (0.8177) time: 0.1268 data: 0.0378 max mem: 9377 +Train: [23] [3600/6250] eta: 0:07:21 lr: 0.000114 grad: 0.0978 (0.0984) loss: 0.8207 (0.8177) time: 0.1595 data: 0.0781 max mem: 9377 +Train: [23] [3700/6250] eta: 0:07:04 lr: 0.000114 grad: 0.0977 (0.0984) loss: 0.8175 (0.8176) time: 0.2098 data: 0.1315 max mem: 9377 +Train: [23] [3800/6250] eta: 0:06:47 lr: 0.000114 grad: 0.0944 (0.0983) loss: 0.8193 (0.8176) time: 0.1486 data: 0.0673 max mem: 9377 +Train: [23] [3900/6250] eta: 0:06:30 lr: 0.000114 grad: 0.0940 (0.0983) loss: 0.8169 (0.8175) time: 0.1713 data: 0.0883 max mem: 9377 +Train: [23] [4000/6250] eta: 0:06:14 lr: 0.000113 grad: 0.0988 (0.0983) loss: 0.8124 (0.8175) time: 0.2689 data: 0.1796 max mem: 9377 +Train: [23] [4100/6250] eta: 0:05:56 lr: 0.000113 grad: 0.1005 (0.0984) loss: 0.8140 (0.8175) time: 0.1438 data: 0.0499 max mem: 9377 +Train: [23] [4200/6250] eta: 0:05:39 lr: 0.000113 grad: 0.0956 (0.0984) loss: 0.8121 (0.8175) time: 0.1609 data: 0.0772 max mem: 9377 +Train: [23] [4300/6250] eta: 0:05:22 lr: 0.000113 grad: 0.1016 (0.0984) loss: 0.8088 (0.8175) time: 0.1525 data: 0.0647 max mem: 9377 +Train: [23] [4400/6250] eta: 0:05:06 lr: 0.000113 grad: 0.0962 (0.0985) loss: 0.8182 (0.8175) time: 0.2236 data: 0.1356 max mem: 9377 +Train: [23] [4500/6250] eta: 0:04:49 lr: 0.000113 grad: 0.1015 (0.0985) loss: 0.8171 (0.8174) time: 0.1415 data: 0.0445 max mem: 9377 +Train: [23] [4600/6250] eta: 0:04:33 lr: 0.000113 grad: 0.1007 (0.0986) loss: 0.8120 (0.8174) time: 0.1867 data: 0.0953 max mem: 9377 +Train: [23] [4700/6250] eta: 0:04:16 lr: 0.000113 grad: 0.1009 (0.0987) loss: 0.8170 (0.8173) time: 0.1604 data: 0.0733 max mem: 9377 +Train: [23] [4800/6250] eta: 0:03:59 lr: 0.000113 grad: 0.0975 (0.0987) loss: 0.8165 (0.8172) time: 0.1609 data: 0.0653 max mem: 9377 +Train: [23] [4900/6250] eta: 0:03:42 lr: 0.000113 grad: 0.0956 (0.0987) loss: 0.8168 (0.8172) time: 0.1110 data: 0.0150 max mem: 9377 +Train: [23] [5000/6250] eta: 0:03:25 lr: 0.000113 grad: 0.0969 (0.0989) loss: 0.8137 (0.8171) time: 0.1192 data: 0.0086 max mem: 9377 +Train: [23] [5100/6250] eta: 0:03:09 lr: 0.000113 grad: 0.1029 (0.0989) loss: 0.8113 (0.8171) time: 0.1480 data: 0.0622 max mem: 9377 +Train: [23] [5200/6250] eta: 0:02:52 lr: 0.000113 grad: 0.0963 (0.0989) loss: 0.8130 (0.8170) time: 0.1353 data: 0.0471 max mem: 9377 +Train: [23] [5300/6250] eta: 0:02:36 lr: 0.000113 grad: 0.0926 (0.0989) loss: 0.8161 (0.8170) time: 0.1598 data: 0.0694 max mem: 9377 +Train: [23] [5400/6250] eta: 0:02:19 lr: 0.000113 grad: 0.0986 (0.0990) loss: 0.8188 (0.8170) time: 0.1604 data: 0.0714 max mem: 9377 +Train: [23] [5500/6250] eta: 0:02:02 lr: 0.000113 grad: 0.0987 (0.0992) loss: 0.8060 (0.8170) time: 0.1934 data: 0.1090 max mem: 9377 +Train: [23] [5600/6250] eta: 0:01:46 lr: 0.000113 grad: 0.0912 (0.0992) loss: 0.8215 (0.8171) time: 0.1488 data: 0.0722 max mem: 9377 +Train: [23] [5700/6250] eta: 0:01:30 lr: 0.000113 grad: 0.0985 (0.0992) loss: 0.8192 (0.8171) time: 0.1587 data: 0.0836 max mem: 9377 +Train: [23] [5800/6250] eta: 0:01:13 lr: 0.000113 grad: 0.0935 (0.0993) loss: 0.8241 (0.8172) time: 0.1368 data: 0.0538 max mem: 9377 +Train: [23] [5900/6250] eta: 0:00:58 lr: 0.000113 grad: 0.0921 (0.0992) loss: 0.8251 (0.8173) time: 1.1331 data: 1.0552 max mem: 9377 +Train: [23] [6000/6250] eta: 0:00:41 lr: 0.000113 grad: 0.0911 (0.0992) loss: 0.8245 (0.8174) time: 0.1133 data: 0.0004 max mem: 9377 +Train: [23] [6100/6250] eta: 0:00:24 lr: 0.000113 grad: 0.1006 (0.0992) loss: 0.8263 (0.8175) time: 0.1443 data: 0.0542 max mem: 9377 +Train: [23] [6200/6250] eta: 0:00:08 lr: 0.000113 grad: 0.0921 (0.0991) loss: 0.8162 (0.8175) time: 0.1761 data: 0.0887 max mem: 9377 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.1018 (0.0991) loss: 0.8199 (0.8176) time: 0.1575 data: 0.0643 max mem: 9377 +Train: [23] Total time: 0:17:24 (0.1672 s / it) +Averaged stats: lr: 0.000113 grad: 0.1018 (0.0991) loss: 0.8199 (0.8176) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:03:58 loss: 0.8367 (0.8367) time: 3.8471 data: 3.7509 max mem: 9377 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8397 (0.8376) time: 0.1423 data: 0.1166 max mem: 9377 +Eval (hcp-train-subset): [23] Total time: 0:00:14 (0.2324 s / it) +Averaged stats (hcp-train-subset): loss: 0.8397 (0.8376) +Eval (hcp-val): [23] [ 0/62] eta: 0:06:05 loss: 0.8444 (0.8444) time: 5.8987 data: 5.8665 max mem: 9377 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8464 (0.8478) time: 0.1510 data: 0.1253 max mem: 9377 +Eval (hcp-val): [23] Total time: 0:00:14 (0.2386 s / it) +Averaged stats (hcp-val): loss: 0.8464 (0.8478) +Eval (nsd-val): [23] [ 0/62] eta: 0:03:58 loss: 0.8043 (0.8043) time: 3.8483 data: 3.7600 max mem: 9377 +Eval (nsd-val): [23] [61/62] eta: 0:00:00 loss: 0.8129 (0.8150) time: 0.1339 data: 0.1067 max mem: 9377 +Eval (nsd-val): [23] Total time: 0:00:14 (0.2310 s / it) +Averaged stats (nsd-val): loss: 0.8129 (0.8150) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 9:39:33 lr: 0.000113 grad: 0.2175 (0.2175) loss: 0.8284 (0.8284) time: 5.5638 data: 5.3852 max mem: 9377 +Train: [24] [ 100/6250] eta: 0:22:38 lr: 0.000113 grad: 0.0887 (0.1106) loss: 0.8353 (0.8356) time: 0.1996 data: 0.1020 max mem: 9377 +Train: [24] [ 200/6250] eta: 0:19:18 lr: 0.000113 grad: 0.0986 (0.1062) loss: 0.8217 (0.8304) time: 0.1773 data: 0.0790 max mem: 9377 +Train: [24] [ 300/6250] eta: 0:18:22 lr: 0.000113 grad: 0.1023 (0.1050) loss: 0.8303 (0.8261) time: 0.1542 data: 0.0509 max mem: 9377 +Train: [24] [ 400/6250] eta: 0:17:50 lr: 0.000113 grad: 0.0996 (0.1051) loss: 0.8102 (0.8233) time: 0.1953 data: 0.1144 max mem: 9377 +Train: [24] [ 500/6250] eta: 0:17:04 lr: 0.000113 grad: 0.1034 (0.1041) loss: 0.8125 (0.8218) time: 0.1616 data: 0.0749 max mem: 9377 +Train: [24] [ 600/6250] eta: 0:16:45 lr: 0.000113 grad: 0.0927 (0.1041) loss: 0.8097 (0.8199) time: 0.1222 data: 0.0286 max mem: 9377 +Train: [24] [ 700/6250] eta: 0:17:16 lr: 0.000113 grad: 0.0929 (0.1037) loss: 0.8102 (0.8189) time: 0.2204 data: 0.1290 max mem: 9377 +Train: [24] [ 800/6250] eta: 0:17:09 lr: 0.000113 grad: 0.0981 (0.1033) loss: 0.8070 (0.8179) time: 0.1410 data: 0.0467 max mem: 9377 +Train: [24] [ 900/6250] eta: 0:17:14 lr: 0.000113 grad: 0.0976 (0.1035) loss: 0.8040 (0.8167) time: 0.2330 data: 0.1358 max mem: 9377 +Train: [24] [1000/6250] eta: 0:16:30 lr: 0.000113 grad: 0.1016 (0.1034) loss: 0.8109 (0.8161) time: 0.1640 data: 0.0771 max mem: 9377 +Train: [24] [1100/6250] eta: 0:15:55 lr: 0.000113 grad: 0.0893 (0.1030) loss: 0.8119 (0.8156) time: 0.1311 data: 0.0397 max mem: 9377 +Train: [24] [1200/6250] eta: 0:15:29 lr: 0.000113 grad: 0.0957 (0.1028) loss: 0.8120 (0.8151) time: 0.1362 data: 0.0432 max mem: 9377 +Train: [24] [1300/6250] eta: 0:15:05 lr: 0.000113 grad: 0.1005 (0.1025) loss: 0.8096 (0.8148) time: 0.2463 data: 0.1620 max mem: 9377 +Train: [24] [1400/6250] eta: 0:14:37 lr: 0.000113 grad: 0.0977 (0.1024) loss: 0.8075 (0.8145) time: 0.2046 data: 0.1146 max mem: 9377 +Train: [24] [1500/6250] eta: 0:14:10 lr: 0.000113 grad: 0.0965 (0.1020) loss: 0.8084 (0.8143) time: 0.1203 data: 0.0379 max mem: 9377 +Train: [24] [1600/6250] eta: 0:13:49 lr: 0.000113 grad: 0.0980 (0.1019) loss: 0.8068 (0.8141) time: 0.2028 data: 0.1230 max mem: 9377 +Train: [24] [1700/6250] eta: 0:13:23 lr: 0.000113 grad: 0.1009 (0.1019) loss: 0.8003 (0.8137) time: 0.1667 data: 0.0791 max mem: 9377 +Train: [24] [1800/6250] eta: 0:13:02 lr: 0.000113 grad: 0.1105 (0.1021) loss: 0.7992 (0.8133) time: 0.1201 data: 0.0320 max mem: 9377 +Train: [24] [1900/6250] eta: 0:12:42 lr: 0.000113 grad: 0.0996 (0.1023) loss: 0.8149 (0.8132) time: 0.1804 data: 0.1003 max mem: 9377 +Train: [24] [2000/6250] eta: 0:12:20 lr: 0.000113 grad: 0.1049 (0.1024) loss: 0.8117 (0.8131) time: 0.1541 data: 0.0608 max mem: 9377 +Train: [24] [2100/6250] eta: 0:12:07 lr: 0.000113 grad: 0.0983 (0.1025) loss: 0.8153 (0.8129) time: 0.2078 data: 0.1140 max mem: 9377 +Train: [24] [2200/6250] eta: 0:11:45 lr: 0.000113 grad: 0.1013 (0.1026) loss: 0.8124 (0.8129) time: 0.1667 data: 0.0706 max mem: 9377 +Train: [24] [2300/6250] eta: 0:11:24 lr: 0.000113 grad: 0.0929 (0.1025) loss: 0.8139 (0.8128) time: 0.1156 data: 0.0271 max mem: 9377 +Train: [24] [2400/6250] eta: 0:11:04 lr: 0.000113 grad: 0.0983 (0.1025) loss: 0.8096 (0.8128) time: 0.1307 data: 0.0308 max mem: 9377 +Train: [24] [2500/6250] eta: 0:10:44 lr: 0.000113 grad: 0.0977 (0.1024) loss: 0.8139 (0.8129) time: 0.1451 data: 0.0601 max mem: 9377 +Train: [24] [2600/6250] eta: 0:10:25 lr: 0.000113 grad: 0.1050 (0.1025) loss: 0.8075 (0.8127) time: 0.1387 data: 0.0460 max mem: 9377 +Train: [24] [2700/6250] eta: 0:10:06 lr: 0.000113 grad: 0.0992 (0.1025) loss: 0.8153 (0.8127) time: 0.1100 data: 0.0209 max mem: 9377 +Train: [24] [2800/6250] eta: 0:10:03 lr: 0.000113 grad: 0.0959 (0.1024) loss: 0.8126 (0.8127) time: 0.0861 data: 0.0002 max mem: 9377 +Train: [24] [2900/6250] eta: 0:09:42 lr: 0.000112 grad: 0.0911 (0.1022) loss: 0.8198 (0.8127) time: 0.1629 data: 0.0816 max mem: 9377 +Train: [24] [3000/6250] eta: 0:09:22 lr: 0.000112 grad: 0.0978 (0.1022) loss: 0.8075 (0.8127) time: 0.1589 data: 0.0773 max mem: 9377 +Train: [24] [3100/6250] eta: 0:09:04 lr: 0.000112 grad: 0.0932 (0.1021) loss: 0.8162 (0.8127) time: 0.1068 data: 0.0121 max mem: 9377 +Train: [24] [3200/6250] eta: 0:08:53 lr: 0.000112 grad: 0.1020 (0.1022) loss: 0.8114 (0.8127) time: 0.1905 data: 0.1037 max mem: 9377 +Train: [24] [3300/6250] eta: 0:08:33 lr: 0.000112 grad: 0.0935 (0.1021) loss: 0.8154 (0.8128) time: 0.1430 data: 0.0633 max mem: 9377 +Train: [24] [3400/6250] eta: 0:08:14 lr: 0.000112 grad: 0.0997 (0.1020) loss: 0.8124 (0.8128) time: 0.1625 data: 0.0705 max mem: 9377 +Train: [24] [3500/6250] eta: 0:07:55 lr: 0.000112 grad: 0.0967 (0.1020) loss: 0.8011 (0.8128) time: 0.1689 data: 0.0822 max mem: 9377 +Train: [24] [3600/6250] eta: 0:07:36 lr: 0.000112 grad: 0.1002 (0.1019) loss: 0.7994 (0.8126) time: 0.1396 data: 0.0476 max mem: 9377 +Train: [24] [3700/6250] eta: 0:07:18 lr: 0.000112 grad: 0.0971 (0.1021) loss: 0.8036 (0.8125) time: 0.1443 data: 0.0535 max mem: 9377 +Train: [24] [3800/6250] eta: 0:07:00 lr: 0.000112 grad: 0.0959 (0.1020) loss: 0.8145 (0.8124) time: 0.1168 data: 0.0321 max mem: 9377 +Train: [24] [3900/6250] eta: 0:06:42 lr: 0.000112 grad: 0.0999 (0.1019) loss: 0.8101 (0.8123) time: 0.1805 data: 0.0790 max mem: 9377 +Train: [24] [4000/6250] eta: 0:06:24 lr: 0.000112 grad: 0.0976 (0.1018) loss: 0.8094 (0.8123) time: 0.1564 data: 0.0677 max mem: 9377 +Train: [24] [4100/6250] eta: 0:06:06 lr: 0.000112 grad: 0.0945 (0.1017) loss: 0.8150 (0.8123) time: 0.1792 data: 0.0929 max mem: 9377 +Train: [24] [4200/6250] eta: 0:05:48 lr: 0.000112 grad: 0.0968 (0.1016) loss: 0.8147 (0.8122) time: 0.2159 data: 0.1326 max mem: 9377 +Train: [24] [4300/6250] eta: 0:05:31 lr: 0.000112 grad: 0.0933 (0.1016) loss: 0.8162 (0.8122) time: 0.1365 data: 0.0521 max mem: 9377 +Train: [24] [4400/6250] eta: 0:05:14 lr: 0.000112 grad: 0.0920 (0.1015) loss: 0.8118 (0.8122) time: 0.1431 data: 0.0583 max mem: 9377 +Train: [24] [4500/6250] eta: 0:04:56 lr: 0.000112 grad: 0.0964 (0.1014) loss: 0.8102 (0.8123) time: 0.1728 data: 0.0929 max mem: 9377 +Train: [24] [4600/6250] eta: 0:04:39 lr: 0.000112 grad: 0.0949 (0.1014) loss: 0.8140 (0.8123) time: 0.1675 data: 0.0822 max mem: 9377 +Train: [24] [4700/6250] eta: 0:04:22 lr: 0.000112 grad: 0.1027 (0.1013) loss: 0.8053 (0.8123) time: 0.1747 data: 0.0901 max mem: 9377 +Train: [24] [4800/6250] eta: 0:04:05 lr: 0.000112 grad: 0.1026 (0.1013) loss: 0.8119 (0.8123) time: 0.1574 data: 0.0745 max mem: 9377 +Train: [24] [4900/6250] eta: 0:03:48 lr: 0.000112 grad: 0.0964 (0.1012) loss: 0.8162 (0.8123) time: 0.1414 data: 0.0588 max mem: 9377 +Train: [24] [5000/6250] eta: 0:03:31 lr: 0.000112 grad: 0.0919 (0.1011) loss: 0.8164 (0.8124) time: 0.1121 data: 0.0208 max mem: 9377 +Train: [24] [5100/6250] eta: 0:03:17 lr: 0.000112 grad: 0.0963 (0.1011) loss: 0.8152 (0.8124) time: 0.6134 data: 0.5149 max mem: 9377 +Train: [24] [5200/6250] eta: 0:02:59 lr: 0.000112 grad: 0.1004 (0.1010) loss: 0.8121 (0.8125) time: 0.1269 data: 0.0352 max mem: 9377 +Train: [24] [5300/6250] eta: 0:02:42 lr: 0.000112 grad: 0.1015 (0.1010) loss: 0.8082 (0.8125) time: 0.1564 data: 0.0635 max mem: 9377 +Train: [24] [5400/6250] eta: 0:02:24 lr: 0.000112 grad: 0.1086 (0.1011) loss: 0.8006 (0.8124) time: 0.1138 data: 0.0208 max mem: 9377 +Train: [24] [5500/6250] eta: 0:02:07 lr: 0.000112 grad: 0.0982 (0.1011) loss: 0.7987 (0.8123) time: 0.1708 data: 0.0862 max mem: 9377 +Train: [24] [5600/6250] eta: 0:01:50 lr: 0.000112 grad: 0.0995 (0.1012) loss: 0.8109 (0.8123) time: 0.1606 data: 0.0706 max mem: 9377 +Train: [24] [5700/6250] eta: 0:01:33 lr: 0.000112 grad: 0.1047 (0.1012) loss: 0.8086 (0.8123) time: 0.2083 data: 0.1180 max mem: 9377 +Train: [24] [5800/6250] eta: 0:01:16 lr: 0.000112 grad: 0.1042 (0.1012) loss: 0.8128 (0.8122) time: 0.1804 data: 0.0936 max mem: 9377 +Train: [24] [5900/6250] eta: 0:00:59 lr: 0.000112 grad: 0.1000 (0.1013) loss: 0.8166 (0.8122) time: 0.1883 data: 0.1019 max mem: 9377 +Train: [24] [6000/6250] eta: 0:00:42 lr: 0.000112 grad: 0.0939 (0.1013) loss: 0.8151 (0.8122) time: 0.1658 data: 0.0770 max mem: 9377 +Train: [24] [6100/6250] eta: 0:00:25 lr: 0.000112 grad: 0.0975 (0.1013) loss: 0.8110 (0.8122) time: 0.1435 data: 0.0575 max mem: 9377 +Train: [24] [6200/6250] eta: 0:00:08 lr: 0.000112 grad: 0.1013 (0.1014) loss: 0.8076 (0.8121) time: 0.1515 data: 0.0679 max mem: 9377 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.1015 (0.1014) loss: 0.7979 (0.8121) time: 0.1461 data: 0.0667 max mem: 9377 +Train: [24] Total time: 0:17:45 (0.1704 s / it) +Averaged stats: lr: 0.000112 grad: 0.1015 (0.1014) loss: 0.7979 (0.8121) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:06:23 loss: 0.8366 (0.8366) time: 6.1811 data: 6.1468 max mem: 9377 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8371 (0.8362) time: 0.1589 data: 0.1338 max mem: 9377 +Eval (hcp-train-subset): [24] Total time: 0:00:17 (0.2760 s / it) +Averaged stats (hcp-train-subset): loss: 0.8371 (0.8362) +Making plots (hcp-train-subset): example=29 +Eval (hcp-val): [24] [ 0/62] eta: 0:05:36 loss: 0.8450 (0.8450) time: 5.4244 data: 5.3915 max mem: 9377 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8463 (0.8473) time: 0.1253 data: 0.1004 max mem: 9377 +Eval (hcp-val): [24] Total time: 0:00:13 (0.2191 s / it) +Averaged stats (hcp-val): loss: 0.8463 (0.8473) +Making plots (hcp-val): example=27 +Eval (nsd-val): [24] [ 0/62] eta: 0:04:26 loss: 0.8114 (0.8114) time: 4.2964 data: 4.2646 max mem: 9377 +Eval (nsd-val): [24] [61/62] eta: 0:00:00 loss: 0.8203 (0.8207) time: 0.1036 data: 0.0785 max mem: 9377 +Eval (nsd-val): [24] Total time: 0:00:14 (0.2335 s / it) +Averaged stats (nsd-val): loss: 0.8203 (0.8207) +Making plots (nsd-val): example=33 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00024.pth +Train: [25] [ 0/6250] eta: 1 day, 17:56:42 lr: 0.000112 grad: 0.1968 (0.1968) loss: 0.8694 (0.8694) time: 24.1604 data: 23.9760 max mem: 9377 +Train: [25] [ 100/6250] eta: 1:02:04 lr: 0.000112 grad: 0.0836 (0.1163) loss: 0.8378 (0.8388) time: 0.1695 data: 0.0638 max mem: 9377 +Train: [25] [ 200/6250] eta: 0:48:04 lr: 0.000112 grad: 0.0879 (0.1083) loss: 0.8384 (0.8335) time: 0.1417 data: 0.0004 max mem: 9377 +Train: [25] [ 300/6250] eta: 0:39:41 lr: 0.000112 grad: 0.0961 (0.1038) loss: 0.8242 (0.8315) time: 0.1645 data: 0.0475 max mem: 9377 +Train: [25] [ 400/6250] eta: 0:36:52 lr: 0.000112 grad: 0.0942 (0.1028) loss: 0.8150 (0.8285) time: 0.1284 data: 0.0003 max mem: 9377 +Train: [25] [ 500/6250] eta: 0:31:58 lr: 0.000112 grad: 0.0948 (0.1020) loss: 0.8209 (0.8273) time: 0.1420 data: 0.0482 max mem: 9377 +Train: [25] [ 600/6250] eta: 0:32:26 lr: 0.000112 grad: 0.0997 (0.1015) loss: 0.8184 (0.8264) time: 0.2398 data: 0.1083 max mem: 9377 +Train: [25] [ 700/6250] eta: 0:30:37 lr: 0.000112 grad: 0.1081 (0.1016) loss: 0.8165 (0.8252) time: 0.2222 data: 0.1298 max mem: 9377 +Train: [25] [ 800/6250] eta: 0:28:56 lr: 0.000112 grad: 0.0984 (0.1017) loss: 0.8222 (0.8243) time: 0.3435 data: 0.2323 max mem: 9377 +Train: [25] [ 900/6250] eta: 0:27:56 lr: 0.000112 grad: 0.0980 (0.1014) loss: 0.8194 (0.8238) time: 0.5330 data: 0.4518 max mem: 9377 +Train: [25] [1000/6250] eta: 0:26:01 lr: 0.000112 grad: 0.1035 (0.1016) loss: 0.8133 (0.8228) time: 0.1487 data: 0.0700 max mem: 9377 +Train: [25] [1100/6250] eta: 0:25:53 lr: 0.000112 grad: 0.1001 (0.1015) loss: 0.8119 (0.8219) time: 0.1894 data: 0.0945 max mem: 9377 +Train: [25] [1200/6250] eta: 0:24:14 lr: 0.000112 grad: 0.0977 (0.1013) loss: 0.8037 (0.8209) time: 0.1542 data: 0.0705 max mem: 9377 +Train: [25] [1300/6250] eta: 0:22:53 lr: 0.000112 grad: 0.1021 (0.1013) loss: 0.8095 (0.8199) time: 0.1402 data: 0.0444 max mem: 9377 +Train: [25] [1400/6250] eta: 0:21:43 lr: 0.000112 grad: 0.0941 (0.1010) loss: 0.8107 (0.8193) time: 0.1662 data: 0.0832 max mem: 9377 +Train: [25] [1500/6250] eta: 0:20:43 lr: 0.000112 grad: 0.0943 (0.1010) loss: 0.8065 (0.8189) time: 0.2064 data: 0.1197 max mem: 9377 +Train: [25] [1600/6250] eta: 0:19:48 lr: 0.000111 grad: 0.0918 (0.1008) loss: 0.8250 (0.8185) time: 0.1305 data: 0.0357 max mem: 9377 +Train: [25] [1700/6250] eta: 0:18:56 lr: 0.000111 grad: 0.0975 (0.1007) loss: 0.8117 (0.8182) time: 0.1207 data: 0.0323 max mem: 9377 +Train: [25] [1800/6250] eta: 0:18:12 lr: 0.000111 grad: 0.0903 (0.1006) loss: 0.8128 (0.8181) time: 0.2071 data: 0.1229 max mem: 9377 +Train: [25] [1900/6250] eta: 0:17:28 lr: 0.000111 grad: 0.0962 (0.1007) loss: 0.8166 (0.8178) time: 0.1306 data: 0.0504 max mem: 9377 +Train: [25] [2000/6250] eta: 0:16:46 lr: 0.000111 grad: 0.1047 (0.1007) loss: 0.8206 (0.8177) time: 0.1392 data: 0.0555 max mem: 9377 +Train: [25] [2100/6250] eta: 0:16:07 lr: 0.000111 grad: 0.1006 (0.1007) loss: 0.8188 (0.8177) time: 0.1096 data: 0.0209 max mem: 9377 +Train: [25] [2200/6250] eta: 0:15:30 lr: 0.000111 grad: 0.0997 (0.1006) loss: 0.8168 (0.8176) time: 0.1776 data: 0.0916 max mem: 9377 +Train: [25] [2300/6250] eta: 0:14:58 lr: 0.000111 grad: 0.0944 (0.1007) loss: 0.8231 (0.8175) time: 0.0983 data: 0.0003 max mem: 9377 +Train: [25] [2400/6250] eta: 0:14:27 lr: 0.000111 grad: 0.0979 (0.1008) loss: 0.8122 (0.8173) time: 0.1802 data: 0.0916 max mem: 9377 +Train: [25] [2500/6250] eta: 0:13:54 lr: 0.000111 grad: 0.1050 (0.1011) loss: 0.8115 (0.8169) time: 0.1896 data: 0.1049 max mem: 9377 +Train: [25] [2600/6250] eta: 0:13:20 lr: 0.000111 grad: 0.1005 (0.1011) loss: 0.8117 (0.8168) time: 0.1143 data: 0.0231 max mem: 9377 +Train: [25] [2700/6250] eta: 0:12:51 lr: 0.000111 grad: 0.0958 (0.1012) loss: 0.8123 (0.8165) time: 0.1672 data: 0.0648 max mem: 9377 +Train: [25] [2800/6250] eta: 0:12:21 lr: 0.000111 grad: 0.0978 (0.1013) loss: 0.8084 (0.8164) time: 0.1476 data: 0.0486 max mem: 9377 +Train: [25] [2900/6250] eta: 0:11:52 lr: 0.000111 grad: 0.0944 (0.1012) loss: 0.8145 (0.8162) time: 0.1518 data: 0.0659 max mem: 9377 +Train: [25] [3000/6250] eta: 0:11:24 lr: 0.000111 grad: 0.0972 (0.1011) loss: 0.8125 (0.8161) time: 0.1394 data: 0.0530 max mem: 9377 +Train: [25] [3100/6250] eta: 0:10:58 lr: 0.000111 grad: 0.0993 (0.1012) loss: 0.8069 (0.8159) time: 0.1871 data: 0.0983 max mem: 9377 +Train: [25] [3200/6250] eta: 0:10:32 lr: 0.000111 grad: 0.0984 (0.1013) loss: 0.8120 (0.8157) time: 0.1850 data: 0.1095 max mem: 9377 +Train: [25] [3300/6250] eta: 0:10:06 lr: 0.000111 grad: 0.1021 (0.1013) loss: 0.8127 (0.8155) time: 0.1225 data: 0.0410 max mem: 9377 +Train: [25] [3400/6250] eta: 0:09:40 lr: 0.000111 grad: 0.1028 (0.1014) loss: 0.8043 (0.8153) time: 0.1145 data: 0.0326 max mem: 9377 +Train: [25] [3500/6250] eta: 0:09:16 lr: 0.000111 grad: 0.0991 (0.1015) loss: 0.8100 (0.8151) time: 0.1280 data: 0.0394 max mem: 9377 +Train: [25] [3600/6250] eta: 0:08:52 lr: 0.000111 grad: 0.1039 (0.1015) loss: 0.8095 (0.8150) time: 0.1619 data: 0.0799 max mem: 9377 +Train: [25] [3700/6250] eta: 0:08:29 lr: 0.000111 grad: 0.1006 (0.1016) loss: 0.8119 (0.8149) time: 0.1633 data: 0.0698 max mem: 9377 +Train: [25] [3800/6250] eta: 0:08:07 lr: 0.000111 grad: 0.1027 (0.1016) loss: 0.8095 (0.8148) time: 0.1737 data: 0.0941 max mem: 9377 +Train: [25] [3900/6250] eta: 0:07:45 lr: 0.000111 grad: 0.0918 (0.1017) loss: 0.8166 (0.8147) time: 0.1831 data: 0.0978 max mem: 9377 +Train: [25] [4000/6250] eta: 0:07:26 lr: 0.000111 grad: 0.1009 (0.1018) loss: 0.8074 (0.8146) time: 0.4212 data: 0.3366 max mem: 9377 +Train: [25] [4100/6250] eta: 0:07:06 lr: 0.000111 grad: 0.1052 (0.1019) loss: 0.8072 (0.8145) time: 0.2353 data: 0.1422 max mem: 9377 +Train: [25] [4200/6250] eta: 0:06:43 lr: 0.000111 grad: 0.1009 (0.1019) loss: 0.8104 (0.8144) time: 0.1379 data: 0.0482 max mem: 9377 +Train: [25] [4300/6250] eta: 0:06:22 lr: 0.000111 grad: 0.1027 (0.1020) loss: 0.8129 (0.8143) time: 0.1192 data: 0.0292 max mem: 9377 +Train: [25] [4400/6250] eta: 0:06:02 lr: 0.000111 grad: 0.1122 (0.1022) loss: 0.8053 (0.8143) time: 0.2036 data: 0.1169 max mem: 9377 +Train: [25] [4500/6250] eta: 0:05:40 lr: 0.000111 grad: 0.1000 (0.1023) loss: 0.8156 (0.8141) time: 0.1328 data: 0.0494 max mem: 9377 +Train: [25] [4600/6250] eta: 0:05:19 lr: 0.000111 grad: 0.1029 (0.1024) loss: 0.8026 (0.8140) time: 0.1483 data: 0.0616 max mem: 9377 +Train: [25] [4700/6250] eta: 0:04:59 lr: 0.000111 grad: 0.1003 (0.1024) loss: 0.8028 (0.8139) time: 0.1689 data: 0.0900 max mem: 9377 +Train: [25] [4800/6250] eta: 0:04:39 lr: 0.000111 grad: 0.0965 (0.1024) loss: 0.8126 (0.8138) time: 0.1783 data: 0.0935 max mem: 9377 +Train: [25] [4900/6250] eta: 0:04:19 lr: 0.000111 grad: 0.0998 (0.1024) loss: 0.8095 (0.8138) time: 0.1577 data: 0.0695 max mem: 9377 +Train: [25] [5000/6250] eta: 0:03:59 lr: 0.000111 grad: 0.1088 (0.1024) loss: 0.8078 (0.8137) time: 0.1406 data: 0.0506 max mem: 9377 +Train: [25] [5100/6250] eta: 0:03:39 lr: 0.000111 grad: 0.1000 (0.1024) loss: 0.8144 (0.8137) time: 0.2212 data: 0.1390 max mem: 9377 +Train: [25] [5200/6250] eta: 0:03:19 lr: 0.000111 grad: 0.1040 (0.1024) loss: 0.8115 (0.8136) time: 0.1244 data: 0.0417 max mem: 9377 +Train: [25] [5300/6250] eta: 0:03:00 lr: 0.000111 grad: 0.1023 (0.1024) loss: 0.8157 (0.8136) time: 0.2297 data: 0.1239 max mem: 9377 +Train: [25] [5400/6250] eta: 0:02:41 lr: 0.000111 grad: 0.0978 (0.1024) loss: 0.8114 (0.8136) time: 0.1145 data: 0.0283 max mem: 9377 +Train: [25] [5500/6250] eta: 0:02:21 lr: 0.000111 grad: 0.1001 (0.1024) loss: 0.8125 (0.8136) time: 0.1786 data: 0.0855 max mem: 9377 +Train: [25] [5600/6250] eta: 0:02:03 lr: 0.000111 grad: 0.0920 (0.1023) loss: 0.8224 (0.8136) time: 0.1032 data: 0.0003 max mem: 9377 +Train: [25] [5700/6250] eta: 0:01:44 lr: 0.000111 grad: 0.0945 (0.1023) loss: 0.8123 (0.8136) time: 0.1706 data: 0.0807 max mem: 9377 +Train: [25] [5800/6250] eta: 0:01:25 lr: 0.000111 grad: 0.0931 (0.1023) loss: 0.8134 (0.8135) time: 0.2011 data: 0.1182 max mem: 9377 +Train: [25] [5900/6250] eta: 0:01:06 lr: 0.000111 grad: 0.0990 (0.1023) loss: 0.8077 (0.8135) time: 0.2372 data: 0.1467 max mem: 9377 +Train: [25] [6000/6250] eta: 0:00:47 lr: 0.000111 grad: 0.0993 (0.1023) loss: 0.8167 (0.8135) time: 0.2288 data: 0.1266 max mem: 9377 +Train: [25] [6100/6250] eta: 0:00:28 lr: 0.000111 grad: 0.1036 (0.1023) loss: 0.8128 (0.8135) time: 0.1546 data: 0.0203 max mem: 9377 +Train: [25] [6200/6250] eta: 0:00:09 lr: 0.000111 grad: 0.1014 (0.1022) loss: 0.8104 (0.8136) time: 0.1058 data: 0.0014 max mem: 9377 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.1023 (0.1022) loss: 0.8167 (0.8136) time: 0.1169 data: 0.0148 max mem: 9377 +Train: [25] Total time: 0:19:46 (0.1899 s / it) +Averaged stats: lr: 0.000111 grad: 0.1023 (0.1022) loss: 0.8167 (0.8136) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:03:57 loss: 0.8454 (0.8454) time: 3.8325 data: 3.7618 max mem: 9377 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8381 (0.8370) time: 0.1276 data: 0.1025 max mem: 9377 +Eval (hcp-train-subset): [25] Total time: 0:00:13 (0.2166 s / it) +Averaged stats (hcp-train-subset): loss: 0.8381 (0.8370) +Eval (hcp-val): [25] [ 0/62] eta: 0:04:12 loss: 0.8457 (0.8457) time: 4.0773 data: 3.9921 max mem: 9377 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8466 (0.8482) time: 0.1205 data: 0.0957 max mem: 9377 +Eval (hcp-val): [25] Total time: 0:00:13 (0.2168 s / it) +Averaged stats (hcp-val): loss: 0.8466 (0.8482) +Eval (nsd-val): [25] [ 0/62] eta: 0:04:05 loss: 0.8102 (0.8102) time: 3.9607 data: 3.8709 max mem: 9377 +Eval (nsd-val): [25] [61/62] eta: 0:00:00 loss: 0.8197 (0.8211) time: 0.1158 data: 0.0908 max mem: 9377 +Eval (nsd-val): [25] Total time: 0:00:13 (0.2132 s / it) +Averaged stats (nsd-val): loss: 0.8197 (0.8211) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [26] [ 0/6250] eta: 12:23:25 lr: 0.000111 grad: 0.1043 (0.1043) loss: 0.8782 (0.8782) time: 7.1369 data: 7.0172 max mem: 9377 +Train: [26] [ 100/6250] eta: 0:23:19 lr: 0.000111 grad: 0.1246 (0.1475) loss: 0.8197 (0.8256) time: 0.1432 data: 0.0559 max mem: 9377 +Train: [26] [ 200/6250] eta: 0:19:51 lr: 0.000110 grad: 0.1108 (0.1407) loss: 0.8241 (0.8181) time: 0.1600 data: 0.0679 max mem: 9377 +Train: [26] [ 300/6250] eta: 0:18:38 lr: 0.000110 grad: 0.0951 (0.1288) loss: 0.8197 (0.8170) time: 0.1705 data: 0.0718 max mem: 9377 +Train: [26] [ 400/6250] eta: 0:17:36 lr: 0.000110 grad: 0.0963 (0.1226) loss: 0.8076 (0.8167) time: 0.1635 data: 0.0643 max mem: 9377 +Train: [26] [ 500/6250] eta: 0:16:48 lr: 0.000110 grad: 0.0980 (0.1183) loss: 0.8159 (0.8165) time: 0.1219 data: 0.0350 max mem: 9377 +Train: [26] [ 600/6250] eta: 0:16:19 lr: 0.000110 grad: 0.1002 (0.1158) loss: 0.8187 (0.8166) time: 0.1757 data: 0.0754 max mem: 9377 +Train: [26] [ 700/6250] eta: 0:15:57 lr: 0.000110 grad: 0.1006 (0.1135) loss: 0.8050 (0.8165) time: 0.1951 data: 0.0948 max mem: 9377 +Train: [26] [ 800/6250] eta: 0:15:24 lr: 0.000110 grad: 0.0957 (0.1120) loss: 0.8180 (0.8164) time: 0.1398 data: 0.0388 max mem: 9377 +Train: [26] [ 900/6250] eta: 0:15:01 lr: 0.000110 grad: 0.0963 (0.1107) loss: 0.8110 (0.8159) time: 0.1728 data: 0.0877 max mem: 9377 +Train: [26] [1000/6250] eta: 0:15:33 lr: 0.000110 grad: 0.0920 (0.1094) loss: 0.8209 (0.8158) time: 0.1779 data: 0.0871 max mem: 9377 +Train: [26] [1100/6250] eta: 0:15:03 lr: 0.000110 grad: 0.1017 (0.1088) loss: 0.8050 (0.8156) time: 0.1593 data: 0.0838 max mem: 9377 +Train: [26] [1200/6250] eta: 0:14:37 lr: 0.000110 grad: 0.0965 (0.1083) loss: 0.8126 (0.8153) time: 0.1565 data: 0.0748 max mem: 9377 +Train: [26] [1300/6250] eta: 0:14:12 lr: 0.000110 grad: 0.1028 (0.1081) loss: 0.8111 (0.8148) time: 0.1565 data: 0.0693 max mem: 9377 +Train: [26] [1400/6250] eta: 0:13:47 lr: 0.000110 grad: 0.0959 (0.1075) loss: 0.8182 (0.8148) time: 0.1132 data: 0.0207 max mem: 9377 +Train: [26] [1500/6250] eta: 0:13:25 lr: 0.000110 grad: 0.1093 (0.1072) loss: 0.8145 (0.8149) time: 0.1375 data: 0.0535 max mem: 9377 +Train: [26] [1600/6250] eta: 0:13:12 lr: 0.000110 grad: 0.0978 (0.1069) loss: 0.7990 (0.8145) time: 0.1252 data: 0.0184 max mem: 9377 +Train: [26] [1700/6250] eta: 0:12:51 lr: 0.000110 grad: 0.1007 (0.1067) loss: 0.8082 (0.8143) time: 0.1587 data: 0.0575 max mem: 9377 +Train: [26] [1800/6250] eta: 0:12:29 lr: 0.000110 grad: 0.1054 (0.1066) loss: 0.8024 (0.8140) time: 0.1364 data: 0.0477 max mem: 9377 +Train: [26] [1900/6250] eta: 0:12:09 lr: 0.000110 grad: 0.0975 (0.1064) loss: 0.8164 (0.8139) time: 0.1614 data: 0.0637 max mem: 9377 +Train: [26] [2000/6250] eta: 0:11:53 lr: 0.000110 grad: 0.1000 (0.1062) loss: 0.8068 (0.8137) time: 0.2303 data: 0.1432 max mem: 9377 +Train: [26] [2100/6250] eta: 0:11:32 lr: 0.000110 grad: 0.0971 (0.1060) loss: 0.8148 (0.8137) time: 0.2033 data: 0.1231 max mem: 9377 +Train: [26] [2200/6250] eta: 0:11:12 lr: 0.000110 grad: 0.0987 (0.1058) loss: 0.8163 (0.8137) time: 0.1405 data: 0.0516 max mem: 9377 +Train: [26] [2300/6250] eta: 0:10:56 lr: 0.000110 grad: 0.0992 (0.1057) loss: 0.8150 (0.8136) time: 0.2040 data: 0.1227 max mem: 9377 +Train: [26] [2400/6250] eta: 0:10:36 lr: 0.000110 grad: 0.0955 (0.1054) loss: 0.8162 (0.8136) time: 0.1370 data: 0.0509 max mem: 9377 +Train: [26] [2500/6250] eta: 0:10:17 lr: 0.000110 grad: 0.0945 (0.1053) loss: 0.8206 (0.8137) time: 0.1274 data: 0.0367 max mem: 9377 +Train: [26] [2600/6250] eta: 0:10:00 lr: 0.000110 grad: 0.0998 (0.1051) loss: 0.8150 (0.8137) time: 0.1215 data: 0.0364 max mem: 9377 +Train: [26] [2700/6250] eta: 0:09:42 lr: 0.000110 grad: 0.1083 (0.1052) loss: 0.7999 (0.8135) time: 0.1354 data: 0.0569 max mem: 9377 +Train: [26] [2800/6250] eta: 0:09:25 lr: 0.000110 grad: 0.0961 (0.1051) loss: 0.8088 (0.8134) time: 0.1776 data: 0.0948 max mem: 9377 +Train: [26] [2900/6250] eta: 0:09:08 lr: 0.000110 grad: 0.1050 (0.1050) loss: 0.8093 (0.8133) time: 0.1446 data: 0.0606 max mem: 9377 +Train: [26] [3000/6250] eta: 0:08:50 lr: 0.000110 grad: 0.1080 (0.1051) loss: 0.8144 (0.8132) time: 0.1739 data: 0.0868 max mem: 9377 +Train: [26] [3100/6250] eta: 0:08:34 lr: 0.000110 grad: 0.1015 (0.1050) loss: 0.8117 (0.8131) time: 0.1727 data: 0.0786 max mem: 9377 +Train: [26] [3200/6250] eta: 0:08:16 lr: 0.000110 grad: 0.1008 (0.1049) loss: 0.8103 (0.8130) time: 0.1304 data: 0.0404 max mem: 9377 +Train: [26] [3300/6250] eta: 0:08:00 lr: 0.000110 grad: 0.0975 (0.1048) loss: 0.8122 (0.8130) time: 0.1890 data: 0.0996 max mem: 9377 +Train: [26] [3400/6250] eta: 0:07:42 lr: 0.000110 grad: 0.1057 (0.1048) loss: 0.8016 (0.8129) time: 0.1580 data: 0.0640 max mem: 9377 +Train: [26] [3500/6250] eta: 0:07:25 lr: 0.000110 grad: 0.0987 (0.1048) loss: 0.8117 (0.8128) time: 0.1395 data: 0.0569 max mem: 9377 +Train: [26] [3600/6250] eta: 0:07:10 lr: 0.000110 grad: 0.1003 (0.1048) loss: 0.8038 (0.8127) time: 0.1647 data: 0.0824 max mem: 9377 +Train: [26] [3700/6250] eta: 0:06:54 lr: 0.000110 grad: 0.1042 (0.1048) loss: 0.8157 (0.8127) time: 0.1704 data: 0.0866 max mem: 9377 +Train: [26] [3800/6250] eta: 0:06:38 lr: 0.000110 grad: 0.0957 (0.1047) loss: 0.8160 (0.8127) time: 0.1738 data: 0.0871 max mem: 9377 +Train: [26] [3900/6250] eta: 0:06:21 lr: 0.000110 grad: 0.0978 (0.1047) loss: 0.8112 (0.8126) time: 0.1832 data: 0.0878 max mem: 9377 +Train: [26] [4000/6250] eta: 0:06:05 lr: 0.000110 grad: 0.0966 (0.1046) loss: 0.8142 (0.8125) time: 0.1129 data: 0.0003 max mem: 9377 +Train: [26] [4100/6250] eta: 0:05:48 lr: 0.000110 grad: 0.1023 (0.1046) loss: 0.8002 (0.8125) time: 0.1279 data: 0.0387 max mem: 9377 +Train: [26] [4200/6250] eta: 0:05:32 lr: 0.000110 grad: 0.1039 (0.1046) loss: 0.8141 (0.8124) time: 0.1889 data: 0.0996 max mem: 9377 +Train: [26] [4300/6250] eta: 0:05:15 lr: 0.000110 grad: 0.1055 (0.1047) loss: 0.8068 (0.8123) time: 0.1583 data: 0.0788 max mem: 9377 +Train: [26] [4400/6250] eta: 0:05:00 lr: 0.000110 grad: 0.1143 (0.1048) loss: 0.8021 (0.8121) time: 0.1985 data: 0.1167 max mem: 9377 +Train: [26] [4500/6250] eta: 0:04:43 lr: 0.000110 grad: 0.1098 (0.1048) loss: 0.8054 (0.8120) time: 0.1431 data: 0.0638 max mem: 9377 +Train: [26] [4600/6250] eta: 0:04:27 lr: 0.000110 grad: 0.1042 (0.1048) loss: 0.8095 (0.8119) time: 0.1801 data: 0.0948 max mem: 9377 +Train: [26] [4700/6250] eta: 0:04:11 lr: 0.000110 grad: 0.1045 (0.1049) loss: 0.8010 (0.8118) time: 0.1914 data: 0.1100 max mem: 9377 +Train: [26] [4800/6250] eta: 0:03:56 lr: 0.000109 grad: 0.1072 (0.1050) loss: 0.8011 (0.8118) time: 0.1205 data: 0.0444 max mem: 9377 +Train: [26] [4900/6250] eta: 0:03:40 lr: 0.000109 grad: 0.1028 (0.1051) loss: 0.8084 (0.8117) time: 0.1370 data: 0.0313 max mem: 9377 +Train: [26] [5000/6250] eta: 0:03:32 lr: 0.000109 grad: 0.1011 (0.1051) loss: 0.8120 (0.8117) time: 0.6638 data: 0.5112 max mem: 9377 +Train: [26] [5100/6250] eta: 0:03:18 lr: 0.000109 grad: 0.1052 (0.1052) loss: 0.8081 (0.8116) time: 0.1034 data: 0.0003 max mem: 9377 +Train: [26] [5200/6250] eta: 0:03:03 lr: 0.000109 grad: 0.1055 (0.1052) loss: 0.8174 (0.8116) time: 0.3656 data: 0.2231 max mem: 9377 +Train: [26] [5300/6250] eta: 0:02:45 lr: 0.000109 grad: 0.1081 (0.1053) loss: 0.8098 (0.8115) time: 0.1640 data: 0.0815 max mem: 9377 +Train: [26] [5400/6250] eta: 0:02:30 lr: 0.000109 grad: 0.1073 (0.1054) loss: 0.8051 (0.8115) time: 0.1366 data: 0.0235 max mem: 9377 +Train: [26] [5500/6250] eta: 0:02:12 lr: 0.000109 grad: 0.1112 (0.1054) loss: 0.8005 (0.8114) time: 0.1576 data: 0.0782 max mem: 9377 +Train: [26] [5600/6250] eta: 0:01:54 lr: 0.000109 grad: 0.1033 (0.1053) loss: 0.8095 (0.8114) time: 0.1567 data: 0.0606 max mem: 9377 +Train: [26] [5700/6250] eta: 0:01:37 lr: 0.000109 grad: 0.1003 (0.1053) loss: 0.8050 (0.8113) time: 0.1390 data: 0.0485 max mem: 9377 +Train: [26] [5800/6250] eta: 0:01:19 lr: 0.000109 grad: 0.1018 (0.1053) loss: 0.8075 (0.8113) time: 0.2640 data: 0.1866 max mem: 9377 +Train: [26] [5900/6250] eta: 0:01:02 lr: 0.000109 grad: 0.1016 (0.1052) loss: 0.8148 (0.8113) time: 0.1091 data: 0.0002 max mem: 9377 +Train: [26] [6000/6250] eta: 0:00:44 lr: 0.000109 grad: 0.1021 (0.1052) loss: 0.8045 (0.8113) time: 0.1979 data: 0.1135 max mem: 9377 +Train: [26] [6100/6250] eta: 0:00:26 lr: 0.000109 grad: 0.0975 (0.1051) loss: 0.8083 (0.8113) time: 0.1539 data: 0.0593 max mem: 9377 +Train: [26] [6200/6250] eta: 0:00:08 lr: 0.000109 grad: 0.0990 (0.1051) loss: 0.8133 (0.8112) time: 0.3342 data: 0.2485 max mem: 9377 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.1059 (0.1051) loss: 0.8092 (0.8112) time: 0.1815 data: 0.0823 max mem: 9377 +Train: [26] Total time: 0:18:36 (0.1787 s / it) +Averaged stats: lr: 0.000109 grad: 0.1059 (0.1051) loss: 0.8092 (0.8112) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:05:38 loss: 0.8400 (0.8400) time: 5.4669 data: 5.4357 max mem: 9377 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8361 (0.8355) time: 0.1199 data: 0.0949 max mem: 9377 +Eval (hcp-train-subset): [26] Total time: 0:00:13 (0.2241 s / it) +Averaged stats (hcp-train-subset): loss: 0.8361 (0.8355) +Eval (hcp-val): [26] [ 0/62] eta: 0:04:12 loss: 0.8402 (0.8402) time: 4.0762 data: 4.0058 max mem: 9377 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8450 (0.8456) time: 0.1206 data: 0.0956 max mem: 9377 +Eval (hcp-val): [26] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (hcp-val): loss: 0.8450 (0.8456) +Eval (nsd-val): [26] [ 0/62] eta: 0:03:25 loss: 0.8112 (0.8112) time: 3.3133 data: 3.2440 max mem: 9377 +Eval (nsd-val): [26] [61/62] eta: 0:00:00 loss: 0.8212 (0.8214) time: 0.1136 data: 0.0869 max mem: 9377 +Eval (nsd-val): [26] Total time: 0:00:13 (0.2233 s / it) +Averaged stats (nsd-val): loss: 0.8212 (0.8214) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-best.pth +Train: [27] [ 0/6250] eta: 7:09:26 lr: 0.000109 grad: 0.0701 (0.0701) loss: 0.8350 (0.8350) time: 4.1226 data: 3.7834 max mem: 9377 +Train: [27] [ 100/6250] eta: 0:22:32 lr: 0.000109 grad: 0.1168 (0.1543) loss: 0.8240 (0.8250) time: 0.1850 data: 0.0716 max mem: 9377 +Train: [27] [ 200/6250] eta: 0:19:17 lr: 0.000109 grad: 0.1071 (0.1340) loss: 0.8149 (0.8222) time: 0.1759 data: 0.0828 max mem: 9377 +Train: [27] [ 300/6250] eta: 0:18:40 lr: 0.000109 grad: 0.1121 (0.1276) loss: 0.8057 (0.8182) time: 0.1264 data: 0.0167 max mem: 9377 +Train: [27] [ 400/6250] eta: 0:18:05 lr: 0.000109 grad: 0.1123 (0.1230) loss: 0.8077 (0.8151) time: 0.1702 data: 0.0740 max mem: 9377 +Train: [27] [ 500/6250] eta: 0:17:37 lr: 0.000109 grad: 0.0962 (0.1190) loss: 0.8074 (0.8139) time: 0.1783 data: 0.0890 max mem: 9377 +Train: [27] [ 600/6250] eta: 0:16:52 lr: 0.000109 grad: 0.0971 (0.1168) loss: 0.8034 (0.8123) time: 0.1316 data: 0.0190 max mem: 9377 +Train: [27] [ 700/6250] eta: 0:16:20 lr: 0.000109 grad: 0.1015 (0.1151) loss: 0.8061 (0.8110) time: 0.1534 data: 0.0438 max mem: 9377 +Train: [27] [ 800/6250] eta: 0:15:43 lr: 0.000109 grad: 0.0955 (0.1134) loss: 0.8119 (0.8105) time: 0.1407 data: 0.0292 max mem: 9377 +Train: [27] [ 900/6250] eta: 0:15:24 lr: 0.000109 grad: 0.0968 (0.1126) loss: 0.8155 (0.8099) time: 0.1934 data: 0.1136 max mem: 9377 +Train: [27] [1000/6250] eta: 0:14:55 lr: 0.000109 grad: 0.1039 (0.1119) loss: 0.8063 (0.8093) time: 0.1441 data: 0.0605 max mem: 9377 +Train: [27] [1100/6250] eta: 0:14:38 lr: 0.000109 grad: 0.1089 (0.1114) loss: 0.8130 (0.8089) time: 0.2144 data: 0.1334 max mem: 9377 +Train: [27] [1200/6250] eta: 0:14:11 lr: 0.000109 grad: 0.0993 (0.1108) loss: 0.8071 (0.8085) time: 0.1315 data: 0.0524 max mem: 9377 +Train: [27] [1300/6250] eta: 0:13:54 lr: 0.000109 grad: 0.1002 (0.1100) loss: 0.8039 (0.8085) time: 0.1396 data: 0.0553 max mem: 9377 +Train: [27] [1400/6250] eta: 0:13:30 lr: 0.000109 grad: 0.1025 (0.1095) loss: 0.8114 (0.8085) time: 0.1135 data: 0.0280 max mem: 9377 +Train: [27] [1500/6250] eta: 0:13:13 lr: 0.000109 grad: 0.0980 (0.1091) loss: 0.8134 (0.8085) time: 0.1535 data: 0.0684 max mem: 9377 +Train: [27] [1600/6250] eta: 0:12:53 lr: 0.000109 grad: 0.1076 (0.1090) loss: 0.8008 (0.8081) time: 0.1856 data: 0.0994 max mem: 9377 +Train: [27] [1700/6250] eta: 0:12:35 lr: 0.000109 grad: 0.1066 (0.1089) loss: 0.8059 (0.8079) time: 0.1786 data: 0.0887 max mem: 9377 +Train: [27] [1800/6250] eta: 0:12:16 lr: 0.000109 grad: 0.0998 (0.1088) loss: 0.7999 (0.8077) time: 0.1233 data: 0.0393 max mem: 9377 +Train: [27] [1900/6250] eta: 0:11:58 lr: 0.000109 grad: 0.0982 (0.1087) loss: 0.8090 (0.8075) time: 0.2121 data: 0.1305 max mem: 9377 +Train: [27] [2000/6250] eta: 0:11:41 lr: 0.000109 grad: 0.1065 (0.1087) loss: 0.8053 (0.8073) time: 0.1730 data: 0.0900 max mem: 9377 +Train: [27] [2100/6250] eta: 0:11:20 lr: 0.000109 grad: 0.1067 (0.1087) loss: 0.8014 (0.8072) time: 0.1484 data: 0.0680 max mem: 9377 +Train: [27] [2200/6250] eta: 0:11:05 lr: 0.000109 grad: 0.1180 (0.1089) loss: 0.7981 (0.8070) time: 0.1236 data: 0.0376 max mem: 9377 +Train: [27] [2300/6250] eta: 0:10:46 lr: 0.000109 grad: 0.1060 (0.1091) loss: 0.8099 (0.8067) time: 0.1456 data: 0.0656 max mem: 9377 +Train: [27] [2400/6250] eta: 0:10:32 lr: 0.000109 grad: 0.1159 (0.1092) loss: 0.7999 (0.8067) time: 0.1894 data: 0.0977 max mem: 9377 +Train: [27] [2500/6250] eta: 0:10:13 lr: 0.000109 grad: 0.1102 (0.1093) loss: 0.8061 (0.8065) time: 0.1625 data: 0.0853 max mem: 9377 +Train: [27] [2600/6250] eta: 0:09:55 lr: 0.000109 grad: 0.1003 (0.1093) loss: 0.8116 (0.8065) time: 0.1564 data: 0.0720 max mem: 9377 +Train: [27] [2700/6250] eta: 0:09:40 lr: 0.000109 grad: 0.1109 (0.1094) loss: 0.8025 (0.8064) time: 0.1425 data: 0.0536 max mem: 9377 +Train: [27] [2800/6250] eta: 0:09:27 lr: 0.000109 grad: 0.1100 (0.1095) loss: 0.8120 (0.8063) time: 0.2930 data: 0.2043 max mem: 9377 +Train: [27] [2900/6250] eta: 0:09:10 lr: 0.000109 grad: 0.1011 (0.1095) loss: 0.7998 (0.8062) time: 0.1239 data: 0.0451 max mem: 9377 +Train: [27] [3000/6250] eta: 0:08:51 lr: 0.000109 grad: 0.1043 (0.1095) loss: 0.8058 (0.8063) time: 0.1279 data: 0.0333 max mem: 9377 +Train: [27] [3100/6250] eta: 0:08:35 lr: 0.000108 grad: 0.1150 (0.1095) loss: 0.8048 (0.8062) time: 0.1474 data: 0.0540 max mem: 9377 +Train: [27] [3200/6250] eta: 0:08:17 lr: 0.000108 grad: 0.1125 (0.1095) loss: 0.7962 (0.8061) time: 0.1501 data: 0.0602 max mem: 9377 +Train: [27] [3300/6250] eta: 0:08:00 lr: 0.000108 grad: 0.1142 (0.1096) loss: 0.7969 (0.8060) time: 0.1565 data: 0.0657 max mem: 9377 +Train: [27] [3400/6250] eta: 0:07:43 lr: 0.000108 grad: 0.1039 (0.1097) loss: 0.8087 (0.8060) time: 0.1658 data: 0.0812 max mem: 9377 +Train: [27] [3500/6250] eta: 0:07:26 lr: 0.000108 grad: 0.1049 (0.1097) loss: 0.8057 (0.8058) time: 0.1603 data: 0.0716 max mem: 9377 +Train: [27] [3600/6250] eta: 0:07:09 lr: 0.000108 grad: 0.1001 (0.1097) loss: 0.8127 (0.8057) time: 0.1341 data: 0.0502 max mem: 9377 +Train: [27] [3700/6250] eta: 0:06:53 lr: 0.000108 grad: 0.1101 (0.1096) loss: 0.7993 (0.8057) time: 0.1354 data: 0.0411 max mem: 9377 +Train: [27] [3800/6250] eta: 0:06:39 lr: 0.000108 grad: 0.1034 (0.1096) loss: 0.8119 (0.8058) time: 0.3143 data: 0.2363 max mem: 9377 +Train: [27] [3900/6250] eta: 0:06:23 lr: 0.000108 grad: 0.1001 (0.1095) loss: 0.8058 (0.8057) time: 0.2021 data: 0.1223 max mem: 9377 +Train: [27] [4000/6250] eta: 0:06:07 lr: 0.000108 grad: 0.1020 (0.1095) loss: 0.8075 (0.8057) time: 0.1143 data: 0.0004 max mem: 9377 +Train: [27] [4100/6250] eta: 0:05:51 lr: 0.000108 grad: 0.1125 (0.1096) loss: 0.8091 (0.8058) time: 0.1406 data: 0.0594 max mem: 9377 +Train: [27] [4200/6250] eta: 0:05:39 lr: 0.000108 grad: 0.1069 (0.1096) loss: 0.8071 (0.8057) time: 0.2482 data: 0.1494 max mem: 9377 +Train: [27] [4300/6250] eta: 0:05:26 lr: 0.000108 grad: 0.1036 (0.1096) loss: 0.8092 (0.8057) time: 0.2386 data: 0.1419 max mem: 9377 +Train: [27] [4400/6250] eta: 0:05:08 lr: 0.000108 grad: 0.1033 (0.1096) loss: 0.8090 (0.8057) time: 0.1269 data: 0.0422 max mem: 9377 +Train: [27] [4500/6250] eta: 0:04:51 lr: 0.000108 grad: 0.1014 (0.1095) loss: 0.8108 (0.8058) time: 0.1291 data: 0.0518 max mem: 9377 +Train: [27] [4600/6250] eta: 0:04:34 lr: 0.000108 grad: 0.1062 (0.1095) loss: 0.8078 (0.8059) time: 0.1786 data: 0.0953 max mem: 9377 +Train: [27] [4700/6250] eta: 0:04:17 lr: 0.000108 grad: 0.1028 (0.1095) loss: 0.8090 (0.8059) time: 0.1329 data: 0.0492 max mem: 9377 +Train: [27] [4800/6250] eta: 0:04:00 lr: 0.000108 grad: 0.1093 (0.1095) loss: 0.8032 (0.8058) time: 0.1209 data: 0.0383 max mem: 9377 +Train: [27] [4900/6250] eta: 0:03:43 lr: 0.000108 grad: 0.1004 (0.1094) loss: 0.8098 (0.8059) time: 0.1000 data: 0.0094 max mem: 9377 +Train: [27] [5000/6250] eta: 0:03:27 lr: 0.000108 grad: 0.0968 (0.1093) loss: 0.8147 (0.8060) time: 0.1567 data: 0.0580 max mem: 9377 +Train: [27] [5100/6250] eta: 0:03:10 lr: 0.000108 grad: 0.1027 (0.1092) loss: 0.8176 (0.8060) time: 0.1733 data: 0.0928 max mem: 9377 +Train: [27] [5200/6250] eta: 0:02:53 lr: 0.000108 grad: 0.0946 (0.1091) loss: 0.8173 (0.8061) time: 0.1445 data: 0.0717 max mem: 9377 +Train: [27] [5300/6250] eta: 0:02:36 lr: 0.000108 grad: 0.1038 (0.1091) loss: 0.8068 (0.8061) time: 0.1740 data: 0.0868 max mem: 9377 +Train: [27] [5400/6250] eta: 0:02:20 lr: 0.000108 grad: 0.1014 (0.1090) loss: 0.8067 (0.8062) time: 0.1591 data: 0.0597 max mem: 9377 +Train: [27] [5500/6250] eta: 0:02:04 lr: 0.000108 grad: 0.0979 (0.1089) loss: 0.8144 (0.8062) time: 0.3496 data: 0.2346 max mem: 9377 +Train: [27] [5600/6250] eta: 0:01:48 lr: 0.000108 grad: 0.0958 (0.1087) loss: 0.8131 (0.8063) time: 0.1513 data: 0.0584 max mem: 9377 +Train: [27] [5700/6250] eta: 0:01:31 lr: 0.000108 grad: 0.1008 (0.1087) loss: 0.8055 (0.8064) time: 0.1314 data: 0.0003 max mem: 9377 +Train: [27] [5800/6250] eta: 0:01:15 lr: 0.000108 grad: 0.1002 (0.1086) loss: 0.8142 (0.8064) time: 0.2855 data: 0.1858 max mem: 9377 +Train: [27] [5900/6250] eta: 0:00:59 lr: 0.000108 grad: 0.1036 (0.1085) loss: 0.8028 (0.8064) time: 0.1534 data: 0.0557 max mem: 9377 +Train: [27] [6000/6250] eta: 0:00:42 lr: 0.000108 grad: 0.1054 (0.1085) loss: 0.8034 (0.8064) time: 0.1429 data: 0.0483 max mem: 9377 +Train: [27] [6100/6250] eta: 0:00:25 lr: 0.000108 grad: 0.1066 (0.1084) loss: 0.7939 (0.8063) time: 0.2535 data: 0.1642 max mem: 9377 +Train: [27] [6200/6250] eta: 0:00:08 lr: 0.000108 grad: 0.0981 (0.1083) loss: 0.8073 (0.8063) time: 0.1457 data: 0.0590 max mem: 9377 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.1053 (0.1083) loss: 0.8018 (0.8063) time: 0.1564 data: 0.0715 max mem: 9377 +Train: [27] Total time: 0:17:41 (0.1698 s / it) +Averaged stats: lr: 0.000108 grad: 0.1053 (0.1083) loss: 0.8018 (0.8063) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:05:14 loss: 0.8389 (0.8389) time: 5.0677 data: 5.0361 max mem: 9377 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8375 (0.8360) time: 0.1361 data: 0.1113 max mem: 9377 +Eval (hcp-train-subset): [27] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (hcp-train-subset): loss: 0.8375 (0.8360) +Eval (hcp-val): [27] [ 0/62] eta: 0:04:23 loss: 0.8461 (0.8461) time: 4.2453 data: 4.1718 max mem: 9377 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8477 (0.8490) time: 0.1264 data: 0.1013 max mem: 9377 +Eval (hcp-val): [27] Total time: 0:00:13 (0.2193 s / it) +Averaged stats (hcp-val): loss: 0.8477 (0.8490) +Eval (nsd-val): [27] [ 0/62] eta: 0:03:15 loss: 0.8182 (0.8182) time: 3.1559 data: 3.0674 max mem: 9377 +Eval (nsd-val): [27] [61/62] eta: 0:00:00 loss: 0.8237 (0.8252) time: 0.1297 data: 0.1043 max mem: 9377 +Eval (nsd-val): [27] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (nsd-val): loss: 0.8237 (0.8252) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [28] [ 0/6250] eta: 13:01:29 lr: 0.000108 grad: 0.1442 (0.1442) loss: 0.8200 (0.8200) time: 7.5023 data: 7.3925 max mem: 9377 +Train: [28] [ 100/6250] eta: 0:23:38 lr: 0.000108 grad: 0.1021 (0.1242) loss: 0.8219 (0.8287) time: 0.1698 data: 0.0652 max mem: 9377 +Train: [28] [ 200/6250] eta: 0:20:42 lr: 0.000108 grad: 0.1079 (0.1171) loss: 0.8170 (0.8244) time: 0.2080 data: 0.1095 max mem: 9377 +Train: [28] [ 300/6250] eta: 0:19:14 lr: 0.000108 grad: 0.0938 (0.1135) loss: 0.8218 (0.8230) time: 0.1504 data: 0.0430 max mem: 9377 +Train: [28] [ 400/6250] eta: 0:18:11 lr: 0.000108 grad: 0.0943 (0.1106) loss: 0.8195 (0.8219) time: 0.1818 data: 0.0920 max mem: 9377 +Train: [28] [ 500/6250] eta: 0:17:28 lr: 0.000108 grad: 0.1015 (0.1103) loss: 0.8175 (0.8206) time: 0.1820 data: 0.1006 max mem: 9377 +Train: [28] [ 600/6250] eta: 0:17:04 lr: 0.000108 grad: 0.1028 (0.1096) loss: 0.8140 (0.8194) time: 0.1377 data: 0.0406 max mem: 9377 +Train: [28] [ 700/6250] eta: 0:16:52 lr: 0.000108 grad: 0.1008 (0.1094) loss: 0.8122 (0.8183) time: 0.2365 data: 0.1397 max mem: 9377 +Train: [28] [ 800/6250] eta: 0:16:13 lr: 0.000108 grad: 0.0979 (0.1086) loss: 0.8124 (0.8173) time: 0.1453 data: 0.0538 max mem: 9377 +Train: [28] [ 900/6250] eta: 0:15:41 lr: 0.000108 grad: 0.1028 (0.1086) loss: 0.8148 (0.8166) time: 0.1184 data: 0.0139 max mem: 9377 +Train: [28] [1000/6250] eta: 0:15:13 lr: 0.000108 grad: 0.1036 (0.1083) loss: 0.8101 (0.8157) time: 0.1777 data: 0.0827 max mem: 9377 +Train: [28] [1100/6250] eta: 0:14:42 lr: 0.000108 grad: 0.1127 (0.1082) loss: 0.7999 (0.8148) time: 0.1434 data: 0.0516 max mem: 9377 +Train: [28] [1200/6250] eta: 0:14:12 lr: 0.000108 grad: 0.0992 (0.1081) loss: 0.8077 (0.8139) time: 0.1449 data: 0.0553 max mem: 9377 +Train: [28] [1300/6250] eta: 0:13:46 lr: 0.000107 grad: 0.1078 (0.1079) loss: 0.8042 (0.8132) time: 0.1463 data: 0.0532 max mem: 9377 +Train: [28] [1400/6250] eta: 0:13:25 lr: 0.000107 grad: 0.1045 (0.1078) loss: 0.8059 (0.8126) time: 0.1634 data: 0.0774 max mem: 9377 +Train: [28] [1500/6250] eta: 0:13:02 lr: 0.000107 grad: 0.1039 (0.1076) loss: 0.8060 (0.8121) time: 0.1682 data: 0.0809 max mem: 9377 +Train: [28] [1600/6250] eta: 0:12:42 lr: 0.000107 grad: 0.0984 (0.1074) loss: 0.8122 (0.8116) time: 0.1644 data: 0.0801 max mem: 9377 +Train: [28] [1700/6250] eta: 0:12:21 lr: 0.000107 grad: 0.1110 (0.1073) loss: 0.7943 (0.8111) time: 0.1511 data: 0.0685 max mem: 9377 +Train: [28] [1800/6250] eta: 0:12:00 lr: 0.000107 grad: 0.1102 (0.1073) loss: 0.7990 (0.8105) time: 0.1037 data: 0.0194 max mem: 9377 +Train: [28] [1900/6250] eta: 0:11:41 lr: 0.000107 grad: 0.1074 (0.1075) loss: 0.8034 (0.8099) time: 0.1167 data: 0.0305 max mem: 9377 +Train: [28] [2000/6250] eta: 0:11:28 lr: 0.000107 grad: 0.1001 (0.1075) loss: 0.8033 (0.8095) time: 0.1720 data: 0.0800 max mem: 9377 +Train: [28] [2100/6250] eta: 0:11:16 lr: 0.000107 grad: 0.1008 (0.1076) loss: 0.7982 (0.8089) time: 0.2591 data: 0.1711 max mem: 9377 +Train: [28] [2200/6250] eta: 0:11:00 lr: 0.000107 grad: 0.1159 (0.1078) loss: 0.7973 (0.8084) time: 0.1240 data: 0.0270 max mem: 9377 +Train: [28] [2300/6250] eta: 0:10:45 lr: 0.000107 grad: 0.1131 (0.1079) loss: 0.8063 (0.8080) time: 0.0932 data: 0.0002 max mem: 9377 +Train: [28] [2400/6250] eta: 0:10:25 lr: 0.000107 grad: 0.0958 (0.1078) loss: 0.8059 (0.8077) time: 0.1265 data: 0.0491 max mem: 9377 +Train: [28] [2500/6250] eta: 0:10:08 lr: 0.000107 grad: 0.1068 (0.1078) loss: 0.7963 (0.8073) time: 0.1177 data: 0.0301 max mem: 9377 +Train: [28] [2600/6250] eta: 0:09:50 lr: 0.000107 grad: 0.1120 (0.1077) loss: 0.7978 (0.8072) time: 0.1337 data: 0.0465 max mem: 9377 +Train: [28] [2700/6250] eta: 0:09:38 lr: 0.000107 grad: 0.1078 (0.1077) loss: 0.8007 (0.8070) time: 0.2441 data: 0.1658 max mem: 9377 +Train: [28] [2800/6250] eta: 0:09:22 lr: 0.000107 grad: 0.1006 (0.1076) loss: 0.8084 (0.8070) time: 0.1463 data: 0.0414 max mem: 9377 +Train: [28] [2900/6250] eta: 0:09:08 lr: 0.000107 grad: 0.1049 (0.1076) loss: 0.8076 (0.8068) time: 0.1640 data: 0.0657 max mem: 9377 +Train: [28] [3000/6250] eta: 0:08:50 lr: 0.000107 grad: 0.1066 (0.1075) loss: 0.8009 (0.8066) time: 0.1664 data: 0.0792 max mem: 9377 +Train: [28] [3100/6250] eta: 0:08:32 lr: 0.000107 grad: 0.1034 (0.1074) loss: 0.7990 (0.8065) time: 0.1374 data: 0.0501 max mem: 9377 +Train: [28] [3200/6250] eta: 0:08:15 lr: 0.000107 grad: 0.0965 (0.1073) loss: 0.8121 (0.8065) time: 0.1409 data: 0.0552 max mem: 9377 +Train: [28] [3300/6250] eta: 0:08:00 lr: 0.000107 grad: 0.1025 (0.1072) loss: 0.8121 (0.8065) time: 0.1628 data: 0.0767 max mem: 9377 +Train: [28] [3400/6250] eta: 0:07:44 lr: 0.000107 grad: 0.0965 (0.1071) loss: 0.8096 (0.8066) time: 0.1084 data: 0.0102 max mem: 9377 +Train: [28] [3500/6250] eta: 0:07:29 lr: 0.000107 grad: 0.1014 (0.1070) loss: 0.8088 (0.8066) time: 0.0975 data: 0.0002 max mem: 9377 +Train: [28] [3600/6250] eta: 0:07:26 lr: 0.000107 grad: 0.0985 (0.1069) loss: 0.8058 (0.8067) time: 0.8027 data: 0.7065 max mem: 9377 +Train: [28] [3700/6250] eta: 0:07:08 lr: 0.000107 grad: 0.1037 (0.1069) loss: 0.8054 (0.8067) time: 0.0951 data: 0.0002 max mem: 9377 +Train: [28] [3800/6250] eta: 0:06:52 lr: 0.000107 grad: 0.1023 (0.1069) loss: 0.7998 (0.8066) time: 0.1939 data: 0.1026 max mem: 9377 +Train: [28] [3900/6250] eta: 0:06:33 lr: 0.000107 grad: 0.1061 (0.1070) loss: 0.8069 (0.8066) time: 0.1386 data: 0.0489 max mem: 9377 +Train: [28] [4000/6250] eta: 0:06:20 lr: 0.000107 grad: 0.0989 (0.1070) loss: 0.8106 (0.8066) time: 0.4610 data: 0.3636 max mem: 9377 +Train: [28] [4100/6250] eta: 0:06:03 lr: 0.000107 grad: 0.1088 (0.1070) loss: 0.8018 (0.8065) time: 0.1234 data: 0.0003 max mem: 9377 +Train: [28] [4200/6250] eta: 0:05:45 lr: 0.000107 grad: 0.1088 (0.1070) loss: 0.7950 (0.8064) time: 0.1393 data: 0.0448 max mem: 9377 +Train: [28] [4300/6250] eta: 0:05:28 lr: 0.000107 grad: 0.1080 (0.1070) loss: 0.8066 (0.8064) time: 0.1849 data: 0.1021 max mem: 9377 +Train: [28] [4400/6250] eta: 0:05:10 lr: 0.000107 grad: 0.1114 (0.1071) loss: 0.7995 (0.8063) time: 0.1735 data: 0.0827 max mem: 9377 +Train: [28] [4500/6250] eta: 0:04:53 lr: 0.000107 grad: 0.1017 (0.1072) loss: 0.7988 (0.8063) time: 0.1112 data: 0.0016 max mem: 9377 +Train: [28] [4600/6250] eta: 0:04:39 lr: 0.000107 grad: 0.1126 (0.1073) loss: 0.7928 (0.8061) time: 0.0870 data: 0.0002 max mem: 9377 +Train: [28] [4700/6250] eta: 0:04:21 lr: 0.000107 grad: 0.1067 (0.1073) loss: 0.7993 (0.8061) time: 0.1473 data: 0.0628 max mem: 9377 +Train: [28] [4800/6250] eta: 0:04:04 lr: 0.000107 grad: 0.1081 (0.1073) loss: 0.8080 (0.8060) time: 0.1282 data: 0.0304 max mem: 9377 +Train: [28] [4900/6250] eta: 0:03:47 lr: 0.000107 grad: 0.1105 (0.1074) loss: 0.7929 (0.8059) time: 0.1465 data: 0.0497 max mem: 9377 +Train: [28] [5000/6250] eta: 0:03:30 lr: 0.000107 grad: 0.1055 (0.1074) loss: 0.8091 (0.8058) time: 0.1299 data: 0.0488 max mem: 9377 +Train: [28] [5100/6250] eta: 0:03:13 lr: 0.000107 grad: 0.1066 (0.1074) loss: 0.8080 (0.8057) time: 0.1278 data: 0.0449 max mem: 9377 +Train: [28] [5200/6250] eta: 0:02:56 lr: 0.000107 grad: 0.1170 (0.1075) loss: 0.7963 (0.8057) time: 0.1397 data: 0.0529 max mem: 9377 +Train: [28] [5300/6250] eta: 0:02:39 lr: 0.000107 grad: 0.0988 (0.1075) loss: 0.8077 (0.8057) time: 0.2020 data: 0.1042 max mem: 9377 +Train: [28] [5400/6250] eta: 0:02:22 lr: 0.000107 grad: 0.0997 (0.1075) loss: 0.8068 (0.8057) time: 0.1080 data: 0.0056 max mem: 9377 +Train: [28] [5500/6250] eta: 0:02:05 lr: 0.000107 grad: 0.1062 (0.1074) loss: 0.8073 (0.8058) time: 0.1370 data: 0.0520 max mem: 9377 +Train: [28] [5600/6250] eta: 0:01:48 lr: 0.000106 grad: 0.1007 (0.1074) loss: 0.8049 (0.8058) time: 0.1306 data: 0.0468 max mem: 9377 +Train: [28] [5700/6250] eta: 0:01:32 lr: 0.000106 grad: 0.1112 (0.1074) loss: 0.8044 (0.8058) time: 0.1542 data: 0.0602 max mem: 9377 +Train: [28] [5800/6250] eta: 0:01:15 lr: 0.000106 grad: 0.1067 (0.1074) loss: 0.8008 (0.8058) time: 0.1041 data: 0.0091 max mem: 9377 +Train: [28] [5900/6250] eta: 0:00:58 lr: 0.000106 grad: 0.1029 (0.1074) loss: 0.8124 (0.8059) time: 0.1738 data: 0.0885 max mem: 9377 +Train: [28] [6000/6250] eta: 0:00:41 lr: 0.000106 grad: 0.1021 (0.1075) loss: 0.8026 (0.8059) time: 0.1696 data: 0.0689 max mem: 9377 +Train: [28] [6100/6250] eta: 0:00:25 lr: 0.000106 grad: 0.0979 (0.1074) loss: 0.8156 (0.8060) time: 0.1571 data: 0.0647 max mem: 9377 +Train: [28] [6200/6250] eta: 0:00:08 lr: 0.000106 grad: 0.1015 (0.1074) loss: 0.8067 (0.8061) time: 0.1637 data: 0.0785 max mem: 9377 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.1035 (0.1074) loss: 0.8090 (0.8061) time: 0.1866 data: 0.0848 max mem: 9377 +Train: [28] Total time: 0:17:31 (0.1683 s / it) +Averaged stats: lr: 0.000106 grad: 0.1035 (0.1074) loss: 0.8090 (0.8061) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:06:18 loss: 0.8396 (0.8396) time: 6.1106 data: 6.0770 max mem: 9377 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8312 (0.8337) time: 0.1162 data: 0.0913 max mem: 9377 +Eval (hcp-train-subset): [28] Total time: 0:00:14 (0.2357 s / it) +Averaged stats (hcp-train-subset): loss: 0.8312 (0.8337) +Eval (hcp-val): [28] [ 0/62] eta: 0:05:22 loss: 0.8464 (0.8464) time: 5.1985 data: 5.1660 max mem: 9377 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8451 (0.8473) time: 0.0517 data: 0.0252 max mem: 9377 +Eval (hcp-val): [28] Total time: 0:00:13 (0.2241 s / it) +Averaged stats (hcp-val): loss: 0.8451 (0.8473) +Eval (nsd-val): [28] [ 0/62] eta: 0:04:32 loss: 0.8088 (0.8088) time: 4.3993 data: 4.3617 max mem: 9377 +Eval (nsd-val): [28] [61/62] eta: 0:00:00 loss: 0.8217 (0.8225) time: 0.1433 data: 0.1182 max mem: 9377 +Eval (nsd-val): [28] Total time: 0:00:14 (0.2328 s / it) +Averaged stats (nsd-val): loss: 0.8217 (0.8225) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 10:36:36 lr: 0.000106 grad: 0.1402 (0.1402) loss: 0.8146 (0.8146) time: 6.1114 data: 5.9274 max mem: 9377 +Train: [29] [ 100/6250] eta: 0:23:03 lr: 0.000106 grad: 0.1023 (0.1272) loss: 0.8242 (0.8302) time: 0.1664 data: 0.0587 max mem: 9377 +Train: [29] [ 200/6250] eta: 0:19:46 lr: 0.000106 grad: 0.1039 (0.1162) loss: 0.8167 (0.8273) time: 0.1490 data: 0.0638 max mem: 9377 +Train: [29] [ 300/6250] eta: 0:18:15 lr: 0.000106 grad: 0.0943 (0.1099) loss: 0.8342 (0.8261) time: 0.1655 data: 0.0759 max mem: 9377 +Train: [29] [ 400/6250] eta: 0:17:36 lr: 0.000106 grad: 0.1032 (0.1077) loss: 0.8070 (0.8242) time: 0.1998 data: 0.1133 max mem: 9377 +Train: [29] [ 500/6250] eta: 0:16:49 lr: 0.000106 grad: 0.1074 (0.1070) loss: 0.8042 (0.8216) time: 0.1280 data: 0.0268 max mem: 9377 +Train: [29] [ 600/6250] eta: 0:16:22 lr: 0.000106 grad: 0.1062 (0.1067) loss: 0.8114 (0.8197) time: 0.1997 data: 0.1022 max mem: 9377 +Train: [29] [ 700/6250] eta: 0:16:13 lr: 0.000106 grad: 0.1057 (0.1064) loss: 0.8054 (0.8177) time: 0.2635 data: 0.1813 max mem: 9377 +Train: [29] [ 800/6250] eta: 0:15:35 lr: 0.000106 grad: 0.0960 (0.1062) loss: 0.8036 (0.8162) time: 0.1638 data: 0.0806 max mem: 9377 +Train: [29] [ 900/6250] eta: 0:15:12 lr: 0.000106 grad: 0.1022 (0.1058) loss: 0.8055 (0.8153) time: 0.1881 data: 0.1063 max mem: 9377 +Train: [29] [1000/6250] eta: 0:14:43 lr: 0.000106 grad: 0.0959 (0.1054) loss: 0.8141 (0.8146) time: 0.1524 data: 0.0689 max mem: 9377 +Train: [29] [1100/6250] eta: 0:14:24 lr: 0.000106 grad: 0.1089 (0.1053) loss: 0.8013 (0.8138) time: 0.1654 data: 0.0762 max mem: 9377 +Train: [29] [1200/6250] eta: 0:14:01 lr: 0.000106 grad: 0.1089 (0.1052) loss: 0.8059 (0.8132) time: 0.1716 data: 0.0811 max mem: 9377 +Train: [29] [1300/6250] eta: 0:13:40 lr: 0.000106 grad: 0.1067 (0.1050) loss: 0.7984 (0.8126) time: 0.1633 data: 0.0777 max mem: 9377 +Train: [29] [1400/6250] eta: 0:13:17 lr: 0.000106 grad: 0.1069 (0.1052) loss: 0.8075 (0.8119) time: 0.1469 data: 0.0632 max mem: 9377 +Train: [29] [1500/6250] eta: 0:12:59 lr: 0.000106 grad: 0.1000 (0.1053) loss: 0.8128 (0.8114) time: 0.1124 data: 0.0002 max mem: 9377 +Train: [29] [1600/6250] eta: 0:12:43 lr: 0.000106 grad: 0.1074 (0.1054) loss: 0.8041 (0.8109) time: 0.1629 data: 0.0613 max mem: 9377 +Train: [29] [1700/6250] eta: 0:12:29 lr: 0.000106 grad: 0.1028 (0.1053) loss: 0.8112 (0.8107) time: 0.1137 data: 0.0019 max mem: 9377 +Train: [29] [1800/6250] eta: 0:12:14 lr: 0.000106 grad: 0.1024 (0.1053) loss: 0.7965 (0.8103) time: 0.1307 data: 0.0437 max mem: 9377 +Train: [29] [1900/6250] eta: 0:11:55 lr: 0.000106 grad: 0.1067 (0.1056) loss: 0.8037 (0.8099) time: 0.1610 data: 0.0741 max mem: 9377 +Train: [29] [2000/6250] eta: 0:11:36 lr: 0.000106 grad: 0.1015 (0.1056) loss: 0.7950 (0.8094) time: 0.1293 data: 0.0457 max mem: 9377 +Train: [29] [2100/6250] eta: 0:11:22 lr: 0.000106 grad: 0.1044 (0.1057) loss: 0.8076 (0.8090) time: 0.1036 data: 0.0002 max mem: 9377 +Train: [29] [2200/6250] eta: 0:11:02 lr: 0.000106 grad: 0.0998 (0.1057) loss: 0.8013 (0.8087) time: 0.1233 data: 0.0355 max mem: 9377 +Train: [29] [2300/6250] eta: 0:10:46 lr: 0.000106 grad: 0.1039 (0.1057) loss: 0.8014 (0.8086) time: 0.2027 data: 0.1197 max mem: 9377 +Train: [29] [2400/6250] eta: 0:10:28 lr: 0.000106 grad: 0.1029 (0.1058) loss: 0.8014 (0.8083) time: 0.1130 data: 0.0196 max mem: 9377 +Train: [29] [2500/6250] eta: 0:10:11 lr: 0.000106 grad: 0.1061 (0.1058) loss: 0.8047 (0.8082) time: 0.1584 data: 0.0757 max mem: 9377 +Train: [29] [2600/6250] eta: 0:09:55 lr: 0.000106 grad: 0.1073 (0.1059) loss: 0.8003 (0.8080) time: 0.1525 data: 0.0656 max mem: 9377 +Train: [29] [2700/6250] eta: 0:09:40 lr: 0.000106 grad: 0.1133 (0.1062) loss: 0.7891 (0.8077) time: 0.2308 data: 0.1413 max mem: 9377 +Train: [29] [2800/6250] eta: 0:09:26 lr: 0.000106 grad: 0.1168 (0.1064) loss: 0.7954 (0.8073) time: 0.2120 data: 0.1316 max mem: 9377 +Train: [29] [2900/6250] eta: 0:09:12 lr: 0.000106 grad: 0.1024 (0.1066) loss: 0.7995 (0.8070) time: 0.1267 data: 0.0212 max mem: 9377 +Train: [29] [3000/6250] eta: 0:09:12 lr: 0.000106 grad: 0.1089 (0.1067) loss: 0.7999 (0.8067) time: 0.1965 data: 0.0955 max mem: 9377 +Train: [29] [3100/6250] eta: 0:08:55 lr: 0.000106 grad: 0.1141 (0.1070) loss: 0.7979 (0.8065) time: 0.1896 data: 0.0981 max mem: 9377 +Train: [29] [3200/6250] eta: 0:08:38 lr: 0.000106 grad: 0.1089 (0.1073) loss: 0.7993 (0.8062) time: 0.1514 data: 0.0679 max mem: 9377 +Train: [29] [3300/6250] eta: 0:08:36 lr: 0.000106 grad: 0.1162 (0.1076) loss: 0.8013 (0.8059) time: 0.1602 data: 0.0003 max mem: 9377 +Train: [29] [3400/6250] eta: 0:08:34 lr: 0.000106 grad: 0.1121 (0.1079) loss: 0.7944 (0.8057) time: 0.1150 data: 0.0002 max mem: 9377 +Train: [29] [3500/6250] eta: 0:08:23 lr: 0.000105 grad: 0.1153 (0.1080) loss: 0.7971 (0.8055) time: 0.1210 data: 0.0003 max mem: 9377 +Train: [29] [3600/6250] eta: 0:08:06 lr: 0.000105 grad: 0.1146 (0.1081) loss: 0.7977 (0.8053) time: 0.2729 data: 0.1075 max mem: 9377 +Train: [29] [3700/6250] eta: 0:07:46 lr: 0.000105 grad: 0.1113 (0.1082) loss: 0.7961 (0.8052) time: 0.1618 data: 0.0431 max mem: 9377 +Train: [29] [3800/6250] eta: 0:07:34 lr: 0.000105 grad: 0.1094 (0.1083) loss: 0.8039 (0.8051) time: 0.1243 data: 0.0003 max mem: 9377 +Train: [29] [3900/6250] eta: 0:07:16 lr: 0.000105 grad: 0.1074 (0.1083) loss: 0.8039 (0.8050) time: 0.2380 data: 0.1325 max mem: 9377 +Train: [29] [4000/6250] eta: 0:06:56 lr: 0.000105 grad: 0.1041 (0.1083) loss: 0.8021 (0.8049) time: 0.1365 data: 0.0396 max mem: 9377 +Train: [29] [4100/6250] eta: 0:06:43 lr: 0.000105 grad: 0.1050 (0.1084) loss: 0.7972 (0.8049) time: 0.2180 data: 0.1281 max mem: 9377 +Train: [29] [4200/6250] eta: 0:06:25 lr: 0.000105 grad: 0.1116 (0.1086) loss: 0.7994 (0.8048) time: 0.1605 data: 0.0651 max mem: 9377 +Train: [29] [4300/6250] eta: 0:06:06 lr: 0.000105 grad: 0.1096 (0.1087) loss: 0.8017 (0.8047) time: 0.1924 data: 0.1058 max mem: 9377 +Train: [29] [4400/6250] eta: 0:05:46 lr: 0.000105 grad: 0.1126 (0.1087) loss: 0.7983 (0.8046) time: 0.1503 data: 0.0618 max mem: 9377 +Train: [29] [4500/6250] eta: 0:05:26 lr: 0.000105 grad: 0.1042 (0.1087) loss: 0.8008 (0.8046) time: 0.1442 data: 0.0489 max mem: 9377 +Train: [29] [4600/6250] eta: 0:05:06 lr: 0.000105 grad: 0.1112 (0.1088) loss: 0.8057 (0.8045) time: 0.1866 data: 0.1018 max mem: 9377 +Train: [29] [4700/6250] eta: 0:04:47 lr: 0.000105 grad: 0.1044 (0.1089) loss: 0.8063 (0.8044) time: 0.1880 data: 0.1007 max mem: 9377 +Train: [29] [4800/6250] eta: 0:04:27 lr: 0.000105 grad: 0.1097 (0.1090) loss: 0.8091 (0.8043) time: 0.1793 data: 0.0929 max mem: 9377 +Train: [29] [4900/6250] eta: 0:04:09 lr: 0.000105 grad: 0.1093 (0.1091) loss: 0.8010 (0.8042) time: 0.2644 data: 0.1682 max mem: 9377 +Train: [29] [5000/6250] eta: 0:03:50 lr: 0.000105 grad: 0.1033 (0.1091) loss: 0.8053 (0.8042) time: 0.1433 data: 0.0451 max mem: 9377 +Train: [29] [5100/6250] eta: 0:03:31 lr: 0.000105 grad: 0.1092 (0.1092) loss: 0.8025 (0.8041) time: 0.1673 data: 0.0822 max mem: 9377 +Train: [29] [5200/6250] eta: 0:03:12 lr: 0.000105 grad: 0.1012 (0.1092) loss: 0.8056 (0.8041) time: 0.1486 data: 0.0622 max mem: 9377 +Train: [29] [5300/6250] eta: 0:02:53 lr: 0.000105 grad: 0.1087 (0.1092) loss: 0.8063 (0.8041) time: 0.1831 data: 0.0961 max mem: 9377 +Train: [29] [5400/6250] eta: 0:02:34 lr: 0.000105 grad: 0.1039 (0.1092) loss: 0.8122 (0.8041) time: 0.2286 data: 0.1477 max mem: 9377 +Train: [29] [5500/6250] eta: 0:02:16 lr: 0.000105 grad: 0.1077 (0.1092) loss: 0.8046 (0.8041) time: 0.2483 data: 0.1635 max mem: 9377 +Train: [29] [5600/6250] eta: 0:01:58 lr: 0.000105 grad: 0.1056 (0.1093) loss: 0.8015 (0.8042) time: 0.1394 data: 0.0229 max mem: 9377 +Train: [29] [5700/6250] eta: 0:01:39 lr: 0.000105 grad: 0.1057 (0.1093) loss: 0.8095 (0.8041) time: 0.1662 data: 0.0802 max mem: 9377 +Train: [29] [5800/6250] eta: 0:01:21 lr: 0.000105 grad: 0.1150 (0.1094) loss: 0.7980 (0.8041) time: 0.1009 data: 0.0081 max mem: 9377 +Train: [29] [5900/6250] eta: 0:01:03 lr: 0.000105 grad: 0.1103 (0.1095) loss: 0.7944 (0.8040) time: 0.1324 data: 0.0442 max mem: 9377 +Train: [29] [6000/6250] eta: 0:00:45 lr: 0.000105 grad: 0.1034 (0.1095) loss: 0.8070 (0.8040) time: 0.1642 data: 0.0652 max mem: 9377 +Train: [29] [6100/6250] eta: 0:00:27 lr: 0.000105 grad: 0.1058 (0.1095) loss: 0.8019 (0.8040) time: 0.1722 data: 0.0782 max mem: 9377 +Train: [29] [6200/6250] eta: 0:00:09 lr: 0.000105 grad: 0.1063 (0.1095) loss: 0.8156 (0.8041) time: 0.1269 data: 0.0365 max mem: 9377 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.1035 (0.1095) loss: 0.8133 (0.8041) time: 0.1512 data: 0.0531 max mem: 9377 +Train: [29] Total time: 0:18:50 (0.1809 s / it) +Averaged stats: lr: 0.000105 grad: 0.1035 (0.1095) loss: 0.8133 (0.8041) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:03:29 loss: 0.8356 (0.8356) time: 3.3862 data: 3.3129 max mem: 9377 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8334 (0.8334) time: 0.1391 data: 0.1140 max mem: 9377 +Eval (hcp-train-subset): [29] Total time: 0:00:14 (0.2377 s / it) +Averaged stats (hcp-train-subset): loss: 0.8334 (0.8334) +Making plots (hcp-train-subset): example=8 +Eval (hcp-val): [29] [ 0/62] eta: 0:05:02 loss: 0.8487 (0.8487) time: 4.8746 data: 4.8436 max mem: 9377 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8470 (0.8472) time: 0.1224 data: 0.0976 max mem: 9377 +Eval (hcp-val): [29] Total time: 0:00:13 (0.2222 s / it) +Averaged stats (hcp-val): loss: 0.8470 (0.8472) +Making plots (hcp-val): example=37 +Eval (nsd-val): [29] [ 0/62] eta: 0:05:14 loss: 0.8125 (0.8125) time: 5.0695 data: 5.0382 max mem: 9377 +Eval (nsd-val): [29] [61/62] eta: 0:00:00 loss: 0.8198 (0.8202) time: 0.1253 data: 0.0983 max mem: 9377 +Eval (nsd-val): [29] Total time: 0:00:14 (0.2318 s / it) +Averaged stats (nsd-val): loss: 0.8198 (0.8202) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00029.pth +Train: [30] [ 0/6250] eta: 9:06:05 lr: 0.000105 grad: 0.0734 (0.0734) loss: 0.8447 (0.8447) time: 5.2425 data: 4.9593 max mem: 9377 +Train: [30] [ 100/6250] eta: 0:23:53 lr: 0.000105 grad: 0.1019 (0.1437) loss: 0.8214 (0.8232) time: 0.1526 data: 0.0450 max mem: 9377 +Train: [30] [ 200/6250] eta: 0:20:10 lr: 0.000105 grad: 0.1035 (0.1315) loss: 0.8227 (0.8200) time: 0.1219 data: 0.0259 max mem: 9377 +Train: [30] [ 300/6250] eta: 0:18:49 lr: 0.000105 grad: 0.1092 (0.1243) loss: 0.8101 (0.8180) time: 0.1555 data: 0.0637 max mem: 9377 +Train: [30] [ 400/6250] eta: 0:17:46 lr: 0.000105 grad: 0.1039 (0.1208) loss: 0.8182 (0.8175) time: 0.1525 data: 0.0582 max mem: 9377 +Train: [30] [ 500/6250] eta: 0:16:51 lr: 0.000105 grad: 0.1026 (0.1181) loss: 0.8205 (0.8167) time: 0.1421 data: 0.0523 max mem: 9377 +Train: [30] [ 600/6250] eta: 0:16:09 lr: 0.000105 grad: 0.0963 (0.1159) loss: 0.8162 (0.8158) time: 0.1397 data: 0.0577 max mem: 9377 +Train: [30] [ 700/6250] eta: 0:15:43 lr: 0.000105 grad: 0.1001 (0.1144) loss: 0.8144 (0.8149) time: 0.1403 data: 0.0420 max mem: 9377 +Train: [30] [ 800/6250] eta: 0:15:39 lr: 0.000105 grad: 0.1031 (0.1133) loss: 0.8089 (0.8140) time: 0.1095 data: 0.0002 max mem: 9377 +Train: [30] [ 900/6250] eta: 0:15:21 lr: 0.000105 grad: 0.1064 (0.1126) loss: 0.8101 (0.8133) time: 0.2031 data: 0.1123 max mem: 9377 +Train: [30] [1000/6250] eta: 0:15:01 lr: 0.000105 grad: 0.1042 (0.1123) loss: 0.8042 (0.8125) time: 0.1508 data: 0.0694 max mem: 9377 +Train: [30] [1100/6250] eta: 0:14:36 lr: 0.000105 grad: 0.0995 (0.1121) loss: 0.7954 (0.8116) time: 0.1497 data: 0.0596 max mem: 9377 +Train: [30] [1200/6250] eta: 0:14:12 lr: 0.000105 grad: 0.1075 (0.1118) loss: 0.8034 (0.8110) time: 0.1601 data: 0.0625 max mem: 9377 +Train: [30] [1300/6250] eta: 0:13:53 lr: 0.000105 grad: 0.1111 (0.1116) loss: 0.8003 (0.8103) time: 0.1565 data: 0.0661 max mem: 9377 +Train: [30] [1400/6250] eta: 0:13:32 lr: 0.000104 grad: 0.1105 (0.1117) loss: 0.8020 (0.8097) time: 0.1537 data: 0.0669 max mem: 9377 +Train: [30] [1500/6250] eta: 0:13:12 lr: 0.000104 grad: 0.1052 (0.1113) loss: 0.8001 (0.8095) time: 0.1354 data: 0.0438 max mem: 9377 +Train: [30] [1600/6250] eta: 0:12:52 lr: 0.000104 grad: 0.1006 (0.1112) loss: 0.8027 (0.8089) time: 0.1507 data: 0.0644 max mem: 9377 +Train: [30] [1700/6250] eta: 0:12:36 lr: 0.000104 grad: 0.1139 (0.1113) loss: 0.7987 (0.8083) time: 0.1752 data: 0.0983 max mem: 9377 +Train: [30] [1800/6250] eta: 0:12:19 lr: 0.000104 grad: 0.1130 (0.1113) loss: 0.8004 (0.8077) time: 0.1884 data: 0.0994 max mem: 9377 +Train: [30] [1900/6250] eta: 0:12:01 lr: 0.000104 grad: 0.1075 (0.1113) loss: 0.8061 (0.8072) time: 0.1899 data: 0.1032 max mem: 9377 +Train: [30] [2000/6250] eta: 0:11:42 lr: 0.000104 grad: 0.1035 (0.1114) loss: 0.8062 (0.8067) time: 0.1402 data: 0.0507 max mem: 9377 +Train: [30] [2100/6250] eta: 0:11:24 lr: 0.000104 grad: 0.1091 (0.1115) loss: 0.8024 (0.8061) time: 0.1358 data: 0.0474 max mem: 9377 +Train: [30] [2200/6250] eta: 0:11:08 lr: 0.000104 grad: 0.1032 (0.1115) loss: 0.8034 (0.8059) time: 0.1666 data: 0.0797 max mem: 9377 +Train: [30] [2300/6250] eta: 0:10:53 lr: 0.000104 grad: 0.1095 (0.1115) loss: 0.8053 (0.8058) time: 0.1782 data: 0.0964 max mem: 9377 +Train: [30] [2400/6250] eta: 0:10:34 lr: 0.000104 grad: 0.1068 (0.1116) loss: 0.8112 (0.8056) time: 0.1236 data: 0.0267 max mem: 9377 +Train: [30] [2500/6250] eta: 0:10:19 lr: 0.000104 grad: 0.1112 (0.1117) loss: 0.7990 (0.8053) time: 0.1566 data: 0.0553 max mem: 9377 +Train: [30] [2600/6250] eta: 0:10:02 lr: 0.000104 grad: 0.1149 (0.1117) loss: 0.8005 (0.8053) time: 0.1997 data: 0.1207 max mem: 9377 +Train: [30] [2700/6250] eta: 0:09:44 lr: 0.000104 grad: 0.1078 (0.1116) loss: 0.8068 (0.8054) time: 0.1955 data: 0.1185 max mem: 9377 +Train: [30] [2800/6250] eta: 0:09:27 lr: 0.000104 grad: 0.1036 (0.1116) loss: 0.8140 (0.8054) time: 0.1569 data: 0.0725 max mem: 9377 +Train: [30] [2900/6250] eta: 0:09:10 lr: 0.000104 grad: 0.1073 (0.1114) loss: 0.8072 (0.8055) time: 0.1306 data: 0.0289 max mem: 9377 +Train: [30] [3000/6250] eta: 0:08:53 lr: 0.000104 grad: 0.1060 (0.1112) loss: 0.8079 (0.8056) time: 0.1571 data: 0.0706 max mem: 9377 +Train: [30] [3100/6250] eta: 0:08:36 lr: 0.000104 grad: 0.1073 (0.1111) loss: 0.8116 (0.8057) time: 0.1598 data: 0.0765 max mem: 9377 +Train: [30] [3200/6250] eta: 0:08:19 lr: 0.000104 grad: 0.1098 (0.1111) loss: 0.8046 (0.8057) time: 0.1285 data: 0.0423 max mem: 9377 +Train: [30] [3300/6250] eta: 0:08:02 lr: 0.000104 grad: 0.1134 (0.1111) loss: 0.7993 (0.8057) time: 0.1787 data: 0.1031 max mem: 9377 +Train: [30] [3400/6250] eta: 0:07:46 lr: 0.000104 grad: 0.1134 (0.1112) loss: 0.8050 (0.8056) time: 0.1750 data: 0.0858 max mem: 9377 +Train: [30] [3500/6250] eta: 0:07:36 lr: 0.000104 grad: 0.1093 (0.1114) loss: 0.8031 (0.8056) time: 0.1091 data: 0.0004 max mem: 9377 +Train: [30] [3600/6250] eta: 0:07:18 lr: 0.000104 grad: 0.1084 (0.1115) loss: 0.8032 (0.8053) time: 0.1365 data: 0.0383 max mem: 9377 +Train: [30] [3700/6250] eta: 0:07:05 lr: 0.000104 grad: 0.1095 (0.1115) loss: 0.8023 (0.8051) time: 0.2518 data: 0.1533 max mem: 9377 +Train: [30] [3800/6250] eta: 0:06:47 lr: 0.000104 grad: 0.1160 (0.1116) loss: 0.7970 (0.8051) time: 0.1406 data: 0.0529 max mem: 9377 +Train: [30] [3900/6250] eta: 0:06:35 lr: 0.000104 grad: 0.1100 (0.1116) loss: 0.7962 (0.8049) time: 0.1052 data: 0.0003 max mem: 9377 +Train: [30] [4000/6250] eta: 0:06:18 lr: 0.000104 grad: 0.1005 (0.1117) loss: 0.8134 (0.8049) time: 0.1517 data: 0.0738 max mem: 9377 +Train: [30] [4100/6250] eta: 0:06:00 lr: 0.000104 grad: 0.1093 (0.1117) loss: 0.8021 (0.8049) time: 0.1447 data: 0.0574 max mem: 9377 +Train: [30] [4200/6250] eta: 0:05:43 lr: 0.000104 grad: 0.1099 (0.1116) loss: 0.8023 (0.8048) time: 0.1717 data: 0.0855 max mem: 9377 +Train: [30] [4300/6250] eta: 0:05:25 lr: 0.000104 grad: 0.1150 (0.1116) loss: 0.7939 (0.8046) time: 0.1540 data: 0.0663 max mem: 9377 +Train: [30] [4400/6250] eta: 0:05:08 lr: 0.000104 grad: 0.1069 (0.1116) loss: 0.8060 (0.8045) time: 0.1198 data: 0.0291 max mem: 9377 +Train: [30] [4500/6250] eta: 0:04:53 lr: 0.000104 grad: 0.1015 (0.1115) loss: 0.8043 (0.8045) time: 0.2220 data: 0.1374 max mem: 9377 +Train: [30] [4600/6250] eta: 0:04:35 lr: 0.000104 grad: 0.1077 (0.1115) loss: 0.8021 (0.8044) time: 0.2432 data: 0.1530 max mem: 9377 +Train: [30] [4700/6250] eta: 0:04:19 lr: 0.000104 grad: 0.1037 (0.1115) loss: 0.8002 (0.8044) time: 0.1266 data: 0.0316 max mem: 9377 +Train: [30] [4800/6250] eta: 0:04:02 lr: 0.000104 grad: 0.1079 (0.1114) loss: 0.8071 (0.8044) time: 0.1600 data: 0.0756 max mem: 9377 +Train: [30] [4900/6250] eta: 0:03:45 lr: 0.000104 grad: 0.1082 (0.1115) loss: 0.7977 (0.8042) time: 0.1635 data: 0.0748 max mem: 9377 +Train: [30] [5000/6250] eta: 0:03:28 lr: 0.000104 grad: 0.1118 (0.1116) loss: 0.8028 (0.8040) time: 0.1954 data: 0.1116 max mem: 9377 +Train: [30] [5100/6250] eta: 0:03:11 lr: 0.000104 grad: 0.1152 (0.1116) loss: 0.8060 (0.8039) time: 0.1701 data: 0.0861 max mem: 9377 +Train: [30] [5200/6250] eta: 0:02:54 lr: 0.000104 grad: 0.1137 (0.1117) loss: 0.7928 (0.8038) time: 0.2302 data: 0.1454 max mem: 9377 +Train: [30] [5300/6250] eta: 0:02:37 lr: 0.000104 grad: 0.1132 (0.1117) loss: 0.7900 (0.8035) time: 0.1196 data: 0.0272 max mem: 9377 +Train: [30] [5400/6250] eta: 0:02:21 lr: 0.000103 grad: 0.1142 (0.1117) loss: 0.7986 (0.8035) time: 0.0935 data: 0.0002 max mem: 9377 +Train: [30] [5500/6250] eta: 0:02:04 lr: 0.000103 grad: 0.1181 (0.1117) loss: 0.7882 (0.8033) time: 0.1699 data: 0.0880 max mem: 9377 +Train: [30] [5600/6250] eta: 0:01:47 lr: 0.000103 grad: 0.1064 (0.1117) loss: 0.7934 (0.8032) time: 0.1743 data: 0.0810 max mem: 9377 +Train: [30] [5700/6250] eta: 0:01:30 lr: 0.000103 grad: 0.1109 (0.1117) loss: 0.7923 (0.8030) time: 0.1389 data: 0.0562 max mem: 9377 +Train: [30] [5800/6250] eta: 0:01:14 lr: 0.000103 grad: 0.1082 (0.1117) loss: 0.8005 (0.8029) time: 0.2273 data: 0.1499 max mem: 9377 +Train: [30] [5900/6250] eta: 0:00:57 lr: 0.000103 grad: 0.1113 (0.1117) loss: 0.7950 (0.8028) time: 0.1191 data: 0.0394 max mem: 9377 +Train: [30] [6000/6250] eta: 0:00:41 lr: 0.000103 grad: 0.1122 (0.1117) loss: 0.7977 (0.8027) time: 0.1735 data: 0.0905 max mem: 9377 +Train: [30] [6100/6250] eta: 0:00:24 lr: 0.000103 grad: 0.1083 (0.1117) loss: 0.8017 (0.8026) time: 0.1663 data: 0.0667 max mem: 9377 +Train: [30] [6200/6250] eta: 0:00:08 lr: 0.000103 grad: 0.1135 (0.1118) loss: 0.7903 (0.8025) time: 0.1657 data: 0.0719 max mem: 9377 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.1054 (0.1118) loss: 0.7958 (0.8024) time: 0.1786 data: 0.0941 max mem: 9377 +Train: [30] Total time: 0:17:18 (0.1661 s / it) +Averaged stats: lr: 0.000103 grad: 0.1054 (0.1118) loss: 0.7958 (0.8024) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:05:29 loss: 0.8389 (0.8389) time: 5.3118 data: 5.2780 max mem: 9377 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8321 (0.8320) time: 0.1360 data: 0.1093 max mem: 9377 +Eval (hcp-train-subset): [30] Total time: 0:00:14 (0.2373 s / it) +Averaged stats (hcp-train-subset): loss: 0.8321 (0.8320) +Eval (hcp-val): [30] [ 0/62] eta: 0:05:20 loss: 0.8433 (0.8433) time: 5.1705 data: 5.1389 max mem: 9377 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8453 (0.8458) time: 0.1308 data: 0.1054 max mem: 9377 +Eval (hcp-val): [30] Total time: 0:00:14 (0.2419 s / it) +Averaged stats (hcp-val): loss: 0.8453 (0.8458) +Eval (nsd-val): [30] [ 0/62] eta: 0:05:51 loss: 0.8095 (0.8095) time: 5.6660 data: 5.6317 max mem: 9377 +Eval (nsd-val): [30] [61/62] eta: 0:00:00 loss: 0.8204 (0.8206) time: 0.1463 data: 0.1191 max mem: 9377 +Eval (nsd-val): [30] Total time: 0:00:14 (0.2399 s / it) +Averaged stats (nsd-val): loss: 0.8204 (0.8206) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [31] [ 0/6250] eta: 7:21:21 lr: 0.000103 grad: 0.0822 (0.0822) loss: 0.8575 (0.8575) time: 4.2371 data: 3.9798 max mem: 9377 +Train: [31] [ 100/6250] eta: 0:45:29 lr: 0.000103 grad: 0.1380 (0.1537) loss: 0.8066 (0.8185) time: 0.2240 data: 0.1268 max mem: 9377 +Train: [31] [ 200/6250] eta: 0:29:54 lr: 0.000103 grad: 0.1005 (0.1321) loss: 0.8202 (0.8179) time: 0.1595 data: 0.0598 max mem: 9377 +Train: [31] [ 300/6250] eta: 0:24:52 lr: 0.000103 grad: 0.1065 (0.1278) loss: 0.8149 (0.8149) time: 0.1085 data: 0.0003 max mem: 9377 +Train: [31] [ 400/6250] eta: 0:22:03 lr: 0.000103 grad: 0.1054 (0.1253) loss: 0.8135 (0.8126) time: 0.1352 data: 0.0429 max mem: 9377 +Train: [31] [ 500/6250] eta: 0:20:42 lr: 0.000103 grad: 0.1175 (0.1228) loss: 0.7987 (0.8108) time: 0.2360 data: 0.1396 max mem: 9377 +Train: [31] [ 600/6250] eta: 0:19:18 lr: 0.000103 grad: 0.1115 (0.1217) loss: 0.8080 (0.8090) time: 0.1775 data: 0.0821 max mem: 9377 +Train: [31] [ 700/6250] eta: 0:18:25 lr: 0.000103 grad: 0.1162 (0.1203) loss: 0.8065 (0.8076) time: 0.1409 data: 0.0004 max mem: 9377 +Train: [31] [ 800/6250] eta: 0:18:03 lr: 0.000103 grad: 0.1122 (0.1195) loss: 0.8024 (0.8066) time: 0.2270 data: 0.1443 max mem: 9377 +Train: [31] [ 900/6250] eta: 0:18:28 lr: 0.000103 grad: 0.1090 (0.1188) loss: 0.8047 (0.8058) time: 0.1724 data: 0.0696 max mem: 9377 +Train: [31] [1000/6250] eta: 0:18:10 lr: 0.000103 grad: 0.1008 (0.1179) loss: 0.8001 (0.8055) time: 0.4600 data: 0.3801 max mem: 9377 +Train: [31] [1100/6250] eta: 0:17:37 lr: 0.000103 grad: 0.0997 (0.1170) loss: 0.8089 (0.8053) time: 0.3350 data: 0.2451 max mem: 9377 +Train: [31] [1200/6250] eta: 0:16:58 lr: 0.000103 grad: 0.1128 (0.1166) loss: 0.7943 (0.8050) time: 0.1844 data: 0.0869 max mem: 9377 +Train: [31] [1300/6250] eta: 0:16:25 lr: 0.000103 grad: 0.1035 (0.1161) loss: 0.8023 (0.8049) time: 0.1317 data: 0.0505 max mem: 9377 +Train: [31] [1400/6250] eta: 0:15:53 lr: 0.000103 grad: 0.1102 (0.1156) loss: 0.7872 (0.8047) time: 0.1513 data: 0.0597 max mem: 9377 +Train: [31] [1500/6250] eta: 0:15:21 lr: 0.000103 grad: 0.1087 (0.1151) loss: 0.8053 (0.8046) time: 0.1639 data: 0.0836 max mem: 9377 +Train: [31] [1600/6250] eta: 0:16:01 lr: 0.000103 grad: 0.1038 (0.1146) loss: 0.8022 (0.8044) time: 0.3584 data: 0.2417 max mem: 9377 +Train: [31] [1700/6250] eta: 0:15:23 lr: 0.000103 grad: 0.1134 (0.1143) loss: 0.8104 (0.8043) time: 0.1453 data: 0.0474 max mem: 9377 +Train: [31] [1800/6250] eta: 0:14:50 lr: 0.000103 grad: 0.1109 (0.1141) loss: 0.8053 (0.8042) time: 0.1451 data: 0.0498 max mem: 9377 +Train: [31] [1900/6250] eta: 0:14:41 lr: 0.000103 grad: 0.1123 (0.1139) loss: 0.8017 (0.8040) time: 0.5049 data: 0.3394 max mem: 9377 +Train: [31] [2000/6250] eta: 0:14:12 lr: 0.000103 grad: 0.1086 (0.1137) loss: 0.8074 (0.8039) time: 0.1067 data: 0.0002 max mem: 9377 +Train: [31] [2100/6250] eta: 0:13:49 lr: 0.000103 grad: 0.1108 (0.1137) loss: 0.8019 (0.8037) time: 0.0995 data: 0.0002 max mem: 9377 +Train: [31] [2200/6250] eta: 0:13:21 lr: 0.000103 grad: 0.1036 (0.1135) loss: 0.7997 (0.8036) time: 0.1207 data: 0.0305 max mem: 9377 +Train: [31] [2300/6250] eta: 0:12:56 lr: 0.000103 grad: 0.1036 (0.1134) loss: 0.8031 (0.8034) time: 0.1407 data: 0.0456 max mem: 9377 +Train: [31] [2400/6250] eta: 0:12:32 lr: 0.000103 grad: 0.1113 (0.1133) loss: 0.7961 (0.8033) time: 0.2454 data: 0.1633 max mem: 9377 +Train: [31] [2500/6250] eta: 0:12:05 lr: 0.000103 grad: 0.1029 (0.1131) loss: 0.8091 (0.8032) time: 0.1470 data: 0.0409 max mem: 9377 +Train: [31] [2600/6250] eta: 0:11:44 lr: 0.000103 grad: 0.1077 (0.1129) loss: 0.8032 (0.8032) time: 0.1488 data: 0.0474 max mem: 9377 +Train: [31] [2700/6250] eta: 0:11:20 lr: 0.000103 grad: 0.1053 (0.1129) loss: 0.8028 (0.8032) time: 0.0966 data: 0.0002 max mem: 9377 +Train: [31] [2800/6250] eta: 0:11:04 lr: 0.000103 grad: 0.1050 (0.1128) loss: 0.7999 (0.8031) time: 0.1601 data: 0.0653 max mem: 9377 +Train: [31] [2900/6250] eta: 0:10:42 lr: 0.000103 grad: 0.1053 (0.1127) loss: 0.8113 (0.8031) time: 0.0860 data: 0.0002 max mem: 9377 +Train: [31] [3000/6250] eta: 0:10:19 lr: 0.000103 grad: 0.1032 (0.1126) loss: 0.8064 (0.8031) time: 0.1420 data: 0.0548 max mem: 9377 +Train: [31] [3100/6250] eta: 0:09:59 lr: 0.000103 grad: 0.1098 (0.1125) loss: 0.7998 (0.8031) time: 0.2397 data: 0.1609 max mem: 9377 +Train: [31] [3200/6250] eta: 0:09:45 lr: 0.000102 grad: 0.1100 (0.1124) loss: 0.8010 (0.8031) time: 0.1433 data: 0.0466 max mem: 9377 +Train: [31] [3300/6250] eta: 0:09:23 lr: 0.000102 grad: 0.1114 (0.1124) loss: 0.8052 (0.8030) time: 0.1721 data: 0.0916 max mem: 9377 +Train: [31] [3400/6250] eta: 0:09:00 lr: 0.000102 grad: 0.1118 (0.1124) loss: 0.8131 (0.8031) time: 0.1248 data: 0.0465 max mem: 9377 +Train: [31] [3500/6250] eta: 0:08:39 lr: 0.000102 grad: 0.1110 (0.1123) loss: 0.8111 (0.8031) time: 0.1405 data: 0.0474 max mem: 9377 +Train: [31] [3600/6250] eta: 0:08:18 lr: 0.000102 grad: 0.1078 (0.1122) loss: 0.8056 (0.8031) time: 0.0967 data: 0.0020 max mem: 9377 +Train: [31] [3700/6250] eta: 0:07:57 lr: 0.000102 grad: 0.1080 (0.1122) loss: 0.8032 (0.8031) time: 0.1226 data: 0.0334 max mem: 9377 +Train: [31] [3800/6250] eta: 0:07:39 lr: 0.000102 grad: 0.1062 (0.1122) loss: 0.8043 (0.8031) time: 0.3508 data: 0.2589 max mem: 9377 +Train: [31] [3900/6250] eta: 0:07:18 lr: 0.000102 grad: 0.1120 (0.1121) loss: 0.7962 (0.8032) time: 0.1378 data: 0.0538 max mem: 9377 +Train: [31] [4000/6250] eta: 0:06:59 lr: 0.000102 grad: 0.1168 (0.1122) loss: 0.7940 (0.8031) time: 0.1401 data: 0.0475 max mem: 9377 +Train: [31] [4100/6250] eta: 0:06:38 lr: 0.000102 grad: 0.1134 (0.1122) loss: 0.8057 (0.8031) time: 0.1532 data: 0.0584 max mem: 9377 +Train: [31] [4200/6250] eta: 0:06:18 lr: 0.000102 grad: 0.1121 (0.1122) loss: 0.7997 (0.8030) time: 0.1510 data: 0.0673 max mem: 9377 +Train: [31] [4300/6250] eta: 0:05:58 lr: 0.000102 grad: 0.1126 (0.1123) loss: 0.7974 (0.8029) time: 0.1770 data: 0.0922 max mem: 9377 +Train: [31] [4400/6250] eta: 0:05:38 lr: 0.000102 grad: 0.1108 (0.1124) loss: 0.7899 (0.8027) time: 0.1609 data: 0.0661 max mem: 9377 +Train: [31] [4500/6250] eta: 0:05:19 lr: 0.000102 grad: 0.1077 (0.1123) loss: 0.7967 (0.8026) time: 0.1224 data: 0.0167 max mem: 9377 +Train: [31] [4600/6250] eta: 0:04:59 lr: 0.000102 grad: 0.1113 (0.1124) loss: 0.7969 (0.8025) time: 0.1090 data: 0.0202 max mem: 9377 +Train: [31] [4700/6250] eta: 0:04:40 lr: 0.000102 grad: 0.1138 (0.1124) loss: 0.8048 (0.8024) time: 0.1857 data: 0.1017 max mem: 9377 +Train: [31] [4800/6250] eta: 0:04:21 lr: 0.000102 grad: 0.1090 (0.1124) loss: 0.7897 (0.8022) time: 0.1531 data: 0.0679 max mem: 9377 +Train: [31] [4900/6250] eta: 0:04:03 lr: 0.000102 grad: 0.1130 (0.1125) loss: 0.7963 (0.8022) time: 0.1709 data: 0.0836 max mem: 9377 +Train: [31] [5000/6250] eta: 0:03:44 lr: 0.000102 grad: 0.1078 (0.1125) loss: 0.8044 (0.8021) time: 0.1749 data: 0.0842 max mem: 9377 +Train: [31] [5100/6250] eta: 0:03:25 lr: 0.000102 grad: 0.1113 (0.1126) loss: 0.7965 (0.8019) time: 0.1349 data: 0.0383 max mem: 9377 +Train: [31] [5200/6250] eta: 0:03:07 lr: 0.000102 grad: 0.1160 (0.1126) loss: 0.7934 (0.8018) time: 0.1406 data: 0.0467 max mem: 9377 +Train: [31] [5300/6250] eta: 0:02:49 lr: 0.000102 grad: 0.1056 (0.1126) loss: 0.8100 (0.8018) time: 0.1468 data: 0.0625 max mem: 9377 +Train: [31] [5400/6250] eta: 0:02:31 lr: 0.000102 grad: 0.1119 (0.1127) loss: 0.7940 (0.8017) time: 0.1724 data: 0.0839 max mem: 9377 +Train: [31] [5500/6250] eta: 0:02:12 lr: 0.000102 grad: 0.1100 (0.1127) loss: 0.8032 (0.8017) time: 0.1469 data: 0.0622 max mem: 9377 +Train: [31] [5600/6250] eta: 0:01:54 lr: 0.000102 grad: 0.1098 (0.1128) loss: 0.7975 (0.8017) time: 0.1201 data: 0.0360 max mem: 9377 +Train: [31] [5700/6250] eta: 0:01:37 lr: 0.000102 grad: 0.1146 (0.1128) loss: 0.8060 (0.8017) time: 0.1270 data: 0.0329 max mem: 9377 +Train: [31] [5800/6250] eta: 0:01:19 lr: 0.000102 grad: 0.1075 (0.1128) loss: 0.7972 (0.8016) time: 0.1373 data: 0.0496 max mem: 9377 +Train: [31] [5900/6250] eta: 0:01:01 lr: 0.000102 grad: 0.1082 (0.1128) loss: 0.7961 (0.8016) time: 0.1877 data: 0.1026 max mem: 9377 +Train: [31] [6000/6250] eta: 0:00:43 lr: 0.000102 grad: 0.1073 (0.1128) loss: 0.8051 (0.8016) time: 0.1930 data: 0.1156 max mem: 9377 +Train: [31] [6100/6250] eta: 0:00:26 lr: 0.000102 grad: 0.1075 (0.1128) loss: 0.7977 (0.8015) time: 0.0947 data: 0.0002 max mem: 9377 +Train: [31] [6200/6250] eta: 0:00:08 lr: 0.000102 grad: 0.1190 (0.1128) loss: 0.8013 (0.8015) time: 0.1483 data: 0.0674 max mem: 9377 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.1096 (0.1129) loss: 0.7945 (0.8015) time: 0.1615 data: 0.0859 max mem: 9377 +Train: [31] Total time: 0:18:19 (0.1759 s / it) +Averaged stats: lr: 0.000102 grad: 0.1096 (0.1129) loss: 0.7945 (0.8015) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:05:30 loss: 0.8350 (0.8350) time: 5.3322 data: 5.3002 max mem: 9377 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8373 (0.8309) time: 0.1535 data: 0.1267 max mem: 9377 +Eval (hcp-train-subset): [31] Total time: 0:00:15 (0.2435 s / it) +Averaged stats (hcp-train-subset): loss: 0.8373 (0.8309) +Eval (hcp-val): [31] [ 0/62] eta: 0:04:14 loss: 0.8434 (0.8434) time: 4.1103 data: 4.0107 max mem: 9377 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8438 (0.8464) time: 0.1267 data: 0.1012 max mem: 9377 +Eval (hcp-val): [31] Total time: 0:00:14 (0.2393 s / it) +Averaged stats (hcp-val): loss: 0.8438 (0.8464) +Eval (nsd-val): [31] [ 0/62] eta: 0:06:41 loss: 0.8086 (0.8086) time: 6.4704 data: 6.4372 max mem: 9377 +Eval (nsd-val): [31] [61/62] eta: 0:00:00 loss: 0.8159 (0.8174) time: 0.1272 data: 0.1019 max mem: 9377 +Eval (nsd-val): [31] Total time: 0:00:14 (0.2402 s / it) +Averaged stats (nsd-val): loss: 0.8159 (0.8174) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [32] [ 0/6250] eta: 10:40:16 lr: 0.000102 grad: 0.1448 (0.1448) loss: 0.8842 (0.8842) time: 6.1466 data: 5.7774 max mem: 9377 +Train: [32] [ 100/6250] eta: 0:24:30 lr: 0.000102 grad: 0.0876 (0.1351) loss: 0.8398 (0.8266) time: 0.1980 data: 0.1025 max mem: 9377 +Train: [32] [ 200/6250] eta: 0:20:42 lr: 0.000102 grad: 0.1171 (0.1358) loss: 0.8116 (0.8195) time: 0.1330 data: 0.0326 max mem: 9377 +Train: [32] [ 300/6250] eta: 0:19:17 lr: 0.000102 grad: 0.1195 (0.1335) loss: 0.8037 (0.8125) time: 0.1645 data: 0.0724 max mem: 9377 +Train: [32] [ 400/6250] eta: 0:18:36 lr: 0.000102 grad: 0.1145 (0.1300) loss: 0.7930 (0.8090) time: 0.1902 data: 0.0941 max mem: 9377 +Train: [32] [ 500/6250] eta: 0:19:30 lr: 0.000102 grad: 0.1124 (0.1267) loss: 0.8071 (0.8087) time: 0.4987 data: 0.3748 max mem: 9377 +Train: [32] [ 600/6250] eta: 0:20:03 lr: 0.000102 grad: 0.1071 (0.1250) loss: 0.8078 (0.8077) time: 0.2448 data: 0.1117 max mem: 9377 +Train: [32] [ 700/6250] eta: 0:19:48 lr: 0.000102 grad: 0.1155 (0.1243) loss: 0.7997 (0.8066) time: 0.1712 data: 0.0690 max mem: 9377 +Train: [32] [ 800/6250] eta: 0:19:12 lr: 0.000101 grad: 0.1162 (0.1236) loss: 0.7965 (0.8058) time: 0.1049 data: 0.0003 max mem: 9377 +Train: [32] [ 900/6250] eta: 0:19:35 lr: 0.000101 grad: 0.1155 (0.1227) loss: 0.7964 (0.8056) time: 0.7414 data: 0.5975 max mem: 9377 +Train: [32] [1000/6250] eta: 0:18:37 lr: 0.000101 grad: 0.1087 (0.1220) loss: 0.8086 (0.8053) time: 0.0981 data: 0.0046 max mem: 9377 +Train: [32] [1100/6250] eta: 0:18:24 lr: 0.000101 grad: 0.1095 (0.1214) loss: 0.7973 (0.8047) time: 0.1482 data: 0.0532 max mem: 9377 +Train: [32] [1200/6250] eta: 0:17:38 lr: 0.000101 grad: 0.1071 (0.1206) loss: 0.7955 (0.8045) time: 0.1634 data: 0.0763 max mem: 9377 +Train: [32] [1300/6250] eta: 0:16:55 lr: 0.000101 grad: 0.1111 (0.1202) loss: 0.8000 (0.8043) time: 0.1268 data: 0.0453 max mem: 9377 +Train: [32] [1400/6250] eta: 0:16:20 lr: 0.000101 grad: 0.1116 (0.1195) loss: 0.8032 (0.8042) time: 0.1184 data: 0.0301 max mem: 9377 +Train: [32] [1500/6250] eta: 0:15:46 lr: 0.000101 grad: 0.1089 (0.1191) loss: 0.7970 (0.8040) time: 0.1626 data: 0.0848 max mem: 9377 +Train: [32] [1600/6250] eta: 0:15:16 lr: 0.000101 grad: 0.1088 (0.1188) loss: 0.7972 (0.8038) time: 0.0968 data: 0.0002 max mem: 9377 +Train: [32] [1700/6250] eta: 0:14:47 lr: 0.000101 grad: 0.1105 (0.1184) loss: 0.7952 (0.8035) time: 0.1676 data: 0.0778 max mem: 9377 +Train: [32] [1800/6250] eta: 0:14:17 lr: 0.000101 grad: 0.1037 (0.1181) loss: 0.8026 (0.8034) time: 0.1304 data: 0.0400 max mem: 9377 +Train: [32] [1900/6250] eta: 0:13:53 lr: 0.000101 grad: 0.1061 (0.1179) loss: 0.8053 (0.8032) time: 0.1908 data: 0.1001 max mem: 9377 +Train: [32] [2000/6250] eta: 0:13:28 lr: 0.000101 grad: 0.1045 (0.1174) loss: 0.7995 (0.8031) time: 0.1913 data: 0.1030 max mem: 9377 +Train: [32] [2100/6250] eta: 0:13:27 lr: 0.000101 grad: 0.1201 (0.1174) loss: 0.8008 (0.8029) time: 0.2024 data: 0.1096 max mem: 9377 +Train: [32] [2200/6250] eta: 0:13:11 lr: 0.000101 grad: 0.1166 (0.1173) loss: 0.7915 (0.8028) time: 0.4809 data: 0.3093 max mem: 9377 +Train: [32] [2300/6250] eta: 0:12:51 lr: 0.000101 grad: 0.1086 (0.1171) loss: 0.7937 (0.8027) time: 0.1621 data: 0.0524 max mem: 9377 +Train: [32] [2400/6250] eta: 0:12:29 lr: 0.000101 grad: 0.1039 (0.1169) loss: 0.7992 (0.8026) time: 0.2040 data: 0.0952 max mem: 9377 +Train: [32] [2500/6250] eta: 0:12:06 lr: 0.000101 grad: 0.1039 (0.1167) loss: 0.8052 (0.8025) time: 0.2918 data: 0.2018 max mem: 9377 +Train: [32] [2600/6250] eta: 0:11:44 lr: 0.000101 grad: 0.1101 (0.1166) loss: 0.7964 (0.8023) time: 0.1506 data: 0.0510 max mem: 9377 +Train: [32] [2700/6250] eta: 0:11:26 lr: 0.000101 grad: 0.1171 (0.1166) loss: 0.7898 (0.8021) time: 0.1234 data: 0.0319 max mem: 9377 +Train: [32] [2800/6250] eta: 0:11:02 lr: 0.000101 grad: 0.1048 (0.1163) loss: 0.7964 (0.8020) time: 0.1640 data: 0.0745 max mem: 9377 +Train: [32] [2900/6250] eta: 0:10:40 lr: 0.000101 grad: 0.1066 (0.1162) loss: 0.7980 (0.8019) time: 0.1089 data: 0.0029 max mem: 9377 +Train: [32] [3000/6250] eta: 0:10:17 lr: 0.000101 grad: 0.1051 (0.1161) loss: 0.8019 (0.8018) time: 0.1580 data: 0.0735 max mem: 9377 +Train: [32] [3100/6250] eta: 0:09:56 lr: 0.000101 grad: 0.1140 (0.1161) loss: 0.7893 (0.8017) time: 0.1426 data: 0.0540 max mem: 9377 +Train: [32] [3200/6250] eta: 0:09:34 lr: 0.000101 grad: 0.1139 (0.1161) loss: 0.7911 (0.8015) time: 0.1550 data: 0.0675 max mem: 9377 +Train: [32] [3300/6250] eta: 0:09:12 lr: 0.000101 grad: 0.1039 (0.1160) loss: 0.8005 (0.8014) time: 0.1530 data: 0.0709 max mem: 9377 +Train: [32] [3400/6250] eta: 0:08:51 lr: 0.000101 grad: 0.1136 (0.1159) loss: 0.8003 (0.8013) time: 0.1695 data: 0.0807 max mem: 9377 +Train: [32] [3500/6250] eta: 0:08:31 lr: 0.000101 grad: 0.1135 (0.1158) loss: 0.8018 (0.8012) time: 0.1680 data: 0.0768 max mem: 9377 +Train: [32] [3600/6250] eta: 0:08:10 lr: 0.000101 grad: 0.1096 (0.1158) loss: 0.7945 (0.8011) time: 0.1565 data: 0.0651 max mem: 9377 +Train: [32] [3700/6250] eta: 0:07:50 lr: 0.000101 grad: 0.1148 (0.1158) loss: 0.8008 (0.8009) time: 0.1779 data: 0.0990 max mem: 9377 +Train: [32] [3800/6250] eta: 0:07:29 lr: 0.000101 grad: 0.1087 (0.1158) loss: 0.7998 (0.8008) time: 0.1449 data: 0.0574 max mem: 9377 +Train: [32] [3900/6250] eta: 0:07:10 lr: 0.000101 grad: 0.1024 (0.1157) loss: 0.7989 (0.8007) time: 0.1410 data: 0.0504 max mem: 9377 +Train: [32] [4000/6250] eta: 0:06:50 lr: 0.000101 grad: 0.1076 (0.1157) loss: 0.7912 (0.8005) time: 0.1347 data: 0.0494 max mem: 9377 +Train: [32] [4100/6250] eta: 0:06:31 lr: 0.000101 grad: 0.1135 (0.1156) loss: 0.7881 (0.8004) time: 0.2043 data: 0.1230 max mem: 9377 +Train: [32] [4200/6250] eta: 0:06:11 lr: 0.000101 grad: 0.1076 (0.1155) loss: 0.7987 (0.8003) time: 0.1795 data: 0.0885 max mem: 9377 +Train: [32] [4300/6250] eta: 0:05:52 lr: 0.000101 grad: 0.1097 (0.1155) loss: 0.7918 (0.8002) time: 0.1097 data: 0.0153 max mem: 9377 +Train: [32] [4400/6250] eta: 0:05:33 lr: 0.000101 grad: 0.1063 (0.1154) loss: 0.7989 (0.8001) time: 0.1408 data: 0.0465 max mem: 9377 +Train: [32] [4500/6250] eta: 0:05:14 lr: 0.000101 grad: 0.1201 (0.1153) loss: 0.7947 (0.8001) time: 0.1006 data: 0.0129 max mem: 9377 +Train: [32] [4600/6250] eta: 0:04:56 lr: 0.000101 grad: 0.1056 (0.1153) loss: 0.8104 (0.8001) time: 0.1691 data: 0.0784 max mem: 9377 +Train: [32] [4700/6250] eta: 0:04:37 lr: 0.000100 grad: 0.1061 (0.1152) loss: 0.7973 (0.8001) time: 0.1816 data: 0.0889 max mem: 9377 +Train: [32] [4800/6250] eta: 0:04:18 lr: 0.000100 grad: 0.1196 (0.1152) loss: 0.7815 (0.8001) time: 0.1579 data: 0.0747 max mem: 9377 +Train: [32] [4900/6250] eta: 0:04:00 lr: 0.000100 grad: 0.1061 (0.1151) loss: 0.7968 (0.8002) time: 0.1807 data: 0.1035 max mem: 9377 +Train: [32] [5000/6250] eta: 0:03:42 lr: 0.000100 grad: 0.1116 (0.1151) loss: 0.8060 (0.8002) time: 0.2072 data: 0.1182 max mem: 9377 +Train: [32] [5100/6250] eta: 0:03:24 lr: 0.000100 grad: 0.1136 (0.1151) loss: 0.7857 (0.8000) time: 0.1743 data: 0.0901 max mem: 9377 +Train: [32] [5200/6250] eta: 0:03:05 lr: 0.000100 grad: 0.1129 (0.1151) loss: 0.7992 (0.7999) time: 0.1729 data: 0.0863 max mem: 9377 +Train: [32] [5300/6250] eta: 0:02:47 lr: 0.000100 grad: 0.1043 (0.1150) loss: 0.8143 (0.7999) time: 0.1774 data: 0.0900 max mem: 9377 +Train: [32] [5400/6250] eta: 0:02:30 lr: 0.000100 grad: 0.1107 (0.1151) loss: 0.7963 (0.7999) time: 0.1505 data: 0.0664 max mem: 9377 +Train: [32] [5500/6250] eta: 0:02:12 lr: 0.000100 grad: 0.1163 (0.1150) loss: 0.7982 (0.7998) time: 0.1674 data: 0.0868 max mem: 9377 +Train: [32] [5600/6250] eta: 0:01:54 lr: 0.000100 grad: 0.1107 (0.1151) loss: 0.7970 (0.7998) time: 0.1669 data: 0.0846 max mem: 9377 +Train: [32] [5700/6250] eta: 0:01:36 lr: 0.000100 grad: 0.1130 (0.1150) loss: 0.7986 (0.7997) time: 0.1099 data: 0.0256 max mem: 9377 +Train: [32] [5800/6250] eta: 0:01:18 lr: 0.000100 grad: 0.1053 (0.1150) loss: 0.7991 (0.7998) time: 0.1592 data: 0.0695 max mem: 9377 +Train: [32] [5900/6250] eta: 0:01:01 lr: 0.000100 grad: 0.1090 (0.1149) loss: 0.8085 (0.7998) time: 0.2057 data: 0.1171 max mem: 9377 +Train: [32] [6000/6250] eta: 0:00:43 lr: 0.000100 grad: 0.1125 (0.1149) loss: 0.7988 (0.7999) time: 0.2311 data: 0.1492 max mem: 9377 +Train: [32] [6100/6250] eta: 0:00:26 lr: 0.000100 grad: 0.1167 (0.1149) loss: 0.7917 (0.7999) time: 0.2503 data: 0.1621 max mem: 9377 +Train: [32] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.1101 (0.1149) loss: 0.7964 (0.8000) time: 0.2317 data: 0.1509 max mem: 9377 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1148 (0.1148) loss: 0.7994 (0.8000) time: 0.1472 data: 0.0644 max mem: 9377 +Train: [32] Total time: 0:18:23 (0.1766 s / it) +Averaged stats: lr: 0.000100 grad: 0.1148 (0.1148) loss: 0.7994 (0.8000) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:06:07 loss: 0.8368 (0.8368) time: 5.9208 data: 5.8860 max mem: 9377 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8351 (0.8326) time: 0.0899 data: 0.0652 max mem: 9377 +Eval (hcp-train-subset): [32] Total time: 0:00:14 (0.2389 s / it) +Averaged stats (hcp-train-subset): loss: 0.8351 (0.8326) +Eval (hcp-val): [32] [ 0/62] eta: 0:03:38 loss: 0.8451 (0.8451) time: 3.5162 data: 3.4293 max mem: 9377 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8461 (0.8473) time: 0.2159 data: 0.1846 max mem: 9377 +Eval (hcp-val): [32] Total time: 0:00:17 (0.2752 s / it) +Averaged stats (hcp-val): loss: 0.8461 (0.8473) +Eval (nsd-val): [32] [ 0/62] eta: 0:06:37 loss: 0.8115 (0.8115) time: 6.4083 data: 6.3778 max mem: 9377 +Eval (nsd-val): [32] [61/62] eta: 0:00:00 loss: 0.8231 (0.8233) time: 0.1457 data: 0.1186 max mem: 9377 +Eval (nsd-val): [32] Total time: 0:00:14 (0.2338 s / it) +Averaged stats (nsd-val): loss: 0.8231 (0.8233) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 1 day, 15:01:23 lr: 0.000100 grad: 0.1490 (0.1490) loss: 0.8508 (0.8508) time: 22.4773 data: 22.3533 max mem: 9377 +Train: [33] [ 100/6250] eta: 0:43:18 lr: 0.000100 grad: 0.1286 (0.1721) loss: 0.8051 (0.8180) time: 0.1151 data: 0.0002 max mem: 9377 +Train: [33] [ 200/6250] eta: 0:36:33 lr: 0.000100 grad: 0.1078 (0.1477) loss: 0.8126 (0.8118) time: 0.1591 data: 0.0296 max mem: 9377 +Train: [33] [ 300/6250] eta: 0:29:19 lr: 0.000100 grad: 0.1199 (0.1378) loss: 0.8043 (0.8108) time: 0.1483 data: 0.0473 max mem: 9377 +Train: [33] [ 400/6250] eta: 0:28:43 lr: 0.000100 grad: 0.1038 (0.1315) loss: 0.8182 (0.8114) time: 0.2026 data: 0.1075 max mem: 9377 +Train: [33] [ 500/6250] eta: 0:28:22 lr: 0.000100 grad: 0.1094 (0.1283) loss: 0.8093 (0.8116) time: 0.1620 data: 0.0439 max mem: 9377 +Train: [33] [ 600/6250] eta: 0:25:50 lr: 0.000100 grad: 0.1008 (0.1255) loss: 0.8094 (0.8111) time: 0.1481 data: 0.0406 max mem: 9377 +Train: [33] [ 700/6250] eta: 0:25:08 lr: 0.000100 grad: 0.1123 (0.1237) loss: 0.8015 (0.8101) time: 0.2947 data: 0.2003 max mem: 9377 +Train: [33] [ 800/6250] eta: 0:23:26 lr: 0.000100 grad: 0.1050 (0.1216) loss: 0.8075 (0.8096) time: 0.2145 data: 0.1018 max mem: 9377 +Train: [33] [ 900/6250] eta: 0:22:24 lr: 0.000100 grad: 0.1074 (0.1201) loss: 0.7979 (0.8092) time: 0.2161 data: 0.0856 max mem: 9377 +Train: [33] [1000/6250] eta: 0:21:18 lr: 0.000100 grad: 0.1059 (0.1188) loss: 0.8120 (0.8087) time: 0.1226 data: 0.0175 max mem: 9377 +Train: [33] [1100/6250] eta: 0:20:32 lr: 0.000100 grad: 0.1143 (0.1180) loss: 0.7958 (0.8082) time: 0.2137 data: 0.1409 max mem: 9377 +Train: [33] [1200/6250] eta: 0:20:13 lr: 0.000100 grad: 0.1044 (0.1171) loss: 0.8014 (0.8080) time: 0.1798 data: 0.0929 max mem: 9377 +Train: [33] [1300/6250] eta: 0:19:14 lr: 0.000100 grad: 0.1049 (0.1164) loss: 0.8004 (0.8077) time: 0.1603 data: 0.0742 max mem: 9377 +Train: [33] [1400/6250] eta: 0:18:24 lr: 0.000100 grad: 0.1130 (0.1157) loss: 0.7944 (0.8075) time: 0.1503 data: 0.0551 max mem: 9377 +Train: [33] [1500/6250] eta: 0:17:40 lr: 0.000100 grad: 0.1077 (0.1154) loss: 0.7954 (0.8071) time: 0.1453 data: 0.0508 max mem: 9377 +Train: [33] [1600/6250] eta: 0:17:08 lr: 0.000100 grad: 0.1123 (0.1153) loss: 0.7932 (0.8064) time: 0.1728 data: 0.0831 max mem: 9377 +Train: [33] [1700/6250] eta: 0:16:28 lr: 0.000100 grad: 0.1120 (0.1152) loss: 0.7950 (0.8058) time: 0.0937 data: 0.0004 max mem: 9377 +Train: [33] [1800/6250] eta: 0:15:51 lr: 0.000100 grad: 0.1109 (0.1150) loss: 0.8021 (0.8055) time: 0.1182 data: 0.0402 max mem: 9377 +Train: [33] [1900/6250] eta: 0:15:22 lr: 0.000100 grad: 0.1092 (0.1149) loss: 0.7983 (0.8051) time: 0.2646 data: 0.1842 max mem: 9377 +Train: [33] [2000/6250] eta: 0:14:47 lr: 0.000100 grad: 0.1118 (0.1148) loss: 0.7960 (0.8048) time: 0.1417 data: 0.0631 max mem: 9377 +Train: [33] [2100/6250] eta: 0:14:15 lr: 0.000100 grad: 0.1108 (0.1147) loss: 0.8007 (0.8045) time: 0.1142 data: 0.0244 max mem: 9377 +Train: [33] [2200/6250] eta: 0:13:46 lr: 0.000099 grad: 0.1087 (0.1144) loss: 0.8010 (0.8043) time: 0.2005 data: 0.1162 max mem: 9377 +Train: [33] [2300/6250] eta: 0:13:19 lr: 0.000099 grad: 0.1107 (0.1143) loss: 0.7862 (0.8041) time: 0.1806 data: 0.1001 max mem: 9377 +Train: [33] [2400/6250] eta: 0:12:52 lr: 0.000099 grad: 0.1107 (0.1143) loss: 0.7936 (0.8039) time: 0.1940 data: 0.1166 max mem: 9377 +Train: [33] [2500/6250] eta: 0:12:24 lr: 0.000099 grad: 0.1147 (0.1142) loss: 0.8003 (0.8037) time: 0.1491 data: 0.0614 max mem: 9377 +Train: [33] [2600/6250] eta: 0:12:00 lr: 0.000099 grad: 0.1132 (0.1143) loss: 0.7986 (0.8035) time: 0.1306 data: 0.0408 max mem: 9377 +Train: [33] [2700/6250] eta: 0:11:35 lr: 0.000099 grad: 0.1128 (0.1144) loss: 0.8098 (0.8034) time: 0.1584 data: 0.0652 max mem: 9377 +Train: [33] [2800/6250] eta: 0:11:10 lr: 0.000099 grad: 0.1136 (0.1144) loss: 0.7996 (0.8032) time: 0.1386 data: 0.0360 max mem: 9377 +Train: [33] [2900/6250] eta: 0:10:46 lr: 0.000099 grad: 0.1121 (0.1144) loss: 0.8015 (0.8030) time: 0.1965 data: 0.1107 max mem: 9377 +Train: [33] [3000/6250] eta: 0:10:22 lr: 0.000099 grad: 0.1092 (0.1144) loss: 0.8097 (0.8029) time: 0.1253 data: 0.0349 max mem: 9377 +Train: [33] [3100/6250] eta: 0:09:59 lr: 0.000099 grad: 0.1169 (0.1146) loss: 0.7890 (0.8026) time: 0.1667 data: 0.0856 max mem: 9377 +Train: [33] [3200/6250] eta: 0:09:36 lr: 0.000099 grad: 0.1199 (0.1148) loss: 0.7813 (0.8023) time: 0.1236 data: 0.0352 max mem: 9377 +Train: [33] [3300/6250] eta: 0:09:16 lr: 0.000099 grad: 0.1163 (0.1150) loss: 0.7964 (0.8021) time: 0.0995 data: 0.0004 max mem: 9377 +Train: [33] [3400/6250] eta: 0:08:54 lr: 0.000099 grad: 0.1186 (0.1151) loss: 0.7811 (0.8018) time: 0.1612 data: 0.0734 max mem: 9377 +Train: [33] [3500/6250] eta: 0:08:33 lr: 0.000099 grad: 0.1191 (0.1152) loss: 0.7921 (0.8015) time: 0.1887 data: 0.1079 max mem: 9377 +Train: [33] [3600/6250] eta: 0:08:11 lr: 0.000099 grad: 0.1243 (0.1153) loss: 0.7837 (0.8012) time: 0.1734 data: 0.0875 max mem: 9377 +Train: [33] [3700/6250] eta: 0:07:52 lr: 0.000099 grad: 0.1295 (0.1155) loss: 0.7864 (0.8010) time: 0.1743 data: 0.0913 max mem: 9377 +Train: [33] [3800/6250] eta: 0:07:30 lr: 0.000099 grad: 0.1167 (0.1155) loss: 0.7970 (0.8009) time: 0.1445 data: 0.0584 max mem: 9377 +Train: [33] [3900/6250] eta: 0:07:11 lr: 0.000099 grad: 0.1172 (0.1156) loss: 0.7909 (0.8007) time: 0.1825 data: 0.0991 max mem: 9377 +Train: [33] [4000/6250] eta: 0:06:50 lr: 0.000099 grad: 0.1127 (0.1156) loss: 0.7995 (0.8005) time: 0.1348 data: 0.0475 max mem: 9377 +Train: [33] [4100/6250] eta: 0:06:31 lr: 0.000099 grad: 0.1157 (0.1157) loss: 0.7995 (0.8004) time: 0.1423 data: 0.0499 max mem: 9377 +Train: [33] [4200/6250] eta: 0:06:11 lr: 0.000099 grad: 0.1246 (0.1157) loss: 0.7943 (0.8003) time: 0.1605 data: 0.0745 max mem: 9377 +Train: [33] [4300/6250] eta: 0:05:53 lr: 0.000099 grad: 0.1130 (0.1158) loss: 0.7958 (0.8002) time: 0.1843 data: 0.1000 max mem: 9377 +Train: [33] [4400/6250] eta: 0:05:34 lr: 0.000099 grad: 0.1101 (0.1157) loss: 0.7956 (0.8001) time: 0.1928 data: 0.1123 max mem: 9377 +Train: [33] [4500/6250] eta: 0:05:14 lr: 0.000099 grad: 0.1096 (0.1157) loss: 0.8090 (0.8001) time: 0.1392 data: 0.0493 max mem: 9377 +Train: [33] [4600/6250] eta: 0:04:56 lr: 0.000099 grad: 0.1110 (0.1156) loss: 0.8061 (0.8002) time: 0.1759 data: 0.0823 max mem: 9377 +Train: [33] [4700/6250] eta: 0:04:37 lr: 0.000099 grad: 0.1093 (0.1156) loss: 0.8010 (0.8003) time: 0.1985 data: 0.1165 max mem: 9377 +Train: [33] [4800/6250] eta: 0:04:18 lr: 0.000099 grad: 0.1123 (0.1155) loss: 0.8088 (0.8004) time: 0.1207 data: 0.0341 max mem: 9377 +Train: [33] [4900/6250] eta: 0:04:00 lr: 0.000099 grad: 0.1129 (0.1154) loss: 0.8021 (0.8005) time: 0.1389 data: 0.0581 max mem: 9377 +Train: [33] [5000/6250] eta: 0:03:42 lr: 0.000099 grad: 0.1098 (0.1154) loss: 0.8171 (0.8005) time: 0.1731 data: 0.0845 max mem: 9377 +Train: [33] [5100/6250] eta: 0:03:23 lr: 0.000099 grad: 0.1112 (0.1153) loss: 0.8084 (0.8006) time: 0.1458 data: 0.0499 max mem: 9377 +Train: [33] [5200/6250] eta: 0:03:06 lr: 0.000099 grad: 0.1086 (0.1153) loss: 0.7985 (0.8007) time: 0.3674 data: 0.2643 max mem: 9377 +Train: [33] [5300/6250] eta: 0:02:48 lr: 0.000099 grad: 0.1045 (0.1153) loss: 0.8106 (0.8008) time: 0.2041 data: 0.1186 max mem: 9377 +Train: [33] [5400/6250] eta: 0:02:31 lr: 0.000099 grad: 0.1038 (0.1152) loss: 0.8146 (0.8009) time: 0.1573 data: 0.0677 max mem: 9377 +Train: [33] [5500/6250] eta: 0:02:13 lr: 0.000099 grad: 0.1175 (0.1152) loss: 0.7974 (0.8010) time: 0.0907 data: 0.0002 max mem: 9377 +Train: [33] [5600/6250] eta: 0:01:56 lr: 0.000099 grad: 0.1015 (0.1151) loss: 0.8000 (0.8012) time: 0.1601 data: 0.0633 max mem: 9377 +Train: [33] [5700/6250] eta: 0:01:38 lr: 0.000099 grad: 0.1078 (0.1151) loss: 0.8003 (0.8012) time: 0.2200 data: 0.1319 max mem: 9377 +Train: [33] [5800/6250] eta: 0:01:20 lr: 0.000099 grad: 0.1075 (0.1151) loss: 0.8075 (0.8013) time: 0.1951 data: 0.1073 max mem: 9377 +Train: [33] [5900/6250] eta: 0:01:02 lr: 0.000098 grad: 0.1087 (0.1151) loss: 0.8100 (0.8014) time: 0.1757 data: 0.0836 max mem: 9377 +Train: [33] [6000/6250] eta: 0:00:44 lr: 0.000098 grad: 0.1141 (0.1151) loss: 0.8073 (0.8014) time: 0.1151 data: 0.0255 max mem: 9377 +Train: [33] [6100/6250] eta: 0:00:26 lr: 0.000098 grad: 0.1144 (0.1151) loss: 0.8001 (0.8015) time: 0.1363 data: 0.0426 max mem: 9377 +Train: [33] [6200/6250] eta: 0:00:09 lr: 0.000098 grad: 0.1094 (0.1151) loss: 0.8096 (0.8015) time: 0.4105 data: 0.3036 max mem: 9377 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.1105 (0.1151) loss: 0.8057 (0.8015) time: 0.1522 data: 0.0597 max mem: 9377 +Train: [33] Total time: 0:18:50 (0.1808 s / it) +Averaged stats: lr: 0.000098 grad: 0.1105 (0.1151) loss: 0.8057 (0.8015) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:03:54 loss: 0.8328 (0.8328) time: 3.7827 data: 3.6944 max mem: 9377 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8341 (0.8319) time: 0.1230 data: 0.0966 max mem: 9377 +Eval (hcp-train-subset): [33] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (hcp-train-subset): loss: 0.8341 (0.8319) +Eval (hcp-val): [33] [ 0/62] eta: 0:03:59 loss: 0.8509 (0.8509) time: 3.8604 data: 3.8059 max mem: 9377 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8472 (0.8483) time: 0.1135 data: 0.0887 max mem: 9377 +Eval (hcp-val): [33] Total time: 0:00:13 (0.2221 s / it) +Averaged stats (hcp-val): loss: 0.8472 (0.8483) +Eval (nsd-val): [33] [ 0/62] eta: 0:05:39 loss: 0.8131 (0.8131) time: 5.4706 data: 5.4381 max mem: 9377 +Eval (nsd-val): [33] [61/62] eta: 0:00:00 loss: 0.8192 (0.8193) time: 0.1984 data: 0.1734 max mem: 9377 +Eval (nsd-val): [33] Total time: 0:00:15 (0.2483 s / it) +Averaged stats (nsd-val): loss: 0.8192 (0.8193) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [34] [ 0/6250] eta: 7:44:25 lr: 0.000098 grad: 0.3354 (0.3354) loss: 0.7841 (0.7841) time: 4.4585 data: 4.0352 max mem: 9377 +Train: [34] [ 100/6250] eta: 0:23:56 lr: 0.000098 grad: 0.1280 (0.1578) loss: 0.8237 (0.8230) time: 0.2160 data: 0.1149 max mem: 9377 +Train: [34] [ 200/6250] eta: 0:19:48 lr: 0.000098 grad: 0.1203 (0.1465) loss: 0.8074 (0.8133) time: 0.1879 data: 0.0851 max mem: 9377 +Train: [34] [ 300/6250] eta: 0:20:56 lr: 0.000098 grad: 0.1230 (0.1401) loss: 0.8094 (0.8104) time: 0.2225 data: 0.1043 max mem: 9377 +Train: [34] [ 400/6250] eta: 0:22:55 lr: 0.000098 grad: 0.1165 (0.1347) loss: 0.7987 (0.8093) time: 0.1231 data: 0.0003 max mem: 9377 +Train: [34] [ 500/6250] eta: 0:21:07 lr: 0.000098 grad: 0.1106 (0.1309) loss: 0.8010 (0.8078) time: 0.1635 data: 0.0540 max mem: 9377 +Train: [34] [ 600/6250] eta: 0:21:19 lr: 0.000098 grad: 0.1192 (0.1293) loss: 0.7865 (0.8062) time: 0.3509 data: 0.2281 max mem: 9377 +Train: [34] [ 700/6250] eta: 0:20:43 lr: 0.000098 grad: 0.1269 (0.1283) loss: 0.7980 (0.8043) time: 0.2722 data: 0.1457 max mem: 9377 +Train: [34] [ 800/6250] eta: 0:20:22 lr: 0.000098 grad: 0.1121 (0.1278) loss: 0.7966 (0.8027) time: 0.2936 data: 0.2031 max mem: 9377 +Train: [34] [ 900/6250] eta: 0:20:25 lr: 0.000098 grad: 0.1169 (0.1268) loss: 0.7955 (0.8021) time: 0.6851 data: 0.6022 max mem: 9377 +Train: [34] [1000/6250] eta: 0:20:57 lr: 0.000098 grad: 0.1066 (0.1254) loss: 0.8051 (0.8023) time: 1.0290 data: 0.9398 max mem: 9377 +Train: [34] [1100/6250] eta: 0:19:56 lr: 0.000098 grad: 0.1150 (0.1246) loss: 0.7941 (0.8023) time: 0.1991 data: 0.1056 max mem: 9377 +Train: [34] [1200/6250] eta: 0:18:56 lr: 0.000098 grad: 0.1097 (0.1237) loss: 0.8035 (0.8023) time: 0.1698 data: 0.0836 max mem: 9377 +Train: [34] [1300/6250] eta: 0:18:09 lr: 0.000098 grad: 0.1131 (0.1231) loss: 0.8066 (0.8019) time: 0.1420 data: 0.0674 max mem: 9377 +Train: [34] [1400/6250] eta: 0:17:23 lr: 0.000098 grad: 0.1108 (0.1223) loss: 0.7973 (0.8019) time: 0.1614 data: 0.0715 max mem: 9377 +Train: [34] [1500/6250] eta: 0:16:44 lr: 0.000098 grad: 0.1142 (0.1219) loss: 0.8027 (0.8019) time: 0.1761 data: 0.0933 max mem: 9377 +Train: [34] [1600/6250] eta: 0:16:07 lr: 0.000098 grad: 0.1176 (0.1215) loss: 0.7920 (0.8017) time: 0.1484 data: 0.0577 max mem: 9377 +Train: [34] [1700/6250] eta: 0:15:32 lr: 0.000098 grad: 0.1144 (0.1212) loss: 0.8001 (0.8016) time: 0.1259 data: 0.0397 max mem: 9377 +Train: [34] [1800/6250] eta: 0:14:58 lr: 0.000098 grad: 0.1072 (0.1208) loss: 0.8053 (0.8015) time: 0.1570 data: 0.0784 max mem: 9377 +Train: [34] [1900/6250] eta: 0:14:31 lr: 0.000098 grad: 0.1137 (0.1206) loss: 0.8020 (0.8014) time: 0.1241 data: 0.0411 max mem: 9377 +Train: [34] [2000/6250] eta: 0:14:01 lr: 0.000098 grad: 0.1151 (0.1203) loss: 0.7895 (0.8012) time: 0.1787 data: 0.0959 max mem: 9377 +Train: [34] [2100/6250] eta: 0:13:34 lr: 0.000098 grad: 0.1107 (0.1200) loss: 0.8040 (0.8011) time: 0.2224 data: 0.1359 max mem: 9377 +Train: [34] [2200/6250] eta: 0:13:06 lr: 0.000098 grad: 0.1126 (0.1198) loss: 0.7976 (0.8009) time: 0.1251 data: 0.0335 max mem: 9377 +Train: [34] [2300/6250] eta: 0:12:43 lr: 0.000098 grad: 0.1136 (0.1195) loss: 0.7901 (0.8008) time: 0.1300 data: 0.0485 max mem: 9377 +Train: [34] [2400/6250] eta: 0:12:17 lr: 0.000098 grad: 0.1135 (0.1193) loss: 0.7912 (0.8007) time: 0.1298 data: 0.0417 max mem: 9377 +Train: [34] [2500/6250] eta: 0:11:53 lr: 0.000098 grad: 0.1169 (0.1192) loss: 0.7965 (0.8005) time: 0.1642 data: 0.0878 max mem: 9377 +Train: [34] [2600/6250] eta: 0:11:30 lr: 0.000098 grad: 0.1161 (0.1190) loss: 0.8024 (0.8006) time: 0.1697 data: 0.0851 max mem: 9377 +Train: [34] [2700/6250] eta: 0:11:09 lr: 0.000098 grad: 0.1172 (0.1190) loss: 0.7873 (0.8004) time: 0.1260 data: 0.0344 max mem: 9377 +Train: [34] [2800/6250] eta: 0:10:47 lr: 0.000098 grad: 0.1116 (0.1189) loss: 0.8032 (0.8002) time: 0.1891 data: 0.1060 max mem: 9377 +Train: [34] [2900/6250] eta: 0:10:26 lr: 0.000098 grad: 0.1164 (0.1189) loss: 0.7899 (0.8000) time: 0.1637 data: 0.0780 max mem: 9377 +Train: [34] [3000/6250] eta: 0:10:05 lr: 0.000098 grad: 0.1161 (0.1189) loss: 0.7921 (0.7997) time: 0.1929 data: 0.1069 max mem: 9377 +Train: [34] [3100/6250] eta: 0:09:42 lr: 0.000098 grad: 0.1095 (0.1188) loss: 0.7943 (0.7996) time: 0.1370 data: 0.0521 max mem: 9377 +Train: [34] [3200/6250] eta: 0:09:20 lr: 0.000098 grad: 0.1141 (0.1188) loss: 0.8012 (0.7996) time: 0.1144 data: 0.0023 max mem: 9377 +Train: [34] [3300/6250] eta: 0:08:59 lr: 0.000097 grad: 0.1177 (0.1187) loss: 0.7905 (0.7994) time: 0.1542 data: 0.0515 max mem: 9377 +Train: [34] [3400/6250] eta: 0:08:38 lr: 0.000097 grad: 0.1179 (0.1187) loss: 0.7923 (0.7993) time: 0.1442 data: 0.0557 max mem: 9377 +Train: [34] [3500/6250] eta: 0:08:18 lr: 0.000097 grad: 0.1182 (0.1188) loss: 0.7975 (0.7991) time: 0.1975 data: 0.1192 max mem: 9377 +Train: [34] [3600/6250] eta: 0:07:58 lr: 0.000097 grad: 0.1240 (0.1189) loss: 0.7893 (0.7989) time: 0.1570 data: 0.0719 max mem: 9377 +Train: [34] [3700/6250] eta: 0:07:38 lr: 0.000097 grad: 0.1170 (0.1190) loss: 0.7965 (0.7986) time: 0.1549 data: 0.0670 max mem: 9377 +Train: [34] [3800/6250] eta: 0:07:19 lr: 0.000097 grad: 0.1107 (0.1190) loss: 0.7964 (0.7985) time: 0.1986 data: 0.1033 max mem: 9377 +Train: [34] [3900/6250] eta: 0:06:59 lr: 0.000097 grad: 0.1156 (0.1190) loss: 0.7913 (0.7983) time: 0.1232 data: 0.0379 max mem: 9377 +Train: [34] [4000/6250] eta: 0:06:41 lr: 0.000097 grad: 0.1206 (0.1190) loss: 0.7823 (0.7981) time: 0.1897 data: 0.1025 max mem: 9377 +Train: [34] [4100/6250] eta: 0:06:21 lr: 0.000097 grad: 0.1286 (0.1192) loss: 0.7929 (0.7979) time: 0.1481 data: 0.0595 max mem: 9377 +Train: [34] [4200/6250] eta: 0:06:02 lr: 0.000097 grad: 0.1137 (0.1193) loss: 0.7964 (0.7976) time: 0.1304 data: 0.0385 max mem: 9377 +Train: [34] [4300/6250] eta: 0:05:44 lr: 0.000097 grad: 0.1188 (0.1194) loss: 0.7810 (0.7974) time: 0.1321 data: 0.0419 max mem: 9377 +Train: [34] [4400/6250] eta: 0:05:26 lr: 0.000097 grad: 0.1216 (0.1195) loss: 0.7831 (0.7972) time: 0.1618 data: 0.0822 max mem: 9377 +Train: [34] [4500/6250] eta: 0:05:08 lr: 0.000097 grad: 0.1242 (0.1196) loss: 0.7920 (0.7971) time: 0.1593 data: 0.0761 max mem: 9377 +Train: [34] [4600/6250] eta: 0:04:52 lr: 0.000097 grad: 0.1137 (0.1197) loss: 0.7850 (0.7969) time: 0.3383 data: 0.2339 max mem: 9377 +Train: [34] [4700/6250] eta: 0:04:34 lr: 0.000097 grad: 0.1173 (0.1196) loss: 0.7984 (0.7968) time: 0.1668 data: 0.0824 max mem: 9377 +Train: [34] [4800/6250] eta: 0:04:16 lr: 0.000097 grad: 0.1150 (0.1197) loss: 0.7808 (0.7966) time: 0.2788 data: 0.1830 max mem: 9377 +Train: [34] [4900/6250] eta: 0:03:58 lr: 0.000097 grad: 0.1187 (0.1197) loss: 0.7887 (0.7964) time: 0.1718 data: 0.0731 max mem: 9377 +Train: [34] [5000/6250] eta: 0:03:40 lr: 0.000097 grad: 0.1210 (0.1198) loss: 0.7840 (0.7963) time: 0.1505 data: 0.0674 max mem: 9377 +Train: [34] [5100/6250] eta: 0:03:22 lr: 0.000097 grad: 0.1127 (0.1198) loss: 0.7911 (0.7961) time: 0.1500 data: 0.0576 max mem: 9377 +Train: [34] [5200/6250] eta: 0:03:04 lr: 0.000097 grad: 0.1220 (0.1198) loss: 0.7869 (0.7960) time: 0.1274 data: 0.0427 max mem: 9377 +Train: [34] [5300/6250] eta: 0:02:46 lr: 0.000097 grad: 0.1161 (0.1199) loss: 0.7937 (0.7959) time: 0.1009 data: 0.0122 max mem: 9377 +Train: [34] [5400/6250] eta: 0:02:28 lr: 0.000097 grad: 0.1176 (0.1199) loss: 0.8006 (0.7958) time: 0.1324 data: 0.0248 max mem: 9377 +Train: [34] [5500/6250] eta: 0:02:11 lr: 0.000097 grad: 0.1200 (0.1200) loss: 0.7932 (0.7957) time: 0.1198 data: 0.0379 max mem: 9377 +Train: [34] [5600/6250] eta: 0:01:53 lr: 0.000097 grad: 0.1257 (0.1202) loss: 0.7796 (0.7955) time: 0.2091 data: 0.1217 max mem: 9377 +Train: [34] [5700/6250] eta: 0:01:35 lr: 0.000097 grad: 0.1146 (0.1202) loss: 0.7930 (0.7954) time: 0.0966 data: 0.0002 max mem: 9377 +Train: [34] [5800/6250] eta: 0:01:18 lr: 0.000097 grad: 0.1154 (0.1203) loss: 0.7900 (0.7953) time: 0.1641 data: 0.0763 max mem: 9377 +Train: [34] [5900/6250] eta: 0:01:00 lr: 0.000097 grad: 0.1224 (0.1203) loss: 0.7857 (0.7952) time: 0.1188 data: 0.0217 max mem: 9377 +Train: [34] [6000/6250] eta: 0:00:43 lr: 0.000097 grad: 0.1134 (0.1203) loss: 0.7853 (0.7951) time: 0.1588 data: 0.0713 max mem: 9377 +Train: [34] [6100/6250] eta: 0:00:26 lr: 0.000097 grad: 0.1229 (0.1204) loss: 0.7876 (0.7949) time: 0.1982 data: 0.1121 max mem: 9377 +Train: [34] [6200/6250] eta: 0:00:08 lr: 0.000097 grad: 0.1262 (0.1204) loss: 0.7917 (0.7948) time: 0.1848 data: 0.0969 max mem: 9377 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.1236 (0.1204) loss: 0.7835 (0.7947) time: 0.1578 data: 0.0689 max mem: 9377 +Train: [34] Total time: 0:18:13 (0.1749 s / it) +Averaged stats: lr: 0.000097 grad: 0.1236 (0.1204) loss: 0.7835 (0.7947) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:05:12 loss: 0.8305 (0.8305) time: 5.0384 data: 5.0081 max mem: 9377 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8294 (0.8302) time: 0.1425 data: 0.1173 max mem: 9377 +Eval (hcp-train-subset): [34] Total time: 0:00:13 (0.2184 s / it) +Averaged stats (hcp-train-subset): loss: 0.8294 (0.8302) +Making plots (hcp-train-subset): example=22 +Eval (hcp-val): [34] [ 0/62] eta: 0:03:30 loss: 0.8447 (0.8447) time: 3.3931 data: 3.3169 max mem: 9377 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8456 (0.8472) time: 0.1185 data: 0.0936 max mem: 9377 +Eval (hcp-val): [34] Total time: 0:00:13 (0.2214 s / it) +Averaged stats (hcp-val): loss: 0.8456 (0.8472) +Making plots (hcp-val): example=22 +Eval (nsd-val): [34] [ 0/62] eta: 0:06:04 loss: 0.8106 (0.8106) time: 5.8871 data: 5.8523 max mem: 9377 +Eval (nsd-val): [34] [61/62] eta: 0:00:00 loss: 0.8200 (0.8202) time: 0.1213 data: 0.0956 max mem: 9377 +Eval (nsd-val): [34] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (nsd-val): loss: 0.8200 (0.8202) +Making plots (nsd-val): example=5 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00034.pth +Train: [35] [ 0/6250] eta: 12:48:26 lr: 0.000097 grad: 0.3276 (0.3276) loss: 0.8289 (0.8289) time: 7.3771 data: 7.2603 max mem: 9377 +Train: [35] [ 100/6250] eta: 0:25:46 lr: 0.000097 grad: 0.1202 (0.1719) loss: 0.8095 (0.8180) time: 0.1884 data: 0.0575 max mem: 9377 +Train: [35] [ 200/6250] eta: 0:21:38 lr: 0.000097 grad: 0.1379 (0.1564) loss: 0.7946 (0.8139) time: 0.1854 data: 0.0912 max mem: 9377 +Train: [35] [ 300/6250] eta: 0:19:41 lr: 0.000097 grad: 0.1214 (0.1506) loss: 0.7878 (0.8096) time: 0.1633 data: 0.0680 max mem: 9377 +Train: [35] [ 400/6250] eta: 0:21:34 lr: 0.000097 grad: 0.1443 (0.1456) loss: 0.7941 (0.8069) time: 0.3129 data: 0.1459 max mem: 9377 +Train: [35] [ 500/6250] eta: 0:21:17 lr: 0.000097 grad: 0.1170 (0.1417) loss: 0.7953 (0.8048) time: 0.4829 data: 0.2923 max mem: 9377 +Train: [35] [ 600/6250] eta: 0:20:59 lr: 0.000097 grad: 0.1195 (0.1380) loss: 0.7889 (0.8034) time: 0.2434 data: 0.1220 max mem: 9377 +Train: [35] [ 700/6250] eta: 0:21:35 lr: 0.000096 grad: 0.1141 (0.1351) loss: 0.7997 (0.8023) time: 0.3564 data: 0.1865 max mem: 9377 +Train: [35] [ 800/6250] eta: 0:20:18 lr: 0.000096 grad: 0.1153 (0.1338) loss: 0.7886 (0.8011) time: 0.1484 data: 0.0563 max mem: 9377 +Train: [35] [ 900/6250] eta: 0:20:19 lr: 0.000096 grad: 0.1164 (0.1320) loss: 0.7982 (0.8008) time: 0.1149 data: 0.0003 max mem: 9377 +Train: [35] [1000/6250] eta: 0:19:11 lr: 0.000096 grad: 0.1144 (0.1302) loss: 0.7944 (0.8008) time: 0.1556 data: 0.0709 max mem: 9377 +Train: [35] [1100/6250] eta: 0:18:17 lr: 0.000096 grad: 0.1062 (0.1285) loss: 0.8011 (0.8007) time: 0.1369 data: 0.0450 max mem: 9377 +Train: [35] [1200/6250] eta: 0:17:32 lr: 0.000096 grad: 0.1291 (0.1277) loss: 0.7889 (0.7999) time: 0.1956 data: 0.1015 max mem: 9377 +Train: [35] [1300/6250] eta: 0:16:51 lr: 0.000096 grad: 0.1151 (0.1269) loss: 0.7954 (0.7995) time: 0.2055 data: 0.1184 max mem: 9377 +Train: [35] [1400/6250] eta: 0:16:15 lr: 0.000096 grad: 0.1170 (0.1262) loss: 0.7975 (0.7992) time: 0.1119 data: 0.0027 max mem: 9377 +Train: [35] [1500/6250] eta: 0:15:38 lr: 0.000096 grad: 0.1081 (0.1258) loss: 0.7948 (0.7984) time: 0.1793 data: 0.0979 max mem: 9377 +Train: [35] [1600/6250] eta: 0:15:08 lr: 0.000096 grad: 0.1180 (0.1254) loss: 0.7796 (0.7976) time: 0.1879 data: 0.1054 max mem: 9377 +Train: [35] [1700/6250] eta: 0:14:36 lr: 0.000096 grad: 0.1179 (0.1251) loss: 0.7865 (0.7970) time: 0.1566 data: 0.0773 max mem: 9377 +Train: [35] [1800/6250] eta: 0:14:09 lr: 0.000096 grad: 0.1173 (0.1249) loss: 0.7857 (0.7965) time: 0.1758 data: 0.0814 max mem: 9377 +Train: [35] [1900/6250] eta: 0:13:41 lr: 0.000096 grad: 0.1182 (0.1248) loss: 0.7854 (0.7958) time: 0.1695 data: 0.0788 max mem: 9377 +Train: [35] [2000/6250] eta: 0:13:14 lr: 0.000096 grad: 0.1166 (0.1246) loss: 0.7913 (0.7954) time: 0.1384 data: 0.0504 max mem: 9377 +Train: [35] [2100/6250] eta: 0:12:50 lr: 0.000096 grad: 0.1156 (0.1243) loss: 0.7919 (0.7952) time: 0.1502 data: 0.0684 max mem: 9377 +Train: [35] [2200/6250] eta: 0:12:26 lr: 0.000096 grad: 0.1228 (0.1244) loss: 0.7908 (0.7950) time: 0.1813 data: 0.1019 max mem: 9377 +Train: [35] [2300/6250] eta: 0:12:01 lr: 0.000096 grad: 0.1121 (0.1243) loss: 0.8003 (0.7949) time: 0.1609 data: 0.0764 max mem: 9377 +Train: [35] [2400/6250] eta: 0:11:38 lr: 0.000096 grad: 0.1169 (0.1242) loss: 0.7901 (0.7946) time: 0.1222 data: 0.0420 max mem: 9377 +Train: [35] [2500/6250] eta: 0:11:18 lr: 0.000096 grad: 0.1147 (0.1240) loss: 0.7895 (0.7945) time: 0.1565 data: 0.0750 max mem: 9377 +Train: [35] [2600/6250] eta: 0:10:57 lr: 0.000096 grad: 0.1189 (0.1238) loss: 0.7871 (0.7943) time: 0.1314 data: 0.0527 max mem: 9377 +Train: [35] [2700/6250] eta: 0:10:36 lr: 0.000096 grad: 0.1173 (0.1236) loss: 0.7895 (0.7942) time: 0.1840 data: 0.0959 max mem: 9377 +Train: [35] [2800/6250] eta: 0:10:15 lr: 0.000096 grad: 0.1168 (0.1234) loss: 0.7870 (0.7941) time: 0.1515 data: 0.0597 max mem: 9377 +Train: [35] [2900/6250] eta: 0:09:56 lr: 0.000096 grad: 0.1187 (0.1234) loss: 0.7920 (0.7940) time: 0.1892 data: 0.1048 max mem: 9377 +Train: [35] [3000/6250] eta: 0:09:37 lr: 0.000096 grad: 0.1264 (0.1233) loss: 0.7894 (0.7938) time: 0.2121 data: 0.1252 max mem: 9377 +Train: [35] [3100/6250] eta: 0:09:17 lr: 0.000096 grad: 0.1290 (0.1232) loss: 0.7890 (0.7937) time: 0.1547 data: 0.0687 max mem: 9377 +Train: [35] [3200/6250] eta: 0:08:57 lr: 0.000096 grad: 0.1234 (0.1232) loss: 0.7968 (0.7935) time: 0.1452 data: 0.0536 max mem: 9377 +Train: [35] [3300/6250] eta: 0:08:39 lr: 0.000096 grad: 0.1197 (0.1231) loss: 0.7835 (0.7934) time: 0.1920 data: 0.1067 max mem: 9377 +Train: [35] [3400/6250] eta: 0:08:20 lr: 0.000096 grad: 0.1233 (0.1231) loss: 0.7807 (0.7934) time: 0.1109 data: 0.0053 max mem: 9377 +Train: [35] [3500/6250] eta: 0:08:02 lr: 0.000096 grad: 0.1184 (0.1232) loss: 0.7922 (0.7933) time: 0.1579 data: 0.0697 max mem: 9377 +Train: [35] [3600/6250] eta: 0:07:43 lr: 0.000096 grad: 0.1167 (0.1233) loss: 0.7924 (0.7932) time: 0.1333 data: 0.0324 max mem: 9377 +Train: [35] [3700/6250] eta: 0:07:36 lr: 0.000096 grad: 0.1221 (0.1234) loss: 0.7886 (0.7930) time: 0.7122 data: 0.5822 max mem: 9377 +Train: [35] [3800/6250] eta: 0:07:19 lr: 0.000096 grad: 0.1228 (0.1234) loss: 0.7845 (0.7930) time: 0.3114 data: 0.2074 max mem: 9377 +Train: [35] [3900/6250] eta: 0:07:01 lr: 0.000096 grad: 0.1168 (0.1234) loss: 0.8033 (0.7931) time: 0.1182 data: 0.0296 max mem: 9377 +Train: [35] [4000/6250] eta: 0:06:43 lr: 0.000096 grad: 0.1165 (0.1233) loss: 0.8003 (0.7931) time: 0.2021 data: 0.1147 max mem: 9377 +Train: [35] [4100/6250] eta: 0:06:24 lr: 0.000096 grad: 0.1218 (0.1233) loss: 0.7921 (0.7932) time: 0.2202 data: 0.1341 max mem: 9377 +Train: [35] [4200/6250] eta: 0:06:05 lr: 0.000096 grad: 0.1217 (0.1232) loss: 0.8015 (0.7933) time: 0.1681 data: 0.0874 max mem: 9377 +Train: [35] [4300/6250] eta: 0:05:46 lr: 0.000095 grad: 0.1122 (0.1231) loss: 0.7954 (0.7934) time: 0.1598 data: 0.0796 max mem: 9377 +Train: [35] [4400/6250] eta: 0:05:28 lr: 0.000095 grad: 0.1193 (0.1230) loss: 0.8048 (0.7935) time: 0.1598 data: 0.0374 max mem: 9377 +Train: [35] [4500/6250] eta: 0:05:17 lr: 0.000095 grad: 0.1197 (0.1230) loss: 0.7935 (0.7935) time: 0.3844 data: 0.3042 max mem: 9377 +Train: [35] [4600/6250] eta: 0:04:57 lr: 0.000095 grad: 0.1164 (0.1230) loss: 0.7946 (0.7936) time: 0.1756 data: 0.0769 max mem: 9377 +Train: [35] [4700/6250] eta: 0:04:38 lr: 0.000095 grad: 0.1188 (0.1229) loss: 0.8009 (0.7936) time: 0.1186 data: 0.0143 max mem: 9377 +Train: [35] [4800/6250] eta: 0:04:21 lr: 0.000095 grad: 0.1158 (0.1230) loss: 0.7986 (0.7937) time: 0.1773 data: 0.0681 max mem: 9377 +Train: [35] [4900/6250] eta: 0:04:03 lr: 0.000095 grad: 0.1212 (0.1229) loss: 0.7913 (0.7937) time: 0.1602 data: 0.0593 max mem: 9377 +Train: [35] [5000/6250] eta: 0:03:44 lr: 0.000095 grad: 0.1226 (0.1228) loss: 0.7965 (0.7937) time: 0.1607 data: 0.0738 max mem: 9377 +Train: [35] [5100/6250] eta: 0:03:26 lr: 0.000095 grad: 0.1251 (0.1228) loss: 0.7941 (0.7938) time: 0.2744 data: 0.1542 max mem: 9377 +Train: [35] [5200/6250] eta: 0:03:07 lr: 0.000095 grad: 0.1193 (0.1227) loss: 0.7976 (0.7939) time: 0.1514 data: 0.0564 max mem: 9377 +Train: [35] [5300/6250] eta: 0:02:49 lr: 0.000095 grad: 0.1210 (0.1227) loss: 0.7940 (0.7939) time: 0.1456 data: 0.0607 max mem: 9377 +Train: [35] [5400/6250] eta: 0:02:32 lr: 0.000095 grad: 0.1323 (0.1227) loss: 0.7851 (0.7939) time: 0.1461 data: 0.0456 max mem: 9377 +Train: [35] [5500/6250] eta: 0:02:14 lr: 0.000095 grad: 0.1222 (0.1227) loss: 0.7882 (0.7938) time: 0.1347 data: 0.0423 max mem: 9377 +Train: [35] [5600/6250] eta: 0:01:57 lr: 0.000095 grad: 0.1272 (0.1227) loss: 0.7981 (0.7939) time: 0.1658 data: 0.0774 max mem: 9377 +Train: [35] [5700/6250] eta: 0:01:38 lr: 0.000095 grad: 0.1220 (0.1227) loss: 0.7919 (0.7939) time: 0.1333 data: 0.0482 max mem: 9377 +Train: [35] [5800/6250] eta: 0:01:20 lr: 0.000095 grad: 0.1200 (0.1227) loss: 0.7964 (0.7939) time: 0.1502 data: 0.0591 max mem: 9377 +Train: [35] [5900/6250] eta: 0:01:02 lr: 0.000095 grad: 0.1191 (0.1227) loss: 0.7973 (0.7939) time: 0.1962 data: 0.0985 max mem: 9377 +Train: [35] [6000/6250] eta: 0:00:44 lr: 0.000095 grad: 0.1222 (0.1227) loss: 0.7912 (0.7940) time: 0.3995 data: 0.2957 max mem: 9377 +Train: [35] [6100/6250] eta: 0:00:27 lr: 0.000095 grad: 0.1155 (0.1226) loss: 0.7897 (0.7940) time: 0.1574 data: 0.0500 max mem: 9377 +Train: [35] [6200/6250] eta: 0:00:08 lr: 0.000095 grad: 0.1117 (0.1226) loss: 0.7980 (0.7941) time: 0.1443 data: 0.0459 max mem: 9377 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.1192 (0.1226) loss: 0.7998 (0.7941) time: 0.1222 data: 0.0422 max mem: 9377 +Train: [35] Total time: 0:18:46 (0.1803 s / it) +Averaged stats: lr: 0.000095 grad: 0.1192 (0.1226) loss: 0.7998 (0.7941) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:04:15 loss: 0.8301 (0.8301) time: 4.1250 data: 4.0385 max mem: 9377 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8324 (0.8296) time: 0.1227 data: 0.0975 max mem: 9377 +Eval (hcp-train-subset): [35] Total time: 0:00:13 (0.2236 s / it) +Averaged stats (hcp-train-subset): loss: 0.8324 (0.8296) +Eval (hcp-val): [35] [ 0/62] eta: 0:05:09 loss: 0.8463 (0.8463) time: 4.9963 data: 4.9659 max mem: 9377 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8455 (0.8468) time: 0.1180 data: 0.0930 max mem: 9377 +Eval (hcp-val): [35] Total time: 0:00:13 (0.2186 s / it) +Averaged stats (hcp-val): loss: 0.8455 (0.8468) +Eval (nsd-val): [35] [ 0/62] eta: 0:04:57 loss: 0.8135 (0.8135) time: 4.7942 data: 4.7621 max mem: 9377 +Eval (nsd-val): [35] [61/62] eta: 0:00:00 loss: 0.8217 (0.8218) time: 0.1435 data: 0.1160 max mem: 9377 +Eval (nsd-val): [35] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (nsd-val): loss: 0.8217 (0.8218) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [36] [ 0/6250] eta: 9:39:26 lr: 0.000095 grad: 0.0778 (0.0778) loss: 0.8492 (0.8492) time: 5.5626 data: 5.3597 max mem: 9377 +Train: [36] [ 100/6250] eta: 0:23:58 lr: 0.000095 grad: 0.1149 (0.1408) loss: 0.7986 (0.8240) time: 0.1625 data: 0.0507 max mem: 9377 +Train: [36] [ 200/6250] eta: 0:21:04 lr: 0.000095 grad: 0.1028 (0.1374) loss: 0.8250 (0.8160) time: 0.2091 data: 0.1141 max mem: 9377 +Train: [36] [ 300/6250] eta: 0:19:58 lr: 0.000095 grad: 0.1207 (0.1313) loss: 0.8173 (0.8141) time: 0.2519 data: 0.1502 max mem: 9377 +Train: [36] [ 400/6250] eta: 0:18:51 lr: 0.000095 grad: 0.1137 (0.1279) loss: 0.7995 (0.8121) time: 0.1821 data: 0.0916 max mem: 9377 +Train: [36] [ 500/6250] eta: 0:17:55 lr: 0.000095 grad: 0.1129 (0.1271) loss: 0.7931 (0.8091) time: 0.1484 data: 0.0599 max mem: 9377 +Train: [36] [ 600/6250] eta: 0:17:10 lr: 0.000095 grad: 0.1152 (0.1256) loss: 0.7911 (0.8068) time: 0.2070 data: 0.1149 max mem: 9377 +Train: [36] [ 700/6250] eta: 0:16:38 lr: 0.000095 grad: 0.1162 (0.1250) loss: 0.8006 (0.8055) time: 0.1901 data: 0.0979 max mem: 9377 +Train: [36] [ 800/6250] eta: 0:16:02 lr: 0.000095 grad: 0.1158 (0.1247) loss: 0.8053 (0.8046) time: 0.1786 data: 0.0869 max mem: 9377 +Train: [36] [ 900/6250] eta: 0:15:33 lr: 0.000095 grad: 0.1230 (0.1241) loss: 0.7960 (0.8038) time: 0.1706 data: 0.0716 max mem: 9377 +Train: [36] [1000/6250] eta: 0:15:14 lr: 0.000095 grad: 0.1208 (0.1236) loss: 0.7882 (0.8029) time: 0.1856 data: 0.0824 max mem: 9377 +Train: [36] [1100/6250] eta: 0:14:41 lr: 0.000095 grad: 0.1138 (0.1233) loss: 0.8010 (0.8021) time: 0.1104 data: 0.0220 max mem: 9377 +Train: [36] [1200/6250] eta: 0:14:15 lr: 0.000095 grad: 0.1149 (0.1229) loss: 0.7806 (0.8014) time: 0.1443 data: 0.0538 max mem: 9377 +Train: [36] [1300/6250] eta: 0:13:52 lr: 0.000095 grad: 0.1233 (0.1225) loss: 0.7804 (0.8005) time: 0.1775 data: 0.0965 max mem: 9377 +Train: [36] [1400/6250] eta: 0:13:31 lr: 0.000095 grad: 0.1143 (0.1224) loss: 0.7928 (0.7999) time: 0.1518 data: 0.0664 max mem: 9377 +Train: [36] [1500/6250] eta: 0:13:08 lr: 0.000095 grad: 0.1122 (0.1221) loss: 0.7953 (0.7993) time: 0.1347 data: 0.0419 max mem: 9377 +Train: [36] [1600/6250] eta: 0:12:50 lr: 0.000094 grad: 0.1138 (0.1220) loss: 0.7823 (0.7985) time: 0.2002 data: 0.1137 max mem: 9377 +Train: [36] [1700/6250] eta: 0:12:26 lr: 0.000094 grad: 0.1259 (0.1219) loss: 0.7855 (0.7980) time: 0.1346 data: 0.0461 max mem: 9377 +Train: [36] [1800/6250] eta: 0:12:06 lr: 0.000094 grad: 0.1163 (0.1218) loss: 0.7983 (0.7978) time: 0.1519 data: 0.0642 max mem: 9377 +Train: [36] [1900/6250] eta: 0:11:46 lr: 0.000094 grad: 0.1146 (0.1217) loss: 0.7936 (0.7975) time: 0.1529 data: 0.0635 max mem: 9377 +Train: [36] [2000/6250] eta: 0:11:29 lr: 0.000094 grad: 0.1154 (0.1214) loss: 0.8012 (0.7973) time: 0.1773 data: 0.0838 max mem: 9377 +Train: [36] [2100/6250] eta: 0:11:11 lr: 0.000094 grad: 0.1153 (0.1213) loss: 0.7940 (0.7970) time: 0.1687 data: 0.0803 max mem: 9377 +Train: [36] [2200/6250] eta: 0:10:54 lr: 0.000094 grad: 0.1114 (0.1212) loss: 0.7924 (0.7969) time: 0.1913 data: 0.1115 max mem: 9377 +Train: [36] [2300/6250] eta: 0:10:36 lr: 0.000094 grad: 0.1116 (0.1212) loss: 0.8013 (0.7969) time: 0.1662 data: 0.0888 max mem: 9377 +Train: [36] [2400/6250] eta: 0:10:18 lr: 0.000094 grad: 0.1172 (0.1210) loss: 0.7946 (0.7968) time: 0.1249 data: 0.0332 max mem: 9377 +Train: [36] [2500/6250] eta: 0:10:02 lr: 0.000094 grad: 0.1224 (0.1210) loss: 0.7883 (0.7966) time: 0.1605 data: 0.0804 max mem: 9377 +Train: [36] [2600/6250] eta: 0:09:45 lr: 0.000094 grad: 0.1138 (0.1210) loss: 0.8064 (0.7964) time: 0.1471 data: 0.0660 max mem: 9377 +Train: [36] [2700/6250] eta: 0:09:28 lr: 0.000094 grad: 0.1145 (0.1210) loss: 0.7998 (0.7963) time: 0.1132 data: 0.0320 max mem: 9377 +Train: [36] [2800/6250] eta: 0:09:11 lr: 0.000094 grad: 0.1178 (0.1210) loss: 0.7929 (0.7961) time: 0.1720 data: 0.0859 max mem: 9377 +Train: [36] [2900/6250] eta: 0:08:57 lr: 0.000094 grad: 0.1195 (0.1210) loss: 0.7871 (0.7959) time: 0.1324 data: 0.0308 max mem: 9377 +Train: [36] [3000/6250] eta: 0:08:40 lr: 0.000094 grad: 0.1189 (0.1212) loss: 0.7919 (0.7958) time: 0.1547 data: 0.0716 max mem: 9377 +Train: [36] [3100/6250] eta: 0:08:24 lr: 0.000094 grad: 0.1178 (0.1212) loss: 0.7885 (0.7956) time: 0.1077 data: 0.0324 max mem: 9377 +Train: [36] [3200/6250] eta: 0:08:08 lr: 0.000094 grad: 0.1221 (0.1213) loss: 0.7938 (0.7954) time: 0.1823 data: 0.0917 max mem: 9377 +Train: [36] [3300/6250] eta: 0:07:53 lr: 0.000094 grad: 0.1111 (0.1212) loss: 0.7897 (0.7954) time: 0.1703 data: 0.0923 max mem: 9377 +Train: [36] [3400/6250] eta: 0:07:37 lr: 0.000094 grad: 0.1190 (0.1212) loss: 0.7966 (0.7953) time: 0.1528 data: 0.0651 max mem: 9377 +Train: [36] [3500/6250] eta: 0:07:21 lr: 0.000094 grad: 0.1300 (0.1214) loss: 0.7878 (0.7952) time: 0.1837 data: 0.0933 max mem: 9377 +Train: [36] [3600/6250] eta: 0:07:06 lr: 0.000094 grad: 0.1270 (0.1216) loss: 0.7915 (0.7950) time: 0.1517 data: 0.0576 max mem: 9377 +Train: [36] [3700/6250] eta: 0:06:49 lr: 0.000094 grad: 0.1195 (0.1217) loss: 0.7992 (0.7949) time: 0.1137 data: 0.0231 max mem: 9377 +Train: [36] [3800/6250] eta: 0:06:33 lr: 0.000094 grad: 0.1202 (0.1218) loss: 0.7964 (0.7947) time: 0.1517 data: 0.0620 max mem: 9377 +Train: [36] [3900/6250] eta: 0:06:16 lr: 0.000094 grad: 0.1302 (0.1219) loss: 0.7955 (0.7945) time: 0.1209 data: 0.0204 max mem: 9377 +Train: [36] [4000/6250] eta: 0:05:59 lr: 0.000094 grad: 0.1229 (0.1219) loss: 0.7906 (0.7944) time: 0.1441 data: 0.0489 max mem: 9377 +Train: [36] [4100/6250] eta: 0:05:43 lr: 0.000094 grad: 0.1221 (0.1219) loss: 0.7765 (0.7942) time: 0.1542 data: 0.0530 max mem: 9377 +Train: [36] [4200/6250] eta: 0:05:27 lr: 0.000094 grad: 0.1253 (0.1220) loss: 0.7901 (0.7939) time: 0.1210 data: 0.0237 max mem: 9377 +Train: [36] [4300/6250] eta: 0:05:11 lr: 0.000094 grad: 0.1271 (0.1221) loss: 0.7846 (0.7937) time: 0.1185 data: 0.0341 max mem: 9377 +Train: [36] [4400/6250] eta: 0:04:54 lr: 0.000094 grad: 0.1210 (0.1221) loss: 0.7876 (0.7936) time: 0.1278 data: 0.0469 max mem: 9377 +Train: [36] [4500/6250] eta: 0:04:38 lr: 0.000094 grad: 0.1213 (0.1221) loss: 0.7964 (0.7934) time: 0.1441 data: 0.0604 max mem: 9377 +Train: [36] [4600/6250] eta: 0:04:22 lr: 0.000094 grad: 0.1190 (0.1221) loss: 0.7881 (0.7933) time: 0.1752 data: 0.0881 max mem: 9377 +Train: [36] [4700/6250] eta: 0:04:06 lr: 0.000094 grad: 0.1177 (0.1221) loss: 0.7919 (0.7933) time: 0.1745 data: 0.0936 max mem: 9377 +Train: [36] [4800/6250] eta: 0:03:50 lr: 0.000094 grad: 0.1179 (0.1221) loss: 0.7943 (0.7932) time: 0.1218 data: 0.0258 max mem: 9377 +Train: [36] [4900/6250] eta: 0:03:34 lr: 0.000094 grad: 0.1194 (0.1222) loss: 0.7831 (0.7931) time: 0.1457 data: 0.0631 max mem: 9377 +Train: [36] [5000/6250] eta: 0:03:18 lr: 0.000094 grad: 0.1206 (0.1222) loss: 0.7815 (0.7929) time: 0.1276 data: 0.0376 max mem: 9377 +Train: [36] [5100/6250] eta: 0:03:02 lr: 0.000093 grad: 0.1251 (0.1223) loss: 0.7855 (0.7927) time: 0.1505 data: 0.0609 max mem: 9377 +Train: [36] [5200/6250] eta: 0:02:46 lr: 0.000093 grad: 0.1167 (0.1224) loss: 0.7964 (0.7927) time: 0.1429 data: 0.0602 max mem: 9377 +Train: [36] [5300/6250] eta: 0:02:31 lr: 0.000093 grad: 0.1228 (0.1224) loss: 0.7779 (0.7926) time: 0.1225 data: 0.0084 max mem: 9377 +Train: [36] [5400/6250] eta: 0:02:16 lr: 0.000093 grad: 0.1226 (0.1224) loss: 0.7814 (0.7924) time: 0.1285 data: 0.0204 max mem: 9377 +Train: [36] [5500/6250] eta: 0:02:00 lr: 0.000093 grad: 0.1176 (0.1224) loss: 0.7828 (0.7923) time: 0.0991 data: 0.0003 max mem: 9377 +Train: [36] [5600/6250] eta: 0:01:44 lr: 0.000093 grad: 0.1182 (0.1225) loss: 0.7890 (0.7923) time: 0.2548 data: 0.1699 max mem: 9377 +Train: [36] [5700/6250] eta: 0:01:27 lr: 0.000093 grad: 0.1283 (0.1224) loss: 0.7866 (0.7923) time: 0.1896 data: 0.0996 max mem: 9377 +Train: [36] [5800/6250] eta: 0:01:12 lr: 0.000093 grad: 0.1169 (0.1224) loss: 0.7960 (0.7922) time: 0.2207 data: 0.1423 max mem: 9377 +Train: [36] [5900/6250] eta: 0:00:55 lr: 0.000093 grad: 0.1204 (0.1224) loss: 0.7929 (0.7922) time: 0.1246 data: 0.0351 max mem: 9377 +Train: [36] [6000/6250] eta: 0:00:39 lr: 0.000093 grad: 0.1202 (0.1224) loss: 0.7976 (0.7922) time: 0.1398 data: 0.0526 max mem: 9377 +Train: [36] [6100/6250] eta: 0:00:24 lr: 0.000093 grad: 0.1259 (0.1223) loss: 0.7908 (0.7922) time: 0.2244 data: 0.1366 max mem: 9377 +Train: [36] [6200/6250] eta: 0:00:08 lr: 0.000093 grad: 0.1195 (0.1223) loss: 0.8042 (0.7922) time: 0.1323 data: 0.0265 max mem: 9377 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.1135 (0.1222) loss: 0.7938 (0.7923) time: 0.1371 data: 0.0582 max mem: 9377 +Train: [36] Total time: 0:16:50 (0.1617 s / it) +Averaged stats: lr: 0.000093 grad: 0.1135 (0.1222) loss: 0.7938 (0.7923) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:03:32 loss: 0.8351 (0.8351) time: 3.4202 data: 3.3400 max mem: 9377 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8290 (0.8291) time: 0.1454 data: 0.1187 max mem: 9377 +Eval (hcp-train-subset): [36] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (hcp-train-subset): loss: 0.8290 (0.8291) +Eval (hcp-val): [36] [ 0/62] eta: 0:05:49 loss: 0.8416 (0.8416) time: 5.6316 data: 5.6020 max mem: 9377 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8446 (0.8466) time: 0.1050 data: 0.0802 max mem: 9377 +Eval (hcp-val): [36] Total time: 0:00:14 (0.2293 s / it) +Averaged stats (hcp-val): loss: 0.8446 (0.8466) +Eval (nsd-val): [36] [ 0/62] eta: 0:04:41 loss: 0.8143 (0.8143) time: 4.5456 data: 4.4821 max mem: 9377 +Eval (nsd-val): [36] [61/62] eta: 0:00:00 loss: 0.8234 (0.8242) time: 0.1219 data: 0.0962 max mem: 9377 +Eval (nsd-val): [36] Total time: 0:00:14 (0.2338 s / it) +Averaged stats (nsd-val): loss: 0.8234 (0.8242) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 8:52:46 lr: 0.000093 grad: 0.0766 (0.0766) loss: 0.8649 (0.8649) time: 5.1147 data: 4.7189 max mem: 9377 +Train: [37] [ 100/6250] eta: 0:24:59 lr: 0.000093 grad: 0.1460 (0.1746) loss: 0.8006 (0.8133) time: 0.2131 data: 0.1137 max mem: 9377 +Train: [37] [ 200/6250] eta: 0:20:41 lr: 0.000093 grad: 0.1151 (0.1625) loss: 0.8145 (0.8070) time: 0.1953 data: 0.1013 max mem: 9377 +Train: [37] [ 300/6250] eta: 0:18:52 lr: 0.000093 grad: 0.1175 (0.1522) loss: 0.8024 (0.8054) time: 0.1735 data: 0.0752 max mem: 9377 +Train: [37] [ 400/6250] eta: 0:17:53 lr: 0.000093 grad: 0.1274 (0.1455) loss: 0.7913 (0.8035) time: 0.1441 data: 0.0553 max mem: 9377 +Train: [37] [ 500/6250] eta: 0:17:35 lr: 0.000093 grad: 0.1253 (0.1416) loss: 0.8012 (0.8023) time: 0.1865 data: 0.0813 max mem: 9377 +Train: [37] [ 600/6250] eta: 0:16:43 lr: 0.000093 grad: 0.1168 (0.1381) loss: 0.8002 (0.8016) time: 0.1317 data: 0.0463 max mem: 9377 +Train: [37] [ 700/6250] eta: 0:16:18 lr: 0.000093 grad: 0.1067 (0.1350) loss: 0.8043 (0.8018) time: 0.1723 data: 0.0920 max mem: 9377 +Train: [37] [ 800/6250] eta: 0:15:52 lr: 0.000093 grad: 0.1142 (0.1330) loss: 0.7892 (0.8012) time: 0.1730 data: 0.0763 max mem: 9377 +Train: [37] [ 900/6250] eta: 0:15:23 lr: 0.000093 grad: 0.1131 (0.1311) loss: 0.7853 (0.8005) time: 0.1681 data: 0.0700 max mem: 9377 +Train: [37] [1000/6250] eta: 0:15:00 lr: 0.000093 grad: 0.1151 (0.1296) loss: 0.8044 (0.8002) time: 0.1597 data: 0.0782 max mem: 9377 +Train: [37] [1100/6250] eta: 0:14:34 lr: 0.000093 grad: 0.1114 (0.1282) loss: 0.7989 (0.8001) time: 0.1319 data: 0.0468 max mem: 9377 +Train: [37] [1200/6250] eta: 0:14:08 lr: 0.000093 grad: 0.1111 (0.1272) loss: 0.7923 (0.7998) time: 0.1407 data: 0.0474 max mem: 9377 +Train: [37] [1300/6250] eta: 0:13:46 lr: 0.000093 grad: 0.1135 (0.1263) loss: 0.7927 (0.7994) time: 0.1596 data: 0.0666 max mem: 9377 +Train: [37] [1400/6250] eta: 0:13:19 lr: 0.000093 grad: 0.1089 (0.1254) loss: 0.8035 (0.7992) time: 0.1849 data: 0.0761 max mem: 9377 +Train: [37] [1500/6250] eta: 0:12:56 lr: 0.000093 grad: 0.1233 (0.1248) loss: 0.7956 (0.7992) time: 0.1608 data: 0.0641 max mem: 9377 +Train: [37] [1600/6250] eta: 0:12:36 lr: 0.000093 grad: 0.1089 (0.1244) loss: 0.8010 (0.7990) time: 0.1062 data: 0.0093 max mem: 9377 +Train: [37] [1700/6250] eta: 0:12:17 lr: 0.000093 grad: 0.1152 (0.1238) loss: 0.7841 (0.7989) time: 0.1691 data: 0.0809 max mem: 9377 +Train: [37] [1800/6250] eta: 0:11:58 lr: 0.000093 grad: 0.1114 (0.1233) loss: 0.7990 (0.7988) time: 0.1422 data: 0.0535 max mem: 9377 +Train: [37] [1900/6250] eta: 0:11:40 lr: 0.000093 grad: 0.1201 (0.1233) loss: 0.7871 (0.7985) time: 0.1555 data: 0.0739 max mem: 9377 +Train: [37] [2000/6250] eta: 0:11:23 lr: 0.000093 grad: 0.1172 (0.1229) loss: 0.7982 (0.7983) time: 0.1839 data: 0.0994 max mem: 9377 +Train: [37] [2100/6250] eta: 0:11:06 lr: 0.000093 grad: 0.1186 (0.1227) loss: 0.7804 (0.7980) time: 0.1702 data: 0.0883 max mem: 9377 +Train: [37] [2200/6250] eta: 0:10:50 lr: 0.000093 grad: 0.1122 (0.1223) loss: 0.7949 (0.7978) time: 0.0954 data: 0.0002 max mem: 9377 +Train: [37] [2300/6250] eta: 0:10:34 lr: 0.000092 grad: 0.1253 (0.1223) loss: 0.7916 (0.7975) time: 0.1828 data: 0.0948 max mem: 9377 +Train: [37] [2400/6250] eta: 0:10:27 lr: 0.000092 grad: 0.1227 (0.1222) loss: 0.7877 (0.7973) time: 0.1787 data: 0.0855 max mem: 9377 +Train: [37] [2500/6250] eta: 0:10:12 lr: 0.000092 grad: 0.1188 (0.1221) loss: 0.7825 (0.7970) time: 0.1577 data: 0.0809 max mem: 9377 +Train: [37] [2600/6250] eta: 0:09:56 lr: 0.000092 grad: 0.1124 (0.1221) loss: 0.7969 (0.7969) time: 0.1865 data: 0.0958 max mem: 9377 +Train: [37] [2700/6250] eta: 0:09:42 lr: 0.000092 grad: 0.1197 (0.1221) loss: 0.7932 (0.7966) time: 0.1966 data: 0.1015 max mem: 9377 +Train: [37] [2800/6250] eta: 0:09:25 lr: 0.000092 grad: 0.1162 (0.1219) loss: 0.8009 (0.7965) time: 0.2786 data: 0.1887 max mem: 9377 +Train: [37] [2900/6250] eta: 0:09:07 lr: 0.000092 grad: 0.1188 (0.1219) loss: 0.7918 (0.7964) time: 0.1551 data: 0.0767 max mem: 9377 +Train: [37] [3000/6250] eta: 0:08:52 lr: 0.000092 grad: 0.1147 (0.1218) loss: 0.7969 (0.7964) time: 0.1133 data: 0.0217 max mem: 9377 +Train: [37] [3100/6250] eta: 0:08:36 lr: 0.000092 grad: 0.1137 (0.1216) loss: 0.7898 (0.7965) time: 0.1742 data: 0.0908 max mem: 9377 +Train: [37] [3200/6250] eta: 0:08:19 lr: 0.000092 grad: 0.1118 (0.1216) loss: 0.7977 (0.7964) time: 0.1543 data: 0.0593 max mem: 9377 +Train: [37] [3300/6250] eta: 0:08:03 lr: 0.000092 grad: 0.1276 (0.1217) loss: 0.7937 (0.7964) time: 0.1664 data: 0.0794 max mem: 9377 +Train: [37] [3400/6250] eta: 0:07:46 lr: 0.000092 grad: 0.1078 (0.1217) loss: 0.7986 (0.7963) time: 0.1203 data: 0.0378 max mem: 9377 +Train: [37] [3500/6250] eta: 0:07:28 lr: 0.000092 grad: 0.1200 (0.1217) loss: 0.7934 (0.7962) time: 0.1391 data: 0.0619 max mem: 9377 +Train: [37] [3600/6250] eta: 0:07:12 lr: 0.000092 grad: 0.1059 (0.1217) loss: 0.8055 (0.7963) time: 0.1789 data: 0.0875 max mem: 9377 +Train: [37] [3700/6250] eta: 0:06:56 lr: 0.000092 grad: 0.1272 (0.1215) loss: 0.8076 (0.7964) time: 0.1756 data: 0.0958 max mem: 9377 +Train: [37] [3800/6250] eta: 0:06:51 lr: 0.000092 grad: 0.1152 (0.1214) loss: 0.7946 (0.7964) time: 0.1200 data: 0.0003 max mem: 9377 +Train: [37] [3900/6250] eta: 0:06:34 lr: 0.000092 grad: 0.1156 (0.1213) loss: 0.7947 (0.7965) time: 0.1459 data: 0.0443 max mem: 9377 +Train: [37] [4000/6250] eta: 0:06:16 lr: 0.000092 grad: 0.1093 (0.1211) loss: 0.8023 (0.7966) time: 0.1686 data: 0.0871 max mem: 9377 +Train: [37] [4100/6250] eta: 0:05:58 lr: 0.000092 grad: 0.1178 (0.1210) loss: 0.7959 (0.7967) time: 0.1650 data: 0.0711 max mem: 9377 +Train: [37] [4200/6250] eta: 0:05:42 lr: 0.000092 grad: 0.1142 (0.1209) loss: 0.8044 (0.7967) time: 0.2572 data: 0.1572 max mem: 9377 +Train: [37] [4300/6250] eta: 0:05:24 lr: 0.000092 grad: 0.1108 (0.1208) loss: 0.7997 (0.7967) time: 0.1571 data: 0.0645 max mem: 9377 +Train: [37] [4400/6250] eta: 0:05:08 lr: 0.000092 grad: 0.1146 (0.1208) loss: 0.8026 (0.7967) time: 0.0959 data: 0.0002 max mem: 9377 +Train: [37] [4500/6250] eta: 0:04:51 lr: 0.000092 grad: 0.1221 (0.1208) loss: 0.7961 (0.7966) time: 0.1531 data: 0.0593 max mem: 9377 +Train: [37] [4600/6250] eta: 0:04:35 lr: 0.000092 grad: 0.1165 (0.1208) loss: 0.7979 (0.7966) time: 0.1672 data: 0.0567 max mem: 9377 +Train: [37] [4700/6250] eta: 0:04:18 lr: 0.000092 grad: 0.1237 (0.1208) loss: 0.7936 (0.7966) time: 0.1688 data: 0.0907 max mem: 9377 +Train: [37] [4800/6250] eta: 0:04:01 lr: 0.000092 grad: 0.1195 (0.1207) loss: 0.7908 (0.7966) time: 0.1734 data: 0.0910 max mem: 9377 +Train: [37] [4900/6250] eta: 0:03:44 lr: 0.000092 grad: 0.1127 (0.1207) loss: 0.7938 (0.7966) time: 0.1805 data: 0.0982 max mem: 9377 +Train: [37] [5000/6250] eta: 0:03:27 lr: 0.000092 grad: 0.1142 (0.1207) loss: 0.7859 (0.7965) time: 0.1009 data: 0.0067 max mem: 9377 +Train: [37] [5100/6250] eta: 0:03:10 lr: 0.000092 grad: 0.1197 (0.1208) loss: 0.7856 (0.7964) time: 0.1667 data: 0.0769 max mem: 9377 +Train: [37] [5200/6250] eta: 0:02:55 lr: 0.000092 grad: 0.1251 (0.1208) loss: 0.7881 (0.7963) time: 0.4203 data: 0.3220 max mem: 9377 +Train: [37] [5300/6250] eta: 0:02:38 lr: 0.000092 grad: 0.1165 (0.1208) loss: 0.7840 (0.7961) time: 0.1533 data: 0.0178 max mem: 9377 +Train: [37] [5400/6250] eta: 0:02:22 lr: 0.000092 grad: 0.1174 (0.1208) loss: 0.7966 (0.7961) time: 0.1834 data: 0.0968 max mem: 9377 +Train: [37] [5500/6250] eta: 0:02:05 lr: 0.000092 grad: 0.1162 (0.1207) loss: 0.7917 (0.7960) time: 0.1432 data: 0.0232 max mem: 9377 +Train: [37] [5600/6250] eta: 0:01:49 lr: 0.000092 grad: 0.1089 (0.1207) loss: 0.7928 (0.7960) time: 0.1433 data: 0.0494 max mem: 9377 +Train: [37] [5700/6250] eta: 0:01:32 lr: 0.000091 grad: 0.1110 (0.1207) loss: 0.7941 (0.7960) time: 0.1289 data: 0.0419 max mem: 9377 +Train: [37] [5800/6250] eta: 0:01:15 lr: 0.000091 grad: 0.1168 (0.1207) loss: 0.7998 (0.7960) time: 0.1699 data: 0.0741 max mem: 9377 +Train: [37] [5900/6250] eta: 0:00:58 lr: 0.000091 grad: 0.1184 (0.1207) loss: 0.7986 (0.7959) time: 0.1599 data: 0.0832 max mem: 9377 +Train: [37] [6000/6250] eta: 0:00:41 lr: 0.000091 grad: 0.1199 (0.1207) loss: 0.7957 (0.7959) time: 0.2425 data: 0.1585 max mem: 9377 +Train: [37] [6100/6250] eta: 0:00:25 lr: 0.000091 grad: 0.1150 (0.1207) loss: 0.7906 (0.7958) time: 0.2287 data: 0.1385 max mem: 9377 +Train: [37] [6200/6250] eta: 0:00:08 lr: 0.000091 grad: 0.1108 (0.1207) loss: 0.7914 (0.7958) time: 0.1687 data: 0.0839 max mem: 9377 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.1182 (0.1207) loss: 0.7871 (0.7958) time: 0.1667 data: 0.0820 max mem: 9377 +Train: [37] Total time: 0:17:31 (0.1682 s / it) +Averaged stats: lr: 0.000091 grad: 0.1182 (0.1207) loss: 0.7871 (0.7958) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:05:32 loss: 0.8292 (0.8292) time: 5.3680 data: 5.3376 max mem: 9377 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8295 (0.8275) time: 0.1382 data: 0.1130 max mem: 9377 +Eval (hcp-train-subset): [37] Total time: 0:00:13 (0.2220 s / it) +Averaged stats (hcp-train-subset): loss: 0.8295 (0.8275) +Eval (hcp-val): [37] [ 0/62] eta: 0:03:54 loss: 0.8441 (0.8441) time: 3.7835 data: 3.7195 max mem: 9377 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8468 (0.8483) time: 0.1085 data: 0.0821 max mem: 9377 +Eval (hcp-val): [37] Total time: 0:00:13 (0.2148 s / it) +Averaged stats (hcp-val): loss: 0.8468 (0.8483) +Eval (nsd-val): [37] [ 0/62] eta: 0:04:24 loss: 0.8112 (0.8112) time: 4.2678 data: 4.1945 max mem: 9377 +Eval (nsd-val): [37] [61/62] eta: 0:00:00 loss: 0.8194 (0.8213) time: 0.1409 data: 0.1142 max mem: 9377 +Eval (nsd-val): [37] Total time: 0:00:13 (0.2201 s / it) +Averaged stats (nsd-val): loss: 0.8194 (0.8213) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 9:48:02 lr: 0.000091 grad: 0.2950 (0.2950) loss: 0.7934 (0.7934) time: 5.6452 data: 5.4513 max mem: 9377 +Train: [38] [ 100/6250] eta: 0:24:00 lr: 0.000091 grad: 0.1323 (0.1828) loss: 0.8026 (0.8128) time: 0.1514 data: 0.0326 max mem: 9377 +Train: [38] [ 200/6250] eta: 0:20:35 lr: 0.000091 grad: 0.1447 (0.1632) loss: 0.7974 (0.8059) time: 0.1665 data: 0.0779 max mem: 9377 +Train: [38] [ 300/6250] eta: 0:19:03 lr: 0.000091 grad: 0.1265 (0.1567) loss: 0.7935 (0.8011) time: 0.1687 data: 0.0761 max mem: 9377 +Train: [38] [ 400/6250] eta: 0:18:16 lr: 0.000091 grad: 0.1144 (0.1475) loss: 0.8017 (0.8017) time: 0.1878 data: 0.0659 max mem: 9377 +Train: [38] [ 500/6250] eta: 0:17:19 lr: 0.000091 grad: 0.1184 (0.1434) loss: 0.7828 (0.8005) time: 0.1539 data: 0.0657 max mem: 9377 +Train: [38] [ 600/6250] eta: 0:16:43 lr: 0.000091 grad: 0.1212 (0.1402) loss: 0.7977 (0.7994) time: 0.1289 data: 0.0257 max mem: 9377 +Train: [38] [ 700/6250] eta: 0:16:07 lr: 0.000091 grad: 0.1177 (0.1375) loss: 0.7893 (0.7985) time: 0.1334 data: 0.0368 max mem: 9377 +Train: [38] [ 800/6250] eta: 0:15:37 lr: 0.000091 grad: 0.1177 (0.1359) loss: 0.7935 (0.7977) time: 0.1390 data: 0.0332 max mem: 9377 +Train: [38] [ 900/6250] eta: 0:15:21 lr: 0.000091 grad: 0.1252 (0.1346) loss: 0.7854 (0.7972) time: 0.1600 data: 0.0722 max mem: 9377 +Train: [38] [1000/6250] eta: 0:14:57 lr: 0.000091 grad: 0.1122 (0.1334) loss: 0.7859 (0.7966) time: 0.1891 data: 0.1058 max mem: 9377 +Train: [38] [1100/6250] eta: 0:14:29 lr: 0.000091 grad: 0.1210 (0.1326) loss: 0.7959 (0.7962) time: 0.1466 data: 0.0699 max mem: 9377 +Train: [38] [1200/6250] eta: 0:14:15 lr: 0.000091 grad: 0.1151 (0.1315) loss: 0.8001 (0.7961) time: 0.1768 data: 0.0920 max mem: 9377 +Train: [38] [1300/6250] eta: 0:13:57 lr: 0.000091 grad: 0.1131 (0.1304) loss: 0.7910 (0.7959) time: 0.1982 data: 0.1156 max mem: 9377 +Train: [38] [1400/6250] eta: 0:14:00 lr: 0.000091 grad: 0.1138 (0.1297) loss: 0.7879 (0.7956) time: 0.4226 data: 0.3164 max mem: 9377 +Train: [38] [1500/6250] eta: 0:13:41 lr: 0.000091 grad: 0.1244 (0.1293) loss: 0.7936 (0.7949) time: 0.1221 data: 0.0003 max mem: 9377 +Train: [38] [1600/6250] eta: 0:13:51 lr: 0.000091 grad: 0.1288 (0.1290) loss: 0.7891 (0.7945) time: 0.3291 data: 0.1795 max mem: 9377 +Train: [38] [1700/6250] eta: 0:13:54 lr: 0.000091 grad: 0.1098 (0.1285) loss: 0.7961 (0.7942) time: 0.5301 data: 0.4172 max mem: 9377 +Train: [38] [1800/6250] eta: 0:13:30 lr: 0.000091 grad: 0.1124 (0.1280) loss: 0.7958 (0.7941) time: 0.1791 data: 0.0833 max mem: 9377 +Train: [38] [1900/6250] eta: 0:13:13 lr: 0.000091 grad: 0.1245 (0.1277) loss: 0.7840 (0.7940) time: 0.2561 data: 0.1759 max mem: 9377 +Train: [38] [2000/6250] eta: 0:13:03 lr: 0.000091 grad: 0.1175 (0.1273) loss: 0.7842 (0.7940) time: 0.1374 data: 0.0449 max mem: 9377 +Train: [38] [2100/6250] eta: 0:12:45 lr: 0.000091 grad: 0.1153 (0.1270) loss: 0.7912 (0.7939) time: 0.3223 data: 0.2022 max mem: 9377 +Train: [38] [2200/6250] eta: 0:12:19 lr: 0.000091 grad: 0.1085 (0.1266) loss: 0.7852 (0.7938) time: 0.1483 data: 0.0553 max mem: 9377 +Train: [38] [2300/6250] eta: 0:11:57 lr: 0.000091 grad: 0.1153 (0.1264) loss: 0.8004 (0.7939) time: 0.1854 data: 0.1002 max mem: 9377 +Train: [38] [2400/6250] eta: 0:11:33 lr: 0.000091 grad: 0.1118 (0.1260) loss: 0.7986 (0.7941) time: 0.1185 data: 0.0321 max mem: 9377 +Train: [38] [2500/6250] eta: 0:11:14 lr: 0.000091 grad: 0.1135 (0.1256) loss: 0.8020 (0.7943) time: 0.1685 data: 0.0757 max mem: 9377 +Train: [38] [2600/6250] eta: 0:10:51 lr: 0.000091 grad: 0.1129 (0.1252) loss: 0.7897 (0.7944) time: 0.1420 data: 0.0611 max mem: 9377 +Train: [38] [2700/6250] eta: 0:10:30 lr: 0.000091 grad: 0.1267 (0.1250) loss: 0.7919 (0.7944) time: 0.1296 data: 0.0314 max mem: 9377 +Train: [38] [2800/6250] eta: 0:10:14 lr: 0.000091 grad: 0.1192 (0.1249) loss: 0.7923 (0.7944) time: 0.0995 data: 0.0004 max mem: 9377 +Train: [38] [2900/6250] eta: 0:09:55 lr: 0.000090 grad: 0.1202 (0.1248) loss: 0.7920 (0.7944) time: 0.1405 data: 0.0629 max mem: 9377 +Train: [38] [3000/6250] eta: 0:09:34 lr: 0.000090 grad: 0.1200 (0.1248) loss: 0.7900 (0.7944) time: 0.1482 data: 0.0635 max mem: 9377 +Train: [38] [3100/6250] eta: 0:09:15 lr: 0.000090 grad: 0.1161 (0.1246) loss: 0.7955 (0.7944) time: 0.1676 data: 0.0828 max mem: 9377 +Train: [38] [3200/6250] eta: 0:08:55 lr: 0.000090 grad: 0.1169 (0.1245) loss: 0.7929 (0.7943) time: 0.0996 data: 0.0002 max mem: 9377 +Train: [38] [3300/6250] eta: 0:08:36 lr: 0.000090 grad: 0.1206 (0.1244) loss: 0.7888 (0.7942) time: 0.1166 data: 0.0290 max mem: 9377 +Train: [38] [3400/6250] eta: 0:08:18 lr: 0.000090 grad: 0.1219 (0.1243) loss: 0.7878 (0.7942) time: 0.1436 data: 0.0555 max mem: 9377 +Train: [38] [3500/6250] eta: 0:07:59 lr: 0.000090 grad: 0.1269 (0.1244) loss: 0.7904 (0.7941) time: 0.1011 data: 0.0132 max mem: 9377 +Train: [38] [3600/6250] eta: 0:07:41 lr: 0.000090 grad: 0.1174 (0.1243) loss: 0.7946 (0.7941) time: 0.2293 data: 0.1499 max mem: 9377 +Train: [38] [3700/6250] eta: 0:07:23 lr: 0.000090 grad: 0.1265 (0.1243) loss: 0.7874 (0.7940) time: 0.1394 data: 0.0511 max mem: 9377 +Train: [38] [3800/6250] eta: 0:07:05 lr: 0.000090 grad: 0.1244 (0.1243) loss: 0.7823 (0.7939) time: 0.1146 data: 0.0291 max mem: 9377 +Train: [38] [3900/6250] eta: 0:06:47 lr: 0.000090 grad: 0.1175 (0.1243) loss: 0.7908 (0.7939) time: 0.1566 data: 0.0635 max mem: 9377 +Train: [38] [4000/6250] eta: 0:06:30 lr: 0.000090 grad: 0.1168 (0.1242) loss: 0.7947 (0.7939) time: 0.2105 data: 0.0977 max mem: 9377 +Train: [38] [4100/6250] eta: 0:06:21 lr: 0.000090 grad: 0.1196 (0.1241) loss: 0.7951 (0.7939) time: 0.1746 data: 0.0699 max mem: 9377 +Train: [38] [4200/6250] eta: 0:06:03 lr: 0.000090 grad: 0.1202 (0.1241) loss: 0.7935 (0.7938) time: 0.1775 data: 0.0903 max mem: 9377 +Train: [38] [4300/6250] eta: 0:05:46 lr: 0.000090 grad: 0.1215 (0.1241) loss: 0.7980 (0.7939) time: 0.1540 data: 0.0002 max mem: 9377 +Train: [38] [4400/6250] eta: 0:05:30 lr: 0.000090 grad: 0.1265 (0.1241) loss: 0.7870 (0.7938) time: 0.2157 data: 0.1125 max mem: 9377 +Train: [38] [4500/6250] eta: 0:05:14 lr: 0.000090 grad: 0.1262 (0.1241) loss: 0.7864 (0.7938) time: 0.1367 data: 0.0372 max mem: 9377 +Train: [38] [4600/6250] eta: 0:04:55 lr: 0.000090 grad: 0.1223 (0.1241) loss: 0.7889 (0.7936) time: 0.2114 data: 0.1007 max mem: 9377 +Train: [38] [4700/6250] eta: 0:04:39 lr: 0.000090 grad: 0.1236 (0.1242) loss: 0.7782 (0.7935) time: 0.1391 data: 0.0425 max mem: 9377 +Train: [38] [4800/6250] eta: 0:04:20 lr: 0.000090 grad: 0.1216 (0.1243) loss: 0.7831 (0.7933) time: 0.1307 data: 0.0463 max mem: 9377 +Train: [38] [4900/6250] eta: 0:04:03 lr: 0.000090 grad: 0.1195 (0.1243) loss: 0.7900 (0.7933) time: 0.1208 data: 0.0167 max mem: 9377 +Train: [38] [5000/6250] eta: 0:03:44 lr: 0.000090 grad: 0.1275 (0.1243) loss: 0.7907 (0.7931) time: 0.1606 data: 0.0708 max mem: 9377 +Train: [38] [5100/6250] eta: 0:03:26 lr: 0.000090 grad: 0.1105 (0.1243) loss: 0.7937 (0.7930) time: 0.1472 data: 0.0550 max mem: 9377 +Train: [38] [5200/6250] eta: 0:03:07 lr: 0.000090 grad: 0.1311 (0.1244) loss: 0.7861 (0.7929) time: 0.1768 data: 0.0892 max mem: 9377 +Train: [38] [5300/6250] eta: 0:02:49 lr: 0.000090 grad: 0.1296 (0.1244) loss: 0.7777 (0.7928) time: 0.1385 data: 0.0522 max mem: 9377 +Train: [38] [5400/6250] eta: 0:02:31 lr: 0.000090 grad: 0.1188 (0.1245) loss: 0.7961 (0.7927) time: 0.1201 data: 0.0336 max mem: 9377 +Train: [38] [5500/6250] eta: 0:02:13 lr: 0.000090 grad: 0.1402 (0.1246) loss: 0.7931 (0.7925) time: 0.1968 data: 0.1158 max mem: 9377 +Train: [38] [5600/6250] eta: 0:01:55 lr: 0.000090 grad: 0.1308 (0.1247) loss: 0.7818 (0.7924) time: 0.1828 data: 0.1029 max mem: 9377 +Train: [38] [5700/6250] eta: 0:01:37 lr: 0.000090 grad: 0.1245 (0.1248) loss: 0.7926 (0.7923) time: 0.1200 data: 0.0296 max mem: 9377 +Train: [38] [5800/6250] eta: 0:01:19 lr: 0.000090 grad: 0.1142 (0.1247) loss: 0.7957 (0.7922) time: 0.1658 data: 0.0775 max mem: 9377 +Train: [38] [5900/6250] eta: 0:01:01 lr: 0.000090 grad: 0.1231 (0.1248) loss: 0.7799 (0.7921) time: 0.1573 data: 0.0697 max mem: 9377 +Train: [38] [6000/6250] eta: 0:00:44 lr: 0.000090 grad: 0.1221 (0.1248) loss: 0.7819 (0.7921) time: 0.2048 data: 0.1184 max mem: 9377 +Train: [38] [6100/6250] eta: 0:00:26 lr: 0.000090 grad: 0.1312 (0.1249) loss: 0.7843 (0.7920) time: 0.1935 data: 0.1144 max mem: 9377 +Train: [38] [6200/6250] eta: 0:00:08 lr: 0.000089 grad: 0.1229 (0.1250) loss: 0.7812 (0.7919) time: 0.1726 data: 0.0801 max mem: 9377 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.1225 (0.1250) loss: 0.7849 (0.7918) time: 0.1451 data: 0.0589 max mem: 9377 +Train: [38] Total time: 0:18:21 (0.1763 s / it) +Averaged stats: lr: 0.000089 grad: 0.1225 (0.1250) loss: 0.7849 (0.7918) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:05:42 loss: 0.8270 (0.8270) time: 5.5303 data: 5.4990 max mem: 9377 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8277 (0.8277) time: 0.1170 data: 0.0918 max mem: 9377 +Eval (hcp-train-subset): [38] Total time: 0:00:13 (0.2249 s / it) +Averaged stats (hcp-train-subset): loss: 0.8277 (0.8277) +Eval (hcp-val): [38] [ 0/62] eta: 0:05:10 loss: 0.8449 (0.8449) time: 5.0109 data: 4.9805 max mem: 9377 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8484 (0.8491) time: 0.1244 data: 0.0994 max mem: 9377 +Eval (hcp-val): [38] Total time: 0:00:13 (0.2254 s / it) +Averaged stats (hcp-val): loss: 0.8484 (0.8491) +Eval (nsd-val): [38] [ 0/62] eta: 0:03:14 loss: 0.8069 (0.8069) time: 3.1437 data: 3.0837 max mem: 9377 +Eval (nsd-val): [38] [61/62] eta: 0:00:00 loss: 0.8177 (0.8203) time: 0.1329 data: 0.1076 max mem: 9377 +Eval (nsd-val): [38] Total time: 0:00:13 (0.2147 s / it) +Averaged stats (nsd-val): loss: 0.8177 (0.8203) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [39] [ 0/6250] eta: 9:17:42 lr: 0.000089 grad: 0.0910 (0.0910) loss: 0.8340 (0.8340) time: 5.3541 data: 5.1955 max mem: 9377 +Train: [39] [ 100/6250] eta: 0:22:50 lr: 0.000089 grad: 0.1647 (0.1773) loss: 0.8011 (0.8144) time: 0.1322 data: 0.0004 max mem: 9377 +Train: [39] [ 200/6250] eta: 0:19:48 lr: 0.000089 grad: 0.1581 (0.1777) loss: 0.7974 (0.8037) time: 0.1427 data: 0.0498 max mem: 9377 +Train: [39] [ 300/6250] eta: 0:18:36 lr: 0.000089 grad: 0.1298 (0.1693) loss: 0.7951 (0.7984) time: 0.1953 data: 0.0940 max mem: 9377 +Train: [39] [ 400/6250] eta: 0:17:43 lr: 0.000089 grad: 0.1206 (0.1590) loss: 0.7939 (0.7965) time: 0.2093 data: 0.1231 max mem: 9377 +Train: [39] [ 500/6250] eta: 0:16:55 lr: 0.000089 grad: 0.1108 (0.1513) loss: 0.7984 (0.7966) time: 0.1503 data: 0.0482 max mem: 9377 +Train: [39] [ 600/6250] eta: 0:16:32 lr: 0.000089 grad: 0.1147 (0.1461) loss: 0.8036 (0.7969) time: 0.1749 data: 0.0713 max mem: 9377 +Train: [39] [ 700/6250] eta: 0:16:00 lr: 0.000089 grad: 0.1224 (0.1433) loss: 0.7912 (0.7966) time: 0.1507 data: 0.0605 max mem: 9377 +Train: [39] [ 800/6250] eta: 0:15:29 lr: 0.000089 grad: 0.1139 (0.1407) loss: 0.7944 (0.7967) time: 0.1478 data: 0.0547 max mem: 9377 +Train: [39] [ 900/6250] eta: 0:15:09 lr: 0.000089 grad: 0.1238 (0.1386) loss: 0.8018 (0.7972) time: 0.1860 data: 0.0949 max mem: 9377 +Train: [39] [1000/6250] eta: 0:14:45 lr: 0.000089 grad: 0.1217 (0.1371) loss: 0.7952 (0.7970) time: 0.1318 data: 0.0451 max mem: 9377 +Train: [39] [1100/6250] eta: 0:14:45 lr: 0.000089 grad: 0.1152 (0.1355) loss: 0.8009 (0.7967) time: 0.3459 data: 0.2236 max mem: 9377 +Train: [39] [1200/6250] eta: 0:14:22 lr: 0.000089 grad: 0.1245 (0.1345) loss: 0.7864 (0.7964) time: 0.1816 data: 0.1018 max mem: 9377 +Train: [39] [1300/6250] eta: 0:14:05 lr: 0.000089 grad: 0.1192 (0.1336) loss: 0.7890 (0.7960) time: 0.1501 data: 0.0673 max mem: 9377 +Train: [39] [1400/6250] eta: 0:14:58 lr: 0.000089 grad: 0.1160 (0.1328) loss: 0.8005 (0.7956) time: 0.1146 data: 0.0122 max mem: 9377 +Train: [39] [1500/6250] eta: 0:15:13 lr: 0.000089 grad: 0.1170 (0.1321) loss: 0.7902 (0.7952) time: 0.1393 data: 0.0498 max mem: 9377 +Train: [39] [1600/6250] eta: 0:14:42 lr: 0.000089 grad: 0.1251 (0.1318) loss: 0.7846 (0.7948) time: 0.1713 data: 0.0784 max mem: 9377 +Train: [39] [1700/6250] eta: 0:14:16 lr: 0.000089 grad: 0.1243 (0.1315) loss: 0.7860 (0.7942) time: 0.2071 data: 0.1141 max mem: 9377 +Train: [39] [1800/6250] eta: 0:14:00 lr: 0.000089 grad: 0.1220 (0.1311) loss: 0.7844 (0.7939) time: 0.1182 data: 0.0004 max mem: 9377 +Train: [39] [1900/6250] eta: 0:13:31 lr: 0.000089 grad: 0.1231 (0.1307) loss: 0.7901 (0.7935) time: 0.1520 data: 0.0486 max mem: 9377 +Train: [39] [2000/6250] eta: 0:13:09 lr: 0.000089 grad: 0.1262 (0.1305) loss: 0.7898 (0.7930) time: 0.1874 data: 0.0958 max mem: 9377 +Train: [39] [2100/6250] eta: 0:12:42 lr: 0.000089 grad: 0.1178 (0.1301) loss: 0.7865 (0.7927) time: 0.1346 data: 0.0347 max mem: 9377 +Train: [39] [2200/6250] eta: 0:12:18 lr: 0.000089 grad: 0.1297 (0.1299) loss: 0.7911 (0.7923) time: 0.1641 data: 0.0750 max mem: 9377 +Train: [39] [2300/6250] eta: 0:11:58 lr: 0.000089 grad: 0.1219 (0.1296) loss: 0.7913 (0.7920) time: 0.2056 data: 0.0992 max mem: 9377 +Train: [39] [2400/6250] eta: 0:11:43 lr: 0.000089 grad: 0.1245 (0.1293) loss: 0.7842 (0.7917) time: 0.2095 data: 0.1248 max mem: 9377 +Train: [39] [2500/6250] eta: 0:11:19 lr: 0.000089 grad: 0.1234 (0.1293) loss: 0.7930 (0.7915) time: 0.1399 data: 0.0494 max mem: 9377 +Train: [39] [2600/6250] eta: 0:10:58 lr: 0.000089 grad: 0.1224 (0.1291) loss: 0.7786 (0.7912) time: 0.1908 data: 0.1058 max mem: 9377 +Train: [39] [2700/6250] eta: 0:10:36 lr: 0.000089 grad: 0.1232 (0.1290) loss: 0.7853 (0.7910) time: 0.1463 data: 0.0530 max mem: 9377 +Train: [39] [2800/6250] eta: 0:10:15 lr: 0.000089 grad: 0.1199 (0.1290) loss: 0.7877 (0.7908) time: 0.1644 data: 0.0729 max mem: 9377 +Train: [39] [2900/6250] eta: 0:09:55 lr: 0.000089 grad: 0.1229 (0.1289) loss: 0.7890 (0.7907) time: 0.1516 data: 0.0584 max mem: 9377 +Train: [39] [3000/6250] eta: 0:09:34 lr: 0.000089 grad: 0.1254 (0.1288) loss: 0.7945 (0.7907) time: 0.1619 data: 0.0724 max mem: 9377 +Train: [39] [3100/6250] eta: 0:09:15 lr: 0.000089 grad: 0.1341 (0.1288) loss: 0.7838 (0.7906) time: 0.1826 data: 0.0883 max mem: 9377 +Train: [39] [3200/6250] eta: 0:08:55 lr: 0.000089 grad: 0.1244 (0.1286) loss: 0.7913 (0.7904) time: 0.1137 data: 0.0005 max mem: 9377 +Train: [39] [3300/6250] eta: 0:08:36 lr: 0.000088 grad: 0.1207 (0.1285) loss: 0.7898 (0.7904) time: 0.1414 data: 0.0515 max mem: 9377 +Train: [39] [3400/6250] eta: 0:08:17 lr: 0.000088 grad: 0.1147 (0.1284) loss: 0.7965 (0.7904) time: 0.1233 data: 0.0288 max mem: 9377 +Train: [39] [3500/6250] eta: 0:07:59 lr: 0.000088 grad: 0.1289 (0.1283) loss: 0.7908 (0.7904) time: 0.1988 data: 0.1084 max mem: 9377 +Train: [39] [3600/6250] eta: 0:07:40 lr: 0.000088 grad: 0.1262 (0.1282) loss: 0.7895 (0.7903) time: 0.1579 data: 0.0762 max mem: 9377 +Train: [39] [3700/6250] eta: 0:07:23 lr: 0.000088 grad: 0.1310 (0.1282) loss: 0.7947 (0.7902) time: 0.1091 data: 0.0104 max mem: 9377 +Train: [39] [3800/6250] eta: 0:07:05 lr: 0.000088 grad: 0.1245 (0.1282) loss: 0.7904 (0.7901) time: 0.1805 data: 0.0769 max mem: 9377 +Train: [39] [3900/6250] eta: 0:06:47 lr: 0.000088 grad: 0.1220 (0.1283) loss: 0.7892 (0.7901) time: 0.1655 data: 0.0781 max mem: 9377 +Train: [39] [4000/6250] eta: 0:06:29 lr: 0.000088 grad: 0.1279 (0.1283) loss: 0.7792 (0.7900) time: 0.2180 data: 0.1412 max mem: 9377 +Train: [39] [4100/6250] eta: 0:06:11 lr: 0.000088 grad: 0.1248 (0.1283) loss: 0.7742 (0.7898) time: 0.1488 data: 0.0721 max mem: 9377 +Train: [39] [4200/6250] eta: 0:05:53 lr: 0.000088 grad: 0.1247 (0.1282) loss: 0.7761 (0.7897) time: 0.1424 data: 0.0582 max mem: 9377 +Train: [39] [4300/6250] eta: 0:05:35 lr: 0.000088 grad: 0.1232 (0.1282) loss: 0.7835 (0.7895) time: 0.1404 data: 0.0499 max mem: 9377 +Train: [39] [4400/6250] eta: 0:05:18 lr: 0.000088 grad: 0.1150 (0.1281) loss: 0.7858 (0.7895) time: 0.1703 data: 0.0894 max mem: 9377 +Train: [39] [4500/6250] eta: 0:05:00 lr: 0.000088 grad: 0.1172 (0.1279) loss: 0.7868 (0.7895) time: 0.1953 data: 0.1042 max mem: 9377 +Train: [39] [4600/6250] eta: 0:04:43 lr: 0.000088 grad: 0.1206 (0.1279) loss: 0.7845 (0.7895) time: 0.1679 data: 0.0769 max mem: 9377 +Train: [39] [4700/6250] eta: 0:04:25 lr: 0.000088 grad: 0.1157 (0.1277) loss: 0.7911 (0.7895) time: 0.1689 data: 0.0710 max mem: 9377 +Train: [39] [4800/6250] eta: 0:04:08 lr: 0.000088 grad: 0.1222 (0.1276) loss: 0.7861 (0.7893) time: 0.1850 data: 0.0946 max mem: 9377 +Train: [39] [4900/6250] eta: 0:03:50 lr: 0.000088 grad: 0.1198 (0.1276) loss: 0.7997 (0.7893) time: 0.1415 data: 0.0550 max mem: 9377 +Train: [39] [5000/6250] eta: 0:03:32 lr: 0.000088 grad: 0.1201 (0.1275) loss: 0.7893 (0.7893) time: 0.1402 data: 0.0527 max mem: 9377 +Train: [39] [5100/6250] eta: 0:03:15 lr: 0.000088 grad: 0.1199 (0.1275) loss: 0.7970 (0.7893) time: 0.1270 data: 0.0415 max mem: 9377 +Train: [39] [5200/6250] eta: 0:02:58 lr: 0.000088 grad: 0.1268 (0.1275) loss: 0.7847 (0.7893) time: 0.2160 data: 0.1373 max mem: 9377 +Train: [39] [5300/6250] eta: 0:02:41 lr: 0.000088 grad: 0.1185 (0.1274) loss: 0.7977 (0.7893) time: 0.1335 data: 0.0437 max mem: 9377 +Train: [39] [5400/6250] eta: 0:02:24 lr: 0.000088 grad: 0.1248 (0.1273) loss: 0.7933 (0.7894) time: 0.1895 data: 0.1060 max mem: 9377 +Train: [39] [5500/6250] eta: 0:02:06 lr: 0.000088 grad: 0.1265 (0.1272) loss: 0.7920 (0.7894) time: 0.1299 data: 0.0500 max mem: 9377 +Train: [39] [5600/6250] eta: 0:01:49 lr: 0.000088 grad: 0.1196 (0.1272) loss: 0.7961 (0.7895) time: 0.1844 data: 0.0974 max mem: 9377 +Train: [39] [5700/6250] eta: 0:01:32 lr: 0.000088 grad: 0.1170 (0.1271) loss: 0.7924 (0.7896) time: 0.1641 data: 0.0764 max mem: 9377 +Train: [39] [5800/6250] eta: 0:01:15 lr: 0.000088 grad: 0.1197 (0.1270) loss: 0.7919 (0.7897) time: 0.1195 data: 0.0200 max mem: 9377 +Train: [39] [5900/6250] eta: 0:00:58 lr: 0.000088 grad: 0.1216 (0.1269) loss: 0.7978 (0.7898) time: 0.1790 data: 0.0954 max mem: 9377 +Train: [39] [6000/6250] eta: 0:00:42 lr: 0.000088 grad: 0.1236 (0.1268) loss: 0.7950 (0.7899) time: 0.1683 data: 0.0733 max mem: 9377 +Train: [39] [6100/6250] eta: 0:00:25 lr: 0.000088 grad: 0.1232 (0.1268) loss: 0.7945 (0.7900) time: 0.1701 data: 0.0863 max mem: 9377 +Train: [39] [6200/6250] eta: 0:00:08 lr: 0.000088 grad: 0.1080 (0.1267) loss: 0.8014 (0.7900) time: 0.1747 data: 0.0675 max mem: 9377 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.1242 (0.1267) loss: 0.7912 (0.7901) time: 0.1476 data: 0.0508 max mem: 9377 +Train: [39] Total time: 0:17:32 (0.1684 s / it) +Averaged stats: lr: 0.000088 grad: 0.1242 (0.1267) loss: 0.7912 (0.7901) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:04:18 loss: 0.8286 (0.8286) time: 4.1743 data: 4.1084 max mem: 9377 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8281 (0.8259) time: 0.1018 data: 0.0770 max mem: 9377 +Eval (hcp-train-subset): [39] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (hcp-train-subset): loss: 0.8281 (0.8259) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [39] [ 0/62] eta: 0:03:50 loss: 0.8540 (0.8540) time: 3.7193 data: 3.6381 max mem: 9377 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8452 (0.8472) time: 0.1377 data: 0.1111 max mem: 9377 +Eval (hcp-val): [39] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (hcp-val): loss: 0.8452 (0.8472) +Making plots (hcp-val): example=55 +Eval (nsd-val): [39] [ 0/62] eta: 0:05:06 loss: 0.8136 (0.8136) time: 4.9498 data: 4.9009 max mem: 9377 +Eval (nsd-val): [39] [61/62] eta: 0:00:00 loss: 0.8213 (0.8211) time: 0.1349 data: 0.1058 max mem: 9377 +Eval (nsd-val): [39] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (nsd-val): loss: 0.8213 (0.8211) +Making plots (nsd-val): example=49 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 15:46:20 lr: 0.000088 grad: 0.1056 (0.1056) loss: 0.8724 (0.8724) time: 9.0848 data: 8.9843 max mem: 9377 +Train: [40] [ 100/6250] eta: 0:27:21 lr: 0.000088 grad: 0.1443 (0.2045) loss: 0.7988 (0.8031) time: 0.1812 data: 0.0585 max mem: 9377 +Train: [40] [ 200/6250] eta: 0:26:08 lr: 0.000088 grad: 0.1572 (0.1892) loss: 0.7987 (0.7967) time: 0.2131 data: 0.1179 max mem: 9377 +Train: [40] [ 300/6250] eta: 0:26:21 lr: 0.000088 grad: 0.1363 (0.1762) loss: 0.8055 (0.7968) time: 0.1452 data: 0.0003 max mem: 9377 +Train: [40] [ 400/6250] eta: 0:28:26 lr: 0.000087 grad: 0.1266 (0.1652) loss: 0.7926 (0.7970) time: 0.1200 data: 0.0003 max mem: 9377 +Train: [40] [ 500/6250] eta: 0:26:41 lr: 0.000087 grad: 0.1322 (0.1589) loss: 0.7921 (0.7967) time: 0.1853 data: 0.0929 max mem: 9377 +Train: [40] [ 600/6250] eta: 0:25:04 lr: 0.000087 grad: 0.1324 (0.1555) loss: 0.7817 (0.7952) time: 0.1317 data: 0.0003 max mem: 9377 +Train: [40] [ 700/6250] eta: 0:23:55 lr: 0.000087 grad: 0.1273 (0.1522) loss: 0.7875 (0.7939) time: 0.0898 data: 0.0002 max mem: 9377 +Train: [40] [ 800/6250] eta: 0:23:00 lr: 0.000087 grad: 0.1261 (0.1492) loss: 0.7859 (0.7930) time: 0.3838 data: 0.2929 max mem: 9377 +Train: [40] [ 900/6250] eta: 0:21:34 lr: 0.000087 grad: 0.1205 (0.1462) loss: 0.7940 (0.7929) time: 0.1676 data: 0.0648 max mem: 9377 +Train: [40] [1000/6250] eta: 0:20:25 lr: 0.000087 grad: 0.1176 (0.1439) loss: 0.7897 (0.7929) time: 0.1624 data: 0.0803 max mem: 9377 +Train: [40] [1100/6250] eta: 0:19:32 lr: 0.000087 grad: 0.1216 (0.1419) loss: 0.7950 (0.7930) time: 0.1316 data: 0.0465 max mem: 9377 +Train: [40] [1200/6250] eta: 0:18:48 lr: 0.000087 grad: 0.1198 (0.1403) loss: 0.7856 (0.7930) time: 0.0885 data: 0.0002 max mem: 9377 +Train: [40] [1300/6250] eta: 0:17:59 lr: 0.000087 grad: 0.1166 (0.1390) loss: 0.8002 (0.7930) time: 0.1173 data: 0.0387 max mem: 9377 +Train: [40] [1400/6250] eta: 0:17:19 lr: 0.000087 grad: 0.1176 (0.1378) loss: 0.7968 (0.7931) time: 0.1258 data: 0.0469 max mem: 9377 +Train: [40] [1500/6250] eta: 0:16:37 lr: 0.000087 grad: 0.1194 (0.1369) loss: 0.7957 (0.7930) time: 0.1587 data: 0.0594 max mem: 9377 +Train: [40] [1600/6250] eta: 0:16:05 lr: 0.000087 grad: 0.1236 (0.1362) loss: 0.7888 (0.7929) time: 0.1315 data: 0.0441 max mem: 9377 +Train: [40] [1700/6250] eta: 0:15:34 lr: 0.000087 grad: 0.1194 (0.1355) loss: 0.7965 (0.7929) time: 0.1665 data: 0.0697 max mem: 9377 +Train: [40] [1800/6250] eta: 0:15:06 lr: 0.000087 grad: 0.1188 (0.1349) loss: 0.7886 (0.7927) time: 0.1458 data: 0.0401 max mem: 9377 +Train: [40] [1900/6250] eta: 0:14:37 lr: 0.000087 grad: 0.1194 (0.1343) loss: 0.7894 (0.7924) time: 0.1725 data: 0.0848 max mem: 9377 +Train: [40] [2000/6250] eta: 0:14:08 lr: 0.000087 grad: 0.1168 (0.1340) loss: 0.7899 (0.7922) time: 0.1866 data: 0.0965 max mem: 9377 +Train: [40] [2100/6250] eta: 0:13:39 lr: 0.000087 grad: 0.1228 (0.1335) loss: 0.7871 (0.7921) time: 0.1978 data: 0.1111 max mem: 9377 +Train: [40] [2200/6250] eta: 0:13:13 lr: 0.000087 grad: 0.1229 (0.1330) loss: 0.7831 (0.7919) time: 0.1040 data: 0.0225 max mem: 9377 +Train: [40] [2300/6250] eta: 0:12:54 lr: 0.000087 grad: 0.1200 (0.1327) loss: 0.7846 (0.7917) time: 0.0928 data: 0.0003 max mem: 9377 +Train: [40] [2400/6250] eta: 0:12:36 lr: 0.000087 grad: 0.1272 (0.1325) loss: 0.7837 (0.7915) time: 0.2700 data: 0.1817 max mem: 9377 +Train: [40] [2500/6250] eta: 0:12:11 lr: 0.000087 grad: 0.1288 (0.1323) loss: 0.7846 (0.7914) time: 0.1211 data: 0.0005 max mem: 9377 +Train: [40] [2600/6250] eta: 0:11:51 lr: 0.000087 grad: 0.1321 (0.1322) loss: 0.7855 (0.7912) time: 0.2638 data: 0.1738 max mem: 9377 +Train: [40] [2700/6250] eta: 0:11:27 lr: 0.000087 grad: 0.1199 (0.1321) loss: 0.7938 (0.7911) time: 0.1265 data: 0.0375 max mem: 9377 +Train: [40] [2800/6250] eta: 0:11:03 lr: 0.000087 grad: 0.1232 (0.1320) loss: 0.7868 (0.7908) time: 0.1091 data: 0.0211 max mem: 9377 +Train: [40] [2900/6250] eta: 0:10:40 lr: 0.000087 grad: 0.1249 (0.1318) loss: 0.7823 (0.7906) time: 0.1544 data: 0.0618 max mem: 9377 +Train: [40] [3000/6250] eta: 0:10:18 lr: 0.000087 grad: 0.1328 (0.1318) loss: 0.7824 (0.7904) time: 0.1287 data: 0.0370 max mem: 9377 +Train: [40] [3100/6250] eta: 0:09:59 lr: 0.000087 grad: 0.1279 (0.1317) loss: 0.7931 (0.7902) time: 0.2811 data: 0.1908 max mem: 9377 +Train: [40] [3200/6250] eta: 0:09:35 lr: 0.000087 grad: 0.1270 (0.1316) loss: 0.7923 (0.7901) time: 0.1454 data: 0.0460 max mem: 9377 +Train: [40] [3300/6250] eta: 0:09:15 lr: 0.000087 grad: 0.1282 (0.1315) loss: 0.7837 (0.7900) time: 0.1808 data: 0.0984 max mem: 9377 +Train: [40] [3400/6250] eta: 0:08:53 lr: 0.000087 grad: 0.1248 (0.1315) loss: 0.7904 (0.7898) time: 0.1399 data: 0.0456 max mem: 9377 +Train: [40] [3500/6250] eta: 0:08:31 lr: 0.000087 grad: 0.1292 (0.1316) loss: 0.7774 (0.7895) time: 0.1492 data: 0.0565 max mem: 9377 +Train: [40] [3600/6250] eta: 0:08:11 lr: 0.000087 grad: 0.1315 (0.1316) loss: 0.7744 (0.7894) time: 0.1226 data: 0.0418 max mem: 9377 +Train: [40] [3700/6250] eta: 0:07:51 lr: 0.000086 grad: 0.1274 (0.1314) loss: 0.7900 (0.7894) time: 0.1712 data: 0.0780 max mem: 9377 +Train: [40] [3800/6250] eta: 0:07:30 lr: 0.000086 grad: 0.1311 (0.1314) loss: 0.7765 (0.7892) time: 0.1339 data: 0.0380 max mem: 9377 +Train: [40] [3900/6250] eta: 0:07:10 lr: 0.000086 grad: 0.1271 (0.1315) loss: 0.7910 (0.7892) time: 0.1575 data: 0.0738 max mem: 9377 +Train: [40] [4000/6250] eta: 0:06:50 lr: 0.000086 grad: 0.1285 (0.1314) loss: 0.7893 (0.7891) time: 0.1080 data: 0.0126 max mem: 9377 +Train: [40] [4100/6250] eta: 0:06:31 lr: 0.000086 grad: 0.1205 (0.1313) loss: 0.7949 (0.7891) time: 0.1898 data: 0.1082 max mem: 9377 +Train: [40] [4200/6250] eta: 0:06:12 lr: 0.000086 grad: 0.1205 (0.1311) loss: 0.7882 (0.7892) time: 0.1505 data: 0.0638 max mem: 9377 +Train: [40] [4300/6250] eta: 0:05:53 lr: 0.000086 grad: 0.1227 (0.1311) loss: 0.7933 (0.7891) time: 0.1314 data: 0.0388 max mem: 9377 +Train: [40] [4400/6250] eta: 0:05:33 lr: 0.000086 grad: 0.1344 (0.1311) loss: 0.7844 (0.7890) time: 0.1345 data: 0.0530 max mem: 9377 +Train: [40] [4500/6250] eta: 0:05:15 lr: 0.000086 grad: 0.1256 (0.1311) loss: 0.7913 (0.7889) time: 0.2146 data: 0.1331 max mem: 9377 +Train: [40] [4600/6250] eta: 0:04:56 lr: 0.000086 grad: 0.1236 (0.1310) loss: 0.7757 (0.7887) time: 0.1919 data: 0.1023 max mem: 9377 +Train: [40] [4700/6250] eta: 0:04:37 lr: 0.000086 grad: 0.1324 (0.1310) loss: 0.7860 (0.7886) time: 0.1716 data: 0.0792 max mem: 9377 +Train: [40] [4800/6250] eta: 0:04:19 lr: 0.000086 grad: 0.1211 (0.1309) loss: 0.7801 (0.7885) time: 0.1983 data: 0.1128 max mem: 9377 +Train: [40] [4900/6250] eta: 0:04:01 lr: 0.000086 grad: 0.1295 (0.1308) loss: 0.7854 (0.7883) time: 0.1689 data: 0.0756 max mem: 9377 +Train: [40] [5000/6250] eta: 0:03:42 lr: 0.000086 grad: 0.1230 (0.1308) loss: 0.7823 (0.7883) time: 0.1225 data: 0.0271 max mem: 9377 +Train: [40] [5100/6250] eta: 0:03:24 lr: 0.000086 grad: 0.1331 (0.1307) loss: 0.7756 (0.7881) time: 0.1827 data: 0.0931 max mem: 9377 +Train: [40] [5200/6250] eta: 0:03:06 lr: 0.000086 grad: 0.1259 (0.1307) loss: 0.7847 (0.7879) time: 0.1440 data: 0.0512 max mem: 9377 +Train: [40] [5300/6250] eta: 0:02:48 lr: 0.000086 grad: 0.1269 (0.1306) loss: 0.7834 (0.7877) time: 0.1314 data: 0.0316 max mem: 9377 +Train: [40] [5400/6250] eta: 0:02:30 lr: 0.000086 grad: 0.1305 (0.1307) loss: 0.7720 (0.7875) time: 0.1396 data: 0.0484 max mem: 9377 +Train: [40] [5500/6250] eta: 0:02:12 lr: 0.000086 grad: 0.1317 (0.1307) loss: 0.7757 (0.7874) time: 0.1589 data: 0.0764 max mem: 9377 +Train: [40] [5600/6250] eta: 0:01:54 lr: 0.000086 grad: 0.1290 (0.1307) loss: 0.7813 (0.7873) time: 0.1622 data: 0.0797 max mem: 9377 +Train: [40] [5700/6250] eta: 0:01:36 lr: 0.000086 grad: 0.1214 (0.1307) loss: 0.7895 (0.7872) time: 0.1617 data: 0.0691 max mem: 9377 +Train: [40] [5800/6250] eta: 0:01:18 lr: 0.000086 grad: 0.1286 (0.1306) loss: 0.7786 (0.7872) time: 0.1722 data: 0.0880 max mem: 9377 +Train: [40] [5900/6250] eta: 0:01:01 lr: 0.000086 grad: 0.1267 (0.1305) loss: 0.7846 (0.7872) time: 0.1737 data: 0.0916 max mem: 9377 +Train: [40] [6000/6250] eta: 0:00:43 lr: 0.000086 grad: 0.1233 (0.1305) loss: 0.7828 (0.7871) time: 0.1708 data: 0.0858 max mem: 9377 +Train: [40] [6100/6250] eta: 0:00:26 lr: 0.000086 grad: 0.1324 (0.1305) loss: 0.7736 (0.7871) time: 0.1676 data: 0.0840 max mem: 9377 +Train: [40] [6200/6250] eta: 0:00:08 lr: 0.000086 grad: 0.1268 (0.1305) loss: 0.7946 (0.7870) time: 0.2237 data: 0.1314 max mem: 9377 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.1229 (0.1305) loss: 0.7927 (0.7870) time: 0.1476 data: 0.0569 max mem: 9377 +Train: [40] Total time: 0:18:11 (0.1747 s / it) +Averaged stats: lr: 0.000086 grad: 0.1229 (0.1305) loss: 0.7927 (0.7870) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:06:12 loss: 0.8248 (0.8248) time: 6.0002 data: 5.9703 max mem: 9377 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8237 (0.8242) time: 0.1185 data: 0.0939 max mem: 9377 +Eval (hcp-train-subset): [40] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (hcp-train-subset): loss: 0.8237 (0.8242) +Eval (hcp-val): [40] [ 0/62] eta: 0:05:34 loss: 0.8473 (0.8473) time: 5.3880 data: 5.3586 max mem: 9377 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8468 (0.8472) time: 0.1295 data: 0.1046 max mem: 9377 +Eval (hcp-val): [40] Total time: 0:00:13 (0.2233 s / it) +Averaged stats (hcp-val): loss: 0.8468 (0.8472) +Eval (nsd-val): [40] [ 0/62] eta: 0:04:22 loss: 0.8101 (0.8101) time: 4.2312 data: 4.1538 max mem: 9377 +Eval (nsd-val): [40] [61/62] eta: 0:00:00 loss: 0.8157 (0.8193) time: 0.1396 data: 0.1126 max mem: 9377 +Eval (nsd-val): [40] Total time: 0:00:14 (0.2324 s / it) +Averaged stats (nsd-val): loss: 0.8157 (0.8193) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 9:03:30 lr: 0.000086 grad: 0.1329 (0.1329) loss: 0.8482 (0.8482) time: 5.2177 data: 5.0277 max mem: 9377 +Train: [41] [ 100/6250] eta: 0:24:04 lr: 0.000086 grad: 0.1593 (0.1918) loss: 0.7960 (0.8086) time: 0.2119 data: 0.1079 max mem: 9377 +Train: [41] [ 200/6250] eta: 0:19:50 lr: 0.000086 grad: 0.1626 (0.1881) loss: 0.7772 (0.7939) time: 0.1030 data: 0.0002 max mem: 9377 +Train: [41] [ 300/6250] eta: 0:18:42 lr: 0.000086 grad: 0.1498 (0.1731) loss: 0.7920 (0.7916) time: 0.1477 data: 0.0497 max mem: 9377 +Train: [41] [ 400/6250] eta: 0:18:22 lr: 0.000086 grad: 0.1241 (0.1662) loss: 0.7928 (0.7910) time: 0.2695 data: 0.1600 max mem: 9377 +Train: [41] [ 500/6250] eta: 0:17:59 lr: 0.000086 grad: 0.1248 (0.1598) loss: 0.7935 (0.7909) time: 0.1963 data: 0.0986 max mem: 9377 +Train: [41] [ 600/6250] eta: 0:17:05 lr: 0.000086 grad: 0.1381 (0.1557) loss: 0.7994 (0.7906) time: 0.1528 data: 0.0667 max mem: 9377 +Train: [41] [ 700/6250] eta: 0:16:39 lr: 0.000085 grad: 0.1204 (0.1525) loss: 0.7989 (0.7909) time: 0.2132 data: 0.1265 max mem: 9377 +Train: [41] [ 800/6250] eta: 0:15:58 lr: 0.000085 grad: 0.1280 (0.1496) loss: 0.7898 (0.7902) time: 0.1412 data: 0.0583 max mem: 9377 +Train: [41] [ 900/6250] eta: 0:15:36 lr: 0.000085 grad: 0.1293 (0.1474) loss: 0.7906 (0.7903) time: 0.1912 data: 0.1049 max mem: 9377 +Train: [41] [1000/6250] eta: 0:15:26 lr: 0.000085 grad: 0.1302 (0.1459) loss: 0.7796 (0.7901) time: 0.1332 data: 0.0496 max mem: 9377 +Train: [41] [1100/6250] eta: 0:15:09 lr: 0.000085 grad: 0.1212 (0.1447) loss: 0.7876 (0.7898) time: 0.1858 data: 0.0905 max mem: 9377 +Train: [41] [1200/6250] eta: 0:14:47 lr: 0.000085 grad: 0.1244 (0.1433) loss: 0.7864 (0.7895) time: 0.1745 data: 0.0962 max mem: 9377 +Train: [41] [1300/6250] eta: 0:14:17 lr: 0.000085 grad: 0.1258 (0.1424) loss: 0.7806 (0.7889) time: 0.1491 data: 0.0579 max mem: 9377 +Train: [41] [1400/6250] eta: 0:13:54 lr: 0.000085 grad: 0.1294 (0.1413) loss: 0.7818 (0.7886) time: 0.1339 data: 0.0468 max mem: 9377 +Train: [41] [1500/6250] eta: 0:13:32 lr: 0.000085 grad: 0.1208 (0.1405) loss: 0.7823 (0.7884) time: 0.1399 data: 0.0544 max mem: 9377 +Train: [41] [1600/6250] eta: 0:13:15 lr: 0.000085 grad: 0.1244 (0.1396) loss: 0.7804 (0.7881) time: 0.2025 data: 0.1123 max mem: 9377 +Train: [41] [1700/6250] eta: 0:12:54 lr: 0.000085 grad: 0.1265 (0.1391) loss: 0.7820 (0.7879) time: 0.1041 data: 0.0096 max mem: 9377 +Train: [41] [1800/6250] eta: 0:12:32 lr: 0.000085 grad: 0.1224 (0.1384) loss: 0.7911 (0.7879) time: 0.1449 data: 0.0659 max mem: 9377 +Train: [41] [1900/6250] eta: 0:12:14 lr: 0.000085 grad: 0.1174 (0.1380) loss: 0.7947 (0.7878) time: 0.1690 data: 0.0812 max mem: 9377 +Train: [41] [2000/6250] eta: 0:11:55 lr: 0.000085 grad: 0.1203 (0.1372) loss: 0.7894 (0.7880) time: 0.1437 data: 0.0562 max mem: 9377 +Train: [41] [2100/6250] eta: 0:11:39 lr: 0.000085 grad: 0.1186 (0.1366) loss: 0.7933 (0.7882) time: 0.1710 data: 0.0898 max mem: 9377 +Train: [41] [2200/6250] eta: 0:11:20 lr: 0.000085 grad: 0.1204 (0.1360) loss: 0.7933 (0.7884) time: 0.1614 data: 0.0711 max mem: 9377 +Train: [41] [2300/6250] eta: 0:11:03 lr: 0.000085 grad: 0.1212 (0.1355) loss: 0.7912 (0.7887) time: 0.1667 data: 0.0567 max mem: 9377 +Train: [41] [2400/6250] eta: 0:10:46 lr: 0.000085 grad: 0.1119 (0.1348) loss: 0.8008 (0.7890) time: 0.1483 data: 0.0615 max mem: 9377 +Train: [41] [2500/6250] eta: 0:10:29 lr: 0.000085 grad: 0.1206 (0.1343) loss: 0.8014 (0.7894) time: 0.1734 data: 0.0747 max mem: 9377 +Train: [41] [2600/6250] eta: 0:10:12 lr: 0.000085 grad: 0.1160 (0.1336) loss: 0.7950 (0.7897) time: 0.2196 data: 0.1332 max mem: 9377 +Train: [41] [2700/6250] eta: 0:09:56 lr: 0.000085 grad: 0.1299 (0.1333) loss: 0.7930 (0.7898) time: 0.0873 data: 0.0002 max mem: 9377 +Train: [41] [2800/6250] eta: 0:09:36 lr: 0.000085 grad: 0.1141 (0.1330) loss: 0.8052 (0.7900) time: 0.1472 data: 0.0481 max mem: 9377 +Train: [41] [2900/6250] eta: 0:09:19 lr: 0.000085 grad: 0.1273 (0.1327) loss: 0.7870 (0.7901) time: 0.1622 data: 0.0772 max mem: 9377 +Train: [41] [3000/6250] eta: 0:09:01 lr: 0.000085 grad: 0.1230 (0.1323) loss: 0.7833 (0.7902) time: 0.1315 data: 0.0400 max mem: 9377 +Train: [41] [3100/6250] eta: 0:08:44 lr: 0.000085 grad: 0.1315 (0.1321) loss: 0.7815 (0.7901) time: 0.1285 data: 0.0499 max mem: 9377 +Train: [41] [3200/6250] eta: 0:08:26 lr: 0.000085 grad: 0.1316 (0.1318) loss: 0.7813 (0.7900) time: 0.1334 data: 0.0450 max mem: 9377 +Train: [41] [3300/6250] eta: 0:08:09 lr: 0.000085 grad: 0.1269 (0.1318) loss: 0.7913 (0.7899) time: 0.1382 data: 0.0526 max mem: 9377 +Train: [41] [3400/6250] eta: 0:07:51 lr: 0.000085 grad: 0.1225 (0.1317) loss: 0.7852 (0.7899) time: 0.1102 data: 0.0296 max mem: 9377 +Train: [41] [3500/6250] eta: 0:07:34 lr: 0.000085 grad: 0.1216 (0.1315) loss: 0.7923 (0.7900) time: 0.1128 data: 0.0192 max mem: 9377 +Train: [41] [3600/6250] eta: 0:07:18 lr: 0.000085 grad: 0.1255 (0.1313) loss: 0.7910 (0.7900) time: 0.2003 data: 0.1173 max mem: 9377 +Train: [41] [3700/6250] eta: 0:07:01 lr: 0.000085 grad: 0.1210 (0.1312) loss: 0.7890 (0.7901) time: 0.2057 data: 0.1241 max mem: 9377 +Train: [41] [3800/6250] eta: 0:06:43 lr: 0.000085 grad: 0.1247 (0.1310) loss: 0.7953 (0.7902) time: 0.1258 data: 0.0334 max mem: 9377 +Train: [41] [3900/6250] eta: 0:06:26 lr: 0.000084 grad: 0.1331 (0.1310) loss: 0.7820 (0.7901) time: 0.1496 data: 0.0618 max mem: 9377 +Train: [41] [4000/6250] eta: 0:06:10 lr: 0.000084 grad: 0.1351 (0.1309) loss: 0.7840 (0.7900) time: 0.1686 data: 0.0809 max mem: 9377 +Train: [41] [4100/6250] eta: 0:05:53 lr: 0.000084 grad: 0.1290 (0.1308) loss: 0.7883 (0.7900) time: 0.1841 data: 0.1001 max mem: 9377 +Train: [41] [4200/6250] eta: 0:05:37 lr: 0.000084 grad: 0.1285 (0.1307) loss: 0.7902 (0.7900) time: 0.1803 data: 0.0948 max mem: 9377 +Train: [41] [4300/6250] eta: 0:05:20 lr: 0.000084 grad: 0.1279 (0.1308) loss: 0.7929 (0.7900) time: 0.1476 data: 0.0486 max mem: 9377 +Train: [41] [4400/6250] eta: 0:05:03 lr: 0.000084 grad: 0.1268 (0.1307) loss: 0.7885 (0.7899) time: 0.1659 data: 0.0700 max mem: 9377 +Train: [41] [4500/6250] eta: 0:04:46 lr: 0.000084 grad: 0.1301 (0.1308) loss: 0.7817 (0.7897) time: 0.1378 data: 0.0460 max mem: 9377 +Train: [41] [4600/6250] eta: 0:04:31 lr: 0.000084 grad: 0.1198 (0.1308) loss: 0.7857 (0.7896) time: 0.3147 data: 0.2400 max mem: 9377 +Train: [41] [4700/6250] eta: 0:04:14 lr: 0.000084 grad: 0.1299 (0.1308) loss: 0.7871 (0.7895) time: 0.1264 data: 0.0381 max mem: 9377 +Train: [41] [4800/6250] eta: 0:03:57 lr: 0.000084 grad: 0.1278 (0.1308) loss: 0.7850 (0.7894) time: 0.1303 data: 0.0390 max mem: 9377 +Train: [41] [4900/6250] eta: 0:03:40 lr: 0.000084 grad: 0.1301 (0.1308) loss: 0.7799 (0.7893) time: 0.1529 data: 0.0575 max mem: 9377 +Train: [41] [5000/6250] eta: 0:03:24 lr: 0.000084 grad: 0.1262 (0.1308) loss: 0.7795 (0.7891) time: 0.1781 data: 0.0974 max mem: 9377 +Train: [41] [5100/6250] eta: 0:03:08 lr: 0.000084 grad: 0.1276 (0.1308) loss: 0.7946 (0.7890) time: 0.1727 data: 0.0822 max mem: 9377 +Train: [41] [5200/6250] eta: 0:02:51 lr: 0.000084 grad: 0.1282 (0.1308) loss: 0.7866 (0.7889) time: 0.1525 data: 0.0557 max mem: 9377 +Train: [41] [5300/6250] eta: 0:02:35 lr: 0.000084 grad: 0.1300 (0.1308) loss: 0.7810 (0.7887) time: 0.1635 data: 0.0719 max mem: 9377 +Train: [41] [5400/6250] eta: 0:02:19 lr: 0.000084 grad: 0.1273 (0.1308) loss: 0.7889 (0.7886) time: 0.1407 data: 0.0425 max mem: 9377 +Train: [41] [5500/6250] eta: 0:02:02 lr: 0.000084 grad: 0.1286 (0.1308) loss: 0.7872 (0.7886) time: 0.1600 data: 0.0626 max mem: 9377 +Train: [41] [5600/6250] eta: 0:01:46 lr: 0.000084 grad: 0.1229 (0.1308) loss: 0.7875 (0.7885) time: 0.1655 data: 0.0781 max mem: 9377 +Train: [41] [5700/6250] eta: 0:01:30 lr: 0.000084 grad: 0.1331 (0.1308) loss: 0.7708 (0.7884) time: 0.0887 data: 0.0002 max mem: 9377 +Train: [41] [5800/6250] eta: 0:01:13 lr: 0.000084 grad: 0.1313 (0.1309) loss: 0.7757 (0.7883) time: 0.0928 data: 0.0002 max mem: 9377 +Train: [41] [5900/6250] eta: 0:00:57 lr: 0.000084 grad: 0.1274 (0.1310) loss: 0.7815 (0.7882) time: 0.1261 data: 0.0347 max mem: 9377 +Train: [41] [6000/6250] eta: 0:00:41 lr: 0.000084 grad: 0.1272 (0.1309) loss: 0.7919 (0.7882) time: 0.2658 data: 0.1286 max mem: 9377 +Train: [41] [6100/6250] eta: 0:00:24 lr: 0.000084 grad: 0.1348 (0.1310) loss: 0.7863 (0.7881) time: 0.1677 data: 0.0631 max mem: 9377 +Train: [41] [6200/6250] eta: 0:00:08 lr: 0.000084 grad: 0.1306 (0.1310) loss: 0.7872 (0.7881) time: 0.1200 data: 0.0003 max mem: 9377 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.1376 (0.1311) loss: 0.7858 (0.7880) time: 0.1412 data: 0.0003 max mem: 9377 +Train: [41] Total time: 0:17:35 (0.1689 s / it) +Averaged stats: lr: 0.000084 grad: 0.1376 (0.1311) loss: 0.7858 (0.7880) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:04:33 loss: 0.8327 (0.8327) time: 4.4047 data: 4.3054 max mem: 9377 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8266 (0.8258) time: 0.1342 data: 0.1071 max mem: 9377 +Eval (hcp-train-subset): [41] Total time: 0:00:15 (0.2565 s / it) +Averaged stats (hcp-train-subset): loss: 0.8266 (0.8258) +Eval (hcp-val): [41] [ 0/62] eta: 0:05:42 loss: 0.8431 (0.8431) time: 5.5178 data: 5.4877 max mem: 9377 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8466 (0.8475) time: 0.1130 data: 0.0882 max mem: 9377 +Eval (hcp-val): [41] Total time: 0:00:14 (0.2333 s / it) +Averaged stats (hcp-val): loss: 0.8466 (0.8475) +Eval (nsd-val): [41] [ 0/62] eta: 0:04:55 loss: 0.8107 (0.8107) time: 4.7639 data: 4.7348 max mem: 9377 +Eval (nsd-val): [41] [61/62] eta: 0:00:00 loss: 0.8209 (0.8230) time: 0.1350 data: 0.1098 max mem: 9377 +Eval (nsd-val): [41] Total time: 0:00:14 (0.2349 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8230) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [42] [ 0/6250] eta: 19:26:59 lr: 0.000084 grad: 0.1744 (0.1744) loss: 0.8364 (0.8364) time: 11.2031 data: 11.0814 max mem: 9377 +Train: [42] [ 100/6250] eta: 0:40:35 lr: 0.000084 grad: 0.1642 (0.2013) loss: 0.8084 (0.8051) time: 0.2686 data: 0.0405 max mem: 9377 +Train: [42] [ 200/6250] eta: 0:28:27 lr: 0.000084 grad: 0.1457 (0.1838) loss: 0.7858 (0.7989) time: 0.1933 data: 0.0922 max mem: 9377 +Train: [42] [ 300/6250] eta: 0:28:19 lr: 0.000084 grad: 0.1485 (0.1712) loss: 0.7890 (0.7970) time: 0.1689 data: 0.0394 max mem: 9377 +Train: [42] [ 400/6250] eta: 0:26:02 lr: 0.000084 grad: 0.1356 (0.1645) loss: 0.7898 (0.7947) time: 0.1172 data: 0.0006 max mem: 9377 +Train: [42] [ 500/6250] eta: 0:23:35 lr: 0.000084 grad: 0.1296 (0.1606) loss: 0.7870 (0.7926) time: 0.1746 data: 0.0708 max mem: 9377 +Train: [42] [ 600/6250] eta: 0:21:45 lr: 0.000084 grad: 0.1474 (0.1590) loss: 0.7891 (0.7909) time: 0.1769 data: 0.0844 max mem: 9377 +Train: [42] [ 700/6250] eta: 0:21:20 lr: 0.000084 grad: 0.1259 (0.1555) loss: 0.7951 (0.7906) time: 0.4897 data: 0.3901 max mem: 9377 +Train: [42] [ 800/6250] eta: 0:20:12 lr: 0.000084 grad: 0.1265 (0.1517) loss: 0.7976 (0.7907) time: 0.1979 data: 0.1043 max mem: 9377 +Train: [42] [ 900/6250] eta: 0:19:43 lr: 0.000083 grad: 0.1310 (0.1489) loss: 0.7777 (0.7905) time: 0.1600 data: 0.0588 max mem: 9377 +Train: [42] [1000/6250] eta: 0:19:13 lr: 0.000083 grad: 0.1245 (0.1464) loss: 0.7944 (0.7906) time: 0.0941 data: 0.0003 max mem: 9377 +Train: [42] [1100/6250] eta: 0:18:22 lr: 0.000083 grad: 0.1261 (0.1446) loss: 0.7808 (0.7907) time: 0.1953 data: 0.1085 max mem: 9377 +Train: [42] [1200/6250] eta: 0:17:39 lr: 0.000083 grad: 0.1276 (0.1432) loss: 0.7859 (0.7903) time: 0.2225 data: 0.1421 max mem: 9377 +Train: [42] [1300/6250] eta: 0:16:58 lr: 0.000083 grad: 0.1257 (0.1419) loss: 0.7752 (0.7900) time: 0.1241 data: 0.0397 max mem: 9377 +Train: [42] [1400/6250] eta: 0:16:16 lr: 0.000083 grad: 0.1228 (0.1410) loss: 0.7776 (0.7896) time: 0.1326 data: 0.0526 max mem: 9377 +Train: [42] [1500/6250] eta: 0:16:18 lr: 0.000083 grad: 0.1227 (0.1399) loss: 0.7908 (0.7893) time: 0.2022 data: 0.1095 max mem: 9377 +Train: [42] [1600/6250] eta: 0:16:20 lr: 0.000083 grad: 0.1223 (0.1392) loss: 0.7883 (0.7889) time: 0.7191 data: 0.6247 max mem: 9377 +Train: [42] [1700/6250] eta: 0:15:43 lr: 0.000083 grad: 0.1263 (0.1388) loss: 0.7722 (0.7884) time: 0.1540 data: 0.0642 max mem: 9377 +Train: [42] [1800/6250] eta: 0:15:09 lr: 0.000083 grad: 0.1309 (0.1384) loss: 0.7807 (0.7879) time: 0.1430 data: 0.0594 max mem: 9377 +Train: [42] [1900/6250] eta: 0:14:40 lr: 0.000083 grad: 0.1418 (0.1383) loss: 0.7785 (0.7875) time: 0.1296 data: 0.0466 max mem: 9377 +Train: [42] [2000/6250] eta: 0:14:10 lr: 0.000083 grad: 0.1224 (0.1381) loss: 0.7841 (0.7871) time: 0.1715 data: 0.0814 max mem: 9377 +Train: [42] [2100/6250] eta: 0:13:42 lr: 0.000083 grad: 0.1288 (0.1378) loss: 0.7825 (0.7870) time: 0.1489 data: 0.0647 max mem: 9377 +Train: [42] [2200/6250] eta: 0:13:16 lr: 0.000083 grad: 0.1219 (0.1374) loss: 0.7932 (0.7869) time: 0.1251 data: 0.0345 max mem: 9377 +Train: [42] [2300/6250] eta: 0:12:50 lr: 0.000083 grad: 0.1270 (0.1370) loss: 0.7852 (0.7871) time: 0.1761 data: 0.0762 max mem: 9377 +Train: [42] [2400/6250] eta: 0:12:25 lr: 0.000083 grad: 0.1225 (0.1368) loss: 0.7843 (0.7870) time: 0.1594 data: 0.0657 max mem: 9377 +Train: [42] [2500/6250] eta: 0:11:59 lr: 0.000083 grad: 0.1194 (0.1363) loss: 0.7918 (0.7872) time: 0.1223 data: 0.0284 max mem: 9377 +Train: [42] [2600/6250] eta: 0:11:33 lr: 0.000083 grad: 0.1306 (0.1361) loss: 0.7777 (0.7872) time: 0.1365 data: 0.0376 max mem: 9377 +Train: [42] [2700/6250] eta: 0:11:10 lr: 0.000083 grad: 0.1311 (0.1359) loss: 0.7938 (0.7872) time: 0.1660 data: 0.0712 max mem: 9377 +Train: [42] [2800/6250] eta: 0:10:47 lr: 0.000083 grad: 0.1238 (0.1355) loss: 0.7895 (0.7873) time: 0.1264 data: 0.0356 max mem: 9377 +Train: [42] [2900/6250] eta: 0:10:29 lr: 0.000083 grad: 0.1227 (0.1353) loss: 0.7838 (0.7874) time: 0.2489 data: 0.1671 max mem: 9377 +Train: [42] [3000/6250] eta: 0:10:05 lr: 0.000083 grad: 0.1285 (0.1351) loss: 0.7921 (0.7875) time: 0.1642 data: 0.0754 max mem: 9377 +Train: [42] [3100/6250] eta: 0:09:44 lr: 0.000083 grad: 0.1307 (0.1348) loss: 0.7835 (0.7877) time: 0.1939 data: 0.1081 max mem: 9377 +Train: [42] [3200/6250] eta: 0:09:22 lr: 0.000083 grad: 0.1274 (0.1346) loss: 0.7955 (0.7877) time: 0.1372 data: 0.0501 max mem: 9377 +Train: [42] [3300/6250] eta: 0:09:01 lr: 0.000083 grad: 0.1243 (0.1344) loss: 0.7911 (0.7878) time: 0.1119 data: 0.0244 max mem: 9377 +Train: [42] [3400/6250] eta: 0:08:41 lr: 0.000083 grad: 0.1267 (0.1343) loss: 0.7946 (0.7878) time: 0.2155 data: 0.1316 max mem: 9377 +Train: [42] [3500/6250] eta: 0:08:20 lr: 0.000083 grad: 0.1337 (0.1342) loss: 0.7908 (0.7878) time: 0.1478 data: 0.0540 max mem: 9377 +Train: [42] [3600/6250] eta: 0:07:59 lr: 0.000083 grad: 0.1302 (0.1342) loss: 0.7914 (0.7878) time: 0.1653 data: 0.0760 max mem: 9377 +Train: [42] [3700/6250] eta: 0:07:39 lr: 0.000083 grad: 0.1296 (0.1343) loss: 0.7941 (0.7877) time: 0.1618 data: 0.0764 max mem: 9377 +Train: [42] [3800/6250] eta: 0:07:19 lr: 0.000083 grad: 0.1353 (0.1343) loss: 0.7834 (0.7877) time: 0.1298 data: 0.0375 max mem: 9377 +Train: [42] [3900/6250] eta: 0:07:01 lr: 0.000083 grad: 0.1310 (0.1342) loss: 0.7871 (0.7877) time: 0.1496 data: 0.0627 max mem: 9377 +Train: [42] [4000/6250] eta: 0:06:41 lr: 0.000083 grad: 0.1283 (0.1341) loss: 0.7947 (0.7877) time: 0.1528 data: 0.0713 max mem: 9377 +Train: [42] [4100/6250] eta: 0:06:22 lr: 0.000082 grad: 0.1256 (0.1340) loss: 0.7867 (0.7877) time: 0.1228 data: 0.0363 max mem: 9377 +Train: [42] [4200/6250] eta: 0:06:04 lr: 0.000082 grad: 0.1242 (0.1339) loss: 0.7825 (0.7877) time: 0.1425 data: 0.0553 max mem: 9377 +Train: [42] [4300/6250] eta: 0:05:45 lr: 0.000082 grad: 0.1283 (0.1339) loss: 0.7899 (0.7876) time: 0.1791 data: 0.0961 max mem: 9377 +Train: [42] [4400/6250] eta: 0:05:27 lr: 0.000082 grad: 0.1319 (0.1339) loss: 0.7858 (0.7876) time: 0.1744 data: 0.0845 max mem: 9377 +Train: [42] [4500/6250] eta: 0:05:08 lr: 0.000082 grad: 0.1263 (0.1338) loss: 0.7801 (0.7875) time: 0.1510 data: 0.0637 max mem: 9377 +Train: [42] [4600/6250] eta: 0:04:50 lr: 0.000082 grad: 0.1263 (0.1338) loss: 0.7817 (0.7875) time: 0.2248 data: 0.1364 max mem: 9377 +Train: [42] [4700/6250] eta: 0:04:32 lr: 0.000082 grad: 0.1213 (0.1337) loss: 0.7874 (0.7875) time: 0.1523 data: 0.0597 max mem: 9377 +Train: [42] [4800/6250] eta: 0:04:14 lr: 0.000082 grad: 0.1302 (0.1337) loss: 0.7896 (0.7875) time: 0.2524 data: 0.1722 max mem: 9377 +Train: [42] [4900/6250] eta: 0:03:56 lr: 0.000082 grad: 0.1338 (0.1336) loss: 0.7848 (0.7874) time: 0.1483 data: 0.0624 max mem: 9377 +Train: [42] [5000/6250] eta: 0:03:38 lr: 0.000082 grad: 0.1347 (0.1336) loss: 0.7877 (0.7875) time: 0.1873 data: 0.1032 max mem: 9377 +Train: [42] [5100/6250] eta: 0:03:20 lr: 0.000082 grad: 0.1214 (0.1336) loss: 0.7912 (0.7875) time: 0.1451 data: 0.0582 max mem: 9377 +Train: [42] [5200/6250] eta: 0:03:03 lr: 0.000082 grad: 0.1310 (0.1336) loss: 0.7903 (0.7875) time: 0.1540 data: 0.0691 max mem: 9377 +Train: [42] [5300/6250] eta: 0:02:45 lr: 0.000082 grad: 0.1311 (0.1335) loss: 0.7894 (0.7876) time: 0.1236 data: 0.0267 max mem: 9377 +Train: [42] [5400/6250] eta: 0:02:28 lr: 0.000082 grad: 0.1300 (0.1335) loss: 0.7832 (0.7876) time: 0.1929 data: 0.1022 max mem: 9377 +Train: [42] [5500/6250] eta: 0:02:10 lr: 0.000082 grad: 0.1358 (0.1336) loss: 0.7859 (0.7876) time: 0.1009 data: 0.0003 max mem: 9377 +Train: [42] [5600/6250] eta: 0:01:53 lr: 0.000082 grad: 0.1363 (0.1337) loss: 0.7724 (0.7876) time: 0.2072 data: 0.1165 max mem: 9377 +Train: [42] [5700/6250] eta: 0:01:35 lr: 0.000082 grad: 0.1312 (0.1337) loss: 0.7881 (0.7876) time: 0.1047 data: 0.0072 max mem: 9377 +Train: [42] [5800/6250] eta: 0:01:17 lr: 0.000082 grad: 0.1325 (0.1336) loss: 0.7892 (0.7876) time: 0.1547 data: 0.0731 max mem: 9377 +Train: [42] [5900/6250] eta: 0:01:00 lr: 0.000082 grad: 0.1226 (0.1336) loss: 0.8048 (0.7877) time: 0.1011 data: 0.0084 max mem: 9377 +Train: [42] [6000/6250] eta: 0:00:43 lr: 0.000082 grad: 0.1301 (0.1335) loss: 0.7872 (0.7878) time: 0.1524 data: 0.0586 max mem: 9377 +Train: [42] [6100/6250] eta: 0:00:25 lr: 0.000082 grad: 0.1222 (0.1335) loss: 0.7949 (0.7879) time: 0.1665 data: 0.0798 max mem: 9377 +Train: [42] [6200/6250] eta: 0:00:08 lr: 0.000082 grad: 0.1238 (0.1335) loss: 0.7874 (0.7879) time: 0.2778 data: 0.1844 max mem: 9377 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.1342 (0.1335) loss: 0.7904 (0.7879) time: 0.2258 data: 0.1229 max mem: 9377 +Train: [42] Total time: 0:18:14 (0.1751 s / it) +Averaged stats: lr: 0.000082 grad: 0.1342 (0.1335) loss: 0.7904 (0.7879) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:05:36 loss: 0.8274 (0.8274) time: 5.4291 data: 5.3977 max mem: 9377 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8286 (0.8238) time: 0.1121 data: 0.0871 max mem: 9377 +Eval (hcp-train-subset): [42] Total time: 0:00:14 (0.2269 s / it) +Averaged stats (hcp-train-subset): loss: 0.8286 (0.8238) +Eval (hcp-val): [42] [ 0/62] eta: 0:06:12 loss: 0.8426 (0.8426) time: 6.0020 data: 5.9714 max mem: 9377 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8456 (0.8467) time: 0.1220 data: 0.0970 max mem: 9377 +Eval (hcp-val): [42] Total time: 0:00:14 (0.2266 s / it) +Averaged stats (hcp-val): loss: 0.8456 (0.8467) +Eval (nsd-val): [42] [ 0/62] eta: 0:05:04 loss: 0.8131 (0.8131) time: 4.9119 data: 4.8787 max mem: 9377 +Eval (nsd-val): [42] [61/62] eta: 0:00:00 loss: 0.8262 (0.8260) time: 0.1418 data: 0.1159 max mem: 9377 +Eval (nsd-val): [42] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (nsd-val): loss: 0.8262 (0.8260) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [43] [ 0/6250] eta: 10:24:46 lr: 0.000082 grad: 0.3476 (0.3476) loss: 0.8832 (0.8832) time: 5.9979 data: 5.8408 max mem: 9377 +Train: [43] [ 100/6250] eta: 0:23:25 lr: 0.000082 grad: 0.1545 (0.2039) loss: 0.7937 (0.8020) time: 0.2092 data: 0.1021 max mem: 9377 +Train: [43] [ 200/6250] eta: 0:20:21 lr: 0.000082 grad: 0.1398 (0.1792) loss: 0.7931 (0.7972) time: 0.1782 data: 0.0875 max mem: 9377 +Train: [43] [ 300/6250] eta: 0:18:46 lr: 0.000082 grad: 0.1476 (0.1675) loss: 0.7920 (0.7957) time: 0.1163 data: 0.0132 max mem: 9377 +Train: [43] [ 400/6250] eta: 0:18:03 lr: 0.000082 grad: 0.1278 (0.1605) loss: 0.7946 (0.7938) time: 0.1190 data: 0.0003 max mem: 9377 +Train: [43] [ 500/6250] eta: 0:17:22 lr: 0.000082 grad: 0.1282 (0.1556) loss: 0.7881 (0.7930) time: 0.1768 data: 0.0781 max mem: 9377 +Train: [43] [ 600/6250] eta: 0:16:38 lr: 0.000082 grad: 0.1256 (0.1520) loss: 0.7967 (0.7925) time: 0.1492 data: 0.0490 max mem: 9377 +Train: [43] [ 700/6250] eta: 0:16:00 lr: 0.000082 grad: 0.1191 (0.1480) loss: 0.7896 (0.7923) time: 0.1710 data: 0.0667 max mem: 9377 +Train: [43] [ 800/6250] eta: 0:15:27 lr: 0.000082 grad: 0.1149 (0.1450) loss: 0.7943 (0.7923) time: 0.1303 data: 0.0199 max mem: 9377 +Train: [43] [ 900/6250] eta: 0:15:45 lr: 0.000082 grad: 0.1191 (0.1426) loss: 0.7981 (0.7927) time: 0.1955 data: 0.1027 max mem: 9377 +Train: [43] [1000/6250] eta: 0:15:30 lr: 0.000081 grad: 0.1217 (0.1408) loss: 0.7993 (0.7928) time: 0.1000 data: 0.0004 max mem: 9377 +Train: [43] [1100/6250] eta: 0:15:06 lr: 0.000081 grad: 0.1238 (0.1393) loss: 0.7888 (0.7926) time: 0.1687 data: 0.0802 max mem: 9377 +Train: [43] [1200/6250] eta: 0:14:36 lr: 0.000081 grad: 0.1196 (0.1380) loss: 0.7960 (0.7925) time: 0.1516 data: 0.0561 max mem: 9377 +Train: [43] [1300/6250] eta: 0:14:13 lr: 0.000081 grad: 0.1183 (0.1369) loss: 0.7895 (0.7925) time: 0.1257 data: 0.0297 max mem: 9377 +Train: [43] [1400/6250] eta: 0:13:48 lr: 0.000081 grad: 0.1200 (0.1358) loss: 0.7879 (0.7921) time: 0.1631 data: 0.0721 max mem: 9377 +Train: [43] [1500/6250] eta: 0:13:29 lr: 0.000081 grad: 0.1142 (0.1348) loss: 0.7942 (0.7920) time: 0.2223 data: 0.1372 max mem: 9377 +Train: [43] [1600/6250] eta: 0:13:04 lr: 0.000081 grad: 0.1169 (0.1341) loss: 0.7925 (0.7918) time: 0.1078 data: 0.0002 max mem: 9377 +Train: [43] [1700/6250] eta: 0:12:42 lr: 0.000081 grad: 0.1224 (0.1334) loss: 0.7801 (0.7915) time: 0.1404 data: 0.0527 max mem: 9377 +Train: [43] [1800/6250] eta: 0:12:22 lr: 0.000081 grad: 0.1226 (0.1329) loss: 0.7883 (0.7913) time: 0.1254 data: 0.0285 max mem: 9377 +Train: [43] [1900/6250] eta: 0:12:04 lr: 0.000081 grad: 0.1260 (0.1326) loss: 0.7821 (0.7910) time: 0.1882 data: 0.1021 max mem: 9377 +Train: [43] [2000/6250] eta: 0:11:50 lr: 0.000081 grad: 0.1213 (0.1322) loss: 0.7864 (0.7910) time: 0.2062 data: 0.1270 max mem: 9377 +Train: [43] [2100/6250] eta: 0:11:30 lr: 0.000081 grad: 0.1189 (0.1319) loss: 0.7972 (0.7909) time: 0.1278 data: 0.0498 max mem: 9377 +Train: [43] [2200/6250] eta: 0:11:11 lr: 0.000081 grad: 0.1334 (0.1316) loss: 0.7900 (0.7908) time: 0.1167 data: 0.0332 max mem: 9377 +Train: [43] [2300/6250] eta: 0:10:56 lr: 0.000081 grad: 0.1298 (0.1313) loss: 0.7750 (0.7906) time: 0.2531 data: 0.1707 max mem: 9377 +Train: [43] [2400/6250] eta: 0:10:38 lr: 0.000081 grad: 0.1206 (0.1312) loss: 0.7837 (0.7905) time: 0.1550 data: 0.0731 max mem: 9377 +Train: [43] [2500/6250] eta: 0:10:22 lr: 0.000081 grad: 0.1243 (0.1312) loss: 0.7913 (0.7903) time: 0.1873 data: 0.1003 max mem: 9377 +Train: [43] [2600/6250] eta: 0:10:04 lr: 0.000081 grad: 0.1247 (0.1313) loss: 0.7859 (0.7901) time: 0.1504 data: 0.0579 max mem: 9377 +Train: [43] [2700/6250] eta: 0:09:45 lr: 0.000081 grad: 0.1335 (0.1313) loss: 0.7769 (0.7899) time: 0.1609 data: 0.0654 max mem: 9377 +Train: [43] [2800/6250] eta: 0:09:27 lr: 0.000081 grad: 0.1270 (0.1314) loss: 0.7841 (0.7896) time: 0.1653 data: 0.0739 max mem: 9377 +Train: [43] [2900/6250] eta: 0:09:10 lr: 0.000081 grad: 0.1388 (0.1317) loss: 0.7733 (0.7892) time: 0.1366 data: 0.0511 max mem: 9377 +Train: [43] [3000/6250] eta: 0:08:51 lr: 0.000081 grad: 0.1402 (0.1319) loss: 0.7765 (0.7888) time: 0.1245 data: 0.0371 max mem: 9377 +Train: [43] [3100/6250] eta: 0:08:35 lr: 0.000081 grad: 0.1297 (0.1320) loss: 0.7858 (0.7884) time: 0.2110 data: 0.1183 max mem: 9377 +Train: [43] [3200/6250] eta: 0:08:17 lr: 0.000081 grad: 0.1303 (0.1320) loss: 0.7862 (0.7882) time: 0.1576 data: 0.0669 max mem: 9377 +Train: [43] [3300/6250] eta: 0:08:00 lr: 0.000081 grad: 0.1405 (0.1321) loss: 0.7671 (0.7879) time: 0.1356 data: 0.0428 max mem: 9377 +Train: [43] [3400/6250] eta: 0:07:43 lr: 0.000081 grad: 0.1361 (0.1323) loss: 0.7718 (0.7876) time: 0.1015 data: 0.0164 max mem: 9377 +Train: [43] [3500/6250] eta: 0:07:26 lr: 0.000081 grad: 0.1343 (0.1324) loss: 0.7812 (0.7873) time: 0.1872 data: 0.1042 max mem: 9377 +Train: [43] [3600/6250] eta: 0:07:09 lr: 0.000081 grad: 0.1251 (0.1326) loss: 0.7847 (0.7871) time: 0.1710 data: 0.0756 max mem: 9377 +Train: [43] [3700/6250] eta: 0:06:52 lr: 0.000081 grad: 0.1304 (0.1328) loss: 0.7698 (0.7868) time: 0.1498 data: 0.0600 max mem: 9377 +Train: [43] [3800/6250] eta: 0:06:36 lr: 0.000081 grad: 0.1391 (0.1330) loss: 0.7700 (0.7866) time: 0.1068 data: 0.0099 max mem: 9377 +Train: [43] [3900/6250] eta: 0:06:20 lr: 0.000081 grad: 0.1399 (0.1331) loss: 0.7739 (0.7863) time: 0.1891 data: 0.1026 max mem: 9377 +Train: [43] [4000/6250] eta: 0:06:04 lr: 0.000081 grad: 0.1335 (0.1332) loss: 0.7807 (0.7860) time: 0.1916 data: 0.1090 max mem: 9377 +Train: [43] [4100/6250] eta: 0:05:48 lr: 0.000081 grad: 0.1284 (0.1332) loss: 0.7749 (0.7858) time: 0.1601 data: 0.0769 max mem: 9377 +Train: [43] [4200/6250] eta: 0:05:37 lr: 0.000080 grad: 0.1309 (0.1333) loss: 0.7844 (0.7856) time: 0.2281 data: 0.1248 max mem: 9377 +Train: [43] [4300/6250] eta: 0:05:21 lr: 0.000080 grad: 0.1317 (0.1333) loss: 0.7793 (0.7856) time: 0.1619 data: 0.0651 max mem: 9377 +Train: [43] [4400/6250] eta: 0:05:04 lr: 0.000080 grad: 0.1282 (0.1333) loss: 0.7984 (0.7856) time: 0.1138 data: 0.0271 max mem: 9377 +Train: [43] [4500/6250] eta: 0:04:48 lr: 0.000080 grad: 0.1313 (0.1334) loss: 0.7822 (0.7855) time: 0.1968 data: 0.1096 max mem: 9377 +Train: [43] [4600/6250] eta: 0:04:37 lr: 0.000080 grad: 0.1333 (0.1335) loss: 0.7847 (0.7855) time: 0.1030 data: 0.0003 max mem: 9377 +Train: [43] [4700/6250] eta: 0:04:19 lr: 0.000080 grad: 0.1254 (0.1334) loss: 0.7847 (0.7854) time: 0.1805 data: 0.0907 max mem: 9377 +Train: [43] [4800/6250] eta: 0:04:03 lr: 0.000080 grad: 0.1262 (0.1334) loss: 0.7789 (0.7854) time: 0.3090 data: 0.1975 max mem: 9377 +Train: [43] [4900/6250] eta: 0:03:46 lr: 0.000080 grad: 0.1282 (0.1334) loss: 0.7840 (0.7853) time: 0.2330 data: 0.1281 max mem: 9377 +Train: [43] [5000/6250] eta: 0:03:29 lr: 0.000080 grad: 0.1331 (0.1334) loss: 0.7775 (0.7852) time: 0.2197 data: 0.1379 max mem: 9377 +Train: [43] [5100/6250] eta: 0:03:12 lr: 0.000080 grad: 0.1331 (0.1334) loss: 0.7799 (0.7852) time: 0.1576 data: 0.0684 max mem: 9377 +Train: [43] [5200/6250] eta: 0:02:56 lr: 0.000080 grad: 0.1390 (0.1335) loss: 0.7853 (0.7852) time: 0.1027 data: 0.0191 max mem: 9377 +Train: [43] [5300/6250] eta: 0:02:39 lr: 0.000080 grad: 0.1283 (0.1335) loss: 0.7848 (0.7852) time: 0.1183 data: 0.0203 max mem: 9377 +Train: [43] [5400/6250] eta: 0:02:22 lr: 0.000080 grad: 0.1297 (0.1334) loss: 0.7798 (0.7851) time: 0.1794 data: 0.0939 max mem: 9377 +Train: [43] [5500/6250] eta: 0:02:05 lr: 0.000080 grad: 0.1267 (0.1334) loss: 0.7806 (0.7850) time: 0.1952 data: 0.1173 max mem: 9377 +Train: [43] [5600/6250] eta: 0:01:48 lr: 0.000080 grad: 0.1366 (0.1334) loss: 0.7811 (0.7850) time: 0.1529 data: 0.0709 max mem: 9377 +Train: [43] [5700/6250] eta: 0:01:32 lr: 0.000080 grad: 0.1283 (0.1334) loss: 0.7833 (0.7850) time: 0.1610 data: 0.0758 max mem: 9377 +Train: [43] [5800/6250] eta: 0:01:15 lr: 0.000080 grad: 0.1318 (0.1334) loss: 0.7796 (0.7849) time: 0.1864 data: 0.1038 max mem: 9377 +Train: [43] [5900/6250] eta: 0:00:58 lr: 0.000080 grad: 0.1305 (0.1334) loss: 0.7778 (0.7849) time: 0.1133 data: 0.0003 max mem: 9377 +Train: [43] [6000/6250] eta: 0:00:42 lr: 0.000080 grad: 0.1257 (0.1333) loss: 0.7828 (0.7848) time: 0.1918 data: 0.0514 max mem: 9377 +Train: [43] [6100/6250] eta: 0:00:25 lr: 0.000080 grad: 0.1367 (0.1333) loss: 0.7754 (0.7848) time: 0.1629 data: 0.0449 max mem: 9377 +Train: [43] [6200/6250] eta: 0:00:08 lr: 0.000080 grad: 0.1325 (0.1334) loss: 0.7848 (0.7847) time: 0.1701 data: 0.0927 max mem: 9377 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.1347 (0.1334) loss: 0.7790 (0.7847) time: 0.1549 data: 0.0733 max mem: 9377 +Train: [43] Total time: 0:17:46 (0.1707 s / it) +Averaged stats: lr: 0.000080 grad: 0.1347 (0.1334) loss: 0.7790 (0.7847) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:03:35 loss: 0.8309 (0.8309) time: 3.4732 data: 3.4038 max mem: 9377 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8261 (0.8251) time: 0.1155 data: 0.0889 max mem: 9377 +Eval (hcp-train-subset): [43] Total time: 0:00:13 (0.2147 s / it) +Averaged stats (hcp-train-subset): loss: 0.8261 (0.8251) +Eval (hcp-val): [43] [ 0/62] eta: 0:04:42 loss: 0.8470 (0.8470) time: 4.5548 data: 4.5253 max mem: 9377 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8477 (0.8497) time: 0.1480 data: 0.1224 max mem: 9377 +Eval (hcp-val): [43] Total time: 0:00:13 (0.2231 s / it) +Averaged stats (hcp-val): loss: 0.8477 (0.8497) +Eval (nsd-val): [43] [ 0/62] eta: 0:03:45 loss: 0.8129 (0.8129) time: 3.6373 data: 3.5546 max mem: 9377 +Eval (nsd-val): [43] [61/62] eta: 0:00:00 loss: 0.8242 (0.8243) time: 0.1371 data: 0.1093 max mem: 9377 +Eval (nsd-val): [43] Total time: 0:00:13 (0.2222 s / it) +Averaged stats (nsd-val): loss: 0.8242 (0.8243) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [44] [ 0/6250] eta: 9:18:20 lr: 0.000080 grad: 0.0811 (0.0811) loss: 0.8531 (0.8531) time: 5.3601 data: 5.0620 max mem: 9377 +Train: [44] [ 100/6250] eta: 0:41:58 lr: 0.000080 grad: 0.1568 (0.1877) loss: 0.8068 (0.8030) time: 1.1031 data: 0.9680 max mem: 9377 +Train: [44] [ 200/6250] eta: 0:34:03 lr: 0.000080 grad: 0.1617 (0.1821) loss: 0.7823 (0.7982) time: 0.1625 data: 0.0005 max mem: 9377 +Train: [44] [ 300/6250] eta: 0:30:13 lr: 0.000080 grad: 0.1386 (0.1731) loss: 0.7828 (0.7946) time: 0.1726 data: 0.0370 max mem: 9377 +Train: [44] [ 400/6250] eta: 0:30:13 lr: 0.000080 grad: 0.1377 (0.1657) loss: 0.7748 (0.7929) time: 0.5302 data: 0.3585 max mem: 9377 +Train: [44] [ 500/6250] eta: 0:27:17 lr: 0.000080 grad: 0.1278 (0.1605) loss: 0.7910 (0.7925) time: 0.1477 data: 0.0298 max mem: 9377 +Train: [44] [ 600/6250] eta: 0:26:21 lr: 0.000080 grad: 0.1259 (0.1562) loss: 0.7992 (0.7922) time: 0.1507 data: 0.0241 max mem: 9377 +Train: [44] [ 700/6250] eta: 0:24:11 lr: 0.000080 grad: 0.1283 (0.1525) loss: 0.7902 (0.7919) time: 0.1427 data: 0.0437 max mem: 9377 +Train: [44] [ 800/6250] eta: 0:22:36 lr: 0.000080 grad: 0.1299 (0.1495) loss: 0.7880 (0.7914) time: 0.1631 data: 0.0772 max mem: 9377 +Train: [44] [ 900/6250] eta: 0:21:22 lr: 0.000080 grad: 0.1217 (0.1473) loss: 0.7794 (0.7906) time: 0.1569 data: 0.0545 max mem: 9377 +Train: [44] [1000/6250] eta: 0:20:19 lr: 0.000080 grad: 0.1296 (0.1456) loss: 0.7826 (0.7893) time: 0.0954 data: 0.0002 max mem: 9377 +Train: [44] [1100/6250] eta: 0:19:27 lr: 0.000079 grad: 0.1423 (0.1445) loss: 0.7742 (0.7881) time: 0.1888 data: 0.1077 max mem: 9377 +Train: [44] [1200/6250] eta: 0:18:31 lr: 0.000079 grad: 0.1255 (0.1437) loss: 0.7725 (0.7868) time: 0.1789 data: 0.0920 max mem: 9377 +Train: [44] [1300/6250] eta: 0:17:45 lr: 0.000079 grad: 0.1343 (0.1429) loss: 0.7704 (0.7856) time: 0.1047 data: 0.0104 max mem: 9377 +Train: [44] [1400/6250] eta: 0:17:08 lr: 0.000079 grad: 0.1325 (0.1422) loss: 0.7670 (0.7849) time: 0.1683 data: 0.0810 max mem: 9377 +Train: [44] [1500/6250] eta: 0:16:33 lr: 0.000079 grad: 0.1366 (0.1414) loss: 0.7755 (0.7843) time: 0.1744 data: 0.0812 max mem: 9377 +Train: [44] [1600/6250] eta: 0:15:55 lr: 0.000079 grad: 0.1336 (0.1409) loss: 0.7728 (0.7837) time: 0.1671 data: 0.0790 max mem: 9377 +Train: [44] [1700/6250] eta: 0:15:24 lr: 0.000079 grad: 0.1434 (0.1404) loss: 0.7611 (0.7832) time: 0.2160 data: 0.1286 max mem: 9377 +Train: [44] [1800/6250] eta: 0:14:52 lr: 0.000079 grad: 0.1290 (0.1400) loss: 0.7697 (0.7826) time: 0.1987 data: 0.1120 max mem: 9377 +Train: [44] [1900/6250] eta: 0:14:20 lr: 0.000079 grad: 0.1473 (0.1397) loss: 0.7772 (0.7820) time: 0.1303 data: 0.0388 max mem: 9377 +Train: [44] [2000/6250] eta: 0:13:52 lr: 0.000079 grad: 0.1299 (0.1400) loss: 0.7753 (0.7814) time: 0.0923 data: 0.0002 max mem: 9377 +Train: [44] [2100/6250] eta: 0:13:26 lr: 0.000079 grad: 0.1416 (0.1401) loss: 0.7632 (0.7809) time: 0.1870 data: 0.1026 max mem: 9377 +Train: [44] [2200/6250] eta: 0:13:00 lr: 0.000079 grad: 0.1375 (0.1400) loss: 0.7824 (0.7806) time: 0.1485 data: 0.0654 max mem: 9377 +Train: [44] [2300/6250] eta: 0:12:36 lr: 0.000079 grad: 0.1385 (0.1399) loss: 0.7664 (0.7801) time: 0.1633 data: 0.0864 max mem: 9377 +Train: [44] [2400/6250] eta: 0:12:12 lr: 0.000079 grad: 0.1284 (0.1397) loss: 0.7765 (0.7800) time: 0.1980 data: 0.1190 max mem: 9377 +Train: [44] [2500/6250] eta: 0:11:49 lr: 0.000079 grad: 0.1347 (0.1396) loss: 0.7724 (0.7799) time: 0.1790 data: 0.0895 max mem: 9377 +Train: [44] [2600/6250] eta: 0:11:28 lr: 0.000079 grad: 0.1457 (0.1395) loss: 0.7742 (0.7796) time: 0.1787 data: 0.0856 max mem: 9377 +Train: [44] [2700/6250] eta: 0:11:07 lr: 0.000079 grad: 0.1351 (0.1395) loss: 0.7786 (0.7795) time: 0.1980 data: 0.1118 max mem: 9377 +Train: [44] [2800/6250] eta: 0:10:45 lr: 0.000079 grad: 0.1384 (0.1394) loss: 0.7632 (0.7794) time: 0.1741 data: 0.0847 max mem: 9377 +Train: [44] [2900/6250] eta: 0:10:22 lr: 0.000079 grad: 0.1278 (0.1392) loss: 0.7773 (0.7792) time: 0.1610 data: 0.0646 max mem: 9377 +Train: [44] [3000/6250] eta: 0:10:00 lr: 0.000079 grad: 0.1308 (0.1390) loss: 0.7807 (0.7791) time: 0.1570 data: 0.0652 max mem: 9377 +Train: [44] [3100/6250] eta: 0:09:40 lr: 0.000079 grad: 0.1327 (0.1387) loss: 0.7736 (0.7792) time: 0.1420 data: 0.0514 max mem: 9377 +Train: [44] [3200/6250] eta: 0:09:24 lr: 0.000079 grad: 0.1237 (0.1384) loss: 0.7807 (0.7792) time: 0.1331 data: 0.0347 max mem: 9377 +Train: [44] [3300/6250] eta: 0:09:04 lr: 0.000079 grad: 0.1350 (0.1382) loss: 0.7827 (0.7791) time: 0.2423 data: 0.1515 max mem: 9377 +Train: [44] [3400/6250] eta: 0:08:44 lr: 0.000079 grad: 0.1332 (0.1380) loss: 0.7820 (0.7792) time: 0.1161 data: 0.0167 max mem: 9377 +Train: [44] [3500/6250] eta: 0:08:26 lr: 0.000079 grad: 0.1342 (0.1379) loss: 0.7813 (0.7792) time: 0.3636 data: 0.2769 max mem: 9377 +Train: [44] [3600/6250] eta: 0:08:08 lr: 0.000079 grad: 0.1320 (0.1377) loss: 0.7805 (0.7793) time: 0.1489 data: 0.0588 max mem: 9377 +Train: [44] [3700/6250] eta: 0:07:47 lr: 0.000079 grad: 0.1329 (0.1376) loss: 0.7790 (0.7793) time: 0.1625 data: 0.0721 max mem: 9377 +Train: [44] [3800/6250] eta: 0:07:28 lr: 0.000079 grad: 0.1409 (0.1376) loss: 0.7767 (0.7792) time: 0.2480 data: 0.1579 max mem: 9377 +Train: [44] [3900/6250] eta: 0:07:08 lr: 0.000079 grad: 0.1256 (0.1375) loss: 0.7755 (0.7791) time: 0.1193 data: 0.0104 max mem: 9377 +Train: [44] [4000/6250] eta: 0:06:48 lr: 0.000079 grad: 0.1267 (0.1375) loss: 0.7775 (0.7791) time: 0.1239 data: 0.0397 max mem: 9377 +Train: [44] [4100/6250] eta: 0:06:29 lr: 0.000079 grad: 0.1292 (0.1374) loss: 0.7832 (0.7791) time: 0.1593 data: 0.0763 max mem: 9377 +Train: [44] [4200/6250] eta: 0:06:10 lr: 0.000078 grad: 0.1341 (0.1374) loss: 0.7736 (0.7791) time: 0.1956 data: 0.1115 max mem: 9377 +Train: [44] [4300/6250] eta: 0:05:51 lr: 0.000078 grad: 0.1320 (0.1373) loss: 0.7793 (0.7792) time: 0.1410 data: 0.0413 max mem: 9377 +Train: [44] [4400/6250] eta: 0:05:34 lr: 0.000078 grad: 0.1292 (0.1372) loss: 0.7993 (0.7793) time: 0.1197 data: 0.0314 max mem: 9377 +Train: [44] [4500/6250] eta: 0:05:15 lr: 0.000078 grad: 0.1303 (0.1372) loss: 0.7831 (0.7793) time: 0.0991 data: 0.0177 max mem: 9377 +Train: [44] [4600/6250] eta: 0:04:56 lr: 0.000078 grad: 0.1309 (0.1371) loss: 0.7857 (0.7794) time: 0.2275 data: 0.1444 max mem: 9377 +Train: [44] [4700/6250] eta: 0:04:39 lr: 0.000078 grad: 0.1345 (0.1371) loss: 0.7787 (0.7795) time: 0.3223 data: 0.2318 max mem: 9377 +Train: [44] [4800/6250] eta: 0:04:20 lr: 0.000078 grad: 0.1322 (0.1370) loss: 0.7764 (0.7796) time: 0.1518 data: 0.0638 max mem: 9377 +Train: [44] [4900/6250] eta: 0:04:02 lr: 0.000078 grad: 0.1346 (0.1370) loss: 0.7818 (0.7798) time: 0.2270 data: 0.1461 max mem: 9377 +Train: [44] [5000/6250] eta: 0:03:43 lr: 0.000078 grad: 0.1281 (0.1369) loss: 0.7957 (0.7799) time: 0.1465 data: 0.0642 max mem: 9377 +Train: [44] [5100/6250] eta: 0:03:25 lr: 0.000078 grad: 0.1341 (0.1368) loss: 0.7825 (0.7800) time: 0.2220 data: 0.1354 max mem: 9377 +Train: [44] [5200/6250] eta: 0:03:06 lr: 0.000078 grad: 0.1401 (0.1367) loss: 0.7788 (0.7802) time: 0.1362 data: 0.0500 max mem: 9377 +Train: [44] [5300/6250] eta: 0:02:48 lr: 0.000078 grad: 0.1145 (0.1366) loss: 0.7902 (0.7804) time: 0.1092 data: 0.0134 max mem: 9377 +Train: [44] [5400/6250] eta: 0:02:30 lr: 0.000078 grad: 0.1267 (0.1365) loss: 0.8024 (0.7807) time: 0.1738 data: 0.0862 max mem: 9377 +Train: [44] [5500/6250] eta: 0:02:12 lr: 0.000078 grad: 0.1279 (0.1364) loss: 0.7973 (0.7809) time: 0.1523 data: 0.0597 max mem: 9377 +Train: [44] [5600/6250] eta: 0:01:54 lr: 0.000078 grad: 0.1271 (0.1364) loss: 0.7978 (0.7810) time: 0.1537 data: 0.0741 max mem: 9377 +Train: [44] [5700/6250] eta: 0:01:36 lr: 0.000078 grad: 0.1225 (0.1363) loss: 0.7951 (0.7813) time: 0.1228 data: 0.0442 max mem: 9377 +Train: [44] [5800/6250] eta: 0:01:19 lr: 0.000078 grad: 0.1242 (0.1361) loss: 0.7901 (0.7815) time: 0.3208 data: 0.2249 max mem: 9377 +Train: [44] [5900/6250] eta: 0:01:01 lr: 0.000078 grad: 0.1291 (0.1360) loss: 0.7888 (0.7817) time: 0.2313 data: 0.1172 max mem: 9377 +Train: [44] [6000/6250] eta: 0:00:44 lr: 0.000078 grad: 0.1231 (0.1359) loss: 0.8011 (0.7820) time: 0.1760 data: 0.0909 max mem: 9377 +Train: [44] [6100/6250] eta: 0:00:26 lr: 0.000078 grad: 0.1283 (0.1359) loss: 0.7892 (0.7822) time: 0.1249 data: 0.0227 max mem: 9377 +Train: [44] [6200/6250] eta: 0:00:08 lr: 0.000078 grad: 0.1291 (0.1358) loss: 0.7969 (0.7824) time: 0.1347 data: 0.0436 max mem: 9377 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.1257 (0.1358) loss: 0.7985 (0.7825) time: 0.1328 data: 0.0325 max mem: 9377 +Train: [44] Total time: 0:18:35 (0.1785 s / it) +Averaged stats: lr: 0.000078 grad: 0.1257 (0.1358) loss: 0.7985 (0.7825) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:04:11 loss: 0.8326 (0.8326) time: 4.0619 data: 3.9692 max mem: 9377 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8253 (0.8226) time: 0.1083 data: 0.0816 max mem: 9377 +Eval (hcp-train-subset): [44] Total time: 0:00:14 (0.2302 s / it) +Averaged stats (hcp-train-subset): loss: 0.8253 (0.8226) +Making plots (hcp-train-subset): example=56 +Eval (hcp-val): [44] [ 0/62] eta: 0:03:58 loss: 0.8467 (0.8467) time: 3.8507 data: 3.7730 max mem: 9377 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8455 (0.8466) time: 0.1515 data: 0.1260 max mem: 9377 +Eval (hcp-val): [44] Total time: 0:00:15 (0.2430 s / it) +Averaged stats (hcp-val): loss: 0.8455 (0.8466) +Making plots (hcp-val): example=17 +Eval (nsd-val): [44] [ 0/62] eta: 0:08:00 loss: 0.8134 (0.8134) time: 7.7487 data: 7.7170 max mem: 9377 +Eval (nsd-val): [44] [61/62] eta: 0:00:00 loss: 0.8209 (0.8219) time: 0.1154 data: 0.0889 max mem: 9377 +Eval (nsd-val): [44] Total time: 0:00:16 (0.2648 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8219) +Making plots (nsd-val): example=60 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00044.pth +Train: [45] [ 0/6250] eta: 8:04:14 lr: 0.000078 grad: 0.1035 (0.1035) loss: 0.8807 (0.8807) time: 4.6487 data: 4.2269 max mem: 9377 +Train: [45] [ 100/6250] eta: 0:22:33 lr: 0.000078 grad: 0.1987 (0.2029) loss: 0.7928 (0.8019) time: 0.1564 data: 0.0497 max mem: 9377 +Train: [45] [ 200/6250] eta: 0:19:32 lr: 0.000078 grad: 0.1702 (0.1906) loss: 0.7824 (0.7970) time: 0.1437 data: 0.0414 max mem: 9377 +Train: [45] [ 300/6250] eta: 0:18:27 lr: 0.000078 grad: 0.1599 (0.1827) loss: 0.7738 (0.7900) time: 0.1633 data: 0.0540 max mem: 9377 +Train: [45] [ 400/6250] eta: 0:17:23 lr: 0.000078 grad: 0.1452 (0.1754) loss: 0.7796 (0.7882) time: 0.1247 data: 0.0376 max mem: 9377 +Train: [45] [ 500/6250] eta: 0:16:30 lr: 0.000078 grad: 0.1353 (0.1692) loss: 0.7877 (0.7870) time: 0.1127 data: 0.0228 max mem: 9377 +Train: [45] [ 600/6250] eta: 0:15:58 lr: 0.000078 grad: 0.1427 (0.1644) loss: 0.7778 (0.7863) time: 0.1615 data: 0.0734 max mem: 9377 +Train: [45] [ 700/6250] eta: 0:15:38 lr: 0.000078 grad: 0.1239 (0.1604) loss: 0.7883 (0.7865) time: 0.1741 data: 0.0794 max mem: 9377 +Train: [45] [ 800/6250] eta: 0:15:15 lr: 0.000078 grad: 0.1325 (0.1566) loss: 0.7866 (0.7869) time: 0.1699 data: 0.0696 max mem: 9377 +Train: [45] [ 900/6250] eta: 0:14:57 lr: 0.000078 grad: 0.1328 (0.1543) loss: 0.7861 (0.7868) time: 0.1757 data: 0.0913 max mem: 9377 +Train: [45] [1000/6250] eta: 0:14:28 lr: 0.000078 grad: 0.1229 (0.1521) loss: 0.7952 (0.7871) time: 0.1410 data: 0.0586 max mem: 9377 +Train: [45] [1100/6250] eta: 0:14:09 lr: 0.000077 grad: 0.1208 (0.1498) loss: 0.7934 (0.7871) time: 0.1750 data: 0.0861 max mem: 9377 +Train: [45] [1200/6250] eta: 0:13:47 lr: 0.000077 grad: 0.1287 (0.1482) loss: 0.7863 (0.7872) time: 0.1340 data: 0.0511 max mem: 9377 +Train: [45] [1300/6250] eta: 0:13:29 lr: 0.000077 grad: 0.1250 (0.1472) loss: 0.7870 (0.7868) time: 0.1538 data: 0.0657 max mem: 9377 +Train: [45] [1400/6250] eta: 0:13:08 lr: 0.000077 grad: 0.1339 (0.1462) loss: 0.7792 (0.7866) time: 0.1569 data: 0.0665 max mem: 9377 +Train: [45] [1500/6250] eta: 0:12:51 lr: 0.000077 grad: 0.1395 (0.1453) loss: 0.7703 (0.7863) time: 0.1506 data: 0.0676 max mem: 9377 +Train: [45] [1600/6250] eta: 0:12:37 lr: 0.000077 grad: 0.1220 (0.1447) loss: 0.7813 (0.7858) time: 0.2238 data: 0.1365 max mem: 9377 +Train: [45] [1700/6250] eta: 0:12:18 lr: 0.000077 grad: 0.1236 (0.1441) loss: 0.7840 (0.7854) time: 0.1949 data: 0.1096 max mem: 9377 +Train: [45] [1800/6250] eta: 0:11:59 lr: 0.000077 grad: 0.1301 (0.1435) loss: 0.7844 (0.7852) time: 0.1535 data: 0.0667 max mem: 9377 +Train: [45] [1900/6250] eta: 0:11:42 lr: 0.000077 grad: 0.1296 (0.1429) loss: 0.7709 (0.7850) time: 0.1590 data: 0.0703 max mem: 9377 +Train: [45] [2000/6250] eta: 0:11:24 lr: 0.000077 grad: 0.1352 (0.1425) loss: 0.7810 (0.7847) time: 0.1193 data: 0.0279 max mem: 9377 +Train: [45] [2100/6250] eta: 0:11:07 lr: 0.000077 grad: 0.1298 (0.1422) loss: 0.7786 (0.7844) time: 0.1476 data: 0.0659 max mem: 9377 +Train: [45] [2200/6250] eta: 0:10:52 lr: 0.000077 grad: 0.1302 (0.1416) loss: 0.7831 (0.7843) time: 0.1423 data: 0.0593 max mem: 9377 +Train: [45] [2300/6250] eta: 0:10:38 lr: 0.000077 grad: 0.1336 (0.1413) loss: 0.7830 (0.7843) time: 0.1465 data: 0.0616 max mem: 9377 +Train: [45] [2400/6250] eta: 0:10:25 lr: 0.000077 grad: 0.1286 (0.1408) loss: 0.7865 (0.7844) time: 0.2729 data: 0.1872 max mem: 9377 +Train: [45] [2500/6250] eta: 0:10:11 lr: 0.000077 grad: 0.1286 (0.1406) loss: 0.7830 (0.7844) time: 0.1170 data: 0.0206 max mem: 9377 +Train: [45] [2600/6250] eta: 0:09:56 lr: 0.000077 grad: 0.1309 (0.1405) loss: 0.7797 (0.7841) time: 0.1992 data: 0.1123 max mem: 9377 +Train: [45] [2700/6250] eta: 0:10:05 lr: 0.000077 grad: 0.1257 (0.1403) loss: 0.7863 (0.7841) time: 0.8928 data: 0.8031 max mem: 9377 +Train: [45] [2800/6250] eta: 0:09:50 lr: 0.000077 grad: 0.1347 (0.1402) loss: 0.7882 (0.7842) time: 0.1922 data: 0.0982 max mem: 9377 +Train: [45] [2900/6250] eta: 0:09:34 lr: 0.000077 grad: 0.1294 (0.1400) loss: 0.7884 (0.7842) time: 0.2916 data: 0.1591 max mem: 9377 +Train: [45] [3000/6250] eta: 0:09:19 lr: 0.000077 grad: 0.1359 (0.1399) loss: 0.7858 (0.7843) time: 0.3247 data: 0.2312 max mem: 9377 +Train: [45] [3100/6250] eta: 0:09:04 lr: 0.000077 grad: 0.1316 (0.1397) loss: 0.7868 (0.7844) time: 0.2897 data: 0.1546 max mem: 9377 +Train: [45] [3200/6250] eta: 0:08:48 lr: 0.000077 grad: 0.1295 (0.1397) loss: 0.7829 (0.7844) time: 0.2165 data: 0.1228 max mem: 9377 +Train: [45] [3300/6250] eta: 0:08:33 lr: 0.000077 grad: 0.1310 (0.1395) loss: 0.7879 (0.7843) time: 0.2153 data: 0.1189 max mem: 9377 +Train: [45] [3400/6250] eta: 0:08:14 lr: 0.000077 grad: 0.1302 (0.1393) loss: 0.7809 (0.7843) time: 0.1404 data: 0.0256 max mem: 9377 +Train: [45] [3500/6250] eta: 0:08:03 lr: 0.000077 grad: 0.1284 (0.1392) loss: 0.7870 (0.7842) time: 0.1389 data: 0.0464 max mem: 9377 +Train: [45] [3600/6250] eta: 0:07:45 lr: 0.000077 grad: 0.1316 (0.1390) loss: 0.7821 (0.7842) time: 0.0976 data: 0.0002 max mem: 9377 +Train: [45] [3700/6250] eta: 0:07:27 lr: 0.000077 grad: 0.1300 (0.1389) loss: 0.7862 (0.7842) time: 0.1458 data: 0.0608 max mem: 9377 +Train: [45] [3800/6250] eta: 0:07:11 lr: 0.000077 grad: 0.1265 (0.1388) loss: 0.7891 (0.7842) time: 0.3121 data: 0.2196 max mem: 9377 +Train: [45] [3900/6250] eta: 0:06:55 lr: 0.000077 grad: 0.1293 (0.1386) loss: 0.7898 (0.7842) time: 0.1212 data: 0.0339 max mem: 9377 +Train: [45] [4000/6250] eta: 0:06:35 lr: 0.000077 grad: 0.1254 (0.1384) loss: 0.7825 (0.7843) time: 0.1360 data: 0.0511 max mem: 9377 +Train: [45] [4100/6250] eta: 0:06:17 lr: 0.000077 grad: 0.1263 (0.1382) loss: 0.7910 (0.7844) time: 0.1557 data: 0.0693 max mem: 9377 +Train: [45] [4200/6250] eta: 0:05:58 lr: 0.000076 grad: 0.1244 (0.1380) loss: 0.7804 (0.7844) time: 0.1457 data: 0.0603 max mem: 9377 +Train: [45] [4300/6250] eta: 0:05:40 lr: 0.000076 grad: 0.1313 (0.1378) loss: 0.7888 (0.7845) time: 0.1760 data: 0.0889 max mem: 9377 +Train: [45] [4400/6250] eta: 0:05:21 lr: 0.000076 grad: 0.1355 (0.1377) loss: 0.7920 (0.7846) time: 0.1810 data: 0.1081 max mem: 9377 +Train: [45] [4500/6250] eta: 0:05:03 lr: 0.000076 grad: 0.1263 (0.1375) loss: 0.7906 (0.7846) time: 0.1401 data: 0.0463 max mem: 9377 +Train: [45] [4600/6250] eta: 0:04:45 lr: 0.000076 grad: 0.1332 (0.1373) loss: 0.7914 (0.7848) time: 0.1469 data: 0.0616 max mem: 9377 +Train: [45] [4700/6250] eta: 0:04:28 lr: 0.000076 grad: 0.1411 (0.1372) loss: 0.7983 (0.7850) time: 0.2693 data: 0.1836 max mem: 9377 +Train: [45] [4800/6250] eta: 0:04:09 lr: 0.000076 grad: 0.1310 (0.1371) loss: 0.7816 (0.7850) time: 0.1608 data: 0.0667 max mem: 9377 +Train: [45] [4900/6250] eta: 0:03:52 lr: 0.000076 grad: 0.1300 (0.1370) loss: 0.7866 (0.7851) time: 0.1445 data: 0.0605 max mem: 9377 +Train: [45] [5000/6250] eta: 0:03:34 lr: 0.000076 grad: 0.1268 (0.1368) loss: 0.7844 (0.7851) time: 0.1440 data: 0.0492 max mem: 9377 +Train: [45] [5100/6250] eta: 0:03:17 lr: 0.000076 grad: 0.1342 (0.1368) loss: 0.7815 (0.7851) time: 0.2115 data: 0.1173 max mem: 9377 +Train: [45] [5200/6250] eta: 0:02:59 lr: 0.000076 grad: 0.1283 (0.1368) loss: 0.7756 (0.7850) time: 0.1856 data: 0.1000 max mem: 9377 +Train: [45] [5300/6250] eta: 0:02:42 lr: 0.000076 grad: 0.1354 (0.1367) loss: 0.7895 (0.7850) time: 0.1540 data: 0.0621 max mem: 9377 +Train: [45] [5400/6250] eta: 0:02:25 lr: 0.000076 grad: 0.1378 (0.1368) loss: 0.7755 (0.7850) time: 0.1231 data: 0.0344 max mem: 9377 +Train: [45] [5500/6250] eta: 0:02:08 lr: 0.000076 grad: 0.1425 (0.1368) loss: 0.7802 (0.7849) time: 0.1772 data: 0.0917 max mem: 9377 +Train: [45] [5600/6250] eta: 0:01:51 lr: 0.000076 grad: 0.1333 (0.1368) loss: 0.7768 (0.7849) time: 0.5927 data: 0.4878 max mem: 9377 +Train: [45] [5700/6250] eta: 0:01:34 lr: 0.000076 grad: 0.1361 (0.1368) loss: 0.7834 (0.7848) time: 0.1700 data: 0.0800 max mem: 9377 +Train: [45] [5800/6250] eta: 0:01:17 lr: 0.000076 grad: 0.1367 (0.1368) loss: 0.7778 (0.7847) time: 0.1560 data: 0.0625 max mem: 9377 +Train: [45] [5900/6250] eta: 0:01:00 lr: 0.000076 grad: 0.1381 (0.1368) loss: 0.7752 (0.7846) time: 0.2045 data: 0.1168 max mem: 9377 +Train: [45] [6000/6250] eta: 0:00:42 lr: 0.000076 grad: 0.1332 (0.1369) loss: 0.7782 (0.7846) time: 0.1503 data: 0.0625 max mem: 9377 +Train: [45] [6100/6250] eta: 0:00:25 lr: 0.000076 grad: 0.1318 (0.1370) loss: 0.7802 (0.7844) time: 0.1200 data: 0.0242 max mem: 9377 +Train: [45] [6200/6250] eta: 0:00:08 lr: 0.000076 grad: 0.1395 (0.1370) loss: 0.7833 (0.7843) time: 0.1597 data: 0.0698 max mem: 9377 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.1387 (0.1370) loss: 0.7715 (0.7842) time: 0.1422 data: 0.0509 max mem: 9377 +Train: [45] Total time: 0:17:54 (0.1720 s / it) +Averaged stats: lr: 0.000076 grad: 0.1387 (0.1370) loss: 0.7715 (0.7842) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:05:23 loss: 0.8250 (0.8250) time: 5.2244 data: 5.1764 max mem: 9377 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8242 (0.8225) time: 0.1396 data: 0.1147 max mem: 9377 +Eval (hcp-train-subset): [45] Total time: 0:00:15 (0.2471 s / it) +Averaged stats (hcp-train-subset): loss: 0.8242 (0.8225) +Eval (hcp-val): [45] [ 0/62] eta: 0:06:11 loss: 0.8498 (0.8498) time: 5.9865 data: 5.9506 max mem: 9377 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8461 (0.8476) time: 0.1134 data: 0.0883 max mem: 9377 +Eval (hcp-val): [45] Total time: 0:00:14 (0.2381 s / it) +Averaged stats (hcp-val): loss: 0.8461 (0.8476) +Eval (nsd-val): [45] [ 0/62] eta: 0:05:16 loss: 0.8188 (0.8188) time: 5.1099 data: 5.0793 max mem: 9377 +Eval (nsd-val): [45] [61/62] eta: 0:00:00 loss: 0.8243 (0.8256) time: 0.1489 data: 0.1217 max mem: 9377 +Eval (nsd-val): [45] Total time: 0:00:14 (0.2339 s / it) +Averaged stats (nsd-val): loss: 0.8243 (0.8256) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 11:56:57 lr: 0.000076 grad: 0.3511 (0.3511) loss: 0.6732 (0.6732) time: 6.8828 data: 6.7165 max mem: 9377 +Train: [46] [ 100/6250] eta: 0:24:31 lr: 0.000076 grad: 0.1962 (0.2183) loss: 0.7826 (0.7959) time: 0.1360 data: 0.0298 max mem: 9377 +Train: [46] [ 200/6250] eta: 0:20:42 lr: 0.000076 grad: 0.1519 (0.1913) loss: 0.7899 (0.7919) time: 0.1830 data: 0.0797 max mem: 9377 +Train: [46] [ 300/6250] eta: 0:18:43 lr: 0.000076 grad: 0.1395 (0.1799) loss: 0.7862 (0.7897) time: 0.1423 data: 0.0534 max mem: 9377 +Train: [46] [ 400/6250] eta: 0:17:34 lr: 0.000076 grad: 0.1559 (0.1723) loss: 0.7666 (0.7870) time: 0.1799 data: 0.0917 max mem: 9377 +Train: [46] [ 500/6250] eta: 0:16:49 lr: 0.000076 grad: 0.1665 (0.1707) loss: 0.7783 (0.7851) time: 0.1641 data: 0.0669 max mem: 9377 +Train: [46] [ 600/6250] eta: 0:16:21 lr: 0.000076 grad: 0.1418 (0.1666) loss: 0.7635 (0.7830) time: 0.2018 data: 0.1006 max mem: 9377 +Train: [46] [ 700/6250] eta: 0:15:41 lr: 0.000076 grad: 0.1244 (0.1620) loss: 0.7878 (0.7827) time: 0.1439 data: 0.0362 max mem: 9377 +Train: [46] [ 800/6250] eta: 0:15:10 lr: 0.000076 grad: 0.1397 (0.1587) loss: 0.7724 (0.7821) time: 0.1606 data: 0.0642 max mem: 9377 +Train: [46] [ 900/6250] eta: 0:14:47 lr: 0.000076 grad: 0.1321 (0.1565) loss: 0.7818 (0.7819) time: 0.1551 data: 0.0535 max mem: 9377 +Train: [46] [1000/6250] eta: 0:14:22 lr: 0.000076 grad: 0.1356 (0.1546) loss: 0.7755 (0.7818) time: 0.1489 data: 0.0519 max mem: 9377 +Train: [46] [1100/6250] eta: 0:13:58 lr: 0.000075 grad: 0.1313 (0.1530) loss: 0.7778 (0.7811) time: 0.1459 data: 0.0540 max mem: 9377 +Train: [46] [1200/6250] eta: 0:13:37 lr: 0.000075 grad: 0.1355 (0.1515) loss: 0.7818 (0.7810) time: 0.1285 data: 0.0358 max mem: 9377 +Train: [46] [1300/6250] eta: 0:13:22 lr: 0.000075 grad: 0.1352 (0.1501) loss: 0.7761 (0.7807) time: 0.1450 data: 0.0599 max mem: 9377 +Train: [46] [1400/6250] eta: 0:13:22 lr: 0.000075 grad: 0.1304 (0.1491) loss: 0.7757 (0.7804) time: 0.1060 data: 0.0002 max mem: 9377 +Train: [46] [1500/6250] eta: 0:13:04 lr: 0.000075 grad: 0.1329 (0.1482) loss: 0.7762 (0.7802) time: 0.2097 data: 0.1333 max mem: 9377 +Train: [46] [1600/6250] eta: 0:12:56 lr: 0.000075 grad: 0.1375 (0.1476) loss: 0.7752 (0.7801) time: 0.3332 data: 0.2231 max mem: 9377 +Train: [46] [1700/6250] eta: 0:13:19 lr: 0.000075 grad: 0.1338 (0.1468) loss: 0.7832 (0.7802) time: 0.2576 data: 0.1632 max mem: 9377 +Train: [46] [1800/6250] eta: 0:13:19 lr: 0.000075 grad: 0.1283 (0.1464) loss: 0.7750 (0.7801) time: 0.1619 data: 0.0294 max mem: 9377 +Train: [46] [1900/6250] eta: 0:13:00 lr: 0.000075 grad: 0.1352 (0.1457) loss: 0.7744 (0.7801) time: 0.1159 data: 0.0094 max mem: 9377 +Train: [46] [2000/6250] eta: 0:12:39 lr: 0.000075 grad: 0.1290 (0.1451) loss: 0.7847 (0.7803) time: 0.1544 data: 0.0504 max mem: 9377 +Train: [46] [2100/6250] eta: 0:12:17 lr: 0.000075 grad: 0.1317 (0.1449) loss: 0.7787 (0.7802) time: 0.1222 data: 0.0359 max mem: 9377 +Train: [46] [2200/6250] eta: 0:12:07 lr: 0.000075 grad: 0.1359 (0.1444) loss: 0.7742 (0.7800) time: 0.1360 data: 0.0498 max mem: 9377 +Train: [46] [2300/6250] eta: 0:11:44 lr: 0.000075 grad: 0.1351 (0.1442) loss: 0.7855 (0.7801) time: 0.1496 data: 0.0541 max mem: 9377 +Train: [46] [2400/6250] eta: 0:11:29 lr: 0.000075 grad: 0.1322 (0.1438) loss: 0.7823 (0.7802) time: 0.1818 data: 0.0569 max mem: 9377 +Train: [46] [2500/6250] eta: 0:11:06 lr: 0.000075 grad: 0.1362 (0.1436) loss: 0.7818 (0.7801) time: 0.1725 data: 0.0932 max mem: 9377 +Train: [46] [2600/6250] eta: 0:10:45 lr: 0.000075 grad: 0.1249 (0.1432) loss: 0.7814 (0.7801) time: 0.1504 data: 0.0597 max mem: 9377 +Train: [46] [2700/6250] eta: 0:10:24 lr: 0.000075 grad: 0.1305 (0.1430) loss: 0.7752 (0.7800) time: 0.1746 data: 0.0891 max mem: 9377 +Train: [46] [2800/6250] eta: 0:10:07 lr: 0.000075 grad: 0.1319 (0.1428) loss: 0.7788 (0.7801) time: 0.2058 data: 0.1243 max mem: 9377 +Train: [46] [2900/6250] eta: 0:09:47 lr: 0.000075 grad: 0.1341 (0.1426) loss: 0.7780 (0.7801) time: 0.1333 data: 0.0548 max mem: 9377 +Train: [46] [3000/6250] eta: 0:09:26 lr: 0.000075 grad: 0.1385 (0.1425) loss: 0.7732 (0.7800) time: 0.1376 data: 0.0606 max mem: 9377 +Train: [46] [3100/6250] eta: 0:09:09 lr: 0.000075 grad: 0.1357 (0.1423) loss: 0.7840 (0.7800) time: 0.2062 data: 0.1122 max mem: 9377 +Train: [46] [3200/6250] eta: 0:08:51 lr: 0.000075 grad: 0.1364 (0.1421) loss: 0.7690 (0.7800) time: 0.1602 data: 0.0745 max mem: 9377 +Train: [46] [3300/6250] eta: 0:08:34 lr: 0.000075 grad: 0.1371 (0.1421) loss: 0.7674 (0.7799) time: 0.1879 data: 0.1009 max mem: 9377 +Train: [46] [3400/6250] eta: 0:08:20 lr: 0.000075 grad: 0.1423 (0.1421) loss: 0.7629 (0.7797) time: 0.4083 data: 0.3141 max mem: 9377 +Train: [46] [3500/6250] eta: 0:08:00 lr: 0.000075 grad: 0.1367 (0.1421) loss: 0.7750 (0.7796) time: 0.1466 data: 0.0486 max mem: 9377 +Train: [46] [3600/6250] eta: 0:07:40 lr: 0.000075 grad: 0.1367 (0.1419) loss: 0.7858 (0.7796) time: 0.1060 data: 0.0115 max mem: 9377 +Train: [46] [3700/6250] eta: 0:07:22 lr: 0.000075 grad: 0.1456 (0.1419) loss: 0.7793 (0.7797) time: 0.1611 data: 0.0638 max mem: 9377 +Train: [46] [3800/6250] eta: 0:07:08 lr: 0.000075 grad: 0.1348 (0.1418) loss: 0.7831 (0.7797) time: 0.1700 data: 0.0651 max mem: 9377 +Train: [46] [3900/6250] eta: 0:06:58 lr: 0.000075 grad: 0.1402 (0.1417) loss: 0.7767 (0.7796) time: 0.1796 data: 0.0806 max mem: 9377 +Train: [46] [4000/6250] eta: 0:06:39 lr: 0.000075 grad: 0.1430 (0.1417) loss: 0.7770 (0.7796) time: 0.1339 data: 0.0399 max mem: 9377 +Train: [46] [4100/6250] eta: 0:06:24 lr: 0.000075 grad: 0.1280 (0.1415) loss: 0.7888 (0.7797) time: 0.5050 data: 0.4101 max mem: 9377 +Train: [46] [4200/6250] eta: 0:06:05 lr: 0.000074 grad: 0.1388 (0.1414) loss: 0.7725 (0.7797) time: 0.2013 data: 0.1084 max mem: 9377 +Train: [46] [4300/6250] eta: 0:05:46 lr: 0.000074 grad: 0.1327 (0.1413) loss: 0.7826 (0.7797) time: 0.1909 data: 0.0998 max mem: 9377 +Train: [46] [4400/6250] eta: 0:05:27 lr: 0.000074 grad: 0.1296 (0.1412) loss: 0.7807 (0.7798) time: 0.1529 data: 0.0726 max mem: 9377 +Train: [46] [4500/6250] eta: 0:05:08 lr: 0.000074 grad: 0.1308 (0.1410) loss: 0.7892 (0.7800) time: 0.1273 data: 0.0452 max mem: 9377 +Train: [46] [4600/6250] eta: 0:04:50 lr: 0.000074 grad: 0.1379 (0.1409) loss: 0.7823 (0.7800) time: 0.0989 data: 0.0003 max mem: 9377 +Train: [46] [4700/6250] eta: 0:04:32 lr: 0.000074 grad: 0.1372 (0.1408) loss: 0.7874 (0.7802) time: 0.1664 data: 0.0817 max mem: 9377 +Train: [46] [4800/6250] eta: 0:04:13 lr: 0.000074 grad: 0.1356 (0.1407) loss: 0.7840 (0.7803) time: 0.1120 data: 0.0178 max mem: 9377 +Train: [46] [4900/6250] eta: 0:03:55 lr: 0.000074 grad: 0.1339 (0.1406) loss: 0.7802 (0.7804) time: 0.1200 data: 0.0302 max mem: 9377 +Train: [46] [5000/6250] eta: 0:03:38 lr: 0.000074 grad: 0.1243 (0.1406) loss: 0.7876 (0.7804) time: 0.1996 data: 0.1141 max mem: 9377 +Train: [46] [5100/6250] eta: 0:03:20 lr: 0.000074 grad: 0.1267 (0.1405) loss: 0.7857 (0.7806) time: 0.1846 data: 0.0965 max mem: 9377 +Train: [46] [5200/6250] eta: 0:03:02 lr: 0.000074 grad: 0.1247 (0.1404) loss: 0.7959 (0.7806) time: 0.1789 data: 0.0986 max mem: 9377 +Train: [46] [5300/6250] eta: 0:02:45 lr: 0.000074 grad: 0.1327 (0.1404) loss: 0.7870 (0.7807) time: 0.1624 data: 0.0727 max mem: 9377 +Train: [46] [5400/6250] eta: 0:02:27 lr: 0.000074 grad: 0.1316 (0.1403) loss: 0.7820 (0.7808) time: 0.1587 data: 0.0730 max mem: 9377 +Train: [46] [5500/6250] eta: 0:02:09 lr: 0.000074 grad: 0.1305 (0.1402) loss: 0.7813 (0.7808) time: 0.1558 data: 0.0757 max mem: 9377 +Train: [46] [5600/6250] eta: 0:01:52 lr: 0.000074 grad: 0.1325 (0.1401) loss: 0.7919 (0.7809) time: 0.1218 data: 0.0355 max mem: 9377 +Train: [46] [5700/6250] eta: 0:01:34 lr: 0.000074 grad: 0.1348 (0.1400) loss: 0.7912 (0.7810) time: 0.1814 data: 0.0949 max mem: 9377 +Train: [46] [5800/6250] eta: 0:01:17 lr: 0.000074 grad: 0.1312 (0.1399) loss: 0.7784 (0.7811) time: 0.1481 data: 0.0588 max mem: 9377 +Train: [46] [5900/6250] eta: 0:01:00 lr: 0.000074 grad: 0.1380 (0.1399) loss: 0.7841 (0.7812) time: 0.1782 data: 0.0947 max mem: 9377 +Train: [46] [6000/6250] eta: 0:00:42 lr: 0.000074 grad: 0.1328 (0.1398) loss: 0.7972 (0.7813) time: 0.2325 data: 0.1416 max mem: 9377 +Train: [46] [6100/6250] eta: 0:00:25 lr: 0.000074 grad: 0.1352 (0.1397) loss: 0.7889 (0.7814) time: 0.1471 data: 0.0542 max mem: 9377 +Train: [46] [6200/6250] eta: 0:00:08 lr: 0.000074 grad: 0.1366 (0.1397) loss: 0.7917 (0.7815) time: 0.1326 data: 0.0456 max mem: 9377 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.1275 (0.1396) loss: 0.7925 (0.7815) time: 0.1723 data: 0.0872 max mem: 9377 +Train: [46] Total time: 0:17:58 (0.1726 s / it) +Averaged stats: lr: 0.000074 grad: 0.1275 (0.1396) loss: 0.7925 (0.7815) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:05:20 loss: 0.8259 (0.8259) time: 5.1654 data: 5.1348 max mem: 9377 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8221 (0.8225) time: 0.1670 data: 0.1415 max mem: 9377 +Eval (hcp-train-subset): [46] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (hcp-train-subset): loss: 0.8221 (0.8225) +Eval (hcp-val): [46] [ 0/62] eta: 0:03:53 loss: 0.8459 (0.8459) time: 3.7707 data: 3.7016 max mem: 9377 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8465 (0.8482) time: 0.1667 data: 0.1392 max mem: 9377 +Eval (hcp-val): [46] Total time: 0:00:14 (0.2361 s / it) +Averaged stats (hcp-val): loss: 0.8465 (0.8482) +Eval (nsd-val): [46] [ 0/62] eta: 0:03:44 loss: 0.8145 (0.8145) time: 3.6171 data: 3.5366 max mem: 9377 +Eval (nsd-val): [46] [61/62] eta: 0:00:00 loss: 0.8243 (0.8254) time: 0.1224 data: 0.0958 max mem: 9377 +Eval (nsd-val): [46] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (nsd-val): loss: 0.8243 (0.8254) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 9:33:10 lr: 0.000074 grad: 0.0820 (0.0820) loss: 0.8662 (0.8662) time: 5.5025 data: 5.3109 max mem: 9377 +Train: [47] [ 100/6250] eta: 0:24:41 lr: 0.000074 grad: 0.1662 (0.2034) loss: 0.8082 (0.8027) time: 0.1892 data: 0.0735 max mem: 9377 +Train: [47] [ 200/6250] eta: 0:20:30 lr: 0.000074 grad: 0.2175 (0.2030) loss: 0.7819 (0.7938) time: 0.1811 data: 0.0857 max mem: 9377 +Train: [47] [ 300/6250] eta: 0:19:24 lr: 0.000074 grad: 0.1464 (0.1907) loss: 0.7800 (0.7894) time: 0.1926 data: 0.0813 max mem: 9377 +Train: [47] [ 400/6250] eta: 0:18:25 lr: 0.000074 grad: 0.1521 (0.1814) loss: 0.7665 (0.7872) time: 0.1679 data: 0.0633 max mem: 9377 +Train: [47] [ 500/6250] eta: 0:17:21 lr: 0.000074 grad: 0.1391 (0.1749) loss: 0.7824 (0.7860) time: 0.1364 data: 0.0179 max mem: 9377 +Train: [47] [ 600/6250] eta: 0:16:41 lr: 0.000074 grad: 0.1339 (0.1703) loss: 0.7945 (0.7855) time: 0.1195 data: 0.0147 max mem: 9377 +Train: [47] [ 700/6250] eta: 0:16:09 lr: 0.000074 grad: 0.1299 (0.1657) loss: 0.7712 (0.7856) time: 0.1568 data: 0.0535 max mem: 9377 +Train: [47] [ 800/6250] eta: 0:15:50 lr: 0.000074 grad: 0.1370 (0.1621) loss: 0.7853 (0.7852) time: 0.2330 data: 0.1235 max mem: 9377 +Train: [47] [ 900/6250] eta: 0:15:35 lr: 0.000074 grad: 0.1341 (0.1599) loss: 0.7839 (0.7850) time: 0.2054 data: 0.1015 max mem: 9377 +Train: [47] [1000/6250] eta: 0:15:04 lr: 0.000073 grad: 0.1419 (0.1583) loss: 0.7684 (0.7843) time: 0.0899 data: 0.0002 max mem: 9377 +Train: [47] [1100/6250] eta: 0:14:44 lr: 0.000073 grad: 0.1310 (0.1564) loss: 0.7907 (0.7840) time: 0.1759 data: 0.0828 max mem: 9377 +Train: [47] [1200/6250] eta: 0:14:19 lr: 0.000073 grad: 0.1404 (0.1550) loss: 0.7694 (0.7834) time: 0.1120 data: 0.0221 max mem: 9377 +Train: [47] [1300/6250] eta: 0:14:01 lr: 0.000073 grad: 0.1315 (0.1536) loss: 0.7893 (0.7832) time: 0.1755 data: 0.0902 max mem: 9377 +Train: [47] [1400/6250] eta: 0:13:40 lr: 0.000073 grad: 0.1352 (0.1530) loss: 0.7806 (0.7825) time: 0.1844 data: 0.0944 max mem: 9377 +Train: [47] [1500/6250] eta: 0:13:18 lr: 0.000073 grad: 0.1383 (0.1523) loss: 0.7772 (0.7821) time: 0.1838 data: 0.0997 max mem: 9377 +Train: [47] [1600/6250] eta: 0:13:06 lr: 0.000073 grad: 0.1325 (0.1513) loss: 0.7771 (0.7820) time: 0.3185 data: 0.2294 max mem: 9377 +Train: [47] [1700/6250] eta: 0:12:44 lr: 0.000073 grad: 0.1414 (0.1509) loss: 0.7711 (0.7814) time: 0.1155 data: 0.0298 max mem: 9377 +Train: [47] [1800/6250] eta: 0:12:28 lr: 0.000073 grad: 0.1397 (0.1506) loss: 0.7712 (0.7811) time: 0.1746 data: 0.0939 max mem: 9377 +Train: [47] [1900/6250] eta: 0:12:07 lr: 0.000073 grad: 0.1401 (0.1500) loss: 0.7797 (0.7808) time: 0.1207 data: 0.0292 max mem: 9377 +Train: [47] [2000/6250] eta: 0:11:50 lr: 0.000073 grad: 0.1375 (0.1494) loss: 0.7693 (0.7805) time: 0.2045 data: 0.1204 max mem: 9377 +Train: [47] [2100/6250] eta: 0:11:32 lr: 0.000073 grad: 0.1409 (0.1489) loss: 0.7659 (0.7805) time: 0.1363 data: 0.0395 max mem: 9377 +Train: [47] [2200/6250] eta: 0:11:17 lr: 0.000073 grad: 0.1434 (0.1485) loss: 0.7737 (0.7803) time: 0.1160 data: 0.0279 max mem: 9377 +Train: [47] [2300/6250] eta: 0:10:59 lr: 0.000073 grad: 0.1417 (0.1483) loss: 0.7719 (0.7802) time: 0.1790 data: 0.0900 max mem: 9377 +Train: [47] [2400/6250] eta: 0:10:40 lr: 0.000073 grad: 0.1332 (0.1479) loss: 0.7765 (0.7800) time: 0.1517 data: 0.0544 max mem: 9377 +Train: [47] [2500/6250] eta: 0:10:23 lr: 0.000073 grad: 0.1271 (0.1475) loss: 0.7793 (0.7799) time: 0.2248 data: 0.1321 max mem: 9377 +Train: [47] [2600/6250] eta: 0:10:04 lr: 0.000073 grad: 0.1389 (0.1474) loss: 0.7759 (0.7797) time: 0.1629 data: 0.0844 max mem: 9377 +Train: [47] [2700/6250] eta: 0:09:47 lr: 0.000073 grad: 0.1433 (0.1471) loss: 0.7682 (0.7795) time: 0.1451 data: 0.0605 max mem: 9377 +Train: [47] [2800/6250] eta: 0:09:29 lr: 0.000073 grad: 0.1410 (0.1469) loss: 0.7799 (0.7795) time: 0.1100 data: 0.0003 max mem: 9377 +Train: [47] [2900/6250] eta: 0:09:12 lr: 0.000073 grad: 0.1309 (0.1465) loss: 0.7794 (0.7795) time: 0.0976 data: 0.0102 max mem: 9377 +Train: [47] [3000/6250] eta: 0:08:55 lr: 0.000073 grad: 0.1334 (0.1462) loss: 0.7738 (0.7794) time: 0.1675 data: 0.0765 max mem: 9377 +Train: [47] [3100/6250] eta: 0:08:39 lr: 0.000073 grad: 0.1441 (0.1461) loss: 0.7784 (0.7792) time: 0.1301 data: 0.0526 max mem: 9377 +Train: [47] [3200/6250] eta: 0:08:22 lr: 0.000073 grad: 0.1380 (0.1459) loss: 0.7733 (0.7789) time: 0.1513 data: 0.0602 max mem: 9377 +Train: [47] [3300/6250] eta: 0:08:04 lr: 0.000073 grad: 0.1389 (0.1457) loss: 0.7745 (0.7787) time: 0.1777 data: 0.0923 max mem: 9377 +Train: [47] [3400/6250] eta: 0:07:47 lr: 0.000073 grad: 0.1375 (0.1455) loss: 0.7683 (0.7785) time: 0.1717 data: 0.0877 max mem: 9377 +Train: [47] [3500/6250] eta: 0:07:31 lr: 0.000073 grad: 0.1361 (0.1453) loss: 0.7746 (0.7785) time: 0.1488 data: 0.0582 max mem: 9377 +Train: [47] [3600/6250] eta: 0:07:15 lr: 0.000073 grad: 0.1369 (0.1452) loss: 0.7821 (0.7783) time: 0.1401 data: 0.0537 max mem: 9377 +Train: [47] [3700/6250] eta: 0:07:00 lr: 0.000073 grad: 0.1484 (0.1452) loss: 0.7784 (0.7782) time: 0.2376 data: 0.1479 max mem: 9377 +Train: [47] [3800/6250] eta: 0:06:45 lr: 0.000073 grad: 0.1429 (0.1453) loss: 0.7728 (0.7780) time: 0.1342 data: 0.0353 max mem: 9377 +Train: [47] [3900/6250] eta: 0:06:29 lr: 0.000073 grad: 0.1328 (0.1452) loss: 0.7713 (0.7778) time: 0.1815 data: 0.0749 max mem: 9377 +Train: [47] [4000/6250] eta: 0:06:14 lr: 0.000073 grad: 0.1424 (0.1452) loss: 0.7796 (0.7777) time: 0.3546 data: 0.2528 max mem: 9377 +Train: [47] [4100/6250] eta: 0:05:59 lr: 0.000072 grad: 0.1367 (0.1451) loss: 0.7841 (0.7777) time: 0.2221 data: 0.1341 max mem: 9377 +Train: [47] [4200/6250] eta: 0:05:41 lr: 0.000072 grad: 0.1361 (0.1450) loss: 0.7703 (0.7776) time: 0.1635 data: 0.0744 max mem: 9377 +Train: [47] [4300/6250] eta: 0:05:24 lr: 0.000072 grad: 0.1371 (0.1448) loss: 0.7741 (0.7776) time: 0.0997 data: 0.0035 max mem: 9377 +Train: [47] [4400/6250] eta: 0:05:07 lr: 0.000072 grad: 0.1379 (0.1445) loss: 0.7786 (0.7777) time: 0.1595 data: 0.0657 max mem: 9377 +Train: [47] [4500/6250] eta: 0:04:53 lr: 0.000072 grad: 0.1369 (0.1443) loss: 0.7777 (0.7778) time: 0.1153 data: 0.0233 max mem: 9377 +Train: [47] [4600/6250] eta: 0:04:35 lr: 0.000072 grad: 0.1367 (0.1442) loss: 0.7885 (0.7778) time: 0.1664 data: 0.0870 max mem: 9377 +Train: [47] [4700/6250] eta: 0:04:18 lr: 0.000072 grad: 0.1338 (0.1441) loss: 0.7808 (0.7778) time: 0.1385 data: 0.0534 max mem: 9377 +Train: [47] [4800/6250] eta: 0:04:01 lr: 0.000072 grad: 0.1329 (0.1439) loss: 0.7870 (0.7779) time: 0.0986 data: 0.0005 max mem: 9377 +Train: [47] [4900/6250] eta: 0:03:44 lr: 0.000072 grad: 0.1316 (0.1438) loss: 0.7803 (0.7779) time: 0.1479 data: 0.0690 max mem: 9377 +Train: [47] [5000/6250] eta: 0:03:27 lr: 0.000072 grad: 0.1342 (0.1438) loss: 0.7778 (0.7779) time: 0.1248 data: 0.0207 max mem: 9377 +Train: [47] [5100/6250] eta: 0:03:10 lr: 0.000072 grad: 0.1339 (0.1438) loss: 0.7834 (0.7779) time: 0.1279 data: 0.0376 max mem: 9377 +Train: [47] [5200/6250] eta: 0:02:54 lr: 0.000072 grad: 0.1346 (0.1437) loss: 0.7756 (0.7779) time: 0.1390 data: 0.0570 max mem: 9377 +Train: [47] [5300/6250] eta: 0:02:37 lr: 0.000072 grad: 0.1372 (0.1436) loss: 0.7769 (0.7780) time: 0.1859 data: 0.1030 max mem: 9377 +Train: [47] [5400/6250] eta: 0:02:20 lr: 0.000072 grad: 0.1371 (0.1436) loss: 0.7840 (0.7781) time: 0.1825 data: 0.1008 max mem: 9377 +Train: [47] [5500/6250] eta: 0:02:04 lr: 0.000072 grad: 0.1440 (0.1435) loss: 0.7807 (0.7782) time: 0.1905 data: 0.1012 max mem: 9377 +Train: [47] [5600/6250] eta: 0:01:47 lr: 0.000072 grad: 0.1444 (0.1435) loss: 0.7709 (0.7783) time: 0.1620 data: 0.0719 max mem: 9377 +Train: [47] [5700/6250] eta: 0:01:30 lr: 0.000072 grad: 0.1446 (0.1435) loss: 0.7808 (0.7783) time: 0.1482 data: 0.0678 max mem: 9377 +Train: [47] [5800/6250] eta: 0:01:14 lr: 0.000072 grad: 0.1412 (0.1435) loss: 0.7743 (0.7783) time: 0.2358 data: 0.1562 max mem: 9377 +Train: [47] [5900/6250] eta: 0:00:57 lr: 0.000072 grad: 0.1411 (0.1435) loss: 0.7913 (0.7784) time: 0.1030 data: 0.0002 max mem: 9377 +Train: [47] [6000/6250] eta: 0:00:41 lr: 0.000072 grad: 0.1339 (0.1435) loss: 0.7881 (0.7785) time: 0.1493 data: 0.0605 max mem: 9377 +Train: [47] [6100/6250] eta: 0:00:24 lr: 0.000072 grad: 0.1335 (0.1434) loss: 0.7905 (0.7786) time: 0.2112 data: 0.1219 max mem: 9377 +Train: [47] [6200/6250] eta: 0:00:08 lr: 0.000072 grad: 0.1410 (0.1435) loss: 0.7892 (0.7787) time: 0.0916 data: 0.0063 max mem: 9377 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.1360 (0.1434) loss: 0.7837 (0.7788) time: 0.1572 data: 0.0676 max mem: 9377 +Train: [47] Total time: 0:17:14 (0.1655 s / it) +Averaged stats: lr: 0.000072 grad: 0.1360 (0.1434) loss: 0.7837 (0.7788) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:03:48 loss: 0.8291 (0.8291) time: 3.6782 data: 3.6039 max mem: 9377 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8218 (0.8227) time: 0.1404 data: 0.1152 max mem: 9377 +Eval (hcp-train-subset): [47] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (hcp-train-subset): loss: 0.8218 (0.8227) +Eval (hcp-val): [47] [ 0/62] eta: 0:06:12 loss: 0.8410 (0.8410) time: 6.0039 data: 5.9737 max mem: 9377 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8440 (0.8462) time: 0.1932 data: 0.1681 max mem: 9377 +Eval (hcp-val): [47] Total time: 0:00:15 (0.2561 s / it) +Averaged stats (hcp-val): loss: 0.8440 (0.8462) +Eval (nsd-val): [47] [ 0/62] eta: 0:04:47 loss: 0.8141 (0.8141) time: 4.6410 data: 4.6106 max mem: 9377 +Eval (nsd-val): [47] [61/62] eta: 0:00:00 loss: 0.8207 (0.8219) time: 0.1316 data: 0.1046 max mem: 9377 +Eval (nsd-val): [47] Total time: 0:00:13 (0.2233 s / it) +Averaged stats (nsd-val): loss: 0.8207 (0.8219) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [48] [ 0/6250] eta: 12:25:38 lr: 0.000072 grad: 0.2190 (0.2190) loss: 0.8046 (0.8046) time: 7.1582 data: 7.0113 max mem: 9377 +Train: [48] [ 100/6250] eta: 0:26:51 lr: 0.000072 grad: 0.1729 (0.2042) loss: 0.7768 (0.7987) time: 0.2158 data: 0.1071 max mem: 9377 +Train: [48] [ 200/6250] eta: 0:43:49 lr: 0.000072 grad: 0.1682 (0.1922) loss: 0.7839 (0.7920) time: 0.1650 data: 0.0294 max mem: 9377 +Train: [48] [ 300/6250] eta: 0:42:58 lr: 0.000072 grad: 0.1551 (0.1863) loss: 0.7711 (0.7855) time: 0.1784 data: 0.0008 max mem: 9377 +Train: [48] [ 400/6250] eta: 0:39:35 lr: 0.000072 grad: 0.1673 (0.1821) loss: 0.7720 (0.7824) time: 0.2022 data: 0.0823 max mem: 9377 +Train: [48] [ 500/6250] eta: 0:35:42 lr: 0.000072 grad: 0.1389 (0.1748) loss: 0.7812 (0.7816) time: 0.1964 data: 0.0736 max mem: 9377 +Train: [48] [ 600/6250] eta: 0:32:23 lr: 0.000072 grad: 0.1448 (0.1695) loss: 0.7777 (0.7820) time: 0.1555 data: 0.0631 max mem: 9377 +Train: [48] [ 700/6250] eta: 0:30:24 lr: 0.000072 grad: 0.1457 (0.1662) loss: 0.7795 (0.7821) time: 0.4589 data: 0.3004 max mem: 9377 +Train: [48] [ 800/6250] eta: 0:28:35 lr: 0.000072 grad: 0.1412 (0.1636) loss: 0.7726 (0.7813) time: 0.1309 data: 0.0243 max mem: 9377 +Train: [48] [ 900/6250] eta: 0:28:38 lr: 0.000071 grad: 0.1371 (0.1610) loss: 0.7843 (0.7813) time: 0.9804 data: 0.8186 max mem: 9377 +Train: [48] [1000/6250] eta: 0:28:48 lr: 0.000071 grad: 0.1349 (0.1589) loss: 0.7825 (0.7814) time: 0.3047 data: 0.1030 max mem: 9377 +Train: [48] [1100/6250] eta: 0:27:03 lr: 0.000071 grad: 0.1363 (0.1569) loss: 0.7791 (0.7818) time: 0.1353 data: 0.0016 max mem: 9377 +Train: [48] [1200/6250] eta: 0:26:08 lr: 0.000071 grad: 0.1348 (0.1554) loss: 0.7826 (0.7817) time: 0.1614 data: 0.0651 max mem: 9377 +Train: [48] [1300/6250] eta: 0:24:42 lr: 0.000071 grad: 0.1402 (0.1542) loss: 0.7795 (0.7812) time: 0.2218 data: 0.1337 max mem: 9377 +Train: [48] [1400/6250] eta: 0:23:18 lr: 0.000071 grad: 0.1409 (0.1534) loss: 0.7764 (0.7809) time: 0.1182 data: 0.0280 max mem: 9377 +Train: [48] [1500/6250] eta: 0:22:08 lr: 0.000071 grad: 0.1426 (0.1526) loss: 0.7813 (0.7808) time: 0.0967 data: 0.0003 max mem: 9377 +Train: [48] [1600/6250] eta: 0:21:03 lr: 0.000071 grad: 0.1359 (0.1517) loss: 0.7785 (0.7808) time: 0.1403 data: 0.0578 max mem: 9377 +Train: [48] [1700/6250] eta: 0:20:11 lr: 0.000071 grad: 0.1396 (0.1511) loss: 0.7825 (0.7806) time: 0.2435 data: 0.1581 max mem: 9377 +Train: [48] [1800/6250] eta: 0:19:16 lr: 0.000071 grad: 0.1443 (0.1508) loss: 0.7648 (0.7802) time: 0.1088 data: 0.0202 max mem: 9377 +Train: [48] [1900/6250] eta: 0:18:29 lr: 0.000071 grad: 0.1362 (0.1502) loss: 0.7769 (0.7799) time: 0.1836 data: 0.0916 max mem: 9377 +Train: [48] [2000/6250] eta: 0:17:41 lr: 0.000071 grad: 0.1329 (0.1496) loss: 0.7603 (0.7798) time: 0.1153 data: 0.0085 max mem: 9377 +Train: [48] [2100/6250] eta: 0:17:22 lr: 0.000071 grad: 0.1429 (0.1494) loss: 0.7738 (0.7795) time: 0.4464 data: 0.3471 max mem: 9377 +Train: [48] [2200/6250] eta: 0:16:37 lr: 0.000071 grad: 0.1362 (0.1491) loss: 0.7797 (0.7795) time: 0.1355 data: 0.0400 max mem: 9377 +Train: [48] [2300/6250] eta: 0:15:59 lr: 0.000071 grad: 0.1399 (0.1489) loss: 0.7685 (0.7794) time: 0.1150 data: 0.0297 max mem: 9377 +Train: [48] [2400/6250] eta: 0:15:20 lr: 0.000071 grad: 0.1444 (0.1484) loss: 0.7704 (0.7792) time: 0.1724 data: 0.0863 max mem: 9377 +Train: [48] [2500/6250] eta: 0:14:45 lr: 0.000071 grad: 0.1422 (0.1484) loss: 0.7719 (0.7791) time: 0.1035 data: 0.0002 max mem: 9377 +Train: [48] [2600/6250] eta: 0:14:11 lr: 0.000071 grad: 0.1484 (0.1481) loss: 0.7806 (0.7790) time: 0.1693 data: 0.0785 max mem: 9377 +Train: [48] [2700/6250] eta: 0:13:36 lr: 0.000071 grad: 0.1387 (0.1479) loss: 0.7814 (0.7790) time: 0.1546 data: 0.0654 max mem: 9377 +Train: [48] [2800/6250] eta: 0:13:05 lr: 0.000071 grad: 0.1336 (0.1476) loss: 0.7839 (0.7790) time: 0.1688 data: 0.0888 max mem: 9377 +Train: [48] [2900/6250] eta: 0:12:33 lr: 0.000071 grad: 0.1423 (0.1475) loss: 0.7728 (0.7789) time: 0.1454 data: 0.0591 max mem: 9377 +Train: [48] [3000/6250] eta: 0:12:04 lr: 0.000071 grad: 0.1386 (0.1473) loss: 0.7791 (0.7789) time: 0.1925 data: 0.1069 max mem: 9377 +Train: [48] [3100/6250] eta: 0:11:33 lr: 0.000071 grad: 0.1377 (0.1472) loss: 0.7876 (0.7788) time: 0.0959 data: 0.0117 max mem: 9377 +Train: [48] [3200/6250] eta: 0:11:05 lr: 0.000071 grad: 0.1280 (0.1469) loss: 0.7810 (0.7790) time: 0.1301 data: 0.0435 max mem: 9377 +Train: [48] [3300/6250] eta: 0:10:39 lr: 0.000071 grad: 0.1425 (0.1467) loss: 0.7779 (0.7790) time: 0.1494 data: 0.0431 max mem: 9377 +Train: [48] [3400/6250] eta: 0:10:11 lr: 0.000071 grad: 0.1375 (0.1464) loss: 0.7916 (0.7791) time: 0.1611 data: 0.0809 max mem: 9377 +Train: [48] [3500/6250] eta: 0:09:46 lr: 0.000071 grad: 0.1372 (0.1462) loss: 0.7772 (0.7792) time: 0.1484 data: 0.0644 max mem: 9377 +Train: [48] [3600/6250] eta: 0:09:21 lr: 0.000071 grad: 0.1379 (0.1460) loss: 0.7881 (0.7791) time: 0.1337 data: 0.0509 max mem: 9377 +Train: [48] [3700/6250] eta: 0:08:56 lr: 0.000071 grad: 0.1359 (0.1459) loss: 0.7798 (0.7791) time: 0.1364 data: 0.0407 max mem: 9377 +Train: [48] [3800/6250] eta: 0:08:32 lr: 0.000071 grad: 0.1314 (0.1458) loss: 0.7839 (0.7792) time: 0.1589 data: 0.0691 max mem: 9377 +Train: [48] [3900/6250] eta: 0:08:08 lr: 0.000070 grad: 0.1385 (0.1457) loss: 0.7715 (0.7791) time: 0.1496 data: 0.0654 max mem: 9377 +Train: [48] [4000/6250] eta: 0:07:44 lr: 0.000070 grad: 0.1420 (0.1456) loss: 0.7782 (0.7791) time: 0.1764 data: 0.0867 max mem: 9377 +Train: [48] [4100/6250] eta: 0:07:20 lr: 0.000070 grad: 0.1418 (0.1456) loss: 0.7675 (0.7790) time: 0.1522 data: 0.0681 max mem: 9377 +Train: [48] [4200/6250] eta: 0:06:57 lr: 0.000070 grad: 0.1377 (0.1456) loss: 0.7846 (0.7789) time: 0.1250 data: 0.0405 max mem: 9377 +Train: [48] [4300/6250] eta: 0:06:35 lr: 0.000070 grad: 0.1525 (0.1457) loss: 0.7697 (0.7789) time: 0.2095 data: 0.1242 max mem: 9377 +Train: [48] [4400/6250] eta: 0:06:12 lr: 0.000070 grad: 0.1450 (0.1457) loss: 0.7777 (0.7788) time: 0.1418 data: 0.0603 max mem: 9377 +Train: [48] [4500/6250] eta: 0:05:51 lr: 0.000070 grad: 0.1445 (0.1457) loss: 0.7633 (0.7786) time: 0.2110 data: 0.1226 max mem: 9377 +Train: [48] [4600/6250] eta: 0:05:29 lr: 0.000070 grad: 0.1448 (0.1457) loss: 0.7782 (0.7785) time: 0.1402 data: 0.0523 max mem: 9377 +Train: [48] [4700/6250] eta: 0:05:08 lr: 0.000070 grad: 0.1421 (0.1458) loss: 0.7815 (0.7785) time: 0.1606 data: 0.0710 max mem: 9377 +Train: [48] [4800/6250] eta: 0:04:46 lr: 0.000070 grad: 0.1430 (0.1457) loss: 0.7761 (0.7785) time: 0.1519 data: 0.0676 max mem: 9377 +Train: [48] [4900/6250] eta: 0:04:25 lr: 0.000070 grad: 0.1347 (0.1457) loss: 0.7719 (0.7785) time: 0.1638 data: 0.0745 max mem: 9377 +Train: [48] [5000/6250] eta: 0:04:05 lr: 0.000070 grad: 0.1420 (0.1456) loss: 0.7760 (0.7785) time: 0.1843 data: 0.1001 max mem: 9377 +Train: [48] [5100/6250] eta: 0:03:44 lr: 0.000070 grad: 0.1390 (0.1456) loss: 0.7907 (0.7786) time: 0.1536 data: 0.0752 max mem: 9377 +Train: [48] [5200/6250] eta: 0:03:24 lr: 0.000070 grad: 0.1415 (0.1456) loss: 0.7790 (0.7786) time: 0.2210 data: 0.1351 max mem: 9377 +Train: [48] [5300/6250] eta: 0:03:03 lr: 0.000070 grad: 0.1399 (0.1456) loss: 0.7749 (0.7786) time: 0.1365 data: 0.0514 max mem: 9377 +Train: [48] [5400/6250] eta: 0:02:43 lr: 0.000070 grad: 0.1328 (0.1455) loss: 0.7878 (0.7787) time: 0.1448 data: 0.0551 max mem: 9377 +Train: [48] [5500/6250] eta: 0:02:24 lr: 0.000070 grad: 0.1318 (0.1454) loss: 0.7864 (0.7787) time: 0.1988 data: 0.1167 max mem: 9377 +Train: [48] [5600/6250] eta: 0:02:04 lr: 0.000070 grad: 0.1342 (0.1453) loss: 0.7803 (0.7787) time: 0.1649 data: 0.0778 max mem: 9377 +Train: [48] [5700/6250] eta: 0:01:45 lr: 0.000070 grad: 0.1325 (0.1452) loss: 0.7851 (0.7788) time: 0.1568 data: 0.0684 max mem: 9377 +Train: [48] [5800/6250] eta: 0:01:25 lr: 0.000070 grad: 0.1380 (0.1450) loss: 0.7854 (0.7790) time: 0.1314 data: 0.0450 max mem: 9377 +Train: [48] [5900/6250] eta: 0:01:06 lr: 0.000070 grad: 0.1288 (0.1449) loss: 0.7908 (0.7791) time: 0.1681 data: 0.0777 max mem: 9377 +Train: [48] [6000/6250] eta: 0:00:47 lr: 0.000070 grad: 0.1328 (0.1448) loss: 0.7891 (0.7792) time: 0.1850 data: 0.0910 max mem: 9377 +Train: [48] [6100/6250] eta: 0:00:28 lr: 0.000070 grad: 0.1365 (0.1447) loss: 0.7879 (0.7793) time: 0.1815 data: 0.0920 max mem: 9377 +Train: [48] [6200/6250] eta: 0:00:09 lr: 0.000070 grad: 0.1511 (0.1447) loss: 0.7780 (0.7793) time: 0.1617 data: 0.0779 max mem: 9377 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.1441 (0.1447) loss: 0.7874 (0.7794) time: 0.1675 data: 0.0814 max mem: 9377 +Train: [48] Total time: 0:19:45 (0.1896 s / it) +Averaged stats: lr: 0.000070 grad: 0.1441 (0.1447) loss: 0.7874 (0.7794) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:04:36 loss: 0.8266 (0.8266) time: 4.4537 data: 4.3736 max mem: 9377 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8244 (0.8222) time: 0.1290 data: 0.1022 max mem: 9377 +Eval (hcp-train-subset): [48] Total time: 0:00:13 (0.2250 s / it) +Averaged stats (hcp-train-subset): loss: 0.8244 (0.8222) +Eval (hcp-val): [48] [ 0/62] eta: 0:03:57 loss: 0.8465 (0.8465) time: 3.8310 data: 3.7684 max mem: 9377 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8424 (0.8458) time: 0.1441 data: 0.1191 max mem: 9377 +Eval (hcp-val): [48] Total time: 0:00:14 (0.2342 s / it) +Averaged stats (hcp-val): loss: 0.8424 (0.8458) +Eval (nsd-val): [48] [ 0/62] eta: 0:03:59 loss: 0.8105 (0.8105) time: 3.8571 data: 3.7717 max mem: 9377 +Eval (nsd-val): [48] [61/62] eta: 0:00:00 loss: 0.8209 (0.8216) time: 0.1266 data: 0.1016 max mem: 9377 +Eval (nsd-val): [48] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8216) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [49] [ 0/6250] eta: 9:25:36 lr: 0.000070 grad: 0.0812 (0.0812) loss: 0.8659 (0.8659) time: 5.4298 data: 5.1762 max mem: 9377 +Train: [49] [ 100/6250] eta: 0:49:23 lr: 0.000070 grad: 0.1720 (0.2196) loss: 0.7960 (0.8001) time: 0.1823 data: 0.0721 max mem: 9377 +Train: [49] [ 200/6250] eta: 0:33:43 lr: 0.000070 grad: 0.1861 (0.2042) loss: 0.7873 (0.7961) time: 0.1513 data: 0.0580 max mem: 9377 +Train: [49] [ 300/6250] eta: 0:27:53 lr: 0.000070 grad: 0.1618 (0.1985) loss: 0.7827 (0.7925) time: 0.1837 data: 0.0930 max mem: 9377 +Train: [49] [ 400/6250] eta: 0:24:19 lr: 0.000070 grad: 0.1628 (0.1942) loss: 0.7798 (0.7884) time: 0.1532 data: 0.0610 max mem: 9377 +Train: [49] [ 500/6250] eta: 0:24:56 lr: 0.000070 grad: 0.1391 (0.1868) loss: 0.7907 (0.7865) time: 0.1711 data: 0.0004 max mem: 9377 +Train: [49] [ 600/6250] eta: 0:24:20 lr: 0.000070 grad: 0.1375 (0.1803) loss: 0.7824 (0.7860) time: 0.1963 data: 0.0758 max mem: 9377 +Train: [49] [ 700/6250] eta: 0:23:13 lr: 0.000069 grad: 0.1398 (0.1749) loss: 0.7792 (0.7854) time: 0.1333 data: 0.0221 max mem: 9377 +Train: [49] [ 800/6250] eta: 0:23:42 lr: 0.000069 grad: 0.1390 (0.1706) loss: 0.7854 (0.7853) time: 0.4448 data: 0.3442 max mem: 9377 +Train: [49] [ 900/6250] eta: 0:22:34 lr: 0.000069 grad: 0.1443 (0.1673) loss: 0.7827 (0.7851) time: 0.2777 data: 0.0950 max mem: 9377 +Train: [49] [1000/6250] eta: 0:22:27 lr: 0.000069 grad: 0.1293 (0.1643) loss: 0.7814 (0.7851) time: 0.1469 data: 0.0228 max mem: 9377 +Train: [49] [1100/6250] eta: 0:21:23 lr: 0.000069 grad: 0.1331 (0.1615) loss: 0.7895 (0.7851) time: 0.2350 data: 0.1223 max mem: 9377 +Train: [49] [1200/6250] eta: 0:20:11 lr: 0.000069 grad: 0.1318 (0.1597) loss: 0.7813 (0.7852) time: 0.1529 data: 0.0562 max mem: 9377 +Train: [49] [1300/6250] eta: 0:19:54 lr: 0.000069 grad: 0.1386 (0.1581) loss: 0.7788 (0.7849) time: 0.1270 data: 0.0003 max mem: 9377 +Train: [49] [1400/6250] eta: 0:19:02 lr: 0.000069 grad: 0.1346 (0.1569) loss: 0.7828 (0.7848) time: 0.1821 data: 0.0923 max mem: 9377 +Train: [49] [1500/6250] eta: 0:18:19 lr: 0.000069 grad: 0.1463 (0.1559) loss: 0.7817 (0.7847) time: 0.1772 data: 0.0804 max mem: 9377 +Train: [49] [1600/6250] eta: 0:17:32 lr: 0.000069 grad: 0.1386 (0.1548) loss: 0.7743 (0.7846) time: 0.0970 data: 0.0084 max mem: 9377 +Train: [49] [1700/6250] eta: 0:16:55 lr: 0.000069 grad: 0.1374 (0.1539) loss: 0.7754 (0.7842) time: 0.1756 data: 0.0850 max mem: 9377 +Train: [49] [1800/6250] eta: 0:16:20 lr: 0.000069 grad: 0.1410 (0.1532) loss: 0.7815 (0.7839) time: 0.1707 data: 0.0767 max mem: 9377 +Train: [49] [1900/6250] eta: 0:15:44 lr: 0.000069 grad: 0.1347 (0.1528) loss: 0.7750 (0.7835) time: 0.1954 data: 0.1127 max mem: 9377 +Train: [49] [2000/6250] eta: 0:15:10 lr: 0.000069 grad: 0.1357 (0.1522) loss: 0.7849 (0.7832) time: 0.1225 data: 0.0411 max mem: 9377 +Train: [49] [2100/6250] eta: 0:14:36 lr: 0.000069 grad: 0.1479 (0.1519) loss: 0.7795 (0.7829) time: 0.1180 data: 0.0303 max mem: 9377 +Train: [49] [2200/6250] eta: 0:14:37 lr: 0.000069 grad: 0.1372 (0.1515) loss: 0.7741 (0.7825) time: 0.1585 data: 0.0439 max mem: 9377 +Train: [49] [2300/6250] eta: 0:14:01 lr: 0.000069 grad: 0.1423 (0.1512) loss: 0.7801 (0.7822) time: 0.1496 data: 0.0702 max mem: 9377 +Train: [49] [2400/6250] eta: 0:13:30 lr: 0.000069 grad: 0.1430 (0.1509) loss: 0.7764 (0.7819) time: 0.1382 data: 0.0471 max mem: 9377 +Train: [49] [2500/6250] eta: 0:13:03 lr: 0.000069 grad: 0.1494 (0.1508) loss: 0.7710 (0.7813) time: 0.2344 data: 0.1420 max mem: 9377 +Train: [49] [2600/6250] eta: 0:12:34 lr: 0.000069 grad: 0.1471 (0.1508) loss: 0.7578 (0.7808) time: 0.2099 data: 0.1104 max mem: 9377 +Train: [49] [2700/6250] eta: 0:12:06 lr: 0.000069 grad: 0.1492 (0.1507) loss: 0.7643 (0.7803) time: 0.1528 data: 0.0648 max mem: 9377 +Train: [49] [2800/6250] eta: 0:11:40 lr: 0.000069 grad: 0.1473 (0.1508) loss: 0.7758 (0.7797) time: 0.2222 data: 0.1442 max mem: 9377 +Train: [49] [2900/6250] eta: 0:11:14 lr: 0.000069 grad: 0.1550 (0.1509) loss: 0.7620 (0.7792) time: 0.1587 data: 0.0761 max mem: 9377 +Train: [49] [3000/6250] eta: 0:10:49 lr: 0.000069 grad: 0.1458 (0.1508) loss: 0.7631 (0.7789) time: 0.2103 data: 0.1224 max mem: 9377 +Train: [49] [3100/6250] eta: 0:10:24 lr: 0.000069 grad: 0.1479 (0.1508) loss: 0.7614 (0.7785) time: 0.1677 data: 0.0835 max mem: 9377 +Train: [49] [3200/6250] eta: 0:10:00 lr: 0.000069 grad: 0.1412 (0.1508) loss: 0.7696 (0.7781) time: 0.1276 data: 0.0379 max mem: 9377 +Train: [49] [3300/6250] eta: 0:09:36 lr: 0.000069 grad: 0.1478 (0.1508) loss: 0.7697 (0.7777) time: 0.1455 data: 0.0586 max mem: 9377 +Train: [49] [3400/6250] eta: 0:09:13 lr: 0.000069 grad: 0.1481 (0.1508) loss: 0.7679 (0.7775) time: 0.1086 data: 0.0111 max mem: 9377 +Train: [49] [3500/6250] eta: 0:08:52 lr: 0.000069 grad: 0.1464 (0.1507) loss: 0.7688 (0.7772) time: 0.1238 data: 0.0332 max mem: 9377 +Train: [49] [3600/6250] eta: 0:08:29 lr: 0.000069 grad: 0.1431 (0.1506) loss: 0.7786 (0.7770) time: 0.1203 data: 0.0404 max mem: 9377 +Train: [49] [3700/6250] eta: 0:08:07 lr: 0.000069 grad: 0.1529 (0.1505) loss: 0.7676 (0.7768) time: 0.1381 data: 0.0596 max mem: 9377 +Train: [49] [3800/6250] eta: 0:07:47 lr: 0.000068 grad: 0.1513 (0.1504) loss: 0.7682 (0.7766) time: 0.1400 data: 0.0474 max mem: 9377 +Train: [49] [3900/6250] eta: 0:07:26 lr: 0.000068 grad: 0.1400 (0.1503) loss: 0.7783 (0.7765) time: 0.1936 data: 0.1039 max mem: 9377 +Train: [49] [4000/6250] eta: 0:07:06 lr: 0.000068 grad: 0.1423 (0.1501) loss: 0.7799 (0.7765) time: 0.1905 data: 0.1110 max mem: 9377 +Train: [49] [4100/6250] eta: 0:06:46 lr: 0.000068 grad: 0.1395 (0.1500) loss: 0.7750 (0.7764) time: 0.1718 data: 0.0760 max mem: 9377 +Train: [49] [4200/6250] eta: 0:06:26 lr: 0.000068 grad: 0.1391 (0.1498) loss: 0.7759 (0.7764) time: 0.1375 data: 0.0415 max mem: 9377 +Train: [49] [4300/6250] eta: 0:06:05 lr: 0.000068 grad: 0.1418 (0.1498) loss: 0.7739 (0.7764) time: 0.1414 data: 0.0455 max mem: 9377 +Train: [49] [4400/6250] eta: 0:05:45 lr: 0.000068 grad: 0.1428 (0.1497) loss: 0.7687 (0.7763) time: 0.0910 data: 0.0011 max mem: 9377 +Train: [49] [4500/6250] eta: 0:05:25 lr: 0.000068 grad: 0.1489 (0.1496) loss: 0.7722 (0.7763) time: 0.1687 data: 0.0690 max mem: 9377 +Train: [49] [4600/6250] eta: 0:05:05 lr: 0.000068 grad: 0.1344 (0.1494) loss: 0.7845 (0.7764) time: 0.1654 data: 0.0726 max mem: 9377 +Train: [49] [4700/6250] eta: 0:04:45 lr: 0.000068 grad: 0.1356 (0.1493) loss: 0.7722 (0.7763) time: 0.1586 data: 0.0684 max mem: 9377 +Train: [49] [4800/6250] eta: 0:04:26 lr: 0.000068 grad: 0.1361 (0.1491) loss: 0.7765 (0.7763) time: 0.1479 data: 0.0689 max mem: 9377 +Train: [49] [4900/6250] eta: 0:04:06 lr: 0.000068 grad: 0.1389 (0.1490) loss: 0.7797 (0.7764) time: 0.1445 data: 0.0477 max mem: 9377 +Train: [49] [5000/6250] eta: 0:03:47 lr: 0.000068 grad: 0.1370 (0.1489) loss: 0.7813 (0.7765) time: 0.1369 data: 0.0503 max mem: 9377 +Train: [49] [5100/6250] eta: 0:03:28 lr: 0.000068 grad: 0.1451 (0.1487) loss: 0.7777 (0.7765) time: 0.1463 data: 0.0573 max mem: 9377 +Train: [49] [5200/6250] eta: 0:03:10 lr: 0.000068 grad: 0.1373 (0.1487) loss: 0.7818 (0.7766) time: 0.2314 data: 0.1526 max mem: 9377 +Train: [49] [5300/6250] eta: 0:02:51 lr: 0.000068 grad: 0.1331 (0.1485) loss: 0.7867 (0.7767) time: 0.1714 data: 0.0784 max mem: 9377 +Train: [49] [5400/6250] eta: 0:02:33 lr: 0.000068 grad: 0.1393 (0.1484) loss: 0.7778 (0.7768) time: 0.2353 data: 0.1501 max mem: 9377 +Train: [49] [5500/6250] eta: 0:02:14 lr: 0.000068 grad: 0.1450 (0.1482) loss: 0.7671 (0.7769) time: 0.1410 data: 0.0577 max mem: 9377 +Train: [49] [5600/6250] eta: 0:01:56 lr: 0.000068 grad: 0.1392 (0.1481) loss: 0.7877 (0.7770) time: 0.1142 data: 0.0265 max mem: 9377 +Train: [49] [5700/6250] eta: 0:01:38 lr: 0.000068 grad: 0.1448 (0.1480) loss: 0.7733 (0.7771) time: 0.1607 data: 0.0703 max mem: 9377 +Train: [49] [5800/6250] eta: 0:01:20 lr: 0.000068 grad: 0.1482 (0.1480) loss: 0.7661 (0.7770) time: 0.0899 data: 0.0002 max mem: 9377 +Train: [49] [5900/6250] eta: 0:01:02 lr: 0.000068 grad: 0.1413 (0.1479) loss: 0.7637 (0.7771) time: 0.1435 data: 0.0493 max mem: 9377 +Train: [49] [6000/6250] eta: 0:00:44 lr: 0.000068 grad: 0.1423 (0.1479) loss: 0.7745 (0.7770) time: 0.0893 data: 0.0003 max mem: 9377 +Train: [49] [6100/6250] eta: 0:00:26 lr: 0.000068 grad: 0.1441 (0.1478) loss: 0.7809 (0.7770) time: 0.1436 data: 0.0523 max mem: 9377 +Train: [49] [6200/6250] eta: 0:00:08 lr: 0.000068 grad: 0.1487 (0.1478) loss: 0.7781 (0.7769) time: 0.5180 data: 0.4284 max mem: 9377 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.1410 (0.1478) loss: 0.7730 (0.7769) time: 0.1758 data: 0.0775 max mem: 9377 +Train: [49] Total time: 0:18:45 (0.1801 s / it) +Averaged stats: lr: 0.000068 grad: 0.1410 (0.1478) loss: 0.7730 (0.7769) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:06:15 loss: 0.8263 (0.8263) time: 6.0635 data: 6.0332 max mem: 9377 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8223 (0.8227) time: 0.1212 data: 0.0944 max mem: 9377 +Eval (hcp-train-subset): [49] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (hcp-train-subset): loss: 0.8223 (0.8227) +Making plots (hcp-train-subset): example=50 +Eval (hcp-val): [49] [ 0/62] eta: 0:05:29 loss: 0.8550 (0.8550) time: 5.3215 data: 5.2742 max mem: 9377 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8478 (0.8501) time: 0.1383 data: 0.1129 max mem: 9377 +Eval (hcp-val): [49] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (hcp-val): loss: 0.8478 (0.8501) +Making plots (hcp-val): example=23 +Eval (nsd-val): [49] [ 0/62] eta: 0:04:19 loss: 0.8119 (0.8119) time: 4.1832 data: 4.0934 max mem: 9377 +Eval (nsd-val): [49] [61/62] eta: 0:00:00 loss: 0.8224 (0.8245) time: 0.1272 data: 0.1017 max mem: 9377 +Eval (nsd-val): [49] Total time: 0:00:13 (0.2234 s / it) +Averaged stats (nsd-val): loss: 0.8224 (0.8245) +Making plots (nsd-val): example=14 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00049.pth +Train: [50] [ 0/6250] eta: 15:14:43 lr: 0.000068 grad: 0.3759 (0.3759) loss: 0.6279 (0.6279) time: 8.7814 data: 8.6773 max mem: 9377 +Train: [50] [ 100/6250] eta: 0:25:13 lr: 0.000068 grad: 0.1780 (0.2356) loss: 0.8007 (0.7822) time: 0.1780 data: 0.0744 max mem: 9377 +Train: [50] [ 200/6250] eta: 0:21:20 lr: 0.000068 grad: 0.1910 (0.2363) loss: 0.7766 (0.7775) time: 0.1704 data: 0.0775 max mem: 9377 +Train: [50] [ 300/6250] eta: 0:19:36 lr: 0.000068 grad: 0.1790 (0.2222) loss: 0.7754 (0.7768) time: 0.1587 data: 0.0574 max mem: 9377 +Train: [50] [ 400/6250] eta: 0:18:56 lr: 0.000068 grad: 0.1595 (0.2070) loss: 0.7748 (0.7760) time: 0.1688 data: 0.0686 max mem: 9377 +Train: [50] [ 500/6250] eta: 0:18:16 lr: 0.000067 grad: 0.1434 (0.1962) loss: 0.7747 (0.7766) time: 0.1407 data: 0.0459 max mem: 9377 +Train: [50] [ 600/6250] eta: 0:17:38 lr: 0.000067 grad: 0.1369 (0.1884) loss: 0.7786 (0.7769) time: 0.1577 data: 0.0678 max mem: 9377 +Train: [50] [ 700/6250] eta: 0:19:07 lr: 0.000067 grad: 0.1495 (0.1824) loss: 0.7701 (0.7771) time: 0.6064 data: 0.4398 max mem: 9377 +Train: [50] [ 800/6250] eta: 0:18:14 lr: 0.000067 grad: 0.1359 (0.1773) loss: 0.7893 (0.7775) time: 0.1642 data: 0.0663 max mem: 9377 +Train: [50] [ 900/6250] eta: 0:17:35 lr: 0.000067 grad: 0.1321 (0.1731) loss: 0.7814 (0.7780) time: 0.1589 data: 0.0773 max mem: 9377 +Train: [50] [1000/6250] eta: 0:16:53 lr: 0.000067 grad: 0.1445 (0.1701) loss: 0.7737 (0.7782) time: 0.1108 data: 0.0326 max mem: 9377 +Train: [50] [1100/6250] eta: 0:16:20 lr: 0.000067 grad: 0.1484 (0.1678) loss: 0.7735 (0.7782) time: 0.1420 data: 0.0518 max mem: 9377 +Train: [50] [1200/6250] eta: 0:16:01 lr: 0.000067 grad: 0.1326 (0.1655) loss: 0.7879 (0.7786) time: 0.1657 data: 0.0630 max mem: 9377 +Train: [50] [1300/6250] eta: 0:15:30 lr: 0.000067 grad: 0.1403 (0.1640) loss: 0.7762 (0.7782) time: 0.1425 data: 0.0500 max mem: 9377 +Train: [50] [1400/6250] eta: 0:16:04 lr: 0.000067 grad: 0.1402 (0.1626) loss: 0.7830 (0.7782) time: 0.1631 data: 0.0496 max mem: 9377 +Train: [50] [1500/6250] eta: 0:15:53 lr: 0.000067 grad: 0.1428 (0.1614) loss: 0.7732 (0.7782) time: 0.1565 data: 0.0003 max mem: 9377 +Train: [50] [1600/6250] eta: 0:15:52 lr: 0.000067 grad: 0.1470 (0.1602) loss: 0.7708 (0.7783) time: 0.4827 data: 0.2925 max mem: 9377 +Train: [50] [1700/6250] eta: 0:15:30 lr: 0.000067 grad: 0.1321 (0.1590) loss: 0.7877 (0.7785) time: 0.1093 data: 0.0005 max mem: 9377 +Train: [50] [1800/6250] eta: 0:14:56 lr: 0.000067 grad: 0.1375 (0.1585) loss: 0.7814 (0.7784) time: 0.1403 data: 0.0552 max mem: 9377 +Train: [50] [1900/6250] eta: 0:14:24 lr: 0.000067 grad: 0.1401 (0.1576) loss: 0.7809 (0.7785) time: 0.1806 data: 0.0970 max mem: 9377 +Train: [50] [2000/6250] eta: 0:13:54 lr: 0.000067 grad: 0.1346 (0.1568) loss: 0.7857 (0.7785) time: 0.1462 data: 0.0550 max mem: 9377 +Train: [50] [2100/6250] eta: 0:13:26 lr: 0.000067 grad: 0.1397 (0.1562) loss: 0.7753 (0.7786) time: 0.1883 data: 0.1103 max mem: 9377 +Train: [50] [2200/6250] eta: 0:12:59 lr: 0.000067 grad: 0.1473 (0.1557) loss: 0.7733 (0.7784) time: 0.1214 data: 0.0384 max mem: 9377 +Train: [50] [2300/6250] eta: 0:12:34 lr: 0.000067 grad: 0.1379 (0.1551) loss: 0.7730 (0.7782) time: 0.1392 data: 0.0514 max mem: 9377 +Train: [50] [2400/6250] eta: 0:12:10 lr: 0.000067 grad: 0.1454 (0.1546) loss: 0.7745 (0.7781) time: 0.2018 data: 0.1188 max mem: 9377 +Train: [50] [2500/6250] eta: 0:11:44 lr: 0.000067 grad: 0.1341 (0.1542) loss: 0.7789 (0.7780) time: 0.1404 data: 0.0513 max mem: 9377 +Train: [50] [2600/6250] eta: 0:11:22 lr: 0.000067 grad: 0.1450 (0.1539) loss: 0.7633 (0.7778) time: 0.1862 data: 0.1092 max mem: 9377 +Train: [50] [2700/6250] eta: 0:10:58 lr: 0.000067 grad: 0.1435 (0.1535) loss: 0.7736 (0.7777) time: 0.1338 data: 0.0485 max mem: 9377 +Train: [50] [2800/6250] eta: 0:10:38 lr: 0.000067 grad: 0.1369 (0.1531) loss: 0.7861 (0.7778) time: 0.2095 data: 0.1256 max mem: 9377 +Train: [50] [2900/6250] eta: 0:10:16 lr: 0.000067 grad: 0.1429 (0.1527) loss: 0.7834 (0.7778) time: 0.1946 data: 0.1154 max mem: 9377 +Train: [50] [3000/6250] eta: 0:09:54 lr: 0.000067 grad: 0.1398 (0.1527) loss: 0.7800 (0.7777) time: 0.1567 data: 0.0806 max mem: 9377 +Train: [50] [3100/6250] eta: 0:09:33 lr: 0.000067 grad: 0.1366 (0.1523) loss: 0.7798 (0.7777) time: 0.1387 data: 0.0443 max mem: 9377 +Train: [50] [3200/6250] eta: 0:09:13 lr: 0.000067 grad: 0.1373 (0.1520) loss: 0.7809 (0.7778) time: 0.1657 data: 0.0833 max mem: 9377 +Train: [50] [3300/6250] eta: 0:08:53 lr: 0.000067 grad: 0.1478 (0.1517) loss: 0.7785 (0.7778) time: 0.1152 data: 0.0247 max mem: 9377 +Train: [50] [3400/6250] eta: 0:08:34 lr: 0.000067 grad: 0.1409 (0.1515) loss: 0.7777 (0.7779) time: 0.1181 data: 0.0263 max mem: 9377 +Train: [50] [3500/6250] eta: 0:08:15 lr: 0.000067 grad: 0.1382 (0.1512) loss: 0.7808 (0.7779) time: 0.2724 data: 0.1918 max mem: 9377 +Train: [50] [3600/6250] eta: 0:07:55 lr: 0.000066 grad: 0.1420 (0.1510) loss: 0.7720 (0.7780) time: 0.1657 data: 0.0850 max mem: 9377 +Train: [50] [3700/6250] eta: 0:07:36 lr: 0.000066 grad: 0.1324 (0.1507) loss: 0.7803 (0.7779) time: 0.1467 data: 0.0698 max mem: 9377 +Train: [50] [3800/6250] eta: 0:07:16 lr: 0.000066 grad: 0.1379 (0.1505) loss: 0.7838 (0.7780) time: 0.1290 data: 0.0451 max mem: 9377 +Train: [50] [3900/6250] eta: 0:06:59 lr: 0.000066 grad: 0.1383 (0.1503) loss: 0.7742 (0.7780) time: 0.2047 data: 0.1162 max mem: 9377 +Train: [50] [4000/6250] eta: 0:06:52 lr: 0.000066 grad: 0.1387 (0.1502) loss: 0.7766 (0.7779) time: 0.3736 data: 0.2245 max mem: 9377 +Train: [50] [4100/6250] eta: 0:06:33 lr: 0.000066 grad: 0.1488 (0.1501) loss: 0.7720 (0.7778) time: 0.1871 data: 0.0848 max mem: 9377 +Train: [50] [4200/6250] eta: 0:06:17 lr: 0.000066 grad: 0.1460 (0.1501) loss: 0.7740 (0.7777) time: 0.3232 data: 0.1874 max mem: 9377 +Train: [50] [4300/6250] eta: 0:06:03 lr: 0.000066 grad: 0.1466 (0.1500) loss: 0.7734 (0.7777) time: 0.1567 data: 0.0475 max mem: 9377 +Train: [50] [4400/6250] eta: 0:05:45 lr: 0.000066 grad: 0.1458 (0.1500) loss: 0.7739 (0.7777) time: 0.1941 data: 0.0893 max mem: 9377 +Train: [50] [4500/6250] eta: 0:05:28 lr: 0.000066 grad: 0.1409 (0.1501) loss: 0.7763 (0.7777) time: 0.1906 data: 0.0992 max mem: 9377 +Train: [50] [4600/6250] eta: 0:05:16 lr: 0.000066 grad: 0.1412 (0.1500) loss: 0.7754 (0.7777) time: 0.2137 data: 0.1050 max mem: 9377 +Train: [50] [4700/6250] eta: 0:04:56 lr: 0.000066 grad: 0.1395 (0.1499) loss: 0.7802 (0.7777) time: 0.2008 data: 0.1222 max mem: 9377 +Train: [50] [4800/6250] eta: 0:04:36 lr: 0.000066 grad: 0.1451 (0.1498) loss: 0.7806 (0.7776) time: 0.1956 data: 0.1142 max mem: 9377 +Train: [50] [4900/6250] eta: 0:04:16 lr: 0.000066 grad: 0.1464 (0.1498) loss: 0.7787 (0.7776) time: 0.2138 data: 0.1253 max mem: 9377 +Train: [50] [5000/6250] eta: 0:03:57 lr: 0.000066 grad: 0.1459 (0.1497) loss: 0.7702 (0.7776) time: 0.2330 data: 0.1389 max mem: 9377 +Train: [50] [5100/6250] eta: 0:03:37 lr: 0.000066 grad: 0.1404 (0.1497) loss: 0.7791 (0.7775) time: 0.1764 data: 0.0896 max mem: 9377 +Train: [50] [5200/6250] eta: 0:03:18 lr: 0.000066 grad: 0.1526 (0.1497) loss: 0.7583 (0.7774) time: 0.1312 data: 0.0003 max mem: 9377 +Train: [50] [5300/6250] eta: 0:03:00 lr: 0.000066 grad: 0.1434 (0.1497) loss: 0.7696 (0.7773) time: 0.1642 data: 0.0697 max mem: 9377 +Train: [50] [5400/6250] eta: 0:02:40 lr: 0.000066 grad: 0.1420 (0.1497) loss: 0.7833 (0.7772) time: 0.1400 data: 0.0539 max mem: 9377 +Train: [50] [5500/6250] eta: 0:02:21 lr: 0.000066 grad: 0.1534 (0.1496) loss: 0.7669 (0.7771) time: 0.1540 data: 0.0697 max mem: 9377 +Train: [50] [5600/6250] eta: 0:02:02 lr: 0.000066 grad: 0.1481 (0.1495) loss: 0.7765 (0.7771) time: 0.1146 data: 0.0246 max mem: 9377 +Train: [50] [5700/6250] eta: 0:01:43 lr: 0.000066 grad: 0.1473 (0.1495) loss: 0.7739 (0.7771) time: 0.0958 data: 0.0012 max mem: 9377 +Train: [50] [5800/6250] eta: 0:01:24 lr: 0.000066 grad: 0.1520 (0.1495) loss: 0.7723 (0.7771) time: 0.3006 data: 0.2093 max mem: 9377 +Train: [50] [5900/6250] eta: 0:01:05 lr: 0.000066 grad: 0.1439 (0.1495) loss: 0.7674 (0.7770) time: 0.2250 data: 0.1387 max mem: 9377 +Train: [50] [6000/6250] eta: 0:00:46 lr: 0.000066 grad: 0.1446 (0.1495) loss: 0.7772 (0.7770) time: 0.2500 data: 0.1637 max mem: 9377 +Train: [50] [6100/6250] eta: 0:00:27 lr: 0.000066 grad: 0.1415 (0.1495) loss: 0.7698 (0.7769) time: 0.1862 data: 0.1002 max mem: 9377 +Train: [50] [6200/6250] eta: 0:00:09 lr: 0.000066 grad: 0.1454 (0.1495) loss: 0.7759 (0.7770) time: 0.1978 data: 0.1152 max mem: 9377 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.1471 (0.1495) loss: 0.7819 (0.7770) time: 0.1740 data: 0.0893 max mem: 9377 +Train: [50] Total time: 0:19:26 (0.1866 s / it) +Averaged stats: lr: 0.000066 grad: 0.1471 (0.1495) loss: 0.7819 (0.7770) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:05:09 loss: 0.8247 (0.8247) time: 4.9956 data: 4.9632 max mem: 9377 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8223 (0.8208) time: 0.1029 data: 0.0782 max mem: 9377 +Eval (hcp-train-subset): [50] Total time: 0:00:14 (0.2297 s / it) +Averaged stats (hcp-train-subset): loss: 0.8223 (0.8208) +Eval (hcp-val): [50] [ 0/62] eta: 0:03:50 loss: 0.8446 (0.8446) time: 3.7120 data: 3.6149 max mem: 9377 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8475 (0.8476) time: 0.1126 data: 0.0860 max mem: 9377 +Eval (hcp-val): [50] Total time: 0:00:13 (0.2252 s / it) +Averaged stats (hcp-val): loss: 0.8475 (0.8476) +Eval (nsd-val): [50] [ 0/62] eta: 0:04:02 loss: 0.8124 (0.8124) time: 3.9107 data: 3.8514 max mem: 9377 +Eval (nsd-val): [50] [61/62] eta: 0:00:00 loss: 0.8225 (0.8239) time: 0.1372 data: 0.1103 max mem: 9377 +Eval (nsd-val): [50] Total time: 0:00:14 (0.2274 s / it) +Averaged stats (nsd-val): loss: 0.8225 (0.8239) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 9:42:04 lr: 0.000066 grad: 0.1359 (0.1359) loss: 0.8621 (0.8621) time: 5.5879 data: 5.3199 max mem: 9377 +Train: [51] [ 100/6250] eta: 0:23:38 lr: 0.000066 grad: 0.2108 (0.2487) loss: 0.7763 (0.7963) time: 0.1825 data: 0.0629 max mem: 9377 +Train: [51] [ 200/6250] eta: 0:20:19 lr: 0.000066 grad: 0.1425 (0.2141) loss: 0.8001 (0.7951) time: 0.1580 data: 0.0533 max mem: 9377 +Train: [51] [ 300/6250] eta: 0:20:02 lr: 0.000065 grad: 0.1716 (0.2004) loss: 0.7869 (0.7918) time: 0.1430 data: 0.0112 max mem: 9377 +Train: [51] [ 400/6250] eta: 0:18:47 lr: 0.000065 grad: 0.1451 (0.1923) loss: 0.7748 (0.7888) time: 0.1438 data: 0.0491 max mem: 9377 +Train: [51] [ 500/6250] eta: 0:18:39 lr: 0.000065 grad: 0.1494 (0.1845) loss: 0.7797 (0.7871) time: 0.1333 data: 0.0004 max mem: 9377 +Train: [51] [ 600/6250] eta: 0:19:31 lr: 0.000065 grad: 0.1420 (0.1788) loss: 0.7862 (0.7865) time: 0.1672 data: 0.0416 max mem: 9377 +Train: [51] [ 700/6250] eta: 0:19:45 lr: 0.000065 grad: 0.1452 (0.1754) loss: 0.7822 (0.7858) time: 0.1640 data: 0.0789 max mem: 9377 +Train: [51] [ 800/6250] eta: 0:18:52 lr: 0.000065 grad: 0.1526 (0.1721) loss: 0.7727 (0.7848) time: 0.1563 data: 0.0543 max mem: 9377 +Train: [51] [ 900/6250] eta: 0:18:02 lr: 0.000065 grad: 0.1499 (0.1690) loss: 0.7821 (0.7843) time: 0.1423 data: 0.0563 max mem: 9377 +Train: [51] [1000/6250] eta: 0:17:19 lr: 0.000065 grad: 0.1348 (0.1663) loss: 0.7837 (0.7844) time: 0.1149 data: 0.0264 max mem: 9377 +Train: [51] [1100/6250] eta: 0:16:43 lr: 0.000065 grad: 0.1419 (0.1640) loss: 0.7763 (0.7842) time: 0.1193 data: 0.0290 max mem: 9377 +Train: [51] [1200/6250] eta: 0:16:13 lr: 0.000065 grad: 0.1400 (0.1619) loss: 0.7839 (0.7840) time: 0.2079 data: 0.1271 max mem: 9377 +Train: [51] [1300/6250] eta: 0:15:38 lr: 0.000065 grad: 0.1396 (0.1602) loss: 0.7744 (0.7834) time: 0.1507 data: 0.0600 max mem: 9377 +Train: [51] [1400/6250] eta: 0:15:07 lr: 0.000065 grad: 0.1317 (0.1587) loss: 0.7746 (0.7832) time: 0.1431 data: 0.0547 max mem: 9377 +Train: [51] [1500/6250] eta: 0:14:39 lr: 0.000065 grad: 0.1416 (0.1573) loss: 0.7903 (0.7832) time: 0.1372 data: 0.0553 max mem: 9377 +Train: [51] [1600/6250] eta: 0:14:11 lr: 0.000065 grad: 0.1344 (0.1562) loss: 0.7788 (0.7831) time: 0.1554 data: 0.0715 max mem: 9377 +Train: [51] [1700/6250] eta: 0:13:44 lr: 0.000065 grad: 0.1433 (0.1553) loss: 0.7848 (0.7831) time: 0.1556 data: 0.0647 max mem: 9377 +Train: [51] [1800/6250] eta: 0:13:22 lr: 0.000065 grad: 0.1371 (0.1546) loss: 0.7734 (0.7828) time: 0.1865 data: 0.0930 max mem: 9377 +Train: [51] [1900/6250] eta: 0:12:55 lr: 0.000065 grad: 0.1485 (0.1543) loss: 0.7708 (0.7825) time: 0.1313 data: 0.0425 max mem: 9377 +Train: [51] [2000/6250] eta: 0:12:33 lr: 0.000065 grad: 0.1512 (0.1541) loss: 0.7665 (0.7822) time: 0.1480 data: 0.0679 max mem: 9377 +Train: [51] [2100/6250] eta: 0:12:08 lr: 0.000065 grad: 0.1502 (0.1540) loss: 0.7696 (0.7817) time: 0.1336 data: 0.0390 max mem: 9377 +Train: [51] [2200/6250] eta: 0:11:45 lr: 0.000065 grad: 0.1418 (0.1538) loss: 0.7726 (0.7813) time: 0.1123 data: 0.0245 max mem: 9377 +Train: [51] [2300/6250] eta: 0:11:26 lr: 0.000065 grad: 0.1323 (0.1534) loss: 0.7818 (0.7812) time: 0.0965 data: 0.0002 max mem: 9377 +Train: [51] [2400/6250] eta: 0:11:06 lr: 0.000065 grad: 0.1420 (0.1531) loss: 0.7727 (0.7809) time: 0.1899 data: 0.1059 max mem: 9377 +Train: [51] [2500/6250] eta: 0:10:45 lr: 0.000065 grad: 0.1372 (0.1528) loss: 0.7762 (0.7808) time: 0.1604 data: 0.0743 max mem: 9377 +Train: [51] [2600/6250] eta: 0:10:25 lr: 0.000065 grad: 0.1455 (0.1525) loss: 0.7747 (0.7807) time: 0.1412 data: 0.0543 max mem: 9377 +Train: [51] [2700/6250] eta: 0:10:05 lr: 0.000065 grad: 0.1395 (0.1523) loss: 0.7780 (0.7806) time: 0.1259 data: 0.0406 max mem: 9377 +Train: [51] [2800/6250] eta: 0:09:47 lr: 0.000065 grad: 0.1374 (0.1520) loss: 0.7839 (0.7807) time: 0.1771 data: 0.0904 max mem: 9377 +Train: [51] [2900/6250] eta: 0:09:30 lr: 0.000065 grad: 0.1441 (0.1519) loss: 0.7755 (0.7805) time: 0.1435 data: 0.0532 max mem: 9377 +Train: [51] [3000/6250] eta: 0:09:16 lr: 0.000065 grad: 0.1420 (0.1517) loss: 0.7789 (0.7805) time: 0.2061 data: 0.1183 max mem: 9377 +Train: [51] [3100/6250] eta: 0:08:59 lr: 0.000065 grad: 0.1458 (0.1515) loss: 0.7714 (0.7804) time: 0.2070 data: 0.1186 max mem: 9377 +Train: [51] [3200/6250] eta: 0:08:44 lr: 0.000065 grad: 0.1411 (0.1513) loss: 0.7742 (0.7804) time: 0.1419 data: 0.0536 max mem: 9377 +Train: [51] [3300/6250] eta: 0:08:27 lr: 0.000065 grad: 0.1385 (0.1512) loss: 0.7758 (0.7803) time: 0.2838 data: 0.1941 max mem: 9377 +Train: [51] [3400/6250] eta: 0:08:23 lr: 0.000064 grad: 0.1431 (0.1511) loss: 0.7774 (0.7801) time: 0.2373 data: 0.1390 max mem: 9377 +Train: [51] [3500/6250] eta: 0:08:04 lr: 0.000064 grad: 0.1413 (0.1509) loss: 0.7795 (0.7801) time: 0.0969 data: 0.0002 max mem: 9377 +Train: [51] [3600/6250] eta: 0:07:44 lr: 0.000064 grad: 0.1405 (0.1507) loss: 0.7727 (0.7800) time: 0.1035 data: 0.0178 max mem: 9377 +Train: [51] [3700/6250] eta: 0:07:29 lr: 0.000064 grad: 0.1502 (0.1506) loss: 0.7792 (0.7799) time: 0.3826 data: 0.2979 max mem: 9377 +Train: [51] [3800/6250] eta: 0:07:09 lr: 0.000064 grad: 0.1379 (0.1505) loss: 0.7857 (0.7799) time: 0.1358 data: 0.0458 max mem: 9377 +Train: [51] [3900/6250] eta: 0:06:52 lr: 0.000064 grad: 0.1436 (0.1504) loss: 0.7767 (0.7798) time: 0.2008 data: 0.1209 max mem: 9377 +Train: [51] [4000/6250] eta: 0:06:33 lr: 0.000064 grad: 0.1382 (0.1502) loss: 0.7823 (0.7798) time: 0.1308 data: 0.0452 max mem: 9377 +Train: [51] [4100/6250] eta: 0:06:14 lr: 0.000064 grad: 0.1322 (0.1500) loss: 0.7932 (0.7798) time: 0.1283 data: 0.0330 max mem: 9377 +Train: [51] [4200/6250] eta: 0:05:57 lr: 0.000064 grad: 0.1498 (0.1499) loss: 0.7804 (0.7799) time: 0.2827 data: 0.1831 max mem: 9377 +Train: [51] [4300/6250] eta: 0:05:42 lr: 0.000064 grad: 0.1453 (0.1497) loss: 0.7840 (0.7800) time: 0.2281 data: 0.1304 max mem: 9377 +Train: [51] [4400/6250] eta: 0:05:25 lr: 0.000064 grad: 0.1419 (0.1495) loss: 0.7876 (0.7801) time: 0.1125 data: 0.0155 max mem: 9377 +Train: [51] [4500/6250] eta: 0:05:06 lr: 0.000064 grad: 0.1443 (0.1494) loss: 0.7798 (0.7802) time: 0.1517 data: 0.0452 max mem: 9377 +Train: [51] [4600/6250] eta: 0:04:51 lr: 0.000064 grad: 0.1451 (0.1493) loss: 0.7809 (0.7802) time: 0.1722 data: 0.0676 max mem: 9377 +Train: [51] [4700/6250] eta: 0:04:34 lr: 0.000064 grad: 0.1533 (0.1493) loss: 0.7745 (0.7803) time: 0.1035 data: 0.0005 max mem: 9377 +Train: [51] [4800/6250] eta: 0:04:15 lr: 0.000064 grad: 0.1494 (0.1493) loss: 0.7693 (0.7803) time: 0.1385 data: 0.0486 max mem: 9377 +Train: [51] [4900/6250] eta: 0:03:57 lr: 0.000064 grad: 0.1456 (0.1492) loss: 0.7830 (0.7803) time: 0.0965 data: 0.0043 max mem: 9377 +Train: [51] [5000/6250] eta: 0:03:39 lr: 0.000064 grad: 0.1418 (0.1491) loss: 0.7899 (0.7803) time: 0.1957 data: 0.0882 max mem: 9377 +Train: [51] [5100/6250] eta: 0:03:22 lr: 0.000064 grad: 0.1447 (0.1491) loss: 0.7807 (0.7804) time: 0.0986 data: 0.0003 max mem: 9377 +Train: [51] [5200/6250] eta: 0:03:04 lr: 0.000064 grad: 0.1413 (0.1489) loss: 0.7895 (0.7804) time: 0.1744 data: 0.0862 max mem: 9377 +Train: [51] [5300/6250] eta: 0:02:46 lr: 0.000064 grad: 0.1413 (0.1489) loss: 0.7813 (0.7804) time: 0.1296 data: 0.0416 max mem: 9377 +Train: [51] [5400/6250] eta: 0:02:28 lr: 0.000064 grad: 0.1403 (0.1488) loss: 0.7864 (0.7805) time: 0.1504 data: 0.0634 max mem: 9377 +Train: [51] [5500/6250] eta: 0:02:11 lr: 0.000064 grad: 0.1452 (0.1488) loss: 0.7823 (0.7804) time: 0.1455 data: 0.0585 max mem: 9377 +Train: [51] [5600/6250] eta: 0:01:53 lr: 0.000064 grad: 0.1420 (0.1488) loss: 0.7842 (0.7804) time: 0.1492 data: 0.0550 max mem: 9377 +Train: [51] [5700/6250] eta: 0:01:35 lr: 0.000064 grad: 0.1580 (0.1489) loss: 0.7714 (0.7804) time: 0.1460 data: 0.0533 max mem: 9377 +Train: [51] [5800/6250] eta: 0:01:18 lr: 0.000064 grad: 0.1513 (0.1489) loss: 0.7738 (0.7804) time: 0.1299 data: 0.0357 max mem: 9377 +Train: [51] [5900/6250] eta: 0:01:00 lr: 0.000064 grad: 0.1427 (0.1490) loss: 0.7769 (0.7803) time: 0.2032 data: 0.1234 max mem: 9377 +Train: [51] [6000/6250] eta: 0:00:43 lr: 0.000064 grad: 0.1464 (0.1490) loss: 0.7792 (0.7802) time: 0.1167 data: 0.0268 max mem: 9377 +Train: [51] [6100/6250] eta: 0:00:25 lr: 0.000064 grad: 0.1465 (0.1490) loss: 0.7727 (0.7801) time: 0.2015 data: 0.1176 max mem: 9377 +Train: [51] [6200/6250] eta: 0:00:08 lr: 0.000064 grad: 0.1431 (0.1490) loss: 0.7823 (0.7800) time: 0.1942 data: 0.1090 max mem: 9377 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.1535 (0.1491) loss: 0.7782 (0.7799) time: 0.1556 data: 0.0673 max mem: 9377 +Train: [51] Total time: 0:18:06 (0.1738 s / it) +Averaged stats: lr: 0.000064 grad: 0.1535 (0.1491) loss: 0.7782 (0.7799) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:04:09 loss: 0.8239 (0.8239) time: 4.0211 data: 3.9624 max mem: 9377 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8232 (0.8230) time: 0.1379 data: 0.1124 max mem: 9377 +Eval (hcp-train-subset): [51] Total time: 0:00:14 (0.2327 s / it) +Averaged stats (hcp-train-subset): loss: 0.8232 (0.8230) +Eval (hcp-val): [51] [ 0/62] eta: 0:04:29 loss: 0.8455 (0.8455) time: 4.3476 data: 4.2729 max mem: 9377 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8478 (0.8507) time: 0.1547 data: 0.1279 max mem: 9377 +Eval (hcp-val): [51] Total time: 0:00:14 (0.2320 s / it) +Averaged stats (hcp-val): loss: 0.8478 (0.8507) +Eval (nsd-val): [51] [ 0/62] eta: 0:05:57 loss: 0.8170 (0.8170) time: 5.7656 data: 5.7309 max mem: 9377 +Eval (nsd-val): [51] [61/62] eta: 0:00:00 loss: 0.8271 (0.8285) time: 0.1329 data: 0.1057 max mem: 9377 +Eval (nsd-val): [51] Total time: 0:00:14 (0.2373 s / it) +Averaged stats (nsd-val): loss: 0.8271 (0.8285) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 11:04:50 lr: 0.000064 grad: 0.1942 (0.1942) loss: 0.7593 (0.7593) time: 6.3825 data: 6.2227 max mem: 9377 +Train: [52] [ 100/6250] eta: 0:22:44 lr: 0.000063 grad: 0.2334 (0.2536) loss: 0.7816 (0.7876) time: 0.1646 data: 0.0540 max mem: 9377 +Train: [52] [ 200/6250] eta: 0:19:46 lr: 0.000063 grad: 0.1715 (0.2334) loss: 0.7776 (0.7820) time: 0.1568 data: 0.0590 max mem: 9377 +Train: [52] [ 300/6250] eta: 0:18:46 lr: 0.000063 grad: 0.1532 (0.2121) loss: 0.7917 (0.7829) time: 0.2125 data: 0.1244 max mem: 9377 +Train: [52] [ 400/6250] eta: 0:17:49 lr: 0.000063 grad: 0.1496 (0.2013) loss: 0.7955 (0.7832) time: 0.1673 data: 0.0576 max mem: 9377 +Train: [52] [ 500/6250] eta: 0:17:29 lr: 0.000063 grad: 0.1468 (0.1908) loss: 0.7768 (0.7834) time: 0.1748 data: 0.0820 max mem: 9377 +Train: [52] [ 600/6250] eta: 0:16:53 lr: 0.000063 grad: 0.1438 (0.1847) loss: 0.7816 (0.7829) time: 0.1504 data: 0.0508 max mem: 9377 +Train: [52] [ 700/6250] eta: 0:16:17 lr: 0.000063 grad: 0.1525 (0.1811) loss: 0.7786 (0.7819) time: 0.1574 data: 0.0637 max mem: 9377 +Train: [52] [ 800/6250] eta: 0:15:49 lr: 0.000063 grad: 0.1490 (0.1775) loss: 0.7789 (0.7820) time: 0.1859 data: 0.0895 max mem: 9377 +Train: [52] [ 900/6250] eta: 0:15:23 lr: 0.000063 grad: 0.1481 (0.1743) loss: 0.7766 (0.7820) time: 0.1625 data: 0.0715 max mem: 9377 +Train: [52] [1000/6250] eta: 0:14:55 lr: 0.000063 grad: 0.1492 (0.1717) loss: 0.7787 (0.7820) time: 0.1646 data: 0.0749 max mem: 9377 +Train: [52] [1100/6250] eta: 0:14:30 lr: 0.000063 grad: 0.1297 (0.1696) loss: 0.7784 (0.7818) time: 0.1480 data: 0.0621 max mem: 9377 +Train: [52] [1200/6250] eta: 0:14:08 lr: 0.000063 grad: 0.1351 (0.1677) loss: 0.7811 (0.7816) time: 0.1291 data: 0.0439 max mem: 9377 +Train: [52] [1300/6250] eta: 0:13:47 lr: 0.000063 grad: 0.1486 (0.1662) loss: 0.7840 (0.7814) time: 0.1718 data: 0.0896 max mem: 9377 +Train: [52] [1400/6250] eta: 0:13:30 lr: 0.000063 grad: 0.1409 (0.1647) loss: 0.7801 (0.7814) time: 0.2025 data: 0.1167 max mem: 9377 +Train: [52] [1500/6250] eta: 0:13:11 lr: 0.000063 grad: 0.1420 (0.1633) loss: 0.7737 (0.7815) time: 0.2178 data: 0.1309 max mem: 9377 +Train: [52] [1600/6250] eta: 0:12:53 lr: 0.000063 grad: 0.1448 (0.1623) loss: 0.7829 (0.7813) time: 0.1919 data: 0.1092 max mem: 9377 +Train: [52] [1700/6250] eta: 0:12:36 lr: 0.000063 grad: 0.1490 (0.1615) loss: 0.7731 (0.7810) time: 0.1308 data: 0.0334 max mem: 9377 +Train: [52] [1800/6250] eta: 0:12:20 lr: 0.000063 grad: 0.1450 (0.1607) loss: 0.7720 (0.7807) time: 0.1397 data: 0.0391 max mem: 9377 +Train: [52] [1900/6250] eta: 0:12:04 lr: 0.000063 grad: 0.1465 (0.1600) loss: 0.7721 (0.7805) time: 0.1559 data: 0.0701 max mem: 9377 +Train: [52] [2000/6250] eta: 0:11:43 lr: 0.000063 grad: 0.1385 (0.1595) loss: 0.7795 (0.7800) time: 0.1130 data: 0.0275 max mem: 9377 +Train: [52] [2100/6250] eta: 0:11:22 lr: 0.000063 grad: 0.1463 (0.1589) loss: 0.7757 (0.7797) time: 0.1547 data: 0.0738 max mem: 9377 +Train: [52] [2200/6250] eta: 0:11:01 lr: 0.000063 grad: 0.1511 (0.1585) loss: 0.7658 (0.7794) time: 0.1404 data: 0.0567 max mem: 9377 +Train: [52] [2300/6250] eta: 0:10:43 lr: 0.000063 grad: 0.1455 (0.1581) loss: 0.7746 (0.7792) time: 0.1864 data: 0.1100 max mem: 9377 +Train: [52] [2400/6250] eta: 0:10:22 lr: 0.000063 grad: 0.1357 (0.1579) loss: 0.7822 (0.7791) time: 0.1529 data: 0.0671 max mem: 9377 +Train: [52] [2500/6250] eta: 0:10:04 lr: 0.000063 grad: 0.1505 (0.1576) loss: 0.7670 (0.7787) time: 0.1487 data: 0.0518 max mem: 9377 +Train: [52] [2600/6250] eta: 0:09:46 lr: 0.000063 grad: 0.1560 (0.1573) loss: 0.7712 (0.7785) time: 0.1457 data: 0.0629 max mem: 9377 +Train: [52] [2700/6250] eta: 0:09:26 lr: 0.000063 grad: 0.1435 (0.1570) loss: 0.7734 (0.7782) time: 0.1420 data: 0.0603 max mem: 9377 +Train: [52] [2800/6250] eta: 0:09:07 lr: 0.000063 grad: 0.1477 (0.1567) loss: 0.7736 (0.7781) time: 0.1329 data: 0.0537 max mem: 9377 +Train: [52] [2900/6250] eta: 0:08:48 lr: 0.000063 grad: 0.1531 (0.1566) loss: 0.7544 (0.7779) time: 0.1244 data: 0.0398 max mem: 9377 +Train: [52] [3000/6250] eta: 0:08:30 lr: 0.000063 grad: 0.1469 (0.1563) loss: 0.7766 (0.7778) time: 0.1387 data: 0.0603 max mem: 9377 +Train: [52] [3100/6250] eta: 0:08:12 lr: 0.000063 grad: 0.1410 (0.1561) loss: 0.7847 (0.7777) time: 0.1432 data: 0.0629 max mem: 9377 +Train: [52] [3200/6250] eta: 0:07:54 lr: 0.000062 grad: 0.1542 (0.1558) loss: 0.7735 (0.7776) time: 0.1394 data: 0.0528 max mem: 9377 +Train: [52] [3300/6250] eta: 0:07:36 lr: 0.000062 grad: 0.1436 (0.1557) loss: 0.7772 (0.7775) time: 0.1380 data: 0.0559 max mem: 9377 +Train: [52] [3400/6250] eta: 0:07:19 lr: 0.000062 grad: 0.1419 (0.1555) loss: 0.7820 (0.7775) time: 0.1288 data: 0.0406 max mem: 9377 +Train: [52] [3500/6250] eta: 0:07:02 lr: 0.000062 grad: 0.1483 (0.1553) loss: 0.7841 (0.7775) time: 0.1316 data: 0.0473 max mem: 9377 +Train: [52] [3600/6250] eta: 0:06:45 lr: 0.000062 grad: 0.1504 (0.1551) loss: 0.7769 (0.7775) time: 0.1245 data: 0.0397 max mem: 9377 +Train: [52] [3700/6250] eta: 0:06:28 lr: 0.000062 grad: 0.1496 (0.1550) loss: 0.7742 (0.7774) time: 0.1173 data: 0.0301 max mem: 9377 +Train: [52] [3800/6250] eta: 0:06:11 lr: 0.000062 grad: 0.1468 (0.1549) loss: 0.7784 (0.7773) time: 0.1457 data: 0.0619 max mem: 9377 +Train: [52] [3900/6250] eta: 0:05:55 lr: 0.000062 grad: 0.1564 (0.1549) loss: 0.7812 (0.7772) time: 0.1388 data: 0.0450 max mem: 9377 +Train: [52] [4000/6250] eta: 0:05:38 lr: 0.000062 grad: 0.1510 (0.1549) loss: 0.7660 (0.7770) time: 0.1265 data: 0.0430 max mem: 9377 +Train: [52] [4100/6250] eta: 0:05:22 lr: 0.000062 grad: 0.1571 (0.1549) loss: 0.7726 (0.7769) time: 0.1400 data: 0.0592 max mem: 9377 +Train: [52] [4200/6250] eta: 0:05:07 lr: 0.000062 grad: 0.1535 (0.1549) loss: 0.7751 (0.7768) time: 0.1503 data: 0.0728 max mem: 9377 +Train: [52] [4300/6250] eta: 0:04:52 lr: 0.000062 grad: 0.1588 (0.1549) loss: 0.7712 (0.7767) time: 0.1468 data: 0.0696 max mem: 9377 +Train: [52] [4400/6250] eta: 0:04:36 lr: 0.000062 grad: 0.1517 (0.1549) loss: 0.7706 (0.7765) time: 0.1318 data: 0.0542 max mem: 9377 +Train: [52] [4500/6250] eta: 0:04:21 lr: 0.000062 grad: 0.1564 (0.1549) loss: 0.7677 (0.7763) time: 0.1319 data: 0.0525 max mem: 9377 +Train: [52] [4600/6250] eta: 0:04:06 lr: 0.000062 grad: 0.1557 (0.1549) loss: 0.7565 (0.7761) time: 0.1473 data: 0.0685 max mem: 9377 +Train: [52] [4700/6250] eta: 0:03:51 lr: 0.000062 grad: 0.1507 (0.1549) loss: 0.7711 (0.7759) time: 0.1210 data: 0.0384 max mem: 9377 +Train: [52] [4800/6250] eta: 0:03:35 lr: 0.000062 grad: 0.1555 (0.1549) loss: 0.7711 (0.7758) time: 0.1267 data: 0.0372 max mem: 9377 +Train: [52] [4900/6250] eta: 0:03:20 lr: 0.000062 grad: 0.1500 (0.1548) loss: 0.7667 (0.7757) time: 0.1641 data: 0.0871 max mem: 9377 +Train: [52] [5000/6250] eta: 0:03:05 lr: 0.000062 grad: 0.1573 (0.1548) loss: 0.7674 (0.7756) time: 0.1266 data: 0.0528 max mem: 9377 +Train: [52] [5100/6250] eta: 0:02:50 lr: 0.000062 grad: 0.1598 (0.1549) loss: 0.7720 (0.7754) time: 0.1467 data: 0.0614 max mem: 9377 +Train: [52] [5200/6250] eta: 0:02:35 lr: 0.000062 grad: 0.1517 (0.1549) loss: 0.7748 (0.7753) time: 0.1367 data: 0.0543 max mem: 9377 +Train: [52] [5300/6250] eta: 0:02:20 lr: 0.000062 grad: 0.1485 (0.1549) loss: 0.7699 (0.7752) time: 0.1543 data: 0.0720 max mem: 9377 +Train: [52] [5400/6250] eta: 0:02:05 lr: 0.000062 grad: 0.1462 (0.1548) loss: 0.7722 (0.7751) time: 0.1261 data: 0.0461 max mem: 9377 +Train: [52] [5500/6250] eta: 0:01:50 lr: 0.000062 grad: 0.1511 (0.1547) loss: 0.7608 (0.7750) time: 0.1461 data: 0.0676 max mem: 9377 +Train: [52] [5600/6250] eta: 0:01:35 lr: 0.000062 grad: 0.1463 (0.1547) loss: 0.7733 (0.7749) time: 0.1539 data: 0.0751 max mem: 9377 +Train: [52] [5700/6250] eta: 0:01:20 lr: 0.000062 grad: 0.1534 (0.1546) loss: 0.7674 (0.7749) time: 0.1371 data: 0.0532 max mem: 9377 +Train: [52] [5800/6250] eta: 0:01:05 lr: 0.000062 grad: 0.1521 (0.1546) loss: 0.7634 (0.7747) time: 0.1190 data: 0.0312 max mem: 9377 +Train: [52] [5900/6250] eta: 0:00:51 lr: 0.000062 grad: 0.1492 (0.1546) loss: 0.7674 (0.7747) time: 0.1346 data: 0.0528 max mem: 9377 +Train: [52] [6000/6250] eta: 0:00:36 lr: 0.000062 grad: 0.1437 (0.1545) loss: 0.7626 (0.7746) time: 0.1229 data: 0.0424 max mem: 9377 +Train: [52] [6100/6250] eta: 0:00:21 lr: 0.000062 grad: 0.1622 (0.1545) loss: 0.7613 (0.7745) time: 0.1324 data: 0.0540 max mem: 9377 +Train: [52] [6200/6250] eta: 0:00:07 lr: 0.000061 grad: 0.1480 (0.1544) loss: 0.7723 (0.7745) time: 0.1253 data: 0.0467 max mem: 9377 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.1469 (0.1544) loss: 0.7675 (0.7744) time: 0.1201 data: 0.0390 max mem: 9377 +Train: [52] Total time: 0:15:18 (0.1469 s / it) +Averaged stats: lr: 0.000061 grad: 0.1469 (0.1544) loss: 0.7675 (0.7744) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:03:57 loss: 0.8223 (0.8223) time: 3.8227 data: 3.7445 max mem: 9377 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8216 (0.8205) time: 0.1216 data: 0.0965 max mem: 9377 +Eval (hcp-train-subset): [52] Total time: 0:00:12 (0.2042 s / it) +Averaged stats (hcp-train-subset): loss: 0.8216 (0.8205) +Eval (hcp-val): [52] [ 0/62] eta: 0:03:17 loss: 0.8490 (0.8490) time: 3.1780 data: 3.1140 max mem: 9377 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8464 (0.8478) time: 0.1334 data: 0.1068 max mem: 9377 +Eval (hcp-val): [52] Total time: 0:00:13 (0.2112 s / it) +Averaged stats (hcp-val): loss: 0.8464 (0.8478) +Eval (nsd-val): [52] [ 0/62] eta: 0:03:25 loss: 0.8156 (0.8156) time: 3.3087 data: 3.2393 max mem: 9377 +Eval (nsd-val): [52] [61/62] eta: 0:00:00 loss: 0.8230 (0.8243) time: 0.1266 data: 0.1013 max mem: 9377 +Eval (nsd-val): [52] Total time: 0:00:12 (0.2021 s / it) +Averaged stats (nsd-val): loss: 0.8230 (0.8243) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [53] [ 0/6250] eta: 7:57:31 lr: 0.000061 grad: 0.1164 (0.1164) loss: 0.8302 (0.8302) time: 4.5843 data: 4.3627 max mem: 9377 +Train: [53] [ 100/6250] eta: 0:20:08 lr: 0.000061 grad: 0.2052 (0.1944) loss: 0.7956 (0.8141) time: 0.1071 data: 0.0002 max mem: 9377 +Train: [53] [ 200/6250] eta: 0:17:37 lr: 0.000061 grad: 0.1794 (0.1934) loss: 0.7917 (0.8040) time: 0.1731 data: 0.0785 max mem: 9377 +Train: [53] [ 300/6250] eta: 0:16:45 lr: 0.000061 grad: 0.1820 (0.1912) loss: 0.7769 (0.7964) time: 0.1669 data: 0.0864 max mem: 9377 +Train: [53] [ 400/6250] eta: 0:15:39 lr: 0.000061 grad: 0.1716 (0.1886) loss: 0.7697 (0.7900) time: 0.1308 data: 0.0358 max mem: 9377 +Train: [53] [ 500/6250] eta: 0:14:57 lr: 0.000061 grad: 0.1665 (0.1854) loss: 0.7698 (0.7853) time: 0.1331 data: 0.0444 max mem: 9377 +Train: [53] [ 600/6250] eta: 0:14:22 lr: 0.000061 grad: 0.1542 (0.1811) loss: 0.7672 (0.7830) time: 0.1267 data: 0.0432 max mem: 9377 +Train: [53] [ 700/6250] eta: 0:13:50 lr: 0.000061 grad: 0.1359 (0.1767) loss: 0.7658 (0.7817) time: 0.1365 data: 0.0482 max mem: 9377 +Train: [53] [ 800/6250] eta: 0:13:20 lr: 0.000061 grad: 0.1487 (0.1737) loss: 0.7689 (0.7804) time: 0.1039 data: 0.0067 max mem: 9377 +Train: [53] [ 900/6250] eta: 0:12:58 lr: 0.000061 grad: 0.1468 (0.1708) loss: 0.7815 (0.7793) time: 0.1316 data: 0.0422 max mem: 9377 +Train: [53] [1000/6250] eta: 0:12:38 lr: 0.000061 grad: 0.1478 (0.1687) loss: 0.7712 (0.7786) time: 0.1394 data: 0.0605 max mem: 9377 +Train: [53] [1100/6250] eta: 0:12:25 lr: 0.000061 grad: 0.1431 (0.1668) loss: 0.7747 (0.7781) time: 0.1280 data: 0.0443 max mem: 9377 +Train: [53] [1200/6250] eta: 0:12:07 lr: 0.000061 grad: 0.1475 (0.1652) loss: 0.7749 (0.7778) time: 0.1353 data: 0.0606 max mem: 9377 +Train: [53] [1300/6250] eta: 0:11:52 lr: 0.000061 grad: 0.1468 (0.1637) loss: 0.7650 (0.7772) time: 0.1543 data: 0.0742 max mem: 9377 +Train: [53] [1400/6250] eta: 0:11:38 lr: 0.000061 grad: 0.1448 (0.1625) loss: 0.7726 (0.7767) time: 0.1597 data: 0.0769 max mem: 9377 +Train: [53] [1500/6250] eta: 0:11:28 lr: 0.000061 grad: 0.1527 (0.1613) loss: 0.7618 (0.7764) time: 0.1397 data: 0.0485 max mem: 9377 +Train: [53] [1600/6250] eta: 0:11:14 lr: 0.000061 grad: 0.1527 (0.1608) loss: 0.7730 (0.7760) time: 0.1359 data: 0.0603 max mem: 9377 +Train: [53] [1700/6250] eta: 0:10:57 lr: 0.000061 grad: 0.1507 (0.1605) loss: 0.7718 (0.7757) time: 0.1469 data: 0.0698 max mem: 9377 +Train: [53] [1800/6250] eta: 0:10:39 lr: 0.000061 grad: 0.1458 (0.1598) loss: 0.7789 (0.7755) time: 0.1329 data: 0.0428 max mem: 9377 +Train: [53] [1900/6250] eta: 0:10:22 lr: 0.000061 grad: 0.1629 (0.1596) loss: 0.7587 (0.7750) time: 0.1317 data: 0.0460 max mem: 9377 +Train: [53] [2000/6250] eta: 0:10:03 lr: 0.000061 grad: 0.1482 (0.1593) loss: 0.7782 (0.7746) time: 0.1328 data: 0.0431 max mem: 9377 +Train: [53] [2100/6250] eta: 0:09:47 lr: 0.000061 grad: 0.1461 (0.1588) loss: 0.7795 (0.7746) time: 0.1396 data: 0.0533 max mem: 9377 +Train: [53] [2200/6250] eta: 0:09:32 lr: 0.000061 grad: 0.1563 (0.1585) loss: 0.7680 (0.7744) time: 0.1663 data: 0.0888 max mem: 9377 +Train: [53] [2300/6250] eta: 0:09:15 lr: 0.000061 grad: 0.1501 (0.1585) loss: 0.7690 (0.7741) time: 0.1141 data: 0.0313 max mem: 9377 +Train: [53] [2400/6250] eta: 0:09:00 lr: 0.000061 grad: 0.1616 (0.1585) loss: 0.7602 (0.7736) time: 0.1285 data: 0.0400 max mem: 9377 +Train: [53] [2500/6250] eta: 0:08:45 lr: 0.000061 grad: 0.1514 (0.1584) loss: 0.7600 (0.7732) time: 0.1456 data: 0.0681 max mem: 9377 +Train: [53] [2600/6250] eta: 0:08:31 lr: 0.000061 grad: 0.1461 (0.1580) loss: 0.7662 (0.7730) time: 0.1300 data: 0.0465 max mem: 9377 +Train: [53] [2700/6250] eta: 0:08:16 lr: 0.000061 grad: 0.1493 (0.1579) loss: 0.7632 (0.7728) time: 0.1252 data: 0.0445 max mem: 9377 +Train: [53] [2800/6250] eta: 0:08:02 lr: 0.000061 grad: 0.1454 (0.1577) loss: 0.7784 (0.7726) time: 0.1065 data: 0.0141 max mem: 9377 +Train: [53] [2900/6250] eta: 0:07:48 lr: 0.000061 grad: 0.1515 (0.1575) loss: 0.7659 (0.7726) time: 0.1383 data: 0.0542 max mem: 9377 +Train: [53] [3000/6250] eta: 0:07:34 lr: 0.000060 grad: 0.1491 (0.1573) loss: 0.7687 (0.7725) time: 0.1351 data: 0.0513 max mem: 9377 +Train: [53] [3100/6250] eta: 0:07:20 lr: 0.000060 grad: 0.1502 (0.1571) loss: 0.7730 (0.7725) time: 0.1442 data: 0.0597 max mem: 9377 +Train: [53] [3200/6250] eta: 0:07:06 lr: 0.000060 grad: 0.1391 (0.1568) loss: 0.7733 (0.7725) time: 0.1648 data: 0.0911 max mem: 9377 +Train: [53] [3300/6250] eta: 0:06:52 lr: 0.000060 grad: 0.1447 (0.1566) loss: 0.7693 (0.7724) time: 0.1439 data: 0.0695 max mem: 9377 +Train: [53] [3400/6250] eta: 0:06:37 lr: 0.000060 grad: 0.1486 (0.1564) loss: 0.7695 (0.7723) time: 0.0992 data: 0.0143 max mem: 9377 +Train: [53] [3500/6250] eta: 0:06:24 lr: 0.000060 grad: 0.1346 (0.1563) loss: 0.7759 (0.7723) time: 0.1332 data: 0.0499 max mem: 9377 +Train: [53] [3600/6250] eta: 0:06:10 lr: 0.000060 grad: 0.1425 (0.1560) loss: 0.7760 (0.7723) time: 0.1289 data: 0.0527 max mem: 9377 +Train: [53] [3700/6250] eta: 0:05:56 lr: 0.000060 grad: 0.1443 (0.1558) loss: 0.7710 (0.7724) time: 0.1359 data: 0.0470 max mem: 9377 +Train: [53] [3800/6250] eta: 0:05:43 lr: 0.000060 grad: 0.1447 (0.1557) loss: 0.7626 (0.7723) time: 0.1368 data: 0.0490 max mem: 9377 +Train: [53] [3900/6250] eta: 0:05:29 lr: 0.000060 grad: 0.1535 (0.1556) loss: 0.7822 (0.7722) time: 0.1835 data: 0.1035 max mem: 9377 +Train: [53] [4000/6250] eta: 0:05:15 lr: 0.000060 grad: 0.1427 (0.1555) loss: 0.7825 (0.7722) time: 0.1511 data: 0.0687 max mem: 9377 +Train: [53] [4100/6250] eta: 0:05:00 lr: 0.000060 grad: 0.1630 (0.1556) loss: 0.7707 (0.7721) time: 0.1414 data: 0.0608 max mem: 9377 +Train: [53] [4200/6250] eta: 0:04:46 lr: 0.000060 grad: 0.1499 (0.1556) loss: 0.7624 (0.7719) time: 0.1526 data: 0.0738 max mem: 9377 +Train: [53] [4300/6250] eta: 0:04:33 lr: 0.000060 grad: 0.1516 (0.1556) loss: 0.7783 (0.7719) time: 0.1555 data: 0.0780 max mem: 9377 +Train: [53] [4400/6250] eta: 0:04:19 lr: 0.000060 grad: 0.1548 (0.1556) loss: 0.7751 (0.7719) time: 0.1374 data: 0.0597 max mem: 9377 +Train: [53] [4500/6250] eta: 0:04:05 lr: 0.000060 grad: 0.1629 (0.1557) loss: 0.7693 (0.7717) time: 0.1313 data: 0.0489 max mem: 9377 +Train: [53] [4600/6250] eta: 0:03:51 lr: 0.000060 grad: 0.1461 (0.1557) loss: 0.7796 (0.7718) time: 0.1431 data: 0.0652 max mem: 9377 +Train: [53] [4700/6250] eta: 0:03:38 lr: 0.000060 grad: 0.1491 (0.1557) loss: 0.7703 (0.7717) time: 0.1420 data: 0.0505 max mem: 9377 +Train: [53] [4800/6250] eta: 0:03:23 lr: 0.000060 grad: 0.1532 (0.1556) loss: 0.7795 (0.7717) time: 0.1451 data: 0.0611 max mem: 9377 +Train: [53] [4900/6250] eta: 0:03:09 lr: 0.000060 grad: 0.1518 (0.1556) loss: 0.7803 (0.7717) time: 0.1342 data: 0.0472 max mem: 9377 +Train: [53] [5000/6250] eta: 0:02:55 lr: 0.000060 grad: 0.1440 (0.1555) loss: 0.7675 (0.7718) time: 0.1308 data: 0.0412 max mem: 9377 +Train: [53] [5100/6250] eta: 0:02:41 lr: 0.000060 grad: 0.1437 (0.1554) loss: 0.7803 (0.7718) time: 0.1504 data: 0.0727 max mem: 9377 +Train: [53] [5200/6250] eta: 0:02:27 lr: 0.000060 grad: 0.1448 (0.1552) loss: 0.7788 (0.7719) time: 0.1197 data: 0.0313 max mem: 9377 +Train: [53] [5300/6250] eta: 0:02:13 lr: 0.000060 grad: 0.1470 (0.1550) loss: 0.7770 (0.7721) time: 0.1374 data: 0.0542 max mem: 9377 +Train: [53] [5400/6250] eta: 0:01:58 lr: 0.000060 grad: 0.1533 (0.1549) loss: 0.7770 (0.7722) time: 0.1286 data: 0.0501 max mem: 9377 +Train: [53] [5500/6250] eta: 0:01:45 lr: 0.000060 grad: 0.1496 (0.1548) loss: 0.7804 (0.7723) time: 0.1117 data: 0.0174 max mem: 9377 +Train: [53] [5600/6250] eta: 0:01:31 lr: 0.000060 grad: 0.1448 (0.1547) loss: 0.7872 (0.7724) time: 0.1002 data: 0.0002 max mem: 9377 +Train: [53] [5700/6250] eta: 0:01:17 lr: 0.000060 grad: 0.1431 (0.1547) loss: 0.7854 (0.7724) time: 0.1377 data: 0.0559 max mem: 9377 +Train: [53] [5800/6250] eta: 0:01:03 lr: 0.000060 grad: 0.1388 (0.1545) loss: 0.7796 (0.7726) time: 0.1463 data: 0.0650 max mem: 9377 +Train: [53] [5900/6250] eta: 0:00:49 lr: 0.000060 grad: 0.1426 (0.1544) loss: 0.7761 (0.7727) time: 0.1423 data: 0.0605 max mem: 9377 +Train: [53] [6000/6250] eta: 0:00:35 lr: 0.000059 grad: 0.1438 (0.1543) loss: 0.7745 (0.7727) time: 0.1413 data: 0.0488 max mem: 9377 +Train: [53] [6100/6250] eta: 0:00:21 lr: 0.000059 grad: 0.1539 (0.1543) loss: 0.7705 (0.7727) time: 0.1305 data: 0.0491 max mem: 9377 +Train: [53] [6200/6250] eta: 0:00:07 lr: 0.000059 grad: 0.1490 (0.1542) loss: 0.7766 (0.7727) time: 0.1435 data: 0.0633 max mem: 9377 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.1495 (0.1542) loss: 0.7643 (0.7726) time: 0.1341 data: 0.0520 max mem: 9377 +Train: [53] Total time: 0:14:42 (0.1413 s / it) +Averaged stats: lr: 0.000059 grad: 0.1495 (0.1542) loss: 0.7643 (0.7726) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:04:47 loss: 0.8254 (0.8254) time: 4.6310 data: 4.6012 max mem: 9377 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8265 (0.8210) time: 0.1069 data: 0.0823 max mem: 9377 +Eval (hcp-train-subset): [53] Total time: 0:00:12 (0.2056 s / it) +Averaged stats (hcp-train-subset): loss: 0.8265 (0.8210) +Eval (hcp-val): [53] [ 0/62] eta: 0:04:20 loss: 0.8483 (0.8483) time: 4.1953 data: 4.1466 max mem: 9377 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8484 (0.8488) time: 0.1137 data: 0.0871 max mem: 9377 +Eval (hcp-val): [53] Total time: 0:00:12 (0.1997 s / it) +Averaged stats (hcp-val): loss: 0.8484 (0.8488) +Eval (nsd-val): [53] [ 0/62] eta: 0:05:10 loss: 0.8160 (0.8160) time: 5.0028 data: 4.9727 max mem: 9377 +Eval (nsd-val): [53] [61/62] eta: 0:00:00 loss: 0.8232 (0.8230) time: 0.1400 data: 0.1147 max mem: 9377 +Eval (nsd-val): [53] Total time: 0:00:12 (0.2094 s / it) +Averaged stats (nsd-val): loss: 0.8232 (0.8230) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 9:21:17 lr: 0.000059 grad: 0.1293 (0.1293) loss: 0.8630 (0.8630) time: 5.3885 data: 5.2891 max mem: 9377 +Train: [54] [ 100/6250] eta: 0:21:10 lr: 0.000059 grad: 0.1960 (0.2180) loss: 0.7895 (0.8009) time: 0.1250 data: 0.0209 max mem: 9377 +Train: [54] [ 200/6250] eta: 0:18:22 lr: 0.000059 grad: 0.1883 (0.2121) loss: 0.7883 (0.7946) time: 0.1747 data: 0.0870 max mem: 9377 +Train: [54] [ 300/6250] eta: 0:16:39 lr: 0.000059 grad: 0.1737 (0.2031) loss: 0.7845 (0.7912) time: 0.1426 data: 0.0628 max mem: 9377 +Train: [54] [ 400/6250] eta: 0:15:38 lr: 0.000059 grad: 0.1582 (0.1951) loss: 0.7806 (0.7892) time: 0.1555 data: 0.0662 max mem: 9377 +Train: [54] [ 500/6250] eta: 0:14:43 lr: 0.000059 grad: 0.1699 (0.1894) loss: 0.7679 (0.7872) time: 0.1212 data: 0.0294 max mem: 9377 +Train: [54] [ 600/6250] eta: 0:14:06 lr: 0.000059 grad: 0.1519 (0.1842) loss: 0.7759 (0.7854) time: 0.1042 data: 0.0100 max mem: 9377 +Train: [54] [ 700/6250] eta: 0:13:38 lr: 0.000059 grad: 0.1513 (0.1808) loss: 0.7723 (0.7836) time: 0.1294 data: 0.0423 max mem: 9377 +Train: [54] [ 800/6250] eta: 0:13:10 lr: 0.000059 grad: 0.1621 (0.1779) loss: 0.7671 (0.7823) time: 0.1242 data: 0.0399 max mem: 9377 +Train: [54] [ 900/6250] eta: 0:12:53 lr: 0.000059 grad: 0.1356 (0.1747) loss: 0.7798 (0.7816) time: 0.1564 data: 0.0680 max mem: 9377 +Train: [54] [1000/6250] eta: 0:12:36 lr: 0.000059 grad: 0.1531 (0.1722) loss: 0.7776 (0.7813) time: 0.1560 data: 0.0766 max mem: 9377 +Train: [54] [1100/6250] eta: 0:12:15 lr: 0.000059 grad: 0.1397 (0.1703) loss: 0.7788 (0.7807) time: 0.1189 data: 0.0293 max mem: 9377 +Train: [54] [1200/6250] eta: 0:12:04 lr: 0.000059 grad: 0.1483 (0.1685) loss: 0.7792 (0.7803) time: 0.1229 data: 0.0480 max mem: 9377 +Train: [54] [1300/6250] eta: 0:11:47 lr: 0.000059 grad: 0.1392 (0.1669) loss: 0.7838 (0.7802) time: 0.1398 data: 0.0613 max mem: 9377 +Train: [54] [1400/6250] eta: 0:11:32 lr: 0.000059 grad: 0.1423 (0.1654) loss: 0.7846 (0.7800) time: 0.1463 data: 0.0688 max mem: 9377 +Train: [54] [1500/6250] eta: 0:11:17 lr: 0.000059 grad: 0.1381 (0.1644) loss: 0.7819 (0.7798) time: 0.1316 data: 0.0544 max mem: 9377 +Train: [54] [1600/6250] eta: 0:11:06 lr: 0.000059 grad: 0.1450 (0.1636) loss: 0.7755 (0.7796) time: 0.1435 data: 0.0617 max mem: 9377 +Train: [54] [1700/6250] eta: 0:10:54 lr: 0.000059 grad: 0.1419 (0.1625) loss: 0.7742 (0.7796) time: 0.1393 data: 0.0614 max mem: 9377 +Train: [54] [1800/6250] eta: 0:10:38 lr: 0.000059 grad: 0.1491 (0.1616) loss: 0.7785 (0.7796) time: 0.1595 data: 0.0795 max mem: 9377 +Train: [54] [1900/6250] eta: 0:10:21 lr: 0.000059 grad: 0.1537 (0.1610) loss: 0.7745 (0.7793) time: 0.1379 data: 0.0578 max mem: 9377 +Train: [54] [2000/6250] eta: 0:10:04 lr: 0.000059 grad: 0.1455 (0.1603) loss: 0.7752 (0.7790) time: 0.1229 data: 0.0333 max mem: 9377 +Train: [54] [2100/6250] eta: 0:09:47 lr: 0.000059 grad: 0.1470 (0.1598) loss: 0.7739 (0.7789) time: 0.1284 data: 0.0486 max mem: 9377 +Train: [54] [2200/6250] eta: 0:09:30 lr: 0.000059 grad: 0.1397 (0.1592) loss: 0.7789 (0.7789) time: 0.1150 data: 0.0271 max mem: 9377 +Train: [54] [2300/6250] eta: 0:09:14 lr: 0.000059 grad: 0.1506 (0.1588) loss: 0.7677 (0.7787) time: 0.1222 data: 0.0398 max mem: 9377 +Train: [54] [2400/6250] eta: 0:08:58 lr: 0.000059 grad: 0.1460 (0.1583) loss: 0.7689 (0.7785) time: 0.1361 data: 0.0525 max mem: 9377 +Train: [54] [2500/6250] eta: 0:08:44 lr: 0.000059 grad: 0.1460 (0.1580) loss: 0.7738 (0.7783) time: 0.1420 data: 0.0624 max mem: 9377 +Train: [54] [2600/6250] eta: 0:08:30 lr: 0.000059 grad: 0.1543 (0.1578) loss: 0.7661 (0.7781) time: 0.1013 data: 0.0157 max mem: 9377 +Train: [54] [2700/6250] eta: 0:08:16 lr: 0.000059 grad: 0.1578 (0.1576) loss: 0.7714 (0.7780) time: 0.1316 data: 0.0392 max mem: 9377 +Train: [54] [2800/6250] eta: 0:08:03 lr: 0.000058 grad: 0.1508 (0.1575) loss: 0.7821 (0.7778) time: 0.1672 data: 0.0871 max mem: 9377 +Train: [54] [2900/6250] eta: 0:07:47 lr: 0.000058 grad: 0.1503 (0.1573) loss: 0.7822 (0.7778) time: 0.1319 data: 0.0393 max mem: 9377 +Train: [54] [3000/6250] eta: 0:07:34 lr: 0.000058 grad: 0.1498 (0.1571) loss: 0.7762 (0.7778) time: 0.1406 data: 0.0650 max mem: 9377 +Train: [54] [3100/6250] eta: 0:07:19 lr: 0.000058 grad: 0.1425 (0.1568) loss: 0.7828 (0.7777) time: 0.1426 data: 0.0625 max mem: 9377 +Train: [54] [3200/6250] eta: 0:07:05 lr: 0.000058 grad: 0.1484 (0.1568) loss: 0.7770 (0.7775) time: 0.1414 data: 0.0592 max mem: 9377 +Train: [54] [3300/6250] eta: 0:06:51 lr: 0.000058 grad: 0.1462 (0.1567) loss: 0.7677 (0.7774) time: 0.1452 data: 0.0656 max mem: 9377 +Train: [54] [3400/6250] eta: 0:06:38 lr: 0.000058 grad: 0.1518 (0.1566) loss: 0.7755 (0.7772) time: 0.1520 data: 0.0670 max mem: 9377 +Train: [54] [3500/6250] eta: 0:06:23 lr: 0.000058 grad: 0.1536 (0.1564) loss: 0.7833 (0.7771) time: 0.1547 data: 0.0750 max mem: 9377 +Train: [54] [3600/6250] eta: 0:06:09 lr: 0.000058 grad: 0.1496 (0.1562) loss: 0.7733 (0.7770) time: 0.1335 data: 0.0495 max mem: 9377 +Train: [54] [3700/6250] eta: 0:05:55 lr: 0.000058 grad: 0.1406 (0.1561) loss: 0.7804 (0.7769) time: 0.1236 data: 0.0434 max mem: 9377 +Train: [54] [3800/6250] eta: 0:05:41 lr: 0.000058 grad: 0.1358 (0.1560) loss: 0.7828 (0.7769) time: 0.1422 data: 0.0646 max mem: 9377 +Train: [54] [3900/6250] eta: 0:05:28 lr: 0.000058 grad: 0.1471 (0.1558) loss: 0.7756 (0.7769) time: 0.1451 data: 0.0649 max mem: 9377 +Train: [54] [4000/6250] eta: 0:05:14 lr: 0.000058 grad: 0.1553 (0.1556) loss: 0.7596 (0.7768) time: 0.1223 data: 0.0370 max mem: 9377 +Train: [54] [4100/6250] eta: 0:04:59 lr: 0.000058 grad: 0.1579 (0.1557) loss: 0.7721 (0.7767) time: 0.1271 data: 0.0440 max mem: 9377 +Train: [54] [4200/6250] eta: 0:04:45 lr: 0.000058 grad: 0.1553 (0.1557) loss: 0.7792 (0.7767) time: 0.1393 data: 0.0566 max mem: 9377 +Train: [54] [4300/6250] eta: 0:04:32 lr: 0.000058 grad: 0.1427 (0.1555) loss: 0.7724 (0.7767) time: 0.1314 data: 0.0435 max mem: 9377 +Train: [54] [4400/6250] eta: 0:04:18 lr: 0.000058 grad: 0.1485 (0.1554) loss: 0.7760 (0.7767) time: 0.1601 data: 0.0844 max mem: 9377 +Train: [54] [4500/6250] eta: 0:04:04 lr: 0.000058 grad: 0.1471 (0.1553) loss: 0.7746 (0.7766) time: 0.1377 data: 0.0537 max mem: 9377 +Train: [54] [4600/6250] eta: 0:03:50 lr: 0.000058 grad: 0.1389 (0.1551) loss: 0.7753 (0.7766) time: 0.1377 data: 0.0596 max mem: 9377 +Train: [54] [4700/6250] eta: 0:03:36 lr: 0.000058 grad: 0.1545 (0.1551) loss: 0.7662 (0.7764) time: 0.1873 data: 0.1041 max mem: 9377 +Train: [54] [4800/6250] eta: 0:03:23 lr: 0.000058 grad: 0.1500 (0.1551) loss: 0.7713 (0.7764) time: 0.1659 data: 0.0859 max mem: 9377 +Train: [54] [4900/6250] eta: 0:03:09 lr: 0.000058 grad: 0.1446 (0.1550) loss: 0.7656 (0.7763) time: 0.1437 data: 0.0612 max mem: 9377 +Train: [54] [5000/6250] eta: 0:02:55 lr: 0.000058 grad: 0.1493 (0.1550) loss: 0.7677 (0.7762) time: 0.1369 data: 0.0531 max mem: 9377 +Train: [54] [5100/6250] eta: 0:02:41 lr: 0.000058 grad: 0.1522 (0.1549) loss: 0.7627 (0.7760) time: 0.1369 data: 0.0534 max mem: 9377 +Train: [54] [5200/6250] eta: 0:02:26 lr: 0.000058 grad: 0.1504 (0.1548) loss: 0.7762 (0.7759) time: 0.1442 data: 0.0605 max mem: 9377 +Train: [54] [5300/6250] eta: 0:02:12 lr: 0.000058 grad: 0.1521 (0.1548) loss: 0.7695 (0.7759) time: 0.1339 data: 0.0489 max mem: 9377 +Train: [54] [5400/6250] eta: 0:01:58 lr: 0.000058 grad: 0.1473 (0.1548) loss: 0.7672 (0.7757) time: 0.1432 data: 0.0588 max mem: 9377 +Train: [54] [5500/6250] eta: 0:01:44 lr: 0.000058 grad: 0.1516 (0.1548) loss: 0.7611 (0.7756) time: 0.1361 data: 0.0502 max mem: 9377 +Train: [54] [5600/6250] eta: 0:01:30 lr: 0.000058 grad: 0.1506 (0.1548) loss: 0.7660 (0.7754) time: 0.1486 data: 0.0684 max mem: 9377 +Train: [54] [5700/6250] eta: 0:01:16 lr: 0.000058 grad: 0.1534 (0.1548) loss: 0.7692 (0.7752) time: 0.1796 data: 0.0966 max mem: 9377 +Train: [54] [5800/6250] eta: 0:01:02 lr: 0.000057 grad: 0.1511 (0.1548) loss: 0.7730 (0.7751) time: 0.2025 data: 0.1157 max mem: 9377 +Train: [54] [5900/6250] eta: 0:00:48 lr: 0.000057 grad: 0.1502 (0.1548) loss: 0.7691 (0.7749) time: 0.1582 data: 0.0774 max mem: 9377 +Train: [54] [6000/6250] eta: 0:00:34 lr: 0.000057 grad: 0.1570 (0.1549) loss: 0.7625 (0.7747) time: 0.1435 data: 0.0550 max mem: 9377 +Train: [54] [6100/6250] eta: 0:00:20 lr: 0.000057 grad: 0.1630 (0.1550) loss: 0.7662 (0.7745) time: 0.1436 data: 0.0613 max mem: 9377 +Train: [54] [6200/6250] eta: 0:00:06 lr: 0.000057 grad: 0.1676 (0.1551) loss: 0.7581 (0.7743) time: 0.1446 data: 0.0640 max mem: 9377 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1638 (0.1551) loss: 0.7615 (0.7742) time: 0.1053 data: 0.0239 max mem: 9377 +Train: [54] Total time: 0:14:34 (0.1399 s / it) +Averaged stats: lr: 0.000057 grad: 0.1638 (0.1551) loss: 0.7615 (0.7742) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:05:23 loss: 0.8237 (0.8237) time: 5.2215 data: 5.1906 max mem: 9377 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8193 (0.8185) time: 0.1204 data: 0.0951 max mem: 9377 +Eval (hcp-train-subset): [54] Total time: 0:00:12 (0.2041 s / it) +Averaged stats (hcp-train-subset): loss: 0.8193 (0.8185) +Making plots (hcp-train-subset): example=29 +Eval (hcp-val): [54] [ 0/62] eta: 0:04:57 loss: 0.8508 (0.8508) time: 4.8056 data: 4.7749 max mem: 9377 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8456 (0.8481) time: 0.1190 data: 0.0923 max mem: 9377 +Eval (hcp-val): [54] Total time: 0:00:12 (0.2066 s / it) +Averaged stats (hcp-val): loss: 0.8456 (0.8481) +Making plots (hcp-val): example=49 +Eval (nsd-val): [54] [ 0/62] eta: 0:05:26 loss: 0.8251 (0.8251) time: 5.2635 data: 5.2329 max mem: 9377 +Eval (nsd-val): [54] [61/62] eta: 0:00:00 loss: 0.8298 (0.8326) time: 0.1237 data: 0.0972 max mem: 9377 +Eval (nsd-val): [54] Total time: 0:00:13 (0.2149 s / it) +Averaged stats (nsd-val): loss: 0.8298 (0.8326) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00054.pth +Train: [55] [ 0/6250] eta: 8:40:32 lr: 0.000057 grad: 0.1503 (0.1503) loss: 0.8128 (0.8128) time: 4.9972 data: 4.7517 max mem: 9377 +Train: [55] [ 100/6250] eta: 0:21:22 lr: 0.000057 grad: 0.2082 (0.2270) loss: 0.7713 (0.7967) time: 0.1537 data: 0.0407 max mem: 9377 +Train: [55] [ 200/6250] eta: 0:18:10 lr: 0.000057 grad: 0.1788 (0.2200) loss: 0.7861 (0.7881) time: 0.1515 data: 0.0571 max mem: 9377 +Train: [55] [ 300/6250] eta: 0:16:32 lr: 0.000057 grad: 0.1684 (0.2062) loss: 0.7793 (0.7865) time: 0.1322 data: 0.0427 max mem: 9377 +Train: [55] [ 400/6250] eta: 0:15:33 lr: 0.000057 grad: 0.1673 (0.1969) loss: 0.7696 (0.7841) time: 0.1291 data: 0.0423 max mem: 9377 +Train: [55] [ 500/6250] eta: 0:14:48 lr: 0.000057 grad: 0.1670 (0.1906) loss: 0.7724 (0.7829) time: 0.1268 data: 0.0423 max mem: 9377 +Train: [55] [ 600/6250] eta: 0:14:09 lr: 0.000057 grad: 0.1564 (0.1871) loss: 0.7781 (0.7808) time: 0.1324 data: 0.0499 max mem: 9377 +Train: [55] [ 700/6250] eta: 0:13:42 lr: 0.000057 grad: 0.1638 (0.1831) loss: 0.7665 (0.7800) time: 0.1508 data: 0.0639 max mem: 9377 +Train: [55] [ 800/6250] eta: 0:13:42 lr: 0.000057 grad: 0.1700 (0.1805) loss: 0.7676 (0.7787) time: 0.2049 data: 0.1164 max mem: 9377 +Train: [55] [ 900/6250] eta: 0:13:25 lr: 0.000057 grad: 0.1511 (0.1777) loss: 0.7818 (0.7779) time: 0.1841 data: 0.0991 max mem: 9377 +Train: [55] [1000/6250] eta: 0:13:06 lr: 0.000057 grad: 0.1507 (0.1756) loss: 0.7758 (0.7772) time: 0.1527 data: 0.0740 max mem: 9377 +Train: [55] [1100/6250] eta: 0:12:49 lr: 0.000057 grad: 0.1613 (0.1742) loss: 0.7564 (0.7763) time: 0.1177 data: 0.0407 max mem: 9377 +Train: [55] [1200/6250] eta: 0:12:37 lr: 0.000057 grad: 0.1557 (0.1728) loss: 0.7591 (0.7754) time: 0.1584 data: 0.0759 max mem: 9377 +Train: [55] [1300/6250] eta: 0:12:15 lr: 0.000057 grad: 0.1531 (0.1714) loss: 0.7676 (0.7747) time: 0.1242 data: 0.0421 max mem: 9377 +Train: [55] [1400/6250] eta: 0:11:57 lr: 0.000057 grad: 0.1496 (0.1705) loss: 0.7704 (0.7739) time: 0.1398 data: 0.0584 max mem: 9377 +Train: [55] [1500/6250] eta: 0:11:43 lr: 0.000057 grad: 0.1526 (0.1701) loss: 0.7580 (0.7733) time: 0.1569 data: 0.0750 max mem: 9377 +Train: [55] [1600/6250] eta: 0:11:28 lr: 0.000057 grad: 0.1640 (0.1694) loss: 0.7541 (0.7726) time: 0.1568 data: 0.0655 max mem: 9377 +Train: [55] [1700/6250] eta: 0:11:13 lr: 0.000057 grad: 0.1423 (0.1687) loss: 0.7546 (0.7722) time: 0.1418 data: 0.0610 max mem: 9377 +Train: [55] [1800/6250] eta: 0:10:56 lr: 0.000057 grad: 0.1525 (0.1679) loss: 0.7769 (0.7719) time: 0.1606 data: 0.0785 max mem: 9377 +Train: [55] [1900/6250] eta: 0:10:38 lr: 0.000057 grad: 0.1528 (0.1672) loss: 0.7757 (0.7716) time: 0.1506 data: 0.0668 max mem: 9377 +Train: [55] [2000/6250] eta: 0:10:19 lr: 0.000057 grad: 0.1503 (0.1666) loss: 0.7704 (0.7716) time: 0.1401 data: 0.0538 max mem: 9377 +Train: [55] [2100/6250] eta: 0:10:02 lr: 0.000057 grad: 0.1489 (0.1661) loss: 0.7769 (0.7716) time: 0.1423 data: 0.0619 max mem: 9377 +Train: [55] [2200/6250] eta: 0:09:43 lr: 0.000057 grad: 0.1589 (0.1656) loss: 0.7594 (0.7714) time: 0.1216 data: 0.0352 max mem: 9377 +Train: [55] [2300/6250] eta: 0:09:27 lr: 0.000057 grad: 0.1460 (0.1654) loss: 0.7685 (0.7712) time: 0.1378 data: 0.0561 max mem: 9377 +Train: [55] [2400/6250] eta: 0:09:12 lr: 0.000057 grad: 0.1483 (0.1649) loss: 0.7687 (0.7710) time: 0.1505 data: 0.0719 max mem: 9377 +Train: [55] [2500/6250] eta: 0:08:58 lr: 0.000057 grad: 0.1462 (0.1644) loss: 0.7698 (0.7710) time: 0.1491 data: 0.0661 max mem: 9377 +Train: [55] [2600/6250] eta: 0:08:42 lr: 0.000056 grad: 0.1507 (0.1640) loss: 0.7732 (0.7709) time: 0.1157 data: 0.0328 max mem: 9377 +Train: [55] [2700/6250] eta: 0:08:27 lr: 0.000056 grad: 0.1469 (0.1634) loss: 0.7698 (0.7710) time: 0.1411 data: 0.0584 max mem: 9377 +Train: [55] [2800/6250] eta: 0:08:14 lr: 0.000056 grad: 0.1525 (0.1631) loss: 0.7739 (0.7710) time: 0.1348 data: 0.0004 max mem: 9377 +Train: [55] [2900/6250] eta: 0:07:59 lr: 0.000056 grad: 0.1476 (0.1628) loss: 0.7839 (0.7711) time: 0.1441 data: 0.0641 max mem: 9377 +Train: [55] [3000/6250] eta: 0:07:44 lr: 0.000056 grad: 0.1554 (0.1625) loss: 0.7677 (0.7712) time: 0.1339 data: 0.0510 max mem: 9377 +Train: [55] [3100/6250] eta: 0:07:29 lr: 0.000056 grad: 0.1504 (0.1623) loss: 0.7706 (0.7713) time: 0.1402 data: 0.0628 max mem: 9377 +Train: [55] [3200/6250] eta: 0:07:15 lr: 0.000056 grad: 0.1552 (0.1621) loss: 0.7619 (0.7712) time: 0.1425 data: 0.0627 max mem: 9377 +Train: [55] [3300/6250] eta: 0:07:00 lr: 0.000056 grad: 0.1533 (0.1618) loss: 0.7627 (0.7711) time: 0.1376 data: 0.0568 max mem: 9377 +Train: [55] [3400/6250] eta: 0:06:45 lr: 0.000056 grad: 0.1536 (0.1617) loss: 0.7756 (0.7713) time: 0.1222 data: 0.0455 max mem: 9377 +Train: [55] [3500/6250] eta: 0:06:31 lr: 0.000056 grad: 0.1503 (0.1614) loss: 0.7768 (0.7713) time: 0.1338 data: 0.0463 max mem: 9377 +Train: [55] [3600/6250] eta: 0:06:17 lr: 0.000056 grad: 0.1597 (0.1612) loss: 0.7665 (0.7712) time: 0.0979 data: 0.0002 max mem: 9377 +Train: [55] [3700/6250] eta: 0:06:03 lr: 0.000056 grad: 0.1580 (0.1611) loss: 0.7732 (0.7711) time: 0.1371 data: 0.0547 max mem: 9377 +Train: [55] [3800/6250] eta: 0:05:48 lr: 0.000056 grad: 0.1606 (0.1610) loss: 0.7664 (0.7711) time: 0.1447 data: 0.0569 max mem: 9377 +Train: [55] [3900/6250] eta: 0:05:33 lr: 0.000056 grad: 0.1538 (0.1608) loss: 0.7799 (0.7711) time: 0.1320 data: 0.0482 max mem: 9377 +Train: [55] [4000/6250] eta: 0:05:19 lr: 0.000056 grad: 0.1547 (0.1607) loss: 0.7719 (0.7711) time: 0.1538 data: 0.0699 max mem: 9377 +Train: [55] [4100/6250] eta: 0:05:05 lr: 0.000056 grad: 0.1545 (0.1607) loss: 0.7645 (0.7711) time: 0.1144 data: 0.0349 max mem: 9377 +Train: [55] [4200/6250] eta: 0:04:50 lr: 0.000056 grad: 0.1437 (0.1605) loss: 0.7791 (0.7711) time: 0.1414 data: 0.0539 max mem: 9377 +Train: [55] [4300/6250] eta: 0:04:36 lr: 0.000056 grad: 0.1496 (0.1603) loss: 0.7661 (0.7712) time: 0.1408 data: 0.0594 max mem: 9377 +Train: [55] [4400/6250] eta: 0:04:22 lr: 0.000056 grad: 0.1435 (0.1601) loss: 0.7712 (0.7712) time: 0.1437 data: 0.0641 max mem: 9377 +Train: [55] [4500/6250] eta: 0:04:08 lr: 0.000056 grad: 0.1484 (0.1600) loss: 0.7746 (0.7713) time: 0.1507 data: 0.0730 max mem: 9377 +Train: [55] [4600/6250] eta: 0:03:53 lr: 0.000056 grad: 0.1507 (0.1598) loss: 0.7752 (0.7715) time: 0.1334 data: 0.0435 max mem: 9377 +Train: [55] [4700/6250] eta: 0:03:39 lr: 0.000056 grad: 0.1477 (0.1597) loss: 0.7698 (0.7715) time: 0.1653 data: 0.0881 max mem: 9377 +Train: [55] [4800/6250] eta: 0:03:25 lr: 0.000056 grad: 0.1500 (0.1597) loss: 0.7764 (0.7715) time: 0.1590 data: 0.0745 max mem: 9377 +Train: [55] [4900/6250] eta: 0:03:11 lr: 0.000056 grad: 0.1493 (0.1596) loss: 0.7721 (0.7715) time: 0.1632 data: 0.0833 max mem: 9377 +Train: [55] [5000/6250] eta: 0:02:57 lr: 0.000056 grad: 0.1492 (0.1594) loss: 0.7724 (0.7715) time: 0.1315 data: 0.0370 max mem: 9377 +Train: [55] [5100/6250] eta: 0:02:42 lr: 0.000056 grad: 0.1542 (0.1593) loss: 0.7761 (0.7715) time: 0.1217 data: 0.0402 max mem: 9377 +Train: [55] [5200/6250] eta: 0:02:28 lr: 0.000056 grad: 0.1511 (0.1593) loss: 0.7678 (0.7715) time: 0.1389 data: 0.0545 max mem: 9377 +Train: [55] [5300/6250] eta: 0:02:14 lr: 0.000056 grad: 0.1524 (0.1592) loss: 0.7732 (0.7714) time: 0.1202 data: 0.0354 max mem: 9377 +Train: [55] [5400/6250] eta: 0:02:00 lr: 0.000056 grad: 0.1544 (0.1592) loss: 0.7718 (0.7714) time: 0.1365 data: 0.0554 max mem: 9377 +Train: [55] [5500/6250] eta: 0:01:45 lr: 0.000056 grad: 0.1635 (0.1592) loss: 0.7645 (0.7713) time: 0.1690 data: 0.0830 max mem: 9377 +Train: [55] [5600/6250] eta: 0:01:31 lr: 0.000055 grad: 0.1557 (0.1592) loss: 0.7669 (0.7712) time: 0.1162 data: 0.0403 max mem: 9377 +Train: [55] [5700/6250] eta: 0:01:17 lr: 0.000055 grad: 0.1516 (0.1594) loss: 0.7701 (0.7711) time: 0.1407 data: 0.0549 max mem: 9377 +Train: [55] [5800/6250] eta: 0:01:03 lr: 0.000055 grad: 0.1550 (0.1594) loss: 0.7717 (0.7710) time: 0.1649 data: 0.0913 max mem: 9377 +Train: [55] [5900/6250] eta: 0:00:49 lr: 0.000055 grad: 0.1487 (0.1593) loss: 0.7752 (0.7710) time: 0.1349 data: 0.0569 max mem: 9377 +Train: [55] [6000/6250] eta: 0:00:35 lr: 0.000055 grad: 0.1498 (0.1593) loss: 0.7711 (0.7710) time: 0.1423 data: 0.0654 max mem: 9377 +Train: [55] [6100/6250] eta: 0:00:21 lr: 0.000055 grad: 0.1601 (0.1592) loss: 0.7642 (0.7709) time: 0.1286 data: 0.0468 max mem: 9377 +Train: [55] [6200/6250] eta: 0:00:07 lr: 0.000055 grad: 0.1434 (0.1591) loss: 0.7752 (0.7709) time: 0.1259 data: 0.0438 max mem: 9377 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.1550 (0.1591) loss: 0.7702 (0.7709) time: 0.1423 data: 0.0641 max mem: 9377 +Train: [55] Total time: 0:14:45 (0.1417 s / it) +Averaged stats: lr: 0.000055 grad: 0.1550 (0.1591) loss: 0.7702 (0.7709) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:05:14 loss: 0.8239 (0.8239) time: 5.0651 data: 5.0343 max mem: 9377 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8195 (0.8182) time: 0.1141 data: 0.0894 max mem: 9377 +Eval (hcp-train-subset): [55] Total time: 0:00:12 (0.2071 s / it) +Averaged stats (hcp-train-subset): loss: 0.8195 (0.8182) +Eval (hcp-val): [55] [ 0/62] eta: 0:05:00 loss: 0.8487 (0.8487) time: 4.8512 data: 4.8204 max mem: 9377 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8455 (0.8488) time: 0.1059 data: 0.0804 max mem: 9377 +Eval (hcp-val): [55] Total time: 0:00:12 (0.2031 s / it) +Averaged stats (hcp-val): loss: 0.8455 (0.8488) +Eval (nsd-val): [55] [ 0/62] eta: 0:03:30 loss: 0.8189 (0.8189) time: 3.4018 data: 3.3269 max mem: 9377 +Eval (nsd-val): [55] [61/62] eta: 0:00:00 loss: 0.8269 (0.8275) time: 0.1027 data: 0.0776 max mem: 9377 +Eval (nsd-val): [55] Total time: 0:00:12 (0.2019 s / it) +Averaged stats (nsd-val): loss: 0.8269 (0.8275) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [56] [ 0/6250] eta: 7:58:35 lr: 0.000055 grad: 0.3670 (0.3670) loss: 0.8071 (0.8071) time: 4.5945 data: 4.3792 max mem: 9377 +Train: [56] [ 100/6250] eta: 0:21:04 lr: 0.000055 grad: 0.1902 (0.2479) loss: 0.7734 (0.7918) time: 0.1718 data: 0.0687 max mem: 9377 +Train: [56] [ 200/6250] eta: 0:18:10 lr: 0.000055 grad: 0.1748 (0.2245) loss: 0.7857 (0.7875) time: 0.1495 data: 0.0577 max mem: 9377 +Train: [56] [ 300/6250] eta: 0:16:37 lr: 0.000055 grad: 0.1654 (0.2132) loss: 0.7598 (0.7822) time: 0.1469 data: 0.0561 max mem: 9377 +Train: [56] [ 400/6250] eta: 0:15:36 lr: 0.000055 grad: 0.1852 (0.2060) loss: 0.7738 (0.7794) time: 0.1465 data: 0.0613 max mem: 9377 +Train: [56] [ 500/6250] eta: 0:14:49 lr: 0.000055 grad: 0.1722 (0.2000) loss: 0.7741 (0.7776) time: 0.1540 data: 0.0648 max mem: 9377 +Train: [56] [ 600/6250] eta: 0:14:09 lr: 0.000055 grad: 0.1449 (0.1939) loss: 0.7686 (0.7765) time: 0.1299 data: 0.0367 max mem: 9377 +Train: [56] [ 700/6250] eta: 0:13:39 lr: 0.000055 grad: 0.1580 (0.1885) loss: 0.7862 (0.7763) time: 0.1084 data: 0.0249 max mem: 9377 +Train: [56] [ 800/6250] eta: 0:13:16 lr: 0.000055 grad: 0.1468 (0.1847) loss: 0.7802 (0.7763) time: 0.1396 data: 0.0584 max mem: 9377 +Train: [56] [ 900/6250] eta: 0:13:06 lr: 0.000055 grad: 0.1541 (0.1821) loss: 0.7708 (0.7761) time: 0.2008 data: 0.1154 max mem: 9377 +Train: [56] [1000/6250] eta: 0:12:44 lr: 0.000055 grad: 0.1551 (0.1798) loss: 0.7796 (0.7759) time: 0.1555 data: 0.0718 max mem: 9377 +Train: [56] [1100/6250] eta: 0:12:30 lr: 0.000055 grad: 0.1589 (0.1779) loss: 0.7748 (0.7756) time: 0.1411 data: 0.0580 max mem: 9377 +Train: [56] [1200/6250] eta: 0:12:14 lr: 0.000055 grad: 0.1546 (0.1761) loss: 0.7800 (0.7754) time: 0.1544 data: 0.0747 max mem: 9377 +Train: [56] [1300/6250] eta: 0:11:59 lr: 0.000055 grad: 0.1540 (0.1743) loss: 0.7676 (0.7752) time: 0.1286 data: 0.0572 max mem: 9377 +Train: [56] [1400/6250] eta: 0:11:42 lr: 0.000055 grad: 0.1584 (0.1730) loss: 0.7727 (0.7750) time: 0.1446 data: 0.0645 max mem: 9377 +Train: [56] [1500/6250] eta: 0:11:25 lr: 0.000055 grad: 0.1551 (0.1719) loss: 0.7578 (0.7746) time: 0.1350 data: 0.0579 max mem: 9377 +Train: [56] [1600/6250] eta: 0:11:11 lr: 0.000055 grad: 0.1525 (0.1708) loss: 0.7684 (0.7744) time: 0.1497 data: 0.0688 max mem: 9377 +Train: [56] [1700/6250] eta: 0:10:59 lr: 0.000055 grad: 0.1486 (0.1698) loss: 0.7673 (0.7742) time: 0.1410 data: 0.0464 max mem: 9377 +Train: [56] [1800/6250] eta: 0:10:47 lr: 0.000055 grad: 0.1552 (0.1689) loss: 0.7685 (0.7741) time: 0.1577 data: 0.0743 max mem: 9377 +Train: [56] [1900/6250] eta: 0:10:30 lr: 0.000055 grad: 0.1501 (0.1679) loss: 0.7853 (0.7743) time: 0.1442 data: 0.0634 max mem: 9377 +Train: [56] [2000/6250] eta: 0:10:12 lr: 0.000055 grad: 0.1485 (0.1671) loss: 0.7768 (0.7744) time: 0.1388 data: 0.0582 max mem: 9377 +Train: [56] [2100/6250] eta: 0:09:55 lr: 0.000055 grad: 0.1491 (0.1664) loss: 0.7734 (0.7745) time: 0.1388 data: 0.0483 max mem: 9377 +Train: [56] [2200/6250] eta: 0:09:37 lr: 0.000055 grad: 0.1375 (0.1657) loss: 0.7845 (0.7748) time: 0.1247 data: 0.0393 max mem: 9377 +Train: [56] [2300/6250] eta: 0:09:20 lr: 0.000055 grad: 0.1488 (0.1651) loss: 0.7807 (0.7748) time: 0.1172 data: 0.0233 max mem: 9377 +Train: [56] [2400/6250] eta: 0:09:04 lr: 0.000054 grad: 0.1524 (0.1645) loss: 0.7775 (0.7750) time: 0.1269 data: 0.0431 max mem: 9377 +Train: [56] [2500/6250] eta: 0:08:48 lr: 0.000054 grad: 0.1524 (0.1641) loss: 0.7766 (0.7750) time: 0.1381 data: 0.0535 max mem: 9377 +Train: [56] [2600/6250] eta: 0:08:33 lr: 0.000054 grad: 0.1515 (0.1637) loss: 0.7763 (0.7750) time: 0.1289 data: 0.0439 max mem: 9377 +Train: [56] [2700/6250] eta: 0:08:18 lr: 0.000054 grad: 0.1518 (0.1632) loss: 0.7678 (0.7750) time: 0.1159 data: 0.0324 max mem: 9377 +Train: [56] [2800/6250] eta: 0:08:04 lr: 0.000054 grad: 0.1515 (0.1631) loss: 0.7707 (0.7748) time: 0.1285 data: 0.0459 max mem: 9377 +Train: [56] [2900/6250] eta: 0:07:49 lr: 0.000054 grad: 0.1542 (0.1628) loss: 0.7688 (0.7747) time: 0.1304 data: 0.0421 max mem: 9377 +Train: [56] [3000/6250] eta: 0:07:35 lr: 0.000054 grad: 0.1623 (0.1627) loss: 0.7662 (0.7745) time: 0.1355 data: 0.0537 max mem: 9377 +Train: [56] [3100/6250] eta: 0:07:21 lr: 0.000054 grad: 0.1542 (0.1626) loss: 0.7762 (0.7744) time: 0.1288 data: 0.0448 max mem: 9377 +Train: [56] [3200/6250] eta: 0:07:07 lr: 0.000054 grad: 0.1500 (0.1623) loss: 0.7756 (0.7743) time: 0.1345 data: 0.0469 max mem: 9377 +Train: [56] [3300/6250] eta: 0:06:52 lr: 0.000054 grad: 0.1560 (0.1621) loss: 0.7704 (0.7743) time: 0.1597 data: 0.0774 max mem: 9377 +Train: [56] [3400/6250] eta: 0:06:38 lr: 0.000054 grad: 0.1622 (0.1621) loss: 0.7747 (0.7742) time: 0.1317 data: 0.0533 max mem: 9377 +Train: [56] [3500/6250] eta: 0:06:24 lr: 0.000054 grad: 0.1625 (0.1620) loss: 0.7713 (0.7741) time: 0.1126 data: 0.0295 max mem: 9377 +Train: [56] [3600/6250] eta: 0:06:09 lr: 0.000054 grad: 0.1537 (0.1619) loss: 0.7616 (0.7739) time: 0.1179 data: 0.0301 max mem: 9377 +Train: [56] [3700/6250] eta: 0:05:55 lr: 0.000054 grad: 0.1559 (0.1617) loss: 0.7684 (0.7739) time: 0.1486 data: 0.0695 max mem: 9377 +Train: [56] [3800/6250] eta: 0:05:41 lr: 0.000054 grad: 0.1505 (0.1615) loss: 0.7813 (0.7739) time: 0.1418 data: 0.0603 max mem: 9377 +Train: [56] [3900/6250] eta: 0:05:27 lr: 0.000054 grad: 0.1493 (0.1613) loss: 0.7665 (0.7739) time: 0.1408 data: 0.0582 max mem: 9377 +Train: [56] [4000/6250] eta: 0:05:13 lr: 0.000054 grad: 0.1597 (0.1612) loss: 0.7731 (0.7739) time: 0.1248 data: 0.0429 max mem: 9377 +Train: [56] [4100/6250] eta: 0:04:59 lr: 0.000054 grad: 0.1536 (0.1611) loss: 0.7762 (0.7739) time: 0.0963 data: 0.0002 max mem: 9377 +Train: [56] [4200/6250] eta: 0:04:45 lr: 0.000054 grad: 0.1520 (0.1611) loss: 0.7806 (0.7740) time: 0.1367 data: 0.0593 max mem: 9377 +Train: [56] [4300/6250] eta: 0:04:31 lr: 0.000054 grad: 0.1527 (0.1610) loss: 0.7668 (0.7740) time: 0.1184 data: 0.0418 max mem: 9377 +Train: [56] [4400/6250] eta: 0:04:17 lr: 0.000054 grad: 0.1481 (0.1609) loss: 0.7821 (0.7740) time: 0.1492 data: 0.0694 max mem: 9377 +Train: [56] [4500/6250] eta: 0:04:04 lr: 0.000054 grad: 0.1505 (0.1607) loss: 0.7750 (0.7740) time: 0.1395 data: 0.0537 max mem: 9377 +Train: [56] [4600/6250] eta: 0:03:50 lr: 0.000054 grad: 0.1616 (0.1607) loss: 0.7803 (0.7740) time: 0.1603 data: 0.0779 max mem: 9377 +Train: [56] [4700/6250] eta: 0:03:36 lr: 0.000054 grad: 0.1551 (0.1607) loss: 0.7682 (0.7739) time: 0.1621 data: 0.0848 max mem: 9377 +Train: [56] [4800/6250] eta: 0:03:22 lr: 0.000054 grad: 0.1437 (0.1606) loss: 0.7874 (0.7740) time: 0.1276 data: 0.0486 max mem: 9377 +Train: [56] [4900/6250] eta: 0:03:08 lr: 0.000054 grad: 0.1512 (0.1605) loss: 0.7786 (0.7740) time: 0.1239 data: 0.0406 max mem: 9377 +Train: [56] [5000/6250] eta: 0:02:54 lr: 0.000054 grad: 0.1497 (0.1605) loss: 0.7712 (0.7740) time: 0.1573 data: 0.0792 max mem: 9377 +Train: [56] [5100/6250] eta: 0:02:40 lr: 0.000054 grad: 0.1535 (0.1604) loss: 0.7725 (0.7739) time: 0.1557 data: 0.0738 max mem: 9377 +Train: [56] [5200/6250] eta: 0:02:26 lr: 0.000054 grad: 0.1588 (0.1605) loss: 0.7682 (0.7738) time: 0.1380 data: 0.0544 max mem: 9377 +Train: [56] [5300/6250] eta: 0:02:13 lr: 0.000054 grad: 0.1565 (0.1604) loss: 0.7714 (0.7738) time: 0.1548 data: 0.0715 max mem: 9377 +Train: [56] [5400/6250] eta: 0:01:58 lr: 0.000054 grad: 0.1465 (0.1603) loss: 0.7789 (0.7738) time: 0.1388 data: 0.0581 max mem: 9377 +Train: [56] [5500/6250] eta: 0:01:44 lr: 0.000053 grad: 0.1607 (0.1603) loss: 0.7585 (0.7737) time: 0.1242 data: 0.0369 max mem: 9377 +Train: [56] [5600/6250] eta: 0:01:30 lr: 0.000053 grad: 0.1631 (0.1602) loss: 0.7697 (0.7737) time: 0.1224 data: 0.0407 max mem: 9377 +Train: [56] [5700/6250] eta: 0:01:16 lr: 0.000053 grad: 0.1583 (0.1602) loss: 0.7717 (0.7737) time: 0.1580 data: 0.0801 max mem: 9377 +Train: [56] [5800/6250] eta: 0:01:02 lr: 0.000053 grad: 0.1421 (0.1602) loss: 0.7749 (0.7736) time: 0.1572 data: 0.0741 max mem: 9377 +Train: [56] [5900/6250] eta: 0:00:48 lr: 0.000053 grad: 0.1505 (0.1602) loss: 0.7819 (0.7735) time: 0.1444 data: 0.0609 max mem: 9377 +Train: [56] [6000/6250] eta: 0:00:34 lr: 0.000053 grad: 0.1597 (0.1602) loss: 0.7681 (0.7734) time: 0.1380 data: 0.0564 max mem: 9377 +Train: [56] [6100/6250] eta: 0:00:20 lr: 0.000053 grad: 0.1587 (0.1602) loss: 0.7722 (0.7733) time: 0.1204 data: 0.0385 max mem: 9377 +Train: [56] [6200/6250] eta: 0:00:06 lr: 0.000053 grad: 0.1607 (0.1602) loss: 0.7706 (0.7732) time: 0.1348 data: 0.0599 max mem: 9377 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.1625 (0.1602) loss: 0.7696 (0.7732) time: 0.1516 data: 0.0759 max mem: 9377 +Train: [56] Total time: 0:14:33 (0.1397 s / it) +Averaged stats: lr: 0.000053 grad: 0.1625 (0.1602) loss: 0.7696 (0.7732) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:05:22 loss: 0.8260 (0.8260) time: 5.1985 data: 5.1675 max mem: 9377 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8189 (0.8166) time: 0.1196 data: 0.0932 max mem: 9377 +Eval (hcp-train-subset): [56] Total time: 0:00:12 (0.2072 s / it) +Averaged stats (hcp-train-subset): loss: 0.8189 (0.8166) +Eval (hcp-val): [56] [ 0/62] eta: 0:03:32 loss: 0.8481 (0.8481) time: 3.4344 data: 3.3663 max mem: 9377 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8468 (0.8487) time: 0.1206 data: 0.0918 max mem: 9377 +Eval (hcp-val): [56] Total time: 0:00:12 (0.2047 s / it) +Averaged stats (hcp-val): loss: 0.8468 (0.8487) +Eval (nsd-val): [56] [ 0/62] eta: 0:03:12 loss: 0.8170 (0.8170) time: 3.1128 data: 3.0390 max mem: 9377 +Eval (nsd-val): [56] [61/62] eta: 0:00:00 loss: 0.8243 (0.8235) time: 0.1240 data: 0.0990 max mem: 9377 +Eval (nsd-val): [56] Total time: 0:00:13 (0.2176 s / it) +Averaged stats (nsd-val): loss: 0.8243 (0.8235) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [57] [ 0/6250] eta: 11:19:00 lr: 0.000053 grad: 0.3061 (0.3061) loss: 0.8014 (0.8014) time: 6.5186 data: 6.4132 max mem: 9377 +Train: [57] [ 100/6250] eta: 0:21:11 lr: 0.000053 grad: 0.2003 (0.2269) loss: 0.7957 (0.8027) time: 0.1393 data: 0.0361 max mem: 9377 +Train: [57] [ 200/6250] eta: 0:18:08 lr: 0.000053 grad: 0.1871 (0.2086) loss: 0.7976 (0.7964) time: 0.1455 data: 0.0506 max mem: 9377 +Train: [57] [ 300/6250] eta: 0:16:16 lr: 0.000053 grad: 0.1745 (0.2007) loss: 0.7946 (0.7926) time: 0.1174 data: 0.0337 max mem: 9377 +Train: [57] [ 400/6250] eta: 0:15:27 lr: 0.000053 grad: 0.1550 (0.1928) loss: 0.7829 (0.7902) time: 0.1333 data: 0.0494 max mem: 9377 +Train: [57] [ 500/6250] eta: 0:14:42 lr: 0.000053 grad: 0.1634 (0.1879) loss: 0.7841 (0.7883) time: 0.1138 data: 0.0227 max mem: 9377 +Train: [57] [ 600/6250] eta: 0:14:02 lr: 0.000053 grad: 0.1639 (0.1854) loss: 0.7654 (0.7851) time: 0.1389 data: 0.0531 max mem: 9377 +Train: [57] [ 700/6250] eta: 0:13:33 lr: 0.000053 grad: 0.1674 (0.1828) loss: 0.7649 (0.7818) time: 0.1364 data: 0.0521 max mem: 9377 +Train: [57] [ 800/6250] eta: 0:13:18 lr: 0.000053 grad: 0.1611 (0.1798) loss: 0.7766 (0.7799) time: 0.1593 data: 0.0695 max mem: 9377 +Train: [57] [ 900/6250] eta: 0:13:07 lr: 0.000053 grad: 0.1560 (0.1778) loss: 0.7643 (0.7784) time: 0.1732 data: 0.0916 max mem: 9377 +Train: [57] [1000/6250] eta: 0:12:54 lr: 0.000053 grad: 0.1633 (0.1764) loss: 0.7677 (0.7776) time: 0.1409 data: 0.0589 max mem: 9377 +Train: [57] [1100/6250] eta: 0:12:47 lr: 0.000053 grad: 0.1560 (0.1745) loss: 0.7764 (0.7771) time: 0.1998 data: 0.1193 max mem: 9377 +Train: [57] [1200/6250] eta: 0:12:25 lr: 0.000053 grad: 0.1531 (0.1730) loss: 0.7771 (0.7767) time: 0.1428 data: 0.0603 max mem: 9377 +Train: [57] [1300/6250] eta: 0:12:09 lr: 0.000053 grad: 0.1429 (0.1717) loss: 0.7826 (0.7765) time: 0.1406 data: 0.0601 max mem: 9377 +Train: [57] [1400/6250] eta: 0:11:55 lr: 0.000053 grad: 0.1603 (0.1706) loss: 0.7693 (0.7763) time: 0.1428 data: 0.0657 max mem: 9377 +Train: [57] [1500/6250] eta: 0:11:37 lr: 0.000053 grad: 0.1550 (0.1694) loss: 0.7747 (0.7762) time: 0.1199 data: 0.0351 max mem: 9377 +Train: [57] [1600/6250] eta: 0:11:22 lr: 0.000053 grad: 0.1529 (0.1683) loss: 0.7794 (0.7763) time: 0.1217 data: 0.0310 max mem: 9377 +Train: [57] [1700/6250] eta: 0:11:07 lr: 0.000053 grad: 0.1540 (0.1674) loss: 0.7772 (0.7764) time: 0.1474 data: 0.0619 max mem: 9377 +Train: [57] [1800/6250] eta: 0:10:54 lr: 0.000053 grad: 0.1538 (0.1668) loss: 0.7762 (0.7764) time: 0.1518 data: 0.0628 max mem: 9377 +Train: [57] [1900/6250] eta: 0:10:38 lr: 0.000053 grad: 0.1508 (0.1663) loss: 0.7743 (0.7764) time: 0.1166 data: 0.0304 max mem: 9377 +Train: [57] [2000/6250] eta: 0:10:20 lr: 0.000053 grad: 0.1582 (0.1659) loss: 0.7736 (0.7764) time: 0.1314 data: 0.0484 max mem: 9377 +Train: [57] [2100/6250] eta: 0:10:02 lr: 0.000053 grad: 0.1520 (0.1654) loss: 0.7781 (0.7764) time: 0.1106 data: 0.0227 max mem: 9377 +Train: [57] [2200/6250] eta: 0:09:45 lr: 0.000053 grad: 0.1463 (0.1649) loss: 0.7812 (0.7765) time: 0.1245 data: 0.0442 max mem: 9377 +Train: [57] [2300/6250] eta: 0:09:27 lr: 0.000052 grad: 0.1414 (0.1643) loss: 0.7765 (0.7764) time: 0.1284 data: 0.0483 max mem: 9377 +Train: [57] [2400/6250] eta: 0:09:10 lr: 0.000052 grad: 0.1553 (0.1639) loss: 0.7732 (0.7762) time: 0.1423 data: 0.0636 max mem: 9377 +Train: [57] [2500/6250] eta: 0:08:54 lr: 0.000052 grad: 0.1519 (0.1636) loss: 0.7652 (0.7761) time: 0.1342 data: 0.0507 max mem: 9377 +Train: [57] [2600/6250] eta: 0:08:38 lr: 0.000052 grad: 0.1500 (0.1632) loss: 0.7737 (0.7759) time: 0.1555 data: 0.0734 max mem: 9377 +Train: [57] [2700/6250] eta: 0:08:23 lr: 0.000052 grad: 0.1585 (0.1630) loss: 0.7671 (0.7758) time: 0.1310 data: 0.0532 max mem: 9377 +Train: [57] [2800/6250] eta: 0:08:09 lr: 0.000052 grad: 0.1455 (0.1626) loss: 0.7765 (0.7757) time: 0.1436 data: 0.0605 max mem: 9377 +Train: [57] [2900/6250] eta: 0:07:55 lr: 0.000052 grad: 0.1438 (0.1622) loss: 0.7809 (0.7758) time: 0.1358 data: 0.0535 max mem: 9377 +Train: [57] [3000/6250] eta: 0:07:41 lr: 0.000052 grad: 0.1488 (0.1620) loss: 0.7780 (0.7759) time: 0.1400 data: 0.0541 max mem: 9377 +Train: [57] [3100/6250] eta: 0:07:26 lr: 0.000052 grad: 0.1550 (0.1617) loss: 0.7611 (0.7758) time: 0.1392 data: 0.0558 max mem: 9377 +Train: [57] [3200/6250] eta: 0:07:12 lr: 0.000052 grad: 0.1505 (0.1615) loss: 0.7732 (0.7758) time: 0.1513 data: 0.0672 max mem: 9377 +Train: [57] [3300/6250] eta: 0:06:59 lr: 0.000052 grad: 0.1513 (0.1614) loss: 0.7724 (0.7756) time: 0.1648 data: 0.0781 max mem: 9377 +Train: [57] [3400/6250] eta: 0:06:43 lr: 0.000052 grad: 0.1528 (0.1612) loss: 0.7703 (0.7754) time: 0.1320 data: 0.0487 max mem: 9377 +Train: [57] [3500/6250] eta: 0:06:29 lr: 0.000052 grad: 0.1586 (0.1610) loss: 0.7663 (0.7753) time: 0.1380 data: 0.0598 max mem: 9377 +Train: [57] [3600/6250] eta: 0:06:15 lr: 0.000052 grad: 0.1570 (0.1609) loss: 0.7694 (0.7751) time: 0.1358 data: 0.0571 max mem: 9377 +Train: [57] [3700/6250] eta: 0:06:00 lr: 0.000052 grad: 0.1474 (0.1608) loss: 0.7669 (0.7749) time: 0.1463 data: 0.0661 max mem: 9377 +Train: [57] [3800/6250] eta: 0:05:46 lr: 0.000052 grad: 0.1514 (0.1607) loss: 0.7666 (0.7746) time: 0.1608 data: 0.0818 max mem: 9377 +Train: [57] [3900/6250] eta: 0:05:32 lr: 0.000052 grad: 0.1546 (0.1606) loss: 0.7650 (0.7744) time: 0.1292 data: 0.0503 max mem: 9377 +Train: [57] [4000/6250] eta: 0:05:17 lr: 0.000052 grad: 0.1572 (0.1605) loss: 0.7690 (0.7742) time: 0.1439 data: 0.0618 max mem: 9377 +Train: [57] [4100/6250] eta: 0:05:03 lr: 0.000052 grad: 0.1603 (0.1605) loss: 0.7647 (0.7740) time: 0.1309 data: 0.0499 max mem: 9377 +Train: [57] [4200/6250] eta: 0:04:49 lr: 0.000052 grad: 0.1547 (0.1604) loss: 0.7776 (0.7739) time: 0.1122 data: 0.0268 max mem: 9377 +Train: [57] [4300/6250] eta: 0:04:34 lr: 0.000052 grad: 0.1518 (0.1603) loss: 0.7751 (0.7737) time: 0.1459 data: 0.0692 max mem: 9377 +Train: [57] [4400/6250] eta: 0:04:20 lr: 0.000052 grad: 0.1590 (0.1603) loss: 0.7742 (0.7736) time: 0.1466 data: 0.0633 max mem: 9377 +Train: [57] [4500/6250] eta: 0:04:06 lr: 0.000052 grad: 0.1523 (0.1602) loss: 0.7627 (0.7734) time: 0.1352 data: 0.0511 max mem: 9377 +Train: [57] [4600/6250] eta: 0:03:52 lr: 0.000052 grad: 0.1669 (0.1602) loss: 0.7719 (0.7733) time: 0.1409 data: 0.0565 max mem: 9377 +Train: [57] [4700/6250] eta: 0:03:38 lr: 0.000052 grad: 0.1553 (0.1602) loss: 0.7596 (0.7732) time: 0.1551 data: 0.0820 max mem: 9377 +Train: [57] [4800/6250] eta: 0:03:24 lr: 0.000052 grad: 0.1557 (0.1601) loss: 0.7687 (0.7730) time: 0.1528 data: 0.0798 max mem: 9377 +Train: [57] [4900/6250] eta: 0:03:10 lr: 0.000052 grad: 0.1587 (0.1601) loss: 0.7689 (0.7728) time: 0.1241 data: 0.0439 max mem: 9377 +Train: [57] [5000/6250] eta: 0:02:56 lr: 0.000052 grad: 0.1635 (0.1600) loss: 0.7606 (0.7726) time: 0.1367 data: 0.0512 max mem: 9377 +Train: [57] [5100/6250] eta: 0:02:42 lr: 0.000052 grad: 0.1631 (0.1600) loss: 0.7568 (0.7724) time: 0.1597 data: 0.0748 max mem: 9377 +Train: [57] [5200/6250] eta: 0:02:28 lr: 0.000052 grad: 0.1621 (0.1601) loss: 0.7610 (0.7722) time: 0.1353 data: 0.0532 max mem: 9377 +Train: [57] [5300/6250] eta: 0:02:14 lr: 0.000052 grad: 0.1583 (0.1601) loss: 0.7624 (0.7721) time: 0.1236 data: 0.0409 max mem: 9377 +Train: [57] [5400/6250] eta: 0:01:59 lr: 0.000051 grad: 0.1537 (0.1601) loss: 0.7639 (0.7720) time: 0.1342 data: 0.0514 max mem: 9377 +Train: [57] [5500/6250] eta: 0:01:45 lr: 0.000051 grad: 0.1646 (0.1602) loss: 0.7606 (0.7718) time: 0.1223 data: 0.0412 max mem: 9377 +Train: [57] [5600/6250] eta: 0:01:31 lr: 0.000051 grad: 0.1533 (0.1601) loss: 0.7702 (0.7717) time: 0.1380 data: 0.0582 max mem: 9377 +Train: [57] [5700/6250] eta: 0:01:17 lr: 0.000051 grad: 0.1575 (0.1602) loss: 0.7666 (0.7716) time: 0.1637 data: 0.0858 max mem: 9377 +Train: [57] [5800/6250] eta: 0:01:03 lr: 0.000051 grad: 0.1668 (0.1603) loss: 0.7582 (0.7715) time: 0.1348 data: 0.0583 max mem: 9377 +Train: [57] [5900/6250] eta: 0:00:49 lr: 0.000051 grad: 0.1589 (0.1603) loss: 0.7542 (0.7713) time: 0.1413 data: 0.0572 max mem: 9377 +Train: [57] [6000/6250] eta: 0:00:35 lr: 0.000051 grad: 0.1595 (0.1604) loss: 0.7687 (0.7712) time: 0.1621 data: 0.0830 max mem: 9377 +Train: [57] [6100/6250] eta: 0:00:21 lr: 0.000051 grad: 0.1516 (0.1603) loss: 0.7717 (0.7712) time: 0.1320 data: 0.0468 max mem: 9377 +Train: [57] [6200/6250] eta: 0:00:07 lr: 0.000051 grad: 0.1605 (0.1603) loss: 0.7658 (0.7711) time: 0.1423 data: 0.0649 max mem: 9377 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1554 (0.1603) loss: 0.7623 (0.7711) time: 0.1355 data: 0.0563 max mem: 9377 +Train: [57] Total time: 0:14:44 (0.1414 s / it) +Averaged stats: lr: 0.000051 grad: 0.1554 (0.1603) loss: 0.7623 (0.7711) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:04:32 loss: 0.8218 (0.8218) time: 4.4028 data: 4.3401 max mem: 9377 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8188 (0.8176) time: 0.1213 data: 0.0960 max mem: 9377 +Eval (hcp-train-subset): [57] Total time: 0:00:12 (0.1985 s / it) +Averaged stats (hcp-train-subset): loss: 0.8188 (0.8176) +Eval (hcp-val): [57] [ 0/62] eta: 0:04:30 loss: 0.8463 (0.8463) time: 4.3671 data: 4.2969 max mem: 9377 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8488 (0.8496) time: 0.1046 data: 0.0782 max mem: 9377 +Eval (hcp-val): [57] Total time: 0:00:12 (0.2050 s / it) +Averaged stats (hcp-val): loss: 0.8488 (0.8496) +Eval (nsd-val): [57] [ 0/62] eta: 0:03:39 loss: 0.8135 (0.8135) time: 3.5424 data: 3.4469 max mem: 9377 +Eval (nsd-val): [57] [61/62] eta: 0:00:00 loss: 0.8239 (0.8242) time: 0.1359 data: 0.1108 max mem: 9377 +Eval (nsd-val): [57] Total time: 0:00:12 (0.2055 s / it) +Averaged stats (nsd-val): loss: 0.8239 (0.8242) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [58] [ 0/6250] eta: 10:47:27 lr: 0.000051 grad: 0.3490 (0.3490) loss: 0.8226 (0.8226) time: 6.2156 data: 6.1043 max mem: 9377 +Train: [58] [ 100/6250] eta: 0:20:49 lr: 0.000051 grad: 0.2455 (0.3054) loss: 0.7831 (0.7678) time: 0.1465 data: 0.0366 max mem: 9377 +Train: [58] [ 200/6250] eta: 0:18:00 lr: 0.000051 grad: 0.1881 (0.2676) loss: 0.7904 (0.7756) time: 0.1619 data: 0.0687 max mem: 9377 +Train: [58] [ 300/6250] eta: 0:16:33 lr: 0.000051 grad: 0.1940 (0.2432) loss: 0.7744 (0.7757) time: 0.1435 data: 0.0528 max mem: 9377 +Train: [58] [ 400/6250] eta: 0:15:32 lr: 0.000051 grad: 0.1708 (0.2276) loss: 0.7700 (0.7759) time: 0.1290 data: 0.0327 max mem: 9377 +Train: [58] [ 500/6250] eta: 0:14:40 lr: 0.000051 grad: 0.1612 (0.2176) loss: 0.7799 (0.7762) time: 0.1177 data: 0.0272 max mem: 9377 +Train: [58] [ 600/6250] eta: 0:14:05 lr: 0.000051 grad: 0.1663 (0.2082) loss: 0.7732 (0.7758) time: 0.1387 data: 0.0522 max mem: 9377 +Train: [58] [ 700/6250] eta: 0:13:33 lr: 0.000051 grad: 0.1579 (0.2019) loss: 0.7756 (0.7756) time: 0.1419 data: 0.0585 max mem: 9377 +Train: [58] [ 800/6250] eta: 0:13:11 lr: 0.000051 grad: 0.1696 (0.1968) loss: 0.7616 (0.7749) time: 0.1242 data: 0.0358 max mem: 9377 +Train: [58] [ 900/6250] eta: 0:12:55 lr: 0.000051 grad: 0.1553 (0.1930) loss: 0.7598 (0.7737) time: 0.1533 data: 0.0587 max mem: 9377 +Train: [58] [1000/6250] eta: 0:12:41 lr: 0.000051 grad: 0.1613 (0.1903) loss: 0.7604 (0.7727) time: 0.1489 data: 0.0650 max mem: 9377 +Train: [58] [1100/6250] eta: 0:12:31 lr: 0.000051 grad: 0.1571 (0.1874) loss: 0.7535 (0.7718) time: 0.1642 data: 0.0846 max mem: 9377 +Train: [58] [1200/6250] eta: 0:12:20 lr: 0.000051 grad: 0.1628 (0.1855) loss: 0.7624 (0.7711) time: 0.1801 data: 0.1005 max mem: 9377 +Train: [58] [1300/6250] eta: 0:12:06 lr: 0.000051 grad: 0.1514 (0.1834) loss: 0.7731 (0.7709) time: 0.1596 data: 0.0825 max mem: 9377 +Train: [58] [1400/6250] eta: 0:11:55 lr: 0.000051 grad: 0.1590 (0.1818) loss: 0.7627 (0.7706) time: 0.1528 data: 0.0707 max mem: 9377 +Train: [58] [1500/6250] eta: 0:11:41 lr: 0.000051 grad: 0.1637 (0.1804) loss: 0.7649 (0.7706) time: 0.1632 data: 0.0890 max mem: 9377 +Train: [58] [1600/6250] eta: 0:11:25 lr: 0.000051 grad: 0.1598 (0.1792) loss: 0.7673 (0.7704) time: 0.1473 data: 0.0693 max mem: 9377 +Train: [58] [1700/6250] eta: 0:11:10 lr: 0.000051 grad: 0.1445 (0.1780) loss: 0.7722 (0.7702) time: 0.1538 data: 0.0686 max mem: 9377 +Train: [58] [1800/6250] eta: 0:10:53 lr: 0.000051 grad: 0.1535 (0.1769) loss: 0.7739 (0.7703) time: 0.1501 data: 0.0615 max mem: 9377 +Train: [58] [1900/6250] eta: 0:10:39 lr: 0.000051 grad: 0.1533 (0.1758) loss: 0.7694 (0.7702) time: 0.1416 data: 0.0582 max mem: 9377 +Train: [58] [2000/6250] eta: 0:10:24 lr: 0.000051 grad: 0.1610 (0.1753) loss: 0.7646 (0.7701) time: 0.1459 data: 0.0653 max mem: 9377 +Train: [58] [2100/6250] eta: 0:10:06 lr: 0.000051 grad: 0.1576 (0.1745) loss: 0.7662 (0.7700) time: 0.1273 data: 0.0436 max mem: 9377 +Train: [58] [2200/6250] eta: 0:09:49 lr: 0.000050 grad: 0.1668 (0.1739) loss: 0.7594 (0.7698) time: 0.1338 data: 0.0441 max mem: 9377 +Train: [58] [2300/6250] eta: 0:09:30 lr: 0.000050 grad: 0.1584 (0.1734) loss: 0.7639 (0.7695) time: 0.1350 data: 0.0460 max mem: 9377 +Train: [58] [2400/6250] eta: 0:09:13 lr: 0.000050 grad: 0.1583 (0.1728) loss: 0.7684 (0.7695) time: 0.1244 data: 0.0392 max mem: 9377 +Train: [58] [2500/6250] eta: 0:08:57 lr: 0.000050 grad: 0.1570 (0.1722) loss: 0.7703 (0.7695) time: 0.1206 data: 0.0372 max mem: 9377 +Train: [58] [2600/6250] eta: 0:08:41 lr: 0.000050 grad: 0.1646 (0.1719) loss: 0.7560 (0.7693) time: 0.1122 data: 0.0338 max mem: 9377 +Train: [58] [2700/6250] eta: 0:08:26 lr: 0.000050 grad: 0.1613 (0.1717) loss: 0.7636 (0.7691) time: 0.1561 data: 0.0757 max mem: 9377 +Train: [58] [2800/6250] eta: 0:08:11 lr: 0.000050 grad: 0.1705 (0.1715) loss: 0.7522 (0.7688) time: 0.1515 data: 0.0729 max mem: 9377 +Train: [58] [2900/6250] eta: 0:07:55 lr: 0.000050 grad: 0.1529 (0.1712) loss: 0.7615 (0.7686) time: 0.1188 data: 0.0365 max mem: 9377 +Train: [58] [3000/6250] eta: 0:07:40 lr: 0.000050 grad: 0.1603 (0.1710) loss: 0.7632 (0.7684) time: 0.1386 data: 0.0571 max mem: 9377 +Train: [58] [3100/6250] eta: 0:07:25 lr: 0.000050 grad: 0.1508 (0.1706) loss: 0.7756 (0.7684) time: 0.1370 data: 0.0543 max mem: 9377 +Train: [58] [3200/6250] eta: 0:07:10 lr: 0.000050 grad: 0.1611 (0.1702) loss: 0.7652 (0.7683) time: 0.1283 data: 0.0465 max mem: 9377 +Train: [58] [3300/6250] eta: 0:06:56 lr: 0.000050 grad: 0.1603 (0.1700) loss: 0.7692 (0.7683) time: 0.1579 data: 0.0723 max mem: 9377 +Train: [58] [3400/6250] eta: 0:06:42 lr: 0.000050 grad: 0.1645 (0.1698) loss: 0.7688 (0.7683) time: 0.1229 data: 0.0385 max mem: 9377 +Train: [58] [3500/6250] eta: 0:06:27 lr: 0.000050 grad: 0.1574 (0.1696) loss: 0.7675 (0.7683) time: 0.1178 data: 0.0341 max mem: 9377 +Train: [58] [3600/6250] eta: 0:06:13 lr: 0.000050 grad: 0.1583 (0.1694) loss: 0.7692 (0.7683) time: 0.1558 data: 0.0764 max mem: 9377 +Train: [58] [3700/6250] eta: 0:05:58 lr: 0.000050 grad: 0.1616 (0.1693) loss: 0.7666 (0.7684) time: 0.1284 data: 0.0510 max mem: 9377 +Train: [58] [3800/6250] eta: 0:05:44 lr: 0.000050 grad: 0.1624 (0.1691) loss: 0.7749 (0.7684) time: 0.1404 data: 0.0618 max mem: 9377 +Train: [58] [3900/6250] eta: 0:05:30 lr: 0.000050 grad: 0.1652 (0.1691) loss: 0.7738 (0.7685) time: 0.1521 data: 0.0745 max mem: 9377 +Train: [58] [4000/6250] eta: 0:05:16 lr: 0.000050 grad: 0.1563 (0.1689) loss: 0.7685 (0.7686) time: 0.1413 data: 0.0593 max mem: 9377 +Train: [58] [4100/6250] eta: 0:05:02 lr: 0.000050 grad: 0.1520 (0.1687) loss: 0.7801 (0.7687) time: 0.1365 data: 0.0569 max mem: 9377 +Train: [58] [4200/6250] eta: 0:04:48 lr: 0.000050 grad: 0.1637 (0.1685) loss: 0.7726 (0.7689) time: 0.1271 data: 0.0448 max mem: 9377 +Train: [58] [4300/6250] eta: 0:04:34 lr: 0.000050 grad: 0.1540 (0.1683) loss: 0.7784 (0.7690) time: 0.1382 data: 0.0551 max mem: 9377 +Train: [58] [4400/6250] eta: 0:04:19 lr: 0.000050 grad: 0.1563 (0.1681) loss: 0.7759 (0.7691) time: 0.1316 data: 0.0505 max mem: 9377 +Train: [58] [4500/6250] eta: 0:04:05 lr: 0.000050 grad: 0.1533 (0.1679) loss: 0.7672 (0.7692) time: 0.1417 data: 0.0567 max mem: 9377 +Train: [58] [4600/6250] eta: 0:03:51 lr: 0.000050 grad: 0.1552 (0.1678) loss: 0.7683 (0.7692) time: 0.1636 data: 0.0853 max mem: 9377 +Train: [58] [4700/6250] eta: 0:03:37 lr: 0.000050 grad: 0.1580 (0.1676) loss: 0.7631 (0.7693) time: 0.1364 data: 0.0484 max mem: 9377 +Train: [58] [4800/6250] eta: 0:03:23 lr: 0.000050 grad: 0.1595 (0.1675) loss: 0.7755 (0.7694) time: 0.1330 data: 0.0508 max mem: 9377 +Train: [58] [4900/6250] eta: 0:03:09 lr: 0.000050 grad: 0.1571 (0.1673) loss: 0.7733 (0.7694) time: 0.1385 data: 0.0529 max mem: 9377 +Train: [58] [5000/6250] eta: 0:02:55 lr: 0.000050 grad: 0.1520 (0.1671) loss: 0.7756 (0.7696) time: 0.1330 data: 0.0522 max mem: 9377 +Train: [58] [5100/6250] eta: 0:02:41 lr: 0.000050 grad: 0.1575 (0.1670) loss: 0.7726 (0.7697) time: 0.1493 data: 0.0684 max mem: 9377 +Train: [58] [5200/6250] eta: 0:02:27 lr: 0.000050 grad: 0.1596 (0.1669) loss: 0.7728 (0.7698) time: 0.1486 data: 0.0628 max mem: 9377 +Train: [58] [5300/6250] eta: 0:02:13 lr: 0.000049 grad: 0.1517 (0.1667) loss: 0.7789 (0.7699) time: 0.1492 data: 0.0759 max mem: 9377 +Train: [58] [5400/6250] eta: 0:01:59 lr: 0.000049 grad: 0.1577 (0.1666) loss: 0.7766 (0.7700) time: 0.1354 data: 0.0557 max mem: 9377 +Train: [58] [5500/6250] eta: 0:01:45 lr: 0.000049 grad: 0.1567 (0.1664) loss: 0.7796 (0.7701) time: 0.1273 data: 0.0429 max mem: 9377 +Train: [58] [5600/6250] eta: 0:01:31 lr: 0.000049 grad: 0.1559 (0.1663) loss: 0.7735 (0.7702) time: 0.1431 data: 0.0535 max mem: 9377 +Train: [58] [5700/6250] eta: 0:01:17 lr: 0.000049 grad: 0.1582 (0.1662) loss: 0.7764 (0.7703) time: 0.1264 data: 0.0419 max mem: 9377 +Train: [58] [5800/6250] eta: 0:01:03 lr: 0.000049 grad: 0.1545 (0.1661) loss: 0.7688 (0.7703) time: 0.1657 data: 0.0820 max mem: 9377 +Train: [58] [5900/6250] eta: 0:00:49 lr: 0.000049 grad: 0.1624 (0.1660) loss: 0.7696 (0.7703) time: 0.1291 data: 0.0461 max mem: 9377 +Train: [58] [6000/6250] eta: 0:00:35 lr: 0.000049 grad: 0.1519 (0.1659) loss: 0.7725 (0.7703) time: 0.1338 data: 0.0423 max mem: 9377 +Train: [58] [6100/6250] eta: 0:00:20 lr: 0.000049 grad: 0.1521 (0.1659) loss: 0.7735 (0.7703) time: 0.1104 data: 0.0249 max mem: 9377 +Train: [58] [6200/6250] eta: 0:00:06 lr: 0.000049 grad: 0.1607 (0.1658) loss: 0.7747 (0.7703) time: 0.1499 data: 0.0659 max mem: 9377 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.1617 (0.1657) loss: 0.7635 (0.7703) time: 0.1435 data: 0.0586 max mem: 9377 +Train: [58] Total time: 0:14:39 (0.1407 s / it) +Averaged stats: lr: 0.000049 grad: 0.1617 (0.1657) loss: 0.7635 (0.7703) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:05:09 loss: 0.8211 (0.8211) time: 4.9973 data: 4.9650 max mem: 9377 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8220 (0.8183) time: 0.1418 data: 0.1171 max mem: 9377 +Eval (hcp-train-subset): [58] Total time: 0:00:13 (0.2156 s / it) +Averaged stats (hcp-train-subset): loss: 0.8220 (0.8183) +Eval (hcp-val): [58] [ 0/62] eta: 0:03:21 loss: 0.8462 (0.8462) time: 3.2572 data: 3.1882 max mem: 9377 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8506 (0.8507) time: 0.1148 data: 0.0897 max mem: 9377 +Eval (hcp-val): [58] Total time: 0:00:12 (0.2059 s / it) +Averaged stats (hcp-val): loss: 0.8506 (0.8507) +Eval (nsd-val): [58] [ 0/62] eta: 0:03:10 loss: 0.8130 (0.8130) time: 3.0662 data: 2.9839 max mem: 9377 +Eval (nsd-val): [58] [61/62] eta: 0:00:00 loss: 0.8240 (0.8261) time: 0.1198 data: 0.0951 max mem: 9377 +Eval (nsd-val): [58] Total time: 0:00:12 (0.2024 s / it) +Averaged stats (nsd-val): loss: 0.8240 (0.8261) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [59] [ 0/6250] eta: 10:14:28 lr: 0.000049 grad: 0.1973 (0.1973) loss: 0.7845 (0.7845) time: 5.8989 data: 5.8000 max mem: 9377 +Train: [59] [ 100/6250] eta: 0:19:51 lr: 0.000049 grad: 0.2024 (0.2329) loss: 0.8044 (0.7915) time: 0.1679 data: 0.0760 max mem: 9377 +Train: [59] [ 200/6250] eta: 0:17:24 lr: 0.000049 grad: 0.1870 (0.2271) loss: 0.7831 (0.7851) time: 0.1469 data: 0.0510 max mem: 9377 +Train: [59] [ 300/6250] eta: 0:16:23 lr: 0.000049 grad: 0.1956 (0.2182) loss: 0.7752 (0.7817) time: 0.1412 data: 0.0462 max mem: 9377 +Train: [59] [ 400/6250] eta: 0:15:28 lr: 0.000049 grad: 0.1569 (0.2092) loss: 0.7779 (0.7798) time: 0.1272 data: 0.0350 max mem: 9377 +Train: [59] [ 500/6250] eta: 0:14:42 lr: 0.000049 grad: 0.1829 (0.2032) loss: 0.7762 (0.7779) time: 0.1434 data: 0.0557 max mem: 9377 +Train: [59] [ 600/6250] eta: 0:14:05 lr: 0.000049 grad: 0.1542 (0.1968) loss: 0.7750 (0.7774) time: 0.1325 data: 0.0445 max mem: 9377 +Train: [59] [ 700/6250] eta: 0:13:38 lr: 0.000049 grad: 0.1485 (0.1912) loss: 0.7808 (0.7774) time: 0.1311 data: 0.0312 max mem: 9377 +Train: [59] [ 800/6250] eta: 0:13:15 lr: 0.000049 grad: 0.1563 (0.1873) loss: 0.7804 (0.7771) time: 0.1223 data: 0.0405 max mem: 9377 +Train: [59] [ 900/6250] eta: 0:12:59 lr: 0.000049 grad: 0.1629 (0.1851) loss: 0.7561 (0.7763) time: 0.1259 data: 0.0366 max mem: 9377 +Train: [59] [1000/6250] eta: 0:12:39 lr: 0.000049 grad: 0.1500 (0.1824) loss: 0.7694 (0.7759) time: 0.1359 data: 0.0519 max mem: 9377 +Train: [59] [1100/6250] eta: 0:12:23 lr: 0.000049 grad: 0.1460 (0.1800) loss: 0.7728 (0.7755) time: 0.1394 data: 0.0625 max mem: 9377 +Train: [59] [1200/6250] eta: 0:12:13 lr: 0.000049 grad: 0.1644 (0.1786) loss: 0.7621 (0.7751) time: 0.0966 data: 0.0002 max mem: 9377 +Train: [59] [1300/6250] eta: 0:11:55 lr: 0.000049 grad: 0.1566 (0.1777) loss: 0.7778 (0.7746) time: 0.1287 data: 0.0394 max mem: 9377 +Train: [59] [1400/6250] eta: 0:11:40 lr: 0.000049 grad: 0.1549 (0.1764) loss: 0.7688 (0.7743) time: 0.1341 data: 0.0488 max mem: 9377 +Train: [59] [1500/6250] eta: 0:11:26 lr: 0.000049 grad: 0.1496 (0.1752) loss: 0.7671 (0.7739) time: 0.1414 data: 0.0649 max mem: 9377 +Train: [59] [1600/6250] eta: 0:11:09 lr: 0.000049 grad: 0.1574 (0.1741) loss: 0.7777 (0.7736) time: 0.1466 data: 0.0666 max mem: 9377 +Train: [59] [1700/6250] eta: 0:10:56 lr: 0.000049 grad: 0.1580 (0.1733) loss: 0.7660 (0.7733) time: 0.1281 data: 0.0482 max mem: 9377 +Train: [59] [1800/6250] eta: 0:10:41 lr: 0.000049 grad: 0.1622 (0.1727) loss: 0.7634 (0.7728) time: 0.1415 data: 0.0623 max mem: 9377 +Train: [59] [1900/6250] eta: 0:10:24 lr: 0.000049 grad: 0.1583 (0.1722) loss: 0.7582 (0.7723) time: 0.1307 data: 0.0489 max mem: 9377 +Train: [59] [2000/6250] eta: 0:10:11 lr: 0.000049 grad: 0.1501 (0.1717) loss: 0.7736 (0.7719) time: 0.1696 data: 0.0785 max mem: 9377 +Train: [59] [2100/6250] eta: 0:09:57 lr: 0.000048 grad: 0.1602 (0.1714) loss: 0.7641 (0.7716) time: 0.1656 data: 0.0781 max mem: 9377 +Train: [59] [2200/6250] eta: 0:09:42 lr: 0.000048 grad: 0.1586 (0.1709) loss: 0.7592 (0.7714) time: 0.1327 data: 0.0466 max mem: 9377 +Train: [59] [2300/6250] eta: 0:09:27 lr: 0.000048 grad: 0.1639 (0.1705) loss: 0.7548 (0.7709) time: 0.1521 data: 0.0676 max mem: 9377 +Train: [59] [2400/6250] eta: 0:09:11 lr: 0.000048 grad: 0.1578 (0.1701) loss: 0.7618 (0.7707) time: 0.1388 data: 0.0546 max mem: 9377 +Train: [59] [2500/6250] eta: 0:08:53 lr: 0.000048 grad: 0.1601 (0.1698) loss: 0.7628 (0.7705) time: 0.1218 data: 0.0340 max mem: 9377 +Train: [59] [2600/6250] eta: 0:08:37 lr: 0.000048 grad: 0.1640 (0.1695) loss: 0.7574 (0.7703) time: 0.1211 data: 0.0320 max mem: 9377 +Train: [59] [2700/6250] eta: 0:08:22 lr: 0.000048 grad: 0.1484 (0.1693) loss: 0.7712 (0.7700) time: 0.1308 data: 0.0519 max mem: 9377 +Train: [59] [2800/6250] eta: 0:08:08 lr: 0.000048 grad: 0.1638 (0.1691) loss: 0.7637 (0.7699) time: 0.1278 data: 0.0465 max mem: 9377 +Train: [59] [2900/6250] eta: 0:07:54 lr: 0.000048 grad: 0.1673 (0.1690) loss: 0.7520 (0.7696) time: 0.1511 data: 0.0693 max mem: 9377 +Train: [59] [3000/6250] eta: 0:07:39 lr: 0.000048 grad: 0.1629 (0.1688) loss: 0.7665 (0.7695) time: 0.1465 data: 0.0611 max mem: 9377 +Train: [59] [3100/6250] eta: 0:07:25 lr: 0.000048 grad: 0.1617 (0.1687) loss: 0.7641 (0.7694) time: 0.1515 data: 0.0734 max mem: 9377 +Train: [59] [3200/6250] eta: 0:07:10 lr: 0.000048 grad: 0.1542 (0.1684) loss: 0.7626 (0.7694) time: 0.1483 data: 0.0636 max mem: 9377 +Train: [59] [3300/6250] eta: 0:06:56 lr: 0.000048 grad: 0.1551 (0.1682) loss: 0.7736 (0.7695) time: 0.1485 data: 0.0668 max mem: 9377 +Train: [59] [3400/6250] eta: 0:06:41 lr: 0.000048 grad: 0.1537 (0.1680) loss: 0.7785 (0.7696) time: 0.1190 data: 0.0310 max mem: 9377 +Train: [59] [3500/6250] eta: 0:06:26 lr: 0.000048 grad: 0.1621 (0.1677) loss: 0.7742 (0.7697) time: 0.1161 data: 0.0315 max mem: 9377 +Train: [59] [3600/6250] eta: 0:06:11 lr: 0.000048 grad: 0.1619 (0.1675) loss: 0.7698 (0.7698) time: 0.1243 data: 0.0346 max mem: 9377 +Train: [59] [3700/6250] eta: 0:05:57 lr: 0.000048 grad: 0.1670 (0.1675) loss: 0.7722 (0.7698) time: 0.1659 data: 0.0816 max mem: 9377 +Train: [59] [3800/6250] eta: 0:05:43 lr: 0.000048 grad: 0.1668 (0.1674) loss: 0.7695 (0.7698) time: 0.1588 data: 0.0812 max mem: 9377 +Train: [59] [3900/6250] eta: 0:05:28 lr: 0.000048 grad: 0.1638 (0.1673) loss: 0.7588 (0.7697) time: 0.1276 data: 0.0486 max mem: 9377 +Train: [59] [4000/6250] eta: 0:05:14 lr: 0.000048 grad: 0.1564 (0.1672) loss: 0.7659 (0.7696) time: 0.1323 data: 0.0509 max mem: 9377 +Train: [59] [4100/6250] eta: 0:05:00 lr: 0.000048 grad: 0.1590 (0.1670) loss: 0.7604 (0.7696) time: 0.1164 data: 0.0351 max mem: 9377 +Train: [59] [4200/6250] eta: 0:04:45 lr: 0.000048 grad: 0.1546 (0.1669) loss: 0.7771 (0.7695) time: 0.1323 data: 0.0512 max mem: 9377 +Train: [59] [4300/6250] eta: 0:04:32 lr: 0.000048 grad: 0.1598 (0.1668) loss: 0.7700 (0.7695) time: 0.1170 data: 0.0302 max mem: 9377 +Train: [59] [4400/6250] eta: 0:04:18 lr: 0.000048 grad: 0.1579 (0.1667) loss: 0.7723 (0.7695) time: 0.1448 data: 0.0700 max mem: 9377 +Train: [59] [4500/6250] eta: 0:04:03 lr: 0.000048 grad: 0.1551 (0.1666) loss: 0.7719 (0.7695) time: 0.1510 data: 0.0740 max mem: 9377 +Train: [59] [4600/6250] eta: 0:03:49 lr: 0.000048 grad: 0.1599 (0.1665) loss: 0.7640 (0.7695) time: 0.1292 data: 0.0427 max mem: 9377 +Train: [59] [4700/6250] eta: 0:03:36 lr: 0.000048 grad: 0.1589 (0.1664) loss: 0.7687 (0.7696) time: 0.1583 data: 0.0766 max mem: 9377 +Train: [59] [4800/6250] eta: 0:03:21 lr: 0.000048 grad: 0.1604 (0.1663) loss: 0.7661 (0.7696) time: 0.1218 data: 0.0450 max mem: 9377 +Train: [59] [4900/6250] eta: 0:03:07 lr: 0.000048 grad: 0.1590 (0.1663) loss: 0.7638 (0.7696) time: 0.1290 data: 0.0470 max mem: 9377 +Train: [59] [5000/6250] eta: 0:02:54 lr: 0.000048 grad: 0.1536 (0.1662) loss: 0.7635 (0.7696) time: 0.1281 data: 0.0465 max mem: 9377 +Train: [59] [5100/6250] eta: 0:02:40 lr: 0.000048 grad: 0.1731 (0.1662) loss: 0.7689 (0.7697) time: 0.1649 data: 0.0802 max mem: 9377 +Train: [59] [5200/6250] eta: 0:02:26 lr: 0.000047 grad: 0.1556 (0.1662) loss: 0.7756 (0.7697) time: 0.1282 data: 0.0403 max mem: 9377 +Train: [59] [5300/6250] eta: 0:02:12 lr: 0.000047 grad: 0.1588 (0.1662) loss: 0.7776 (0.7697) time: 0.1292 data: 0.0504 max mem: 9377 +Train: [59] [5400/6250] eta: 0:01:58 lr: 0.000047 grad: 0.1702 (0.1662) loss: 0.7682 (0.7697) time: 0.1498 data: 0.0682 max mem: 9377 +Train: [59] [5500/6250] eta: 0:01:44 lr: 0.000047 grad: 0.1738 (0.1663) loss: 0.7694 (0.7696) time: 0.1574 data: 0.0692 max mem: 9377 +Train: [59] [5600/6250] eta: 0:01:30 lr: 0.000047 grad: 0.1591 (0.1664) loss: 0.7783 (0.7696) time: 0.1202 data: 0.0309 max mem: 9377 +Train: [59] [5700/6250] eta: 0:01:16 lr: 0.000047 grad: 0.1573 (0.1663) loss: 0.7808 (0.7697) time: 0.1381 data: 0.0511 max mem: 9377 +Train: [59] [5800/6250] eta: 0:01:02 lr: 0.000047 grad: 0.1607 (0.1663) loss: 0.7734 (0.7697) time: 0.1361 data: 0.0500 max mem: 9377 +Train: [59] [5900/6250] eta: 0:00:48 lr: 0.000047 grad: 0.1610 (0.1663) loss: 0.7573 (0.7697) time: 0.1295 data: 0.0415 max mem: 9377 +Train: [59] [6000/6250] eta: 0:00:34 lr: 0.000047 grad: 0.1560 (0.1662) loss: 0.7819 (0.7698) time: 0.0981 data: 0.0050 max mem: 9377 +Train: [59] [6100/6250] eta: 0:00:20 lr: 0.000047 grad: 0.1625 (0.1661) loss: 0.7689 (0.7699) time: 0.1223 data: 0.0299 max mem: 9377 +Train: [59] [6200/6250] eta: 0:00:06 lr: 0.000047 grad: 0.1635 (0.1661) loss: 0.7669 (0.7699) time: 0.1198 data: 0.0398 max mem: 9377 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.1675 (0.1661) loss: 0.7686 (0.7699) time: 0.1297 data: 0.0495 max mem: 9377 +Train: [59] Total time: 0:14:33 (0.1397 s / it) +Averaged stats: lr: 0.000047 grad: 0.1675 (0.1661) loss: 0.7686 (0.7699) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:03:34 loss: 0.8255 (0.8255) time: 3.4660 data: 3.3949 max mem: 9377 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8172 (0.8165) time: 0.1154 data: 0.0908 max mem: 9377 +Eval (hcp-train-subset): [59] Total time: 0:00:12 (0.2064 s / it) +Averaged stats (hcp-train-subset): loss: 0.8172 (0.8165) +Making plots (hcp-train-subset): example=10 +Eval (hcp-val): [59] [ 0/62] eta: 0:03:35 loss: 0.8471 (0.8471) time: 3.4828 data: 3.4067 max mem: 9377 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8507 (0.8512) time: 0.1417 data: 0.1169 max mem: 9377 +Eval (hcp-val): [59] Total time: 0:00:12 (0.2090 s / it) +Averaged stats (hcp-val): loss: 0.8507 (0.8512) +Making plots (hcp-val): example=9 +Eval (nsd-val): [59] [ 0/62] eta: 0:03:24 loss: 0.8157 (0.8157) time: 3.2919 data: 3.2342 max mem: 9377 +Eval (nsd-val): [59] [61/62] eta: 0:00:00 loss: 0.8340 (0.8311) time: 0.1305 data: 0.1056 max mem: 9377 +Eval (nsd-val): [59] Total time: 0:00:12 (0.1999 s / it) +Averaged stats (nsd-val): loss: 0.8340 (0.8311) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 9:23:54 lr: 0.000047 grad: 0.3508 (0.3508) loss: 0.6782 (0.6782) time: 5.4135 data: 5.3021 max mem: 9377 +Train: [60] [ 100/6250] eta: 0:21:06 lr: 0.000047 grad: 0.2179 (0.2740) loss: 0.7774 (0.7783) time: 0.1703 data: 0.0643 max mem: 9377 +Train: [60] [ 200/6250] eta: 0:17:43 lr: 0.000047 grad: 0.2088 (0.2598) loss: 0.7830 (0.7781) time: 0.1118 data: 0.0174 max mem: 9377 +Train: [60] [ 300/6250] eta: 0:16:11 lr: 0.000047 grad: 0.1727 (0.2366) loss: 0.7817 (0.7802) time: 0.1115 data: 0.0186 max mem: 9377 +Train: [60] [ 400/6250] eta: 0:15:09 lr: 0.000047 grad: 0.1778 (0.2258) loss: 0.7692 (0.7779) time: 0.1123 data: 0.0182 max mem: 9377 +Train: [60] [ 500/6250] eta: 0:14:26 lr: 0.000047 grad: 0.1634 (0.2171) loss: 0.7761 (0.7758) time: 0.1344 data: 0.0463 max mem: 9377 +Train: [60] [ 600/6250] eta: 0:13:50 lr: 0.000047 grad: 0.1611 (0.2093) loss: 0.7769 (0.7752) time: 0.0918 data: 0.0011 max mem: 9377 +Train: [60] [ 700/6250] eta: 0:13:33 lr: 0.000047 grad: 0.1562 (0.2033) loss: 0.7705 (0.7745) time: 0.1433 data: 0.0606 max mem: 9377 +Train: [60] [ 800/6250] eta: 0:13:16 lr: 0.000047 grad: 0.1571 (0.1989) loss: 0.7716 (0.7738) time: 0.1634 data: 0.0810 max mem: 9377 +Train: [60] [ 900/6250] eta: 0:12:59 lr: 0.000047 grad: 0.1675 (0.1952) loss: 0.7649 (0.7735) time: 0.1348 data: 0.0499 max mem: 9377 +Train: [60] [1000/6250] eta: 0:12:44 lr: 0.000047 grad: 0.1626 (0.1924) loss: 0.7735 (0.7732) time: 0.1468 data: 0.0654 max mem: 9377 +Train: [60] [1100/6250] eta: 0:12:29 lr: 0.000047 grad: 0.1748 (0.1899) loss: 0.7681 (0.7729) time: 0.1723 data: 0.0929 max mem: 9377 +Train: [60] [1200/6250] eta: 0:12:13 lr: 0.000047 grad: 0.1545 (0.1876) loss: 0.7749 (0.7729) time: 0.1576 data: 0.0743 max mem: 9377 +Train: [60] [1300/6250] eta: 0:12:00 lr: 0.000047 grad: 0.1491 (0.1853) loss: 0.7714 (0.7729) time: 0.2051 data: 0.1245 max mem: 9377 +Train: [60] [1400/6250] eta: 0:11:39 lr: 0.000047 grad: 0.1578 (0.1836) loss: 0.7731 (0.7727) time: 0.1118 data: 0.0267 max mem: 9377 +Train: [60] [1500/6250] eta: 0:11:23 lr: 0.000047 grad: 0.1579 (0.1821) loss: 0.7686 (0.7728) time: 0.1505 data: 0.0705 max mem: 9377 +Train: [60] [1600/6250] eta: 0:11:15 lr: 0.000047 grad: 0.1718 (0.1809) loss: 0.7784 (0.7727) time: 0.0921 data: 0.0002 max mem: 9377 +Train: [60] [1700/6250] eta: 0:11:01 lr: 0.000047 grad: 0.1636 (0.1803) loss: 0.7632 (0.7725) time: 0.1518 data: 0.0752 max mem: 9377 +Train: [60] [1800/6250] eta: 0:10:43 lr: 0.000047 grad: 0.1704 (0.1794) loss: 0.7604 (0.7724) time: 0.1363 data: 0.0486 max mem: 9377 +Train: [60] [1900/6250] eta: 0:10:28 lr: 0.000047 grad: 0.1678 (0.1789) loss: 0.7713 (0.7720) time: 0.1397 data: 0.0539 max mem: 9377 +Train: [60] [2000/6250] eta: 0:10:15 lr: 0.000047 grad: 0.1602 (0.1783) loss: 0.7665 (0.7717) time: 0.1089 data: 0.0232 max mem: 9377 +Train: [60] [2100/6250] eta: 0:10:00 lr: 0.000046 grad: 0.1631 (0.1778) loss: 0.7656 (0.7715) time: 0.1456 data: 0.0610 max mem: 9377 +Train: [60] [2200/6250] eta: 0:09:46 lr: 0.000046 grad: 0.1738 (0.1773) loss: 0.7621 (0.7713) time: 0.1390 data: 0.0597 max mem: 9377 +Train: [60] [2300/6250] eta: 0:09:29 lr: 0.000046 grad: 0.1612 (0.1766) loss: 0.7728 (0.7714) time: 0.1341 data: 0.0508 max mem: 9377 +Train: [60] [2400/6250] eta: 0:09:13 lr: 0.000046 grad: 0.1509 (0.1759) loss: 0.7799 (0.7715) time: 0.1404 data: 0.0551 max mem: 9377 +Train: [60] [2500/6250] eta: 0:08:56 lr: 0.000046 grad: 0.1560 (0.1753) loss: 0.7768 (0.7716) time: 0.1282 data: 0.0440 max mem: 9377 +Train: [60] [2600/6250] eta: 0:08:39 lr: 0.000046 grad: 0.1597 (0.1748) loss: 0.7671 (0.7715) time: 0.1103 data: 0.0159 max mem: 9377 +Train: [60] [2700/6250] eta: 0:08:23 lr: 0.000046 grad: 0.1590 (0.1743) loss: 0.7662 (0.7715) time: 0.1365 data: 0.0520 max mem: 9377 +Train: [60] [2800/6250] eta: 0:08:08 lr: 0.000046 grad: 0.1668 (0.1740) loss: 0.7643 (0.7714) time: 0.1514 data: 0.0702 max mem: 9377 +Train: [60] [2900/6250] eta: 0:07:53 lr: 0.000046 grad: 0.1587 (0.1737) loss: 0.7715 (0.7714) time: 0.1408 data: 0.0576 max mem: 9377 +Train: [60] [3000/6250] eta: 0:07:38 lr: 0.000046 grad: 0.1634 (0.1733) loss: 0.7685 (0.7713) time: 0.1299 data: 0.0460 max mem: 9377 +Train: [60] [3100/6250] eta: 0:07:22 lr: 0.000046 grad: 0.1739 (0.1732) loss: 0.7586 (0.7710) time: 0.1390 data: 0.0618 max mem: 9377 +Train: [60] [3200/6250] eta: 0:07:08 lr: 0.000046 grad: 0.1629 (0.1729) loss: 0.7656 (0.7709) time: 0.1147 data: 0.0334 max mem: 9377 +Train: [60] [3300/6250] eta: 0:06:53 lr: 0.000046 grad: 0.1573 (0.1727) loss: 0.7594 (0.7707) time: 0.1188 data: 0.0376 max mem: 9377 +Train: [60] [3400/6250] eta: 0:06:38 lr: 0.000046 grad: 0.1651 (0.1725) loss: 0.7633 (0.7705) time: 0.1225 data: 0.0373 max mem: 9377 +Train: [60] [3500/6250] eta: 0:06:23 lr: 0.000046 grad: 0.1648 (0.1722) loss: 0.7697 (0.7704) time: 0.1344 data: 0.0523 max mem: 9377 +Train: [60] [3600/6250] eta: 0:06:09 lr: 0.000046 grad: 0.1568 (0.1720) loss: 0.7697 (0.7703) time: 0.1332 data: 0.0567 max mem: 9377 +Train: [60] [3700/6250] eta: 0:05:55 lr: 0.000046 grad: 0.1618 (0.1718) loss: 0.7643 (0.7702) time: 0.1446 data: 0.0626 max mem: 9377 +Train: [60] [3800/6250] eta: 0:05:41 lr: 0.000046 grad: 0.1563 (0.1715) loss: 0.7620 (0.7701) time: 0.1601 data: 0.0843 max mem: 9377 +Train: [60] [3900/6250] eta: 0:05:28 lr: 0.000046 grad: 0.1621 (0.1713) loss: 0.7649 (0.7700) time: 0.1813 data: 0.1000 max mem: 9377 +Train: [60] [4000/6250] eta: 0:05:13 lr: 0.000046 grad: 0.1578 (0.1711) loss: 0.7608 (0.7698) time: 0.1457 data: 0.0589 max mem: 9377 +Train: [60] [4100/6250] eta: 0:04:59 lr: 0.000046 grad: 0.1657 (0.1709) loss: 0.7694 (0.7697) time: 0.1342 data: 0.0523 max mem: 9377 +Train: [60] [4200/6250] eta: 0:04:45 lr: 0.000046 grad: 0.1648 (0.1707) loss: 0.7649 (0.7696) time: 0.1384 data: 0.0557 max mem: 9377 +Train: [60] [4300/6250] eta: 0:04:31 lr: 0.000046 grad: 0.1609 (0.1706) loss: 0.7729 (0.7695) time: 0.1368 data: 0.0523 max mem: 9377 +Train: [60] [4400/6250] eta: 0:04:17 lr: 0.000046 grad: 0.1588 (0.1704) loss: 0.7662 (0.7695) time: 0.1345 data: 0.0552 max mem: 9377 +Train: [60] [4500/6250] eta: 0:04:03 lr: 0.000046 grad: 0.1745 (0.1702) loss: 0.7696 (0.7696) time: 0.1491 data: 0.0694 max mem: 9377 +Train: [60] [4600/6250] eta: 0:03:49 lr: 0.000046 grad: 0.1632 (0.1702) loss: 0.7708 (0.7695) time: 0.1219 data: 0.0299 max mem: 9377 +Train: [60] [4700/6250] eta: 0:03:35 lr: 0.000046 grad: 0.1679 (0.1700) loss: 0.7695 (0.7695) time: 0.1449 data: 0.0601 max mem: 9377 +Train: [60] [4800/6250] eta: 0:03:21 lr: 0.000046 grad: 0.1625 (0.1699) loss: 0.7774 (0.7695) time: 0.1373 data: 0.0614 max mem: 9377 +Train: [60] [4900/6250] eta: 0:03:07 lr: 0.000046 grad: 0.1650 (0.1698) loss: 0.7753 (0.7695) time: 0.1281 data: 0.0443 max mem: 9377 +Train: [60] [5000/6250] eta: 0:02:53 lr: 0.000046 grad: 0.1590 (0.1697) loss: 0.7621 (0.7694) time: 0.1790 data: 0.0972 max mem: 9377 +Train: [60] [5100/6250] eta: 0:02:39 lr: 0.000046 grad: 0.1712 (0.1696) loss: 0.7619 (0.7694) time: 0.1520 data: 0.0711 max mem: 9377 +Train: [60] [5200/6250] eta: 0:02:26 lr: 0.000045 grad: 0.1660 (0.1695) loss: 0.7709 (0.7694) time: 0.1489 data: 0.0695 max mem: 9377 +Train: [60] [5300/6250] eta: 0:02:12 lr: 0.000045 grad: 0.1689 (0.1694) loss: 0.7709 (0.7694) time: 0.1598 data: 0.0740 max mem: 9377 +Train: [60] [5400/6250] eta: 0:01:58 lr: 0.000045 grad: 0.1618 (0.1694) loss: 0.7695 (0.7694) time: 0.1424 data: 0.0626 max mem: 9377 +Train: [60] [5500/6250] eta: 0:01:44 lr: 0.000045 grad: 0.1703 (0.1693) loss: 0.7622 (0.7693) time: 0.1700 data: 0.0895 max mem: 9377 +Train: [60] [5600/6250] eta: 0:01:30 lr: 0.000045 grad: 0.1661 (0.1693) loss: 0.7606 (0.7693) time: 0.1222 data: 0.0274 max mem: 9377 +Train: [60] [5700/6250] eta: 0:01:16 lr: 0.000045 grad: 0.1573 (0.1692) loss: 0.7761 (0.7692) time: 0.1303 data: 0.0453 max mem: 9377 +Train: [60] [5800/6250] eta: 0:01:02 lr: 0.000045 grad: 0.1710 (0.1693) loss: 0.7631 (0.7691) time: 0.1239 data: 0.0398 max mem: 9377 +Train: [60] [5900/6250] eta: 0:00:48 lr: 0.000045 grad: 0.1735 (0.1693) loss: 0.7661 (0.7690) time: 0.1312 data: 0.0489 max mem: 9377 +Train: [60] [6000/6250] eta: 0:00:34 lr: 0.000045 grad: 0.1627 (0.1693) loss: 0.7741 (0.7690) time: 0.1350 data: 0.0516 max mem: 9377 +Train: [60] [6100/6250] eta: 0:00:20 lr: 0.000045 grad: 0.1662 (0.1693) loss: 0.7652 (0.7690) time: 0.1343 data: 0.0454 max mem: 9377 +Train: [60] [6200/6250] eta: 0:00:06 lr: 0.000045 grad: 0.1611 (0.1693) loss: 0.7714 (0.7690) time: 0.1606 data: 0.0812 max mem: 9377 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1628 (0.1693) loss: 0.7690 (0.7690) time: 0.1377 data: 0.0540 max mem: 9377 +Train: [60] Total time: 0:14:31 (0.1394 s / it) +Averaged stats: lr: 0.000045 grad: 0.1628 (0.1693) loss: 0.7690 (0.7690) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:04:14 loss: 0.8220 (0.8220) time: 4.1082 data: 4.0170 max mem: 9377 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8123 (0.8138) time: 0.1171 data: 0.0921 max mem: 9377 +Eval (hcp-train-subset): [60] Total time: 0:00:12 (0.2092 s / it) +Averaged stats (hcp-train-subset): loss: 0.8123 (0.8138) +Eval (hcp-val): [60] [ 0/62] eta: 0:05:01 loss: 0.8434 (0.8434) time: 4.8626 data: 4.8321 max mem: 9377 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.8457 (0.8474) time: 0.0879 data: 0.0632 max mem: 9377 +Eval (hcp-val): [60] Total time: 0:00:13 (0.2097 s / it) +Averaged stats (hcp-val): loss: 0.8457 (0.8474) +Eval (nsd-val): [60] [ 0/62] eta: 0:03:20 loss: 0.8193 (0.8193) time: 3.2377 data: 3.1636 max mem: 9377 +Eval (nsd-val): [60] [61/62] eta: 0:00:00 loss: 0.8256 (0.8273) time: 0.1153 data: 0.0884 max mem: 9377 +Eval (nsd-val): [60] Total time: 0:00:13 (0.2181 s / it) +Averaged stats (nsd-val): loss: 0.8256 (0.8273) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [61] [ 0/6250] eta: 9:24:20 lr: 0.000045 grad: 0.8360 (0.8360) loss: 0.7625 (0.7625) time: 5.4177 data: 5.1995 max mem: 9377 +Train: [61] [ 100/6250] eta: 0:20:55 lr: 0.000045 grad: 0.2202 (0.2808) loss: 0.7878 (0.7860) time: 0.1567 data: 0.0592 max mem: 9377 +Train: [61] [ 200/6250] eta: 0:17:33 lr: 0.000045 grad: 0.2274 (0.2593) loss: 0.7704 (0.7779) time: 0.1406 data: 0.0390 max mem: 9377 +Train: [61] [ 300/6250] eta: 0:16:05 lr: 0.000045 grad: 0.1955 (0.2419) loss: 0.7644 (0.7735) time: 0.1295 data: 0.0402 max mem: 9377 +Train: [61] [ 400/6250] eta: 0:15:07 lr: 0.000045 grad: 0.1693 (0.2271) loss: 0.7604 (0.7724) time: 0.1201 data: 0.0309 max mem: 9377 +Train: [61] [ 500/6250] eta: 0:14:22 lr: 0.000045 grad: 0.1715 (0.2165) loss: 0.7746 (0.7726) time: 0.1350 data: 0.0452 max mem: 9377 +Train: [61] [ 600/6250] eta: 0:13:55 lr: 0.000045 grad: 0.1571 (0.2076) loss: 0.7868 (0.7741) time: 0.1345 data: 0.0440 max mem: 9377 +Train: [61] [ 700/6250] eta: 0:13:30 lr: 0.000045 grad: 0.1571 (0.2008) loss: 0.7833 (0.7749) time: 0.1473 data: 0.0608 max mem: 9377 +Train: [61] [ 800/6250] eta: 0:13:11 lr: 0.000045 grad: 0.1636 (0.1959) loss: 0.7776 (0.7756) time: 0.1521 data: 0.0662 max mem: 9377 +Train: [61] [ 900/6250] eta: 0:12:55 lr: 0.000045 grad: 0.1700 (0.1925) loss: 0.7674 (0.7754) time: 0.1511 data: 0.0674 max mem: 9377 +Train: [61] [1000/6250] eta: 0:12:35 lr: 0.000045 grad: 0.1505 (0.1898) loss: 0.7793 (0.7753) time: 0.1394 data: 0.0566 max mem: 9377 +Train: [61] [1100/6250] eta: 0:12:19 lr: 0.000045 grad: 0.1495 (0.1869) loss: 0.7690 (0.7751) time: 0.1390 data: 0.0549 max mem: 9377 +Train: [61] [1200/6250] eta: 0:12:02 lr: 0.000045 grad: 0.1667 (0.1851) loss: 0.7698 (0.7748) time: 0.1351 data: 0.0567 max mem: 9377 +Train: [61] [1300/6250] eta: 0:11:45 lr: 0.000045 grad: 0.1598 (0.1833) loss: 0.7717 (0.7745) time: 0.1357 data: 0.0538 max mem: 9377 +Train: [61] [1400/6250] eta: 0:11:30 lr: 0.000045 grad: 0.1620 (0.1821) loss: 0.7702 (0.7742) time: 0.1387 data: 0.0587 max mem: 9377 +Train: [61] [1500/6250] eta: 0:11:13 lr: 0.000045 grad: 0.1597 (0.1809) loss: 0.7765 (0.7739) time: 0.1398 data: 0.0598 max mem: 9377 +Train: [61] [1600/6250] eta: 0:11:00 lr: 0.000045 grad: 0.1550 (0.1799) loss: 0.7665 (0.7736) time: 0.1570 data: 0.0757 max mem: 9377 +Train: [61] [1700/6250] eta: 0:10:44 lr: 0.000045 grad: 0.1541 (0.1790) loss: 0.7625 (0.7731) time: 0.1363 data: 0.0600 max mem: 9377 +Train: [61] [1800/6250] eta: 0:10:35 lr: 0.000045 grad: 0.1598 (0.1781) loss: 0.7706 (0.7728) time: 0.1675 data: 0.0911 max mem: 9377 +Train: [61] [1900/6250] eta: 0:10:19 lr: 0.000045 grad: 0.1613 (0.1774) loss: 0.7655 (0.7725) time: 0.1329 data: 0.0552 max mem: 9377 +Train: [61] [2000/6250] eta: 0:10:05 lr: 0.000045 grad: 0.1651 (0.1766) loss: 0.7603 (0.7722) time: 0.1428 data: 0.0645 max mem: 9377 +Train: [61] [2100/6250] eta: 0:09:51 lr: 0.000044 grad: 0.1627 (0.1760) loss: 0.7676 (0.7722) time: 0.1597 data: 0.0729 max mem: 9377 +Train: [61] [2200/6250] eta: 0:09:39 lr: 0.000044 grad: 0.1605 (0.1755) loss: 0.7721 (0.7721) time: 0.1542 data: 0.0764 max mem: 9377 +Train: [61] [2300/6250] eta: 0:09:23 lr: 0.000044 grad: 0.1562 (0.1750) loss: 0.7628 (0.7718) time: 0.1213 data: 0.0399 max mem: 9377 +Train: [61] [2400/6250] eta: 0:09:07 lr: 0.000044 grad: 0.1649 (0.1746) loss: 0.7635 (0.7717) time: 0.1129 data: 0.0166 max mem: 9377 +Train: [61] [2500/6250] eta: 0:08:52 lr: 0.000044 grad: 0.1620 (0.1741) loss: 0.7666 (0.7714) time: 0.1361 data: 0.0582 max mem: 9377 +Train: [61] [2600/6250] eta: 0:08:35 lr: 0.000044 grad: 0.1564 (0.1738) loss: 0.7658 (0.7712) time: 0.1360 data: 0.0507 max mem: 9377 +Train: [61] [2700/6250] eta: 0:08:19 lr: 0.000044 grad: 0.1657 (0.1735) loss: 0.7597 (0.7710) time: 0.1267 data: 0.0404 max mem: 9377 +Train: [61] [2800/6250] eta: 0:08:04 lr: 0.000044 grad: 0.1573 (0.1732) loss: 0.7643 (0.7707) time: 0.1255 data: 0.0470 max mem: 9377 +Train: [61] [2900/6250] eta: 0:07:50 lr: 0.000044 grad: 0.1643 (0.1730) loss: 0.7582 (0.7705) time: 0.1527 data: 0.0768 max mem: 9377 +Train: [61] [3000/6250] eta: 0:07:36 lr: 0.000044 grad: 0.1589 (0.1726) loss: 0.7672 (0.7703) time: 0.1505 data: 0.0689 max mem: 9377 +Train: [61] [3100/6250] eta: 0:07:23 lr: 0.000044 grad: 0.1697 (0.1725) loss: 0.7653 (0.7702) time: 0.1051 data: 0.0231 max mem: 9377 +Train: [61] [3200/6250] eta: 0:07:08 lr: 0.000044 grad: 0.1627 (0.1724) loss: 0.7675 (0.7700) time: 0.1484 data: 0.0736 max mem: 9377 +Train: [61] [3300/6250] eta: 0:06:53 lr: 0.000044 grad: 0.1587 (0.1722) loss: 0.7669 (0.7700) time: 0.1383 data: 0.0543 max mem: 9377 +Train: [61] [3400/6250] eta: 0:06:38 lr: 0.000044 grad: 0.1644 (0.1721) loss: 0.7575 (0.7697) time: 0.1225 data: 0.0444 max mem: 9377 +Train: [61] [3500/6250] eta: 0:06:24 lr: 0.000044 grad: 0.1633 (0.1718) loss: 0.7659 (0.7697) time: 0.1208 data: 0.0393 max mem: 9377 +Train: [61] [3600/6250] eta: 0:06:10 lr: 0.000044 grad: 0.1641 (0.1715) loss: 0.7575 (0.7697) time: 0.1390 data: 0.0566 max mem: 9377 +Train: [61] [3700/6250] eta: 0:05:56 lr: 0.000044 grad: 0.1626 (0.1713) loss: 0.7687 (0.7697) time: 0.1175 data: 0.0350 max mem: 9377 +Train: [61] [3800/6250] eta: 0:05:41 lr: 0.000044 grad: 0.1591 (0.1711) loss: 0.7743 (0.7696) time: 0.1361 data: 0.0571 max mem: 9377 +Train: [61] [3900/6250] eta: 0:05:27 lr: 0.000044 grad: 0.1605 (0.1709) loss: 0.7665 (0.7696) time: 0.1505 data: 0.0687 max mem: 9377 +Train: [61] [4000/6250] eta: 0:05:14 lr: 0.000044 grad: 0.1581 (0.1706) loss: 0.7597 (0.7696) time: 0.0979 data: 0.0002 max mem: 9377 +Train: [61] [4100/6250] eta: 0:05:00 lr: 0.000044 grad: 0.1606 (0.1704) loss: 0.7622 (0.7696) time: 0.1611 data: 0.0847 max mem: 9377 +Train: [61] [4200/6250] eta: 0:04:46 lr: 0.000044 grad: 0.1583 (0.1701) loss: 0.7751 (0.7697) time: 0.1869 data: 0.1059 max mem: 9377 +Train: [61] [4300/6250] eta: 0:04:32 lr: 0.000044 grad: 0.1563 (0.1700) loss: 0.7731 (0.7697) time: 0.1207 data: 0.0387 max mem: 9377 +Train: [61] [4400/6250] eta: 0:04:18 lr: 0.000044 grad: 0.1678 (0.1698) loss: 0.7647 (0.7697) time: 0.1359 data: 0.0537 max mem: 9377 +Train: [61] [4500/6250] eta: 0:04:04 lr: 0.000044 grad: 0.1569 (0.1696) loss: 0.7801 (0.7699) time: 0.1167 data: 0.0276 max mem: 9377 +Train: [61] [4600/6250] eta: 0:03:50 lr: 0.000044 grad: 0.1499 (0.1694) loss: 0.7768 (0.7700) time: 0.1044 data: 0.0274 max mem: 9377 +Train: [61] [4700/6250] eta: 0:03:36 lr: 0.000044 grad: 0.1649 (0.1692) loss: 0.7726 (0.7701) time: 0.1381 data: 0.0468 max mem: 9377 +Train: [61] [4800/6250] eta: 0:03:22 lr: 0.000044 grad: 0.1583 (0.1690) loss: 0.7728 (0.7701) time: 0.1521 data: 0.0746 max mem: 9377 +Train: [61] [4900/6250] eta: 0:03:08 lr: 0.000044 grad: 0.1581 (0.1689) loss: 0.7733 (0.7703) time: 0.1486 data: 0.0684 max mem: 9377 +Train: [61] [5000/6250] eta: 0:02:54 lr: 0.000044 grad: 0.1599 (0.1688) loss: 0.7703 (0.7703) time: 0.1183 data: 0.0314 max mem: 9377 +Train: [61] [5100/6250] eta: 0:02:40 lr: 0.000044 grad: 0.1536 (0.1686) loss: 0.7792 (0.7704) time: 0.1305 data: 0.0543 max mem: 9377 +Train: [61] [5200/6250] eta: 0:02:27 lr: 0.000044 grad: 0.1684 (0.1686) loss: 0.7602 (0.7703) time: 0.1941 data: 0.1153 max mem: 9377 +Train: [61] [5300/6250] eta: 0:02:13 lr: 0.000043 grad: 0.1596 (0.1685) loss: 0.7778 (0.7704) time: 0.1352 data: 0.0603 max mem: 9377 +Train: [61] [5400/6250] eta: 0:01:59 lr: 0.000043 grad: 0.1593 (0.1684) loss: 0.7698 (0.7703) time: 0.1353 data: 0.0594 max mem: 9377 +Train: [61] [5500/6250] eta: 0:01:45 lr: 0.000043 grad: 0.1618 (0.1684) loss: 0.7681 (0.7703) time: 0.1759 data: 0.0947 max mem: 9377 +Train: [61] [5600/6250] eta: 0:01:31 lr: 0.000043 grad: 0.1588 (0.1683) loss: 0.7681 (0.7702) time: 0.1529 data: 0.0705 max mem: 9377 +Train: [61] [5700/6250] eta: 0:01:17 lr: 0.000043 grad: 0.1631 (0.1683) loss: 0.7679 (0.7702) time: 0.1529 data: 0.0671 max mem: 9377 +Train: [61] [5800/6250] eta: 0:01:03 lr: 0.000043 grad: 0.1698 (0.1683) loss: 0.7643 (0.7701) time: 0.1343 data: 0.0522 max mem: 9377 +Train: [61] [5900/6250] eta: 0:00:49 lr: 0.000043 grad: 0.1581 (0.1682) loss: 0.7804 (0.7701) time: 0.1431 data: 0.0623 max mem: 9377 +Train: [61] [6000/6250] eta: 0:00:35 lr: 0.000043 grad: 0.1670 (0.1682) loss: 0.7727 (0.7701) time: 0.1293 data: 0.0478 max mem: 9377 +Train: [61] [6100/6250] eta: 0:00:21 lr: 0.000043 grad: 0.1682 (0.1682) loss: 0.7731 (0.7701) time: 0.1453 data: 0.0638 max mem: 9377 +Train: [61] [6200/6250] eta: 0:00:06 lr: 0.000043 grad: 0.1584 (0.1681) loss: 0.7712 (0.7701) time: 0.1298 data: 0.0343 max mem: 9377 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1636 (0.1681) loss: 0.7700 (0.7701) time: 0.1166 data: 0.0254 max mem: 9377 +Train: [61] Total time: 0:14:39 (0.1407 s / it) +Averaged stats: lr: 0.000043 grad: 0.1636 (0.1681) loss: 0.7700 (0.7701) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:04:29 loss: 0.8216 (0.8216) time: 4.3428 data: 4.2881 max mem: 9377 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8172 (0.8159) time: 0.1197 data: 0.0950 max mem: 9377 +Eval (hcp-train-subset): [61] Total time: 0:00:12 (0.2045 s / it) +Averaged stats (hcp-train-subset): loss: 0.8172 (0.8159) +Eval (hcp-val): [61] [ 0/62] eta: 0:04:54 loss: 0.8504 (0.8504) time: 4.7532 data: 4.7222 max mem: 9377 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8511 (0.8512) time: 0.1252 data: 0.1001 max mem: 9377 +Eval (hcp-val): [61] Total time: 0:00:12 (0.2063 s / it) +Averaged stats (hcp-val): loss: 0.8511 (0.8512) +Eval (nsd-val): [61] [ 0/62] eta: 0:04:25 loss: 0.8190 (0.8190) time: 4.2823 data: 4.2040 max mem: 9377 +Eval (nsd-val): [61] [61/62] eta: 0:00:00 loss: 0.8292 (0.8287) time: 0.1278 data: 0.1025 max mem: 9377 +Eval (nsd-val): [61] Total time: 0:00:13 (0.2183 s / it) +Averaged stats (nsd-val): loss: 0.8292 (0.8287) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [62] [ 0/6250] eta: 9:37:43 lr: 0.000043 grad: 0.2892 (0.2892) loss: 0.7762 (0.7762) time: 5.5461 data: 5.4063 max mem: 9377 +Train: [62] [ 100/6250] eta: 0:20:17 lr: 0.000043 grad: 0.1684 (0.2181) loss: 0.8110 (0.8123) time: 0.1447 data: 0.0493 max mem: 9377 +Train: [62] [ 200/6250] eta: 0:17:07 lr: 0.000043 grad: 0.1839 (0.2081) loss: 0.7920 (0.8021) time: 0.1460 data: 0.0544 max mem: 9377 +Train: [62] [ 300/6250] eta: 0:15:41 lr: 0.000043 grad: 0.1658 (0.1983) loss: 0.7754 (0.7965) time: 0.1367 data: 0.0421 max mem: 9377 +Train: [62] [ 400/6250] eta: 0:14:56 lr: 0.000043 grad: 0.1643 (0.1934) loss: 0.7821 (0.7914) time: 0.1540 data: 0.0639 max mem: 9377 +Train: [62] [ 500/6250] eta: 0:14:11 lr: 0.000043 grad: 0.1918 (0.1921) loss: 0.7692 (0.7876) time: 0.1231 data: 0.0342 max mem: 9377 +Train: [62] [ 600/6250] eta: 0:13:45 lr: 0.000043 grad: 0.1670 (0.1896) loss: 0.7710 (0.7846) time: 0.1359 data: 0.0499 max mem: 9377 +Train: [62] [ 700/6250] eta: 0:13:23 lr: 0.000043 grad: 0.1714 (0.1864) loss: 0.7728 (0.7830) time: 0.1413 data: 0.0563 max mem: 9377 +Train: [62] [ 800/6250] eta: 0:13:01 lr: 0.000043 grad: 0.1804 (0.1846) loss: 0.7645 (0.7818) time: 0.1297 data: 0.0420 max mem: 9377 +Train: [62] [ 900/6250] eta: 0:12:46 lr: 0.000043 grad: 0.1658 (0.1828) loss: 0.7744 (0.7810) time: 0.1228 data: 0.0326 max mem: 9377 +Train: [62] [1000/6250] eta: 0:12:36 lr: 0.000043 grad: 0.1687 (0.1812) loss: 0.7698 (0.7804) time: 0.1283 data: 0.0315 max mem: 9377 +Train: [62] [1100/6250] eta: 0:12:17 lr: 0.000043 grad: 0.1677 (0.1804) loss: 0.7643 (0.7796) time: 0.1352 data: 0.0585 max mem: 9377 +Train: [62] [1200/6250] eta: 0:12:00 lr: 0.000043 grad: 0.1600 (0.1794) loss: 0.7705 (0.7786) time: 0.1293 data: 0.0540 max mem: 9377 +Train: [62] [1300/6250] eta: 0:11:45 lr: 0.000043 grad: 0.1582 (0.1784) loss: 0.7658 (0.7777) time: 0.1380 data: 0.0578 max mem: 9377 +Train: [62] [1400/6250] eta: 0:11:37 lr: 0.000043 grad: 0.1650 (0.1775) loss: 0.7661 (0.7769) time: 0.1033 data: 0.0002 max mem: 9377 +Train: [62] [1500/6250] eta: 0:11:19 lr: 0.000043 grad: 0.1773 (0.1774) loss: 0.7544 (0.7761) time: 0.1323 data: 0.0525 max mem: 9377 +Train: [62] [1600/6250] eta: 0:11:03 lr: 0.000043 grad: 0.1624 (0.1773) loss: 0.7699 (0.7753) time: 0.1313 data: 0.0501 max mem: 9377 +Train: [62] [1700/6250] eta: 0:10:48 lr: 0.000043 grad: 0.1670 (0.1767) loss: 0.7652 (0.7747) time: 0.1261 data: 0.0379 max mem: 9377 +Train: [62] [1800/6250] eta: 0:10:31 lr: 0.000043 grad: 0.1649 (0.1761) loss: 0.7671 (0.7742) time: 0.1348 data: 0.0592 max mem: 9377 +Train: [62] [1900/6250] eta: 0:10:20 lr: 0.000043 grad: 0.1715 (0.1757) loss: 0.7576 (0.7737) time: 0.1761 data: 0.0988 max mem: 9377 +Train: [62] [2000/6250] eta: 0:10:08 lr: 0.000043 grad: 0.1658 (0.1754) loss: 0.7578 (0.7732) time: 0.1872 data: 0.0578 max mem: 9377 +Train: [62] [2100/6250] eta: 0:09:54 lr: 0.000043 grad: 0.1680 (0.1752) loss: 0.7655 (0.7728) time: 0.1434 data: 0.0612 max mem: 9377 +Train: [62] [2200/6250] eta: 0:09:40 lr: 0.000042 grad: 0.1696 (0.1749) loss: 0.7622 (0.7723) time: 0.1502 data: 0.0620 max mem: 9377 +Train: [62] [2300/6250] eta: 0:09:27 lr: 0.000042 grad: 0.1758 (0.1747) loss: 0.7583 (0.7719) time: 0.1556 data: 0.0750 max mem: 9377 +Train: [62] [2400/6250] eta: 0:09:13 lr: 0.000042 grad: 0.1568 (0.1745) loss: 0.7546 (0.7715) time: 0.1271 data: 0.0371 max mem: 9377 +Train: [62] [2500/6250] eta: 0:08:57 lr: 0.000042 grad: 0.1655 (0.1742) loss: 0.7739 (0.7712) time: 0.1085 data: 0.0218 max mem: 9377 +Train: [62] [2600/6250] eta: 0:08:41 lr: 0.000042 grad: 0.1685 (0.1740) loss: 0.7613 (0.7708) time: 0.1509 data: 0.0701 max mem: 9377 +Train: [62] [2700/6250] eta: 0:08:26 lr: 0.000042 grad: 0.1629 (0.1739) loss: 0.7703 (0.7704) time: 0.1458 data: 0.0591 max mem: 9377 +Train: [62] [2800/6250] eta: 0:08:10 lr: 0.000042 grad: 0.1655 (0.1738) loss: 0.7644 (0.7700) time: 0.1237 data: 0.0331 max mem: 9377 +Train: [62] [2900/6250] eta: 0:07:53 lr: 0.000042 grad: 0.1700 (0.1737) loss: 0.7598 (0.7697) time: 0.1208 data: 0.0381 max mem: 9377 +Train: [62] [3000/6250] eta: 0:07:38 lr: 0.000042 grad: 0.1668 (0.1736) loss: 0.7666 (0.7695) time: 0.1272 data: 0.0361 max mem: 9377 +Train: [62] [3100/6250] eta: 0:07:24 lr: 0.000042 grad: 0.1668 (0.1736) loss: 0.7624 (0.7691) time: 0.1342 data: 0.0511 max mem: 9377 +Train: [62] [3200/6250] eta: 0:07:11 lr: 0.000042 grad: 0.1715 (0.1736) loss: 0.7575 (0.7688) time: 0.1490 data: 0.0670 max mem: 9377 +Train: [62] [3300/6250] eta: 0:06:57 lr: 0.000042 grad: 0.1774 (0.1736) loss: 0.7533 (0.7685) time: 0.1435 data: 0.0641 max mem: 9377 +Train: [62] [3400/6250] eta: 0:06:43 lr: 0.000042 grad: 0.1707 (0.1738) loss: 0.7597 (0.7682) time: 0.1537 data: 0.0738 max mem: 9377 +Train: [62] [3500/6250] eta: 0:06:29 lr: 0.000042 grad: 0.1702 (0.1738) loss: 0.7689 (0.7680) time: 0.1289 data: 0.0498 max mem: 9377 +Train: [62] [3600/6250] eta: 0:06:14 lr: 0.000042 grad: 0.1700 (0.1737) loss: 0.7579 (0.7679) time: 0.1318 data: 0.0516 max mem: 9377 +Train: [62] [3700/6250] eta: 0:06:00 lr: 0.000042 grad: 0.1708 (0.1737) loss: 0.7613 (0.7678) time: 0.1333 data: 0.0576 max mem: 9377 +Train: [62] [3800/6250] eta: 0:05:45 lr: 0.000042 grad: 0.1725 (0.1737) loss: 0.7559 (0.7677) time: 0.1199 data: 0.0369 max mem: 9377 +Train: [62] [3900/6250] eta: 0:05:30 lr: 0.000042 grad: 0.1698 (0.1736) loss: 0.7645 (0.7677) time: 0.1310 data: 0.0518 max mem: 9377 +Train: [62] [4000/6250] eta: 0:05:16 lr: 0.000042 grad: 0.1683 (0.1737) loss: 0.7667 (0.7676) time: 0.1394 data: 0.0586 max mem: 9377 +Train: [62] [4100/6250] eta: 0:05:02 lr: 0.000042 grad: 0.1686 (0.1735) loss: 0.7819 (0.7676) time: 0.1253 data: 0.0437 max mem: 9377 +Train: [62] [4200/6250] eta: 0:04:48 lr: 0.000042 grad: 0.1666 (0.1734) loss: 0.7753 (0.7677) time: 0.1594 data: 0.0741 max mem: 9377 +Train: [62] [4300/6250] eta: 0:04:33 lr: 0.000042 grad: 0.1625 (0.1733) loss: 0.7751 (0.7677) time: 0.1246 data: 0.0434 max mem: 9377 +Train: [62] [4400/6250] eta: 0:04:19 lr: 0.000042 grad: 0.1663 (0.1733) loss: 0.7714 (0.7678) time: 0.1331 data: 0.0478 max mem: 9377 +Train: [62] [4500/6250] eta: 0:04:05 lr: 0.000042 grad: 0.1630 (0.1732) loss: 0.7642 (0.7678) time: 0.1379 data: 0.0560 max mem: 9377 +Train: [62] [4600/6250] eta: 0:03:51 lr: 0.000042 grad: 0.1660 (0.1731) loss: 0.7750 (0.7680) time: 0.1495 data: 0.0673 max mem: 9377 +Train: [62] [4700/6250] eta: 0:03:37 lr: 0.000042 grad: 0.1656 (0.1730) loss: 0.7681 (0.7681) time: 0.1203 data: 0.0338 max mem: 9377 +Train: [62] [4800/6250] eta: 0:03:23 lr: 0.000042 grad: 0.1577 (0.1728) loss: 0.7816 (0.7682) time: 0.1341 data: 0.0492 max mem: 9377 +Train: [62] [4900/6250] eta: 0:03:09 lr: 0.000042 grad: 0.1646 (0.1727) loss: 0.7680 (0.7683) time: 0.1226 data: 0.0323 max mem: 9377 +Train: [62] [5000/6250] eta: 0:02:55 lr: 0.000042 grad: 0.1694 (0.1726) loss: 0.7741 (0.7684) time: 0.1270 data: 0.0501 max mem: 9377 +Train: [62] [5100/6250] eta: 0:02:40 lr: 0.000042 grad: 0.1632 (0.1725) loss: 0.7686 (0.7684) time: 0.1443 data: 0.0647 max mem: 9377 +Train: [62] [5200/6250] eta: 0:02:26 lr: 0.000042 grad: 0.1538 (0.1723) loss: 0.7831 (0.7685) time: 0.1410 data: 0.0656 max mem: 9377 +Train: [62] [5300/6250] eta: 0:02:12 lr: 0.000042 grad: 0.1703 (0.1722) loss: 0.7619 (0.7685) time: 0.1458 data: 0.0594 max mem: 9377 +Train: [62] [5400/6250] eta: 0:01:58 lr: 0.000041 grad: 0.1617 (0.1721) loss: 0.7705 (0.7686) time: 0.1357 data: 0.0513 max mem: 9377 +Train: [62] [5500/6250] eta: 0:01:44 lr: 0.000041 grad: 0.1655 (0.1720) loss: 0.7659 (0.7687) time: 0.1702 data: 0.0940 max mem: 9377 +Train: [62] [5600/6250] eta: 0:01:30 lr: 0.000041 grad: 0.1603 (0.1719) loss: 0.7745 (0.7687) time: 0.1413 data: 0.0582 max mem: 9377 +Train: [62] [5700/6250] eta: 0:01:16 lr: 0.000041 grad: 0.1808 (0.1719) loss: 0.7655 (0.7687) time: 0.1667 data: 0.0903 max mem: 9377 +Train: [62] [5800/6250] eta: 0:01:02 lr: 0.000041 grad: 0.1716 (0.1718) loss: 0.7626 (0.7687) time: 0.1270 data: 0.0527 max mem: 9377 +Train: [62] [5900/6250] eta: 0:00:48 lr: 0.000041 grad: 0.1810 (0.1719) loss: 0.7740 (0.7687) time: 0.1483 data: 0.0572 max mem: 9377 +Train: [62] [6000/6250] eta: 0:00:34 lr: 0.000041 grad: 0.1738 (0.1719) loss: 0.7587 (0.7687) time: 0.1223 data: 0.0383 max mem: 9377 +Train: [62] [6100/6250] eta: 0:00:20 lr: 0.000041 grad: 0.1689 (0.1720) loss: 0.7624 (0.7686) time: 0.1404 data: 0.0568 max mem: 9377 +Train: [62] [6200/6250] eta: 0:00:06 lr: 0.000041 grad: 0.1615 (0.1720) loss: 0.7722 (0.7685) time: 0.1427 data: 0.0564 max mem: 9377 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1682 (0.1720) loss: 0.7647 (0.7684) time: 0.1379 data: 0.0552 max mem: 9377 +Train: [62] Total time: 0:14:38 (0.1406 s / it) +Averaged stats: lr: 0.000041 grad: 0.1682 (0.1720) loss: 0.7647 (0.7684) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:05:17 loss: 0.8200 (0.8200) time: 5.1246 data: 5.0938 max mem: 9377 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.8127 (0.8141) time: 0.1059 data: 0.0811 max mem: 9377 +Eval (hcp-train-subset): [62] Total time: 0:00:12 (0.2065 s / it) +Averaged stats (hcp-train-subset): loss: 0.8127 (0.8141) +Eval (hcp-val): [62] [ 0/62] eta: 0:04:19 loss: 0.8534 (0.8534) time: 4.1837 data: 4.1179 max mem: 9377 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8489 (0.8506) time: 0.1214 data: 0.0930 max mem: 9377 +Eval (hcp-val): [62] Total time: 0:00:12 (0.2053 s / it) +Averaged stats (hcp-val): loss: 0.8489 (0.8506) +Eval (nsd-val): [62] [ 0/62] eta: 0:03:41 loss: 0.8123 (0.8123) time: 3.5747 data: 3.5207 max mem: 9377 +Eval (nsd-val): [62] [61/62] eta: 0:00:00 loss: 0.8224 (0.8252) time: 0.1123 data: 0.0857 max mem: 9377 +Eval (nsd-val): [62] Total time: 0:00:13 (0.2156 s / it) +Averaged stats (nsd-val): loss: 0.8224 (0.8252) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 8:25:34 lr: 0.000041 grad: 0.1447 (0.1447) loss: 0.8386 (0.8386) time: 4.8536 data: 4.5365 max mem: 9377 +Train: [63] [ 100/6250] eta: 0:19:48 lr: 0.000041 grad: 0.2046 (0.2497) loss: 0.7651 (0.7817) time: 0.1391 data: 0.0424 max mem: 9377 +Train: [63] [ 200/6250] eta: 0:16:49 lr: 0.000041 grad: 0.2153 (0.2364) loss: 0.7782 (0.7815) time: 0.1458 data: 0.0568 max mem: 9377 +Train: [63] [ 300/6250] eta: 0:15:33 lr: 0.000041 grad: 0.2000 (0.2248) loss: 0.7756 (0.7797) time: 0.1282 data: 0.0356 max mem: 9377 +Train: [63] [ 400/6250] eta: 0:14:43 lr: 0.000041 grad: 0.1729 (0.2166) loss: 0.7769 (0.7776) time: 0.1462 data: 0.0548 max mem: 9377 +Train: [63] [ 500/6250] eta: 0:14:02 lr: 0.000041 grad: 0.1786 (0.2096) loss: 0.7558 (0.7752) time: 0.1285 data: 0.0406 max mem: 9377 +Train: [63] [ 600/6250] eta: 0:13:44 lr: 0.000041 grad: 0.1853 (0.2046) loss: 0.7630 (0.7737) time: 0.1433 data: 0.0601 max mem: 9377 +Train: [63] [ 700/6250] eta: 0:13:25 lr: 0.000041 grad: 0.1715 (0.2022) loss: 0.7687 (0.7726) time: 0.1324 data: 0.0439 max mem: 9377 +Train: [63] [ 800/6250] eta: 0:13:07 lr: 0.000041 grad: 0.1726 (0.1989) loss: 0.7732 (0.7723) time: 0.1505 data: 0.0651 max mem: 9377 +Train: [63] [ 900/6250] eta: 0:12:55 lr: 0.000041 grad: 0.1615 (0.1959) loss: 0.7712 (0.7718) time: 0.1534 data: 0.0659 max mem: 9377 +Train: [63] [1000/6250] eta: 0:12:45 lr: 0.000041 grad: 0.1679 (0.1935) loss: 0.7717 (0.7716) time: 0.1612 data: 0.0800 max mem: 9377 +Train: [63] [1100/6250] eta: 0:12:27 lr: 0.000041 grad: 0.1596 (0.1916) loss: 0.7693 (0.7711) time: 0.1413 data: 0.0548 max mem: 9377 +Train: [63] [1200/6250] eta: 0:12:08 lr: 0.000041 grad: 0.1724 (0.1902) loss: 0.7598 (0.7703) time: 0.1401 data: 0.0588 max mem: 9377 +Train: [63] [1300/6250] eta: 0:11:53 lr: 0.000041 grad: 0.1714 (0.1888) loss: 0.7636 (0.7695) time: 0.1501 data: 0.0673 max mem: 9377 +Train: [63] [1400/6250] eta: 0:11:38 lr: 0.000041 grad: 0.1726 (0.1877) loss: 0.7625 (0.7692) time: 0.1461 data: 0.0619 max mem: 9377 +Train: [63] [1500/6250] eta: 0:11:21 lr: 0.000041 grad: 0.1604 (0.1863) loss: 0.7730 (0.7689) time: 0.1281 data: 0.0460 max mem: 9377 +Train: [63] [1600/6250] eta: 0:11:06 lr: 0.000041 grad: 0.1698 (0.1851) loss: 0.7779 (0.7688) time: 0.1053 data: 0.0158 max mem: 9377 +Train: [63] [1700/6250] eta: 0:10:49 lr: 0.000041 grad: 0.1659 (0.1843) loss: 0.7628 (0.7687) time: 0.1129 data: 0.0225 max mem: 9377 +Train: [63] [1800/6250] eta: 0:10:35 lr: 0.000041 grad: 0.1700 (0.1836) loss: 0.7735 (0.7687) time: 0.1379 data: 0.0520 max mem: 9377 +Train: [63] [1900/6250] eta: 0:10:21 lr: 0.000041 grad: 0.1791 (0.1830) loss: 0.7578 (0.7687) time: 0.1482 data: 0.0671 max mem: 9377 +Train: [63] [2000/6250] eta: 0:10:08 lr: 0.000041 grad: 0.1718 (0.1826) loss: 0.7707 (0.7685) time: 0.1255 data: 0.0419 max mem: 9377 +Train: [63] [2100/6250] eta: 0:09:54 lr: 0.000041 grad: 0.1719 (0.1822) loss: 0.7639 (0.7683) time: 0.1194 data: 0.0386 max mem: 9377 +Train: [63] [2200/6250] eta: 0:09:39 lr: 0.000041 grad: 0.1801 (0.1820) loss: 0.7605 (0.7682) time: 0.1377 data: 0.0590 max mem: 9377 +Train: [63] [2300/6250] eta: 0:09:24 lr: 0.000041 grad: 0.1704 (0.1814) loss: 0.7545 (0.7681) time: 0.1533 data: 0.0669 max mem: 9377 +Train: [63] [2400/6250] eta: 0:09:11 lr: 0.000040 grad: 0.1710 (0.1810) loss: 0.7607 (0.7680) time: 0.1481 data: 0.0645 max mem: 9377 +Train: [63] [2500/6250] eta: 0:08:57 lr: 0.000040 grad: 0.1654 (0.1805) loss: 0.7617 (0.7680) time: 0.1534 data: 0.0577 max mem: 9377 +Train: [63] [2600/6250] eta: 0:08:43 lr: 0.000040 grad: 0.1687 (0.1801) loss: 0.7709 (0.7681) time: 0.1528 data: 0.0731 max mem: 9377 +Train: [63] [2700/6250] eta: 0:08:27 lr: 0.000040 grad: 0.1715 (0.1797) loss: 0.7648 (0.7682) time: 0.1549 data: 0.0692 max mem: 9377 +Train: [63] [2800/6250] eta: 0:08:11 lr: 0.000040 grad: 0.1745 (0.1795) loss: 0.7635 (0.7682) time: 0.1356 data: 0.0486 max mem: 9377 +Train: [63] [2900/6250] eta: 0:07:55 lr: 0.000040 grad: 0.1694 (0.1792) loss: 0.7707 (0.7682) time: 0.1261 data: 0.0421 max mem: 9377 +Train: [63] [3000/6250] eta: 0:07:40 lr: 0.000040 grad: 0.1659 (0.1789) loss: 0.7760 (0.7683) time: 0.1521 data: 0.0712 max mem: 9377 +Train: [63] [3100/6250] eta: 0:07:25 lr: 0.000040 grad: 0.1636 (0.1787) loss: 0.7732 (0.7684) time: 0.1384 data: 0.0544 max mem: 9377 +Train: [63] [3200/6250] eta: 0:07:12 lr: 0.000040 grad: 0.1613 (0.1784) loss: 0.7691 (0.7685) time: 0.1209 data: 0.0435 max mem: 9377 +Train: [63] [3300/6250] eta: 0:06:59 lr: 0.000040 grad: 0.1671 (0.1782) loss: 0.7808 (0.7686) time: 0.1655 data: 0.0872 max mem: 9377 +Train: [63] [3400/6250] eta: 0:06:45 lr: 0.000040 grad: 0.1732 (0.1779) loss: 0.7726 (0.7687) time: 0.1428 data: 0.0621 max mem: 9377 +Train: [63] [3500/6250] eta: 0:06:31 lr: 0.000040 grad: 0.1708 (0.1779) loss: 0.7619 (0.7685) time: 0.1381 data: 0.0569 max mem: 9377 +Train: [63] [3600/6250] eta: 0:06:17 lr: 0.000040 grad: 0.1775 (0.1778) loss: 0.7682 (0.7685) time: 0.1330 data: 0.0513 max mem: 9377 +Train: [63] [3700/6250] eta: 0:06:03 lr: 0.000040 grad: 0.1681 (0.1775) loss: 0.7739 (0.7685) time: 0.1243 data: 0.0424 max mem: 9377 +Train: [63] [3800/6250] eta: 0:05:48 lr: 0.000040 grad: 0.1592 (0.1773) loss: 0.7711 (0.7685) time: 0.1343 data: 0.0500 max mem: 9377 +Train: [63] [3900/6250] eta: 0:05:33 lr: 0.000040 grad: 0.1597 (0.1771) loss: 0.7865 (0.7686) time: 0.1290 data: 0.0423 max mem: 9377 +Train: [63] [4000/6250] eta: 0:05:19 lr: 0.000040 grad: 0.1598 (0.1769) loss: 0.7692 (0.7687) time: 0.1273 data: 0.0368 max mem: 9377 +Train: [63] [4100/6250] eta: 0:05:04 lr: 0.000040 grad: 0.1568 (0.1767) loss: 0.7719 (0.7687) time: 0.1201 data: 0.0368 max mem: 9377 +Train: [63] [4200/6250] eta: 0:04:49 lr: 0.000040 grad: 0.1631 (0.1765) loss: 0.7727 (0.7687) time: 0.1320 data: 0.0517 max mem: 9377 +Train: [63] [4300/6250] eta: 0:04:35 lr: 0.000040 grad: 0.1642 (0.1762) loss: 0.7698 (0.7688) time: 0.1375 data: 0.0561 max mem: 9377 +Train: [63] [4400/6250] eta: 0:04:20 lr: 0.000040 grad: 0.1623 (0.1760) loss: 0.7765 (0.7688) time: 0.1404 data: 0.0566 max mem: 9377 +Train: [63] [4500/6250] eta: 0:04:06 lr: 0.000040 grad: 0.1648 (0.1759) loss: 0.7749 (0.7688) time: 0.1432 data: 0.0510 max mem: 9377 +Train: [63] [4600/6250] eta: 0:03:52 lr: 0.000040 grad: 0.1707 (0.1758) loss: 0.7591 (0.7688) time: 0.1374 data: 0.0587 max mem: 9377 +Train: [63] [4700/6250] eta: 0:03:38 lr: 0.000040 grad: 0.1668 (0.1757) loss: 0.7640 (0.7687) time: 0.1618 data: 0.0814 max mem: 9377 +Train: [63] [4800/6250] eta: 0:03:24 lr: 0.000040 grad: 0.1673 (0.1757) loss: 0.7667 (0.7686) time: 0.1257 data: 0.0426 max mem: 9377 +Train: [63] [4900/6250] eta: 0:03:10 lr: 0.000040 grad: 0.1694 (0.1755) loss: 0.7674 (0.7686) time: 0.1385 data: 0.0588 max mem: 9377 +Train: [63] [5000/6250] eta: 0:02:56 lr: 0.000040 grad: 0.1660 (0.1754) loss: 0.7635 (0.7685) time: 0.1350 data: 0.0520 max mem: 9377 +Train: [63] [5100/6250] eta: 0:02:41 lr: 0.000040 grad: 0.1728 (0.1754) loss: 0.7593 (0.7684) time: 0.1239 data: 0.0355 max mem: 9377 +Train: [63] [5200/6250] eta: 0:02:27 lr: 0.000040 grad: 0.1712 (0.1754) loss: 0.7514 (0.7684) time: 0.1606 data: 0.0853 max mem: 9377 +Train: [63] [5300/6250] eta: 0:02:13 lr: 0.000040 grad: 0.1738 (0.1754) loss: 0.7654 (0.7682) time: 0.1257 data: 0.0404 max mem: 9377 +Train: [63] [5400/6250] eta: 0:01:59 lr: 0.000040 grad: 0.1797 (0.1754) loss: 0.7623 (0.7681) time: 0.1426 data: 0.0640 max mem: 9377 +Train: [63] [5500/6250] eta: 0:01:45 lr: 0.000040 grad: 0.1736 (0.1754) loss: 0.7601 (0.7680) time: 0.1266 data: 0.0439 max mem: 9377 +Train: [63] [5600/6250] eta: 0:01:31 lr: 0.000039 grad: 0.1660 (0.1754) loss: 0.7592 (0.7679) time: 0.1428 data: 0.0591 max mem: 9377 +Train: [63] [5700/6250] eta: 0:01:17 lr: 0.000039 grad: 0.1664 (0.1754) loss: 0.7690 (0.7677) time: 0.1485 data: 0.0683 max mem: 9377 +Train: [63] [5800/6250] eta: 0:01:03 lr: 0.000039 grad: 0.1618 (0.1753) loss: 0.7650 (0.7676) time: 0.1386 data: 0.0578 max mem: 9377 +Train: [63] [5900/6250] eta: 0:00:49 lr: 0.000039 grad: 0.1673 (0.1753) loss: 0.7613 (0.7675) time: 0.1192 data: 0.0320 max mem: 9377 +Train: [63] [6000/6250] eta: 0:00:35 lr: 0.000039 grad: 0.1646 (0.1752) loss: 0.7623 (0.7673) time: 0.1297 data: 0.0509 max mem: 9377 +Train: [63] [6100/6250] eta: 0:00:21 lr: 0.000039 grad: 0.1741 (0.1753) loss: 0.7565 (0.7671) time: 0.1633 data: 0.0804 max mem: 9377 +Train: [63] [6200/6250] eta: 0:00:07 lr: 0.000039 grad: 0.1790 (0.1754) loss: 0.7536 (0.7669) time: 0.1513 data: 0.0716 max mem: 9377 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1684 (0.1754) loss: 0.7518 (0.7668) time: 0.1222 data: 0.0381 max mem: 9377 +Train: [63] Total time: 0:14:46 (0.1418 s / it) +Averaged stats: lr: 0.000039 grad: 0.1684 (0.1754) loss: 0.7518 (0.7668) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:05:09 loss: 0.8211 (0.8211) time: 4.9951 data: 4.9647 max mem: 9377 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.8124 (0.8128) time: 0.1112 data: 0.0848 max mem: 9377 +Eval (hcp-train-subset): [63] Total time: 0:00:12 (0.2077 s / it) +Averaged stats (hcp-train-subset): loss: 0.8124 (0.8128) +Eval (hcp-val): [63] [ 0/62] eta: 0:04:37 loss: 0.8434 (0.8434) time: 4.4808 data: 4.4482 max mem: 9377 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8491 (0.8500) time: 0.1451 data: 0.1196 max mem: 9377 +Eval (hcp-val): [63] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (hcp-val): loss: 0.8491 (0.8500) +Eval (nsd-val): [63] [ 0/62] eta: 0:06:06 loss: 0.8203 (0.8203) time: 5.9181 data: 5.8866 max mem: 9377 +Eval (nsd-val): [63] [61/62] eta: 0:00:00 loss: 0.8284 (0.8300) time: 0.1251 data: 0.0977 max mem: 9377 +Eval (nsd-val): [63] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (nsd-val): loss: 0.8284 (0.8300) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 10:14:10 lr: 0.000039 grad: 0.1495 (0.1495) loss: 0.8530 (0.8530) time: 5.8961 data: 5.7964 max mem: 9377 +Train: [64] [ 100/6250] eta: 0:20:32 lr: 0.000039 grad: 0.2374 (0.2470) loss: 0.7794 (0.7900) time: 0.1520 data: 0.0566 max mem: 9377 +Train: [64] [ 200/6250] eta: 0:17:12 lr: 0.000039 grad: 0.2252 (0.2487) loss: 0.7661 (0.7803) time: 0.1386 data: 0.0360 max mem: 9377 +Train: [64] [ 300/6250] eta: 0:15:51 lr: 0.000039 grad: 0.2012 (0.2397) loss: 0.7691 (0.7757) time: 0.1223 data: 0.0402 max mem: 9377 +Train: [64] [ 400/6250] eta: 0:15:01 lr: 0.000039 grad: 0.2029 (0.2338) loss: 0.7682 (0.7740) time: 0.1267 data: 0.0309 max mem: 9377 +Train: [64] [ 500/6250] eta: 0:14:16 lr: 0.000039 grad: 0.1973 (0.2250) loss: 0.7661 (0.7734) time: 0.1245 data: 0.0197 max mem: 9377 +Train: [64] [ 600/6250] eta: 0:13:39 lr: 0.000039 grad: 0.1736 (0.2194) loss: 0.7711 (0.7721) time: 0.0936 data: 0.0029 max mem: 9377 +Train: [64] [ 700/6250] eta: 0:13:17 lr: 0.000039 grad: 0.1733 (0.2139) loss: 0.7687 (0.7716) time: 0.1435 data: 0.0540 max mem: 9377 +Train: [64] [ 800/6250] eta: 0:12:56 lr: 0.000039 grad: 0.1753 (0.2090) loss: 0.7658 (0.7713) time: 0.1443 data: 0.0580 max mem: 9377 +Train: [64] [ 900/6250] eta: 0:12:36 lr: 0.000039 grad: 0.1665 (0.2050) loss: 0.7773 (0.7713) time: 0.1263 data: 0.0389 max mem: 9377 +Train: [64] [1000/6250] eta: 0:12:20 lr: 0.000039 grad: 0.1670 (0.2022) loss: 0.7706 (0.7709) time: 0.1308 data: 0.0474 max mem: 9377 +Train: [64] [1100/6250] eta: 0:12:01 lr: 0.000039 grad: 0.1692 (0.1993) loss: 0.7657 (0.7708) time: 0.1353 data: 0.0434 max mem: 9377 +Train: [64] [1200/6250] eta: 0:11:45 lr: 0.000039 grad: 0.1689 (0.1970) loss: 0.7714 (0.7707) time: 0.1210 data: 0.0420 max mem: 9377 +Train: [64] [1300/6250] eta: 0:11:29 lr: 0.000039 grad: 0.1727 (0.1951) loss: 0.7638 (0.7702) time: 0.1336 data: 0.0505 max mem: 9377 +Train: [64] [1400/6250] eta: 0:11:14 lr: 0.000039 grad: 0.1669 (0.1932) loss: 0.7790 (0.7701) time: 0.1380 data: 0.0552 max mem: 9377 +Train: [64] [1500/6250] eta: 0:10:58 lr: 0.000039 grad: 0.1639 (0.1916) loss: 0.7757 (0.7700) time: 0.1151 data: 0.0258 max mem: 9377 +Train: [64] [1600/6250] eta: 0:10:44 lr: 0.000039 grad: 0.1566 (0.1898) loss: 0.7666 (0.7700) time: 0.1364 data: 0.0572 max mem: 9377 +Train: [64] [1700/6250] eta: 0:10:30 lr: 0.000039 grad: 0.1627 (0.1884) loss: 0.7636 (0.7699) time: 0.1405 data: 0.0518 max mem: 9377 +Train: [64] [1800/6250] eta: 0:10:16 lr: 0.000039 grad: 0.1676 (0.1875) loss: 0.7694 (0.7696) time: 0.1410 data: 0.0555 max mem: 9377 +Train: [64] [1900/6250] eta: 0:10:02 lr: 0.000039 grad: 0.1749 (0.1865) loss: 0.7606 (0.7694) time: 0.1497 data: 0.0711 max mem: 9377 +Train: [64] [2000/6250] eta: 0:09:51 lr: 0.000039 grad: 0.1686 (0.1856) loss: 0.7529 (0.7691) time: 0.1289 data: 0.0448 max mem: 9377 +Train: [64] [2100/6250] eta: 0:09:40 lr: 0.000039 grad: 0.1680 (0.1848) loss: 0.7618 (0.7688) time: 0.1302 data: 0.0489 max mem: 9377 +Train: [64] [2200/6250] eta: 0:09:26 lr: 0.000039 grad: 0.1698 (0.1840) loss: 0.7620 (0.7686) time: 0.1354 data: 0.0581 max mem: 9377 +Train: [64] [2300/6250] eta: 0:09:13 lr: 0.000039 grad: 0.1661 (0.1834) loss: 0.7564 (0.7684) time: 0.1340 data: 0.0521 max mem: 9377 +Train: [64] [2400/6250] eta: 0:09:00 lr: 0.000039 grad: 0.1661 (0.1829) loss: 0.7544 (0.7682) time: 0.1489 data: 0.0616 max mem: 9377 +Train: [64] [2500/6250] eta: 0:08:48 lr: 0.000039 grad: 0.1654 (0.1824) loss: 0.7640 (0.7681) time: 0.1437 data: 0.0594 max mem: 9377 +Train: [64] [2600/6250] eta: 0:08:33 lr: 0.000039 grad: 0.1645 (0.1818) loss: 0.7733 (0.7680) time: 0.1095 data: 0.0294 max mem: 9377 +Train: [64] [2700/6250] eta: 0:08:19 lr: 0.000038 grad: 0.1717 (0.1815) loss: 0.7550 (0.7678) time: 0.1416 data: 0.0625 max mem: 9377 +Train: [64] [2800/6250] eta: 0:08:04 lr: 0.000038 grad: 0.1670 (0.1811) loss: 0.7623 (0.7676) time: 0.1257 data: 0.0433 max mem: 9377 +Train: [64] [2900/6250] eta: 0:07:48 lr: 0.000038 grad: 0.1642 (0.1807) loss: 0.7562 (0.7674) time: 0.1259 data: 0.0361 max mem: 9377 +Train: [64] [3000/6250] eta: 0:07:32 lr: 0.000038 grad: 0.1684 (0.1803) loss: 0.7614 (0.7672) time: 0.1129 data: 0.0188 max mem: 9377 +Train: [64] [3100/6250] eta: 0:07:17 lr: 0.000038 grad: 0.1639 (0.1799) loss: 0.7605 (0.7671) time: 0.1223 data: 0.0364 max mem: 9377 +Train: [64] [3200/6250] eta: 0:07:02 lr: 0.000038 grad: 0.1680 (0.1796) loss: 0.7707 (0.7669) time: 0.1271 data: 0.0455 max mem: 9377 +Train: [64] [3300/6250] eta: 0:06:49 lr: 0.000038 grad: 0.1605 (0.1793) loss: 0.7659 (0.7669) time: 0.1413 data: 0.0625 max mem: 9377 +Train: [64] [3400/6250] eta: 0:06:36 lr: 0.000038 grad: 0.1761 (0.1791) loss: 0.7550 (0.7667) time: 0.1413 data: 0.0608 max mem: 9377 +Train: [64] [3500/6250] eta: 0:06:22 lr: 0.000038 grad: 0.1687 (0.1790) loss: 0.7558 (0.7665) time: 0.1408 data: 0.0591 max mem: 9377 +Train: [64] [3600/6250] eta: 0:06:09 lr: 0.000038 grad: 0.1666 (0.1787) loss: 0.7571 (0.7665) time: 0.1695 data: 0.0936 max mem: 9377 +Train: [64] [3700/6250] eta: 0:05:55 lr: 0.000038 grad: 0.1725 (0.1786) loss: 0.7525 (0.7664) time: 0.1406 data: 0.0649 max mem: 9377 +Train: [64] [3800/6250] eta: 0:05:41 lr: 0.000038 grad: 0.1656 (0.1785) loss: 0.7705 (0.7663) time: 0.1277 data: 0.0459 max mem: 9377 +Train: [64] [3900/6250] eta: 0:05:28 lr: 0.000038 grad: 0.1592 (0.1784) loss: 0.7625 (0.7663) time: 0.1364 data: 0.0546 max mem: 9377 +Train: [64] [4000/6250] eta: 0:05:13 lr: 0.000038 grad: 0.1733 (0.1783) loss: 0.7556 (0.7661) time: 0.1360 data: 0.0570 max mem: 9377 +Train: [64] [4100/6250] eta: 0:04:59 lr: 0.000038 grad: 0.1744 (0.1782) loss: 0.7554 (0.7660) time: 0.1400 data: 0.0531 max mem: 9377 +Train: [64] [4200/6250] eta: 0:04:45 lr: 0.000038 grad: 0.1710 (0.1782) loss: 0.7623 (0.7659) time: 0.1432 data: 0.0608 max mem: 9377 +Train: [64] [4300/6250] eta: 0:04:31 lr: 0.000038 grad: 0.1711 (0.1780) loss: 0.7641 (0.7659) time: 0.1394 data: 0.0558 max mem: 9377 +Train: [64] [4400/6250] eta: 0:04:16 lr: 0.000038 grad: 0.1593 (0.1778) loss: 0.7659 (0.7659) time: 0.1210 data: 0.0348 max mem: 9377 +Train: [64] [4500/6250] eta: 0:04:02 lr: 0.000038 grad: 0.1679 (0.1777) loss: 0.7743 (0.7659) time: 0.1243 data: 0.0364 max mem: 9377 +Train: [64] [4600/6250] eta: 0:03:48 lr: 0.000038 grad: 0.1709 (0.1776) loss: 0.7707 (0.7660) time: 0.1447 data: 0.0684 max mem: 9377 +Train: [64] [4700/6250] eta: 0:03:34 lr: 0.000038 grad: 0.1729 (0.1775) loss: 0.7669 (0.7659) time: 0.1161 data: 0.0341 max mem: 9377 +Train: [64] [4800/6250] eta: 0:03:20 lr: 0.000038 grad: 0.1643 (0.1773) loss: 0.7750 (0.7660) time: 0.1326 data: 0.0464 max mem: 9377 +Train: [64] [4900/6250] eta: 0:03:07 lr: 0.000038 grad: 0.1649 (0.1771) loss: 0.7710 (0.7661) time: 0.1492 data: 0.0697 max mem: 9377 +Train: [64] [5000/6250] eta: 0:02:53 lr: 0.000038 grad: 0.1660 (0.1769) loss: 0.7732 (0.7662) time: 0.1084 data: 0.0202 max mem: 9377 +Train: [64] [5100/6250] eta: 0:02:39 lr: 0.000038 grad: 0.1603 (0.1767) loss: 0.7681 (0.7664) time: 0.1392 data: 0.0553 max mem: 9377 +Train: [64] [5200/6250] eta: 0:02:25 lr: 0.000038 grad: 0.1608 (0.1766) loss: 0.7758 (0.7665) time: 0.1238 data: 0.0442 max mem: 9377 +Train: [64] [5300/6250] eta: 0:02:11 lr: 0.000038 grad: 0.1718 (0.1765) loss: 0.7682 (0.7665) time: 0.1439 data: 0.0631 max mem: 9377 +Train: [64] [5400/6250] eta: 0:01:57 lr: 0.000038 grad: 0.1769 (0.1765) loss: 0.7644 (0.7665) time: 0.1263 data: 0.0442 max mem: 9377 +Train: [64] [5500/6250] eta: 0:01:43 lr: 0.000038 grad: 0.1756 (0.1765) loss: 0.7704 (0.7666) time: 0.1228 data: 0.0347 max mem: 9377 +Train: [64] [5600/6250] eta: 0:01:30 lr: 0.000038 grad: 0.1672 (0.1766) loss: 0.7560 (0.7665) time: 0.1493 data: 0.0662 max mem: 9377 +Train: [64] [5700/6250] eta: 0:01:16 lr: 0.000038 grad: 0.1704 (0.1766) loss: 0.7606 (0.7665) time: 0.1352 data: 0.0581 max mem: 9377 +Train: [64] [5800/6250] eta: 0:01:02 lr: 0.000038 grad: 0.1730 (0.1766) loss: 0.7600 (0.7664) time: 0.1535 data: 0.0755 max mem: 9377 +Train: [64] [5900/6250] eta: 0:00:48 lr: 0.000037 grad: 0.1680 (0.1766) loss: 0.7730 (0.7664) time: 0.1481 data: 0.0641 max mem: 9377 +Train: [64] [6000/6250] eta: 0:00:34 lr: 0.000037 grad: 0.1780 (0.1766) loss: 0.7637 (0.7663) time: 0.1363 data: 0.0543 max mem: 9377 +Train: [64] [6100/6250] eta: 0:00:20 lr: 0.000037 grad: 0.1728 (0.1767) loss: 0.7566 (0.7663) time: 0.1481 data: 0.0648 max mem: 9377 +Train: [64] [6200/6250] eta: 0:00:06 lr: 0.000037 grad: 0.1695 (0.1767) loss: 0.7641 (0.7662) time: 0.1411 data: 0.0440 max mem: 9377 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1778 (0.1767) loss: 0.7583 (0.7662) time: 0.1383 data: 0.0534 max mem: 9377 +Train: [64] Total time: 0:14:34 (0.1399 s / it) +Averaged stats: lr: 0.000037 grad: 0.1778 (0.1767) loss: 0.7583 (0.7662) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:03:14 loss: 0.8180 (0.8180) time: 3.1370 data: 3.0867 max mem: 9377 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.8133 (0.8122) time: 0.1305 data: 0.1057 max mem: 9377 +Eval (hcp-train-subset): [64] Total time: 0:00:12 (0.2048 s / it) +Averaged stats (hcp-train-subset): loss: 0.8133 (0.8122) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [64] [ 0/62] eta: 0:03:32 loss: 0.8515 (0.8515) time: 3.4339 data: 3.3674 max mem: 9377 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8479 (0.8507) time: 0.1114 data: 0.0866 max mem: 9377 +Eval (hcp-val): [64] Total time: 0:00:12 (0.1993 s / it) +Averaged stats (hcp-val): loss: 0.8479 (0.8507) +Making plots (hcp-val): example=42 +Eval (nsd-val): [64] [ 0/62] eta: 0:03:23 loss: 0.8224 (0.8224) time: 3.2794 data: 3.2080 max mem: 9377 +Eval (nsd-val): [64] [61/62] eta: 0:00:00 loss: 0.8288 (0.8321) time: 0.1026 data: 0.0730 max mem: 9377 +Eval (nsd-val): [64] Total time: 0:00:12 (0.2023 s / it) +Averaged stats (nsd-val): loss: 0.8288 (0.8321) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00064.pth +Train: [65] [ 0/6250] eta: 9:16:01 lr: 0.000037 grad: 0.3217 (0.3217) loss: 0.7308 (0.7308) time: 5.3379 data: 5.1926 max mem: 9377 +Train: [65] [ 100/6250] eta: 0:19:35 lr: 0.000037 grad: 0.2636 (0.2902) loss: 0.7660 (0.7830) time: 0.1420 data: 0.0455 max mem: 9377 +Train: [65] [ 200/6250] eta: 0:16:42 lr: 0.000037 grad: 0.1751 (0.2514) loss: 0.7850 (0.7840) time: 0.1414 data: 0.0532 max mem: 9377 +Train: [65] [ 300/6250] eta: 0:15:23 lr: 0.000037 grad: 0.1992 (0.2305) loss: 0.7810 (0.7833) time: 0.1281 data: 0.0318 max mem: 9377 +Train: [65] [ 400/6250] eta: 0:14:31 lr: 0.000037 grad: 0.1797 (0.2199) loss: 0.7816 (0.7831) time: 0.1321 data: 0.0347 max mem: 9377 +Train: [65] [ 500/6250] eta: 0:13:49 lr: 0.000037 grad: 0.1776 (0.2111) loss: 0.7865 (0.7826) time: 0.1056 data: 0.0003 max mem: 9377 +Train: [65] [ 600/6250] eta: 0:13:23 lr: 0.000037 grad: 0.1679 (0.2057) loss: 0.7831 (0.7817) time: 0.1403 data: 0.0573 max mem: 9377 +Train: [65] [ 700/6250] eta: 0:13:02 lr: 0.000037 grad: 0.1728 (0.2023) loss: 0.7763 (0.7811) time: 0.1450 data: 0.0612 max mem: 9377 +Train: [65] [ 800/6250] eta: 0:12:47 lr: 0.000037 grad: 0.1793 (0.1994) loss: 0.7689 (0.7800) time: 0.1453 data: 0.0591 max mem: 9377 +Train: [65] [ 900/6250] eta: 0:12:34 lr: 0.000037 grad: 0.1793 (0.1971) loss: 0.7625 (0.7790) time: 0.1618 data: 0.0739 max mem: 9377 +Train: [65] [1000/6250] eta: 0:12:18 lr: 0.000037 grad: 0.1748 (0.1954) loss: 0.7715 (0.7781) time: 0.1347 data: 0.0558 max mem: 9377 +Train: [65] [1100/6250] eta: 0:12:06 lr: 0.000037 grad: 0.1761 (0.1934) loss: 0.7607 (0.7774) time: 0.1477 data: 0.0647 max mem: 9377 +Train: [65] [1200/6250] eta: 0:11:50 lr: 0.000037 grad: 0.1711 (0.1922) loss: 0.7588 (0.7763) time: 0.1530 data: 0.0680 max mem: 9377 +Train: [65] [1300/6250] eta: 0:11:36 lr: 0.000037 grad: 0.1741 (0.1910) loss: 0.7601 (0.7754) time: 0.1071 data: 0.0150 max mem: 9377 +Train: [65] [1400/6250] eta: 0:11:21 lr: 0.000037 grad: 0.1772 (0.1903) loss: 0.7563 (0.7743) time: 0.1327 data: 0.0499 max mem: 9377 +Train: [65] [1500/6250] eta: 0:11:05 lr: 0.000037 grad: 0.1640 (0.1895) loss: 0.7559 (0.7734) time: 0.1339 data: 0.0557 max mem: 9377 +Train: [65] [1600/6250] eta: 0:10:51 lr: 0.000037 grad: 0.1768 (0.1888) loss: 0.7563 (0.7724) time: 0.1424 data: 0.0665 max mem: 9377 +Train: [65] [1700/6250] eta: 0:10:36 lr: 0.000037 grad: 0.1733 (0.1883) loss: 0.7696 (0.7718) time: 0.1416 data: 0.0633 max mem: 9377 +Train: [65] [1800/6250] eta: 0:10:22 lr: 0.000037 grad: 0.1751 (0.1879) loss: 0.7555 (0.7709) time: 0.1353 data: 0.0529 max mem: 9377 +Train: [65] [1900/6250] eta: 0:10:10 lr: 0.000037 grad: 0.1838 (0.1875) loss: 0.7453 (0.7701) time: 0.0876 data: 0.0002 max mem: 9377 +Train: [65] [2000/6250] eta: 0:09:54 lr: 0.000037 grad: 0.1845 (0.1873) loss: 0.7616 (0.7695) time: 0.1369 data: 0.0615 max mem: 9377 +Train: [65] [2100/6250] eta: 0:09:43 lr: 0.000037 grad: 0.1785 (0.1872) loss: 0.7646 (0.7689) time: 0.1425 data: 0.0684 max mem: 9377 +Train: [65] [2200/6250] eta: 0:09:28 lr: 0.000037 grad: 0.1692 (0.1871) loss: 0.7558 (0.7684) time: 0.1417 data: 0.0662 max mem: 9377 +Train: [65] [2300/6250] eta: 0:09:14 lr: 0.000037 grad: 0.1802 (0.1870) loss: 0.7471 (0.7678) time: 0.1299 data: 0.0409 max mem: 9377 +Train: [65] [2400/6250] eta: 0:09:00 lr: 0.000037 grad: 0.1794 (0.1870) loss: 0.7617 (0.7673) time: 0.1335 data: 0.0531 max mem: 9377 +Train: [65] [2500/6250] eta: 0:08:48 lr: 0.000037 grad: 0.1821 (0.1870) loss: 0.7601 (0.7669) time: 0.1411 data: 0.0528 max mem: 9377 +Train: [65] [2600/6250] eta: 0:08:34 lr: 0.000037 grad: 0.1774 (0.1869) loss: 0.7566 (0.7665) time: 0.1347 data: 0.0574 max mem: 9377 +Train: [65] [2700/6250] eta: 0:08:19 lr: 0.000037 grad: 0.1821 (0.1867) loss: 0.7665 (0.7662) time: 0.1308 data: 0.0332 max mem: 9377 +Train: [65] [2800/6250] eta: 0:08:04 lr: 0.000037 grad: 0.1769 (0.1865) loss: 0.7481 (0.7658) time: 0.1241 data: 0.0438 max mem: 9377 +Train: [65] [2900/6250] eta: 0:07:49 lr: 0.000037 grad: 0.1822 (0.1865) loss: 0.7557 (0.7655) time: 0.1352 data: 0.0423 max mem: 9377 +Train: [65] [3000/6250] eta: 0:07:34 lr: 0.000036 grad: 0.1697 (0.1861) loss: 0.7617 (0.7652) time: 0.1342 data: 0.0493 max mem: 9377 +Train: [65] [3100/6250] eta: 0:07:18 lr: 0.000036 grad: 0.1802 (0.1860) loss: 0.7489 (0.7649) time: 0.1192 data: 0.0355 max mem: 9377 +Train: [65] [3200/6250] eta: 0:07:04 lr: 0.000036 grad: 0.1800 (0.1858) loss: 0.7403 (0.7647) time: 0.1504 data: 0.0689 max mem: 9377 +Train: [65] [3300/6250] eta: 0:06:50 lr: 0.000036 grad: 0.1708 (0.1856) loss: 0.7616 (0.7645) time: 0.1423 data: 0.0673 max mem: 9377 +Train: [65] [3400/6250] eta: 0:06:37 lr: 0.000036 grad: 0.1765 (0.1855) loss: 0.7543 (0.7642) time: 0.1500 data: 0.0735 max mem: 9377 +Train: [65] [3500/6250] eta: 0:06:23 lr: 0.000036 grad: 0.1814 (0.1853) loss: 0.7572 (0.7640) time: 0.1304 data: 0.0458 max mem: 9377 +Train: [65] [3600/6250] eta: 0:06:12 lr: 0.000036 grad: 0.1730 (0.1853) loss: 0.7447 (0.7638) time: 0.1655 data: 0.0856 max mem: 9377 +Train: [65] [3700/6250] eta: 0:05:58 lr: 0.000036 grad: 0.1767 (0.1851) loss: 0.7560 (0.7637) time: 0.1800 data: 0.1048 max mem: 9377 +Train: [65] [3800/6250] eta: 0:05:44 lr: 0.000036 grad: 0.1834 (0.1849) loss: 0.7576 (0.7635) time: 0.1387 data: 0.0555 max mem: 9377 +Train: [65] [3900/6250] eta: 0:05:30 lr: 0.000036 grad: 0.1823 (0.1849) loss: 0.7618 (0.7634) time: 0.1525 data: 0.0728 max mem: 9377 +Train: [65] [4000/6250] eta: 0:05:15 lr: 0.000036 grad: 0.1701 (0.1847) loss: 0.7649 (0.7632) time: 0.1470 data: 0.0623 max mem: 9377 +Train: [65] [4100/6250] eta: 0:05:01 lr: 0.000036 grad: 0.1729 (0.1844) loss: 0.7565 (0.7633) time: 0.1240 data: 0.0466 max mem: 9377 +Train: [65] [4200/6250] eta: 0:04:46 lr: 0.000036 grad: 0.1661 (0.1842) loss: 0.7668 (0.7632) time: 0.1226 data: 0.0459 max mem: 9377 +Train: [65] [4300/6250] eta: 0:04:32 lr: 0.000036 grad: 0.1704 (0.1840) loss: 0.7588 (0.7632) time: 0.1406 data: 0.0613 max mem: 9377 +Train: [65] [4400/6250] eta: 0:04:18 lr: 0.000036 grad: 0.1796 (0.1839) loss: 0.7588 (0.7632) time: 0.1290 data: 0.0480 max mem: 9377 +Train: [65] [4500/6250] eta: 0:04:03 lr: 0.000036 grad: 0.1741 (0.1837) loss: 0.7595 (0.7633) time: 0.1330 data: 0.0480 max mem: 9377 +Train: [65] [4600/6250] eta: 0:03:49 lr: 0.000036 grad: 0.1788 (0.1835) loss: 0.7698 (0.7634) time: 0.1515 data: 0.0748 max mem: 9377 +Train: [65] [4700/6250] eta: 0:03:36 lr: 0.000036 grad: 0.1768 (0.1833) loss: 0.7610 (0.7634) time: 0.1813 data: 0.0957 max mem: 9377 +Train: [65] [4800/6250] eta: 0:03:22 lr: 0.000036 grad: 0.1661 (0.1831) loss: 0.7703 (0.7635) time: 0.1401 data: 0.0528 max mem: 9377 +Train: [65] [4900/6250] eta: 0:03:08 lr: 0.000036 grad: 0.1677 (0.1829) loss: 0.7757 (0.7635) time: 0.1405 data: 0.0619 max mem: 9377 +Train: [65] [5000/6250] eta: 0:02:54 lr: 0.000036 grad: 0.1689 (0.1829) loss: 0.7715 (0.7636) time: 0.1271 data: 0.0435 max mem: 9377 +Train: [65] [5100/6250] eta: 0:02:40 lr: 0.000036 grad: 0.1784 (0.1827) loss: 0.7633 (0.7637) time: 0.1288 data: 0.0413 max mem: 9377 +Train: [65] [5200/6250] eta: 0:02:26 lr: 0.000036 grad: 0.1709 (0.1825) loss: 0.7718 (0.7638) time: 0.1384 data: 0.0545 max mem: 9377 +Train: [65] [5300/6250] eta: 0:02:12 lr: 0.000036 grad: 0.1615 (0.1823) loss: 0.7814 (0.7639) time: 0.1384 data: 0.0558 max mem: 9377 +Train: [65] [5400/6250] eta: 0:01:58 lr: 0.000036 grad: 0.1701 (0.1821) loss: 0.7680 (0.7640) time: 0.1330 data: 0.0499 max mem: 9377 +Train: [65] [5500/6250] eta: 0:01:44 lr: 0.000036 grad: 0.1658 (0.1819) loss: 0.7676 (0.7641) time: 0.1128 data: 0.0309 max mem: 9377 +Train: [65] [5600/6250] eta: 0:01:30 lr: 0.000036 grad: 0.1645 (0.1817) loss: 0.7747 (0.7644) time: 0.1565 data: 0.0841 max mem: 9377 +Train: [65] [5700/6250] eta: 0:01:16 lr: 0.000036 grad: 0.1690 (0.1814) loss: 0.7745 (0.7645) time: 0.1438 data: 0.0560 max mem: 9377 +Train: [65] [5800/6250] eta: 0:01:02 lr: 0.000036 grad: 0.1689 (0.1812) loss: 0.7713 (0.7647) time: 0.1215 data: 0.0362 max mem: 9377 +Train: [65] [5900/6250] eta: 0:00:49 lr: 0.000036 grad: 0.1655 (0.1811) loss: 0.7692 (0.7648) time: 0.1334 data: 0.0533 max mem: 9377 +Train: [65] [6000/6250] eta: 0:00:34 lr: 0.000036 grad: 0.1663 (0.1809) loss: 0.7716 (0.7649) time: 0.1491 data: 0.0639 max mem: 9377 +Train: [65] [6100/6250] eta: 0:00:20 lr: 0.000036 grad: 0.1725 (0.1809) loss: 0.7676 (0.7650) time: 0.1379 data: 0.0606 max mem: 9377 +Train: [65] [6200/6250] eta: 0:00:07 lr: 0.000036 grad: 0.1638 (0.1807) loss: 0.7700 (0.7650) time: 0.1484 data: 0.0585 max mem: 9377 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1724 (0.1807) loss: 0.7699 (0.7650) time: 0.1524 data: 0.0675 max mem: 9377 +Train: [65] Total time: 0:14:41 (0.1410 s / it) +Averaged stats: lr: 0.000036 grad: 0.1724 (0.1807) loss: 0.7699 (0.7650) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:05:46 loss: 0.8170 (0.8170) time: 5.5901 data: 5.5592 max mem: 9377 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.8154 (0.8115) time: 0.0855 data: 0.0606 max mem: 9377 +Eval (hcp-train-subset): [65] Total time: 0:00:13 (0.2196 s / it) +Averaged stats (hcp-train-subset): loss: 0.8154 (0.8115) +Eval (hcp-val): [65] [ 0/62] eta: 0:05:40 loss: 0.8523 (0.8523) time: 5.4936 data: 5.4640 max mem: 9377 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8487 (0.8495) time: 0.1357 data: 0.1104 max mem: 9377 +Eval (hcp-val): [65] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (hcp-val): loss: 0.8487 (0.8495) +Eval (nsd-val): [65] [ 0/62] eta: 0:04:54 loss: 0.8164 (0.8164) time: 4.7426 data: 4.7135 max mem: 9377 +Eval (nsd-val): [65] [61/62] eta: 0:00:00 loss: 0.8285 (0.8297) time: 0.1215 data: 0.0925 max mem: 9377 +Eval (nsd-val): [65] Total time: 0:00:13 (0.2113 s / it) +Averaged stats (nsd-val): loss: 0.8285 (0.8297) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [66] [ 0/6250] eta: 9:41:09 lr: 0.000036 grad: 0.2467 (0.2467) loss: 0.8169 (0.8169) time: 5.5791 data: 5.4223 max mem: 9377 +Train: [66] [ 100/6250] eta: 0:19:34 lr: 0.000035 grad: 0.2065 (0.2824) loss: 0.7626 (0.7731) time: 0.1395 data: 0.0417 max mem: 9377 +Train: [66] [ 200/6250] eta: 0:16:59 lr: 0.000035 grad: 0.2249 (0.2690) loss: 0.7766 (0.7671) time: 0.1400 data: 0.0413 max mem: 9377 +Train: [66] [ 300/6250] eta: 0:15:42 lr: 0.000035 grad: 0.2039 (0.2556) loss: 0.7764 (0.7648) time: 0.1221 data: 0.0269 max mem: 9377 +Train: [66] [ 400/6250] eta: 0:14:56 lr: 0.000035 grad: 0.1901 (0.2407) loss: 0.7732 (0.7643) time: 0.1302 data: 0.0317 max mem: 9377 +Train: [66] [ 500/6250] eta: 0:14:12 lr: 0.000035 grad: 0.1807 (0.2316) loss: 0.7734 (0.7650) time: 0.1097 data: 0.0183 max mem: 9377 +Train: [66] [ 600/6250] eta: 0:13:42 lr: 0.000035 grad: 0.1725 (0.2228) loss: 0.7820 (0.7661) time: 0.1276 data: 0.0419 max mem: 9377 +Train: [66] [ 700/6250] eta: 0:13:18 lr: 0.000035 grad: 0.1658 (0.2163) loss: 0.7828 (0.7672) time: 0.1308 data: 0.0451 max mem: 9377 +Train: [66] [ 800/6250] eta: 0:12:58 lr: 0.000035 grad: 0.1699 (0.2107) loss: 0.7739 (0.7683) time: 0.1231 data: 0.0316 max mem: 9377 +Train: [66] [ 900/6250] eta: 0:12:41 lr: 0.000035 grad: 0.1729 (0.2064) loss: 0.7734 (0.7690) time: 0.1624 data: 0.0734 max mem: 9377 +Train: [66] [1000/6250] eta: 0:12:23 lr: 0.000035 grad: 0.1796 (0.2027) loss: 0.7726 (0.7697) time: 0.1187 data: 0.0377 max mem: 9377 +Train: [66] [1100/6250] eta: 0:12:07 lr: 0.000035 grad: 0.1656 (0.2001) loss: 0.7751 (0.7696) time: 0.1184 data: 0.0253 max mem: 9377 +Train: [66] [1200/6250] eta: 0:11:51 lr: 0.000035 grad: 0.1821 (0.1982) loss: 0.7624 (0.7691) time: 0.1359 data: 0.0547 max mem: 9377 +Train: [66] [1300/6250] eta: 0:11:35 lr: 0.000035 grad: 0.1758 (0.1966) loss: 0.7563 (0.7687) time: 0.1329 data: 0.0512 max mem: 9377 +Train: [66] [1400/6250] eta: 0:11:18 lr: 0.000035 grad: 0.1713 (0.1953) loss: 0.7603 (0.7685) time: 0.1235 data: 0.0370 max mem: 9377 +Train: [66] [1500/6250] eta: 0:11:05 lr: 0.000035 grad: 0.1772 (0.1941) loss: 0.7625 (0.7685) time: 0.1898 data: 0.1093 max mem: 9377 +Train: [66] [1600/6250] eta: 0:10:48 lr: 0.000035 grad: 0.1738 (0.1930) loss: 0.7686 (0.7685) time: 0.1433 data: 0.0649 max mem: 9377 +Train: [66] [1700/6250] eta: 0:10:33 lr: 0.000035 grad: 0.1740 (0.1919) loss: 0.7730 (0.7685) time: 0.1264 data: 0.0437 max mem: 9377 +Train: [66] [1800/6250] eta: 0:10:21 lr: 0.000035 grad: 0.1732 (0.1909) loss: 0.7711 (0.7684) time: 0.1469 data: 0.0679 max mem: 9377 +Train: [66] [1900/6250] eta: 0:10:05 lr: 0.000035 grad: 0.1680 (0.1899) loss: 0.7696 (0.7685) time: 0.1451 data: 0.0611 max mem: 9377 +Train: [66] [2000/6250] eta: 0:09:50 lr: 0.000035 grad: 0.1737 (0.1891) loss: 0.7643 (0.7684) time: 0.1347 data: 0.0560 max mem: 9377 +Train: [66] [2100/6250] eta: 0:09:36 lr: 0.000035 grad: 0.1740 (0.1885) loss: 0.7582 (0.7683) time: 0.1329 data: 0.0434 max mem: 9377 +Train: [66] [2200/6250] eta: 0:09:25 lr: 0.000035 grad: 0.1758 (0.1878) loss: 0.7672 (0.7681) time: 0.1508 data: 0.0650 max mem: 9377 +Train: [66] [2300/6250] eta: 0:09:10 lr: 0.000035 grad: 0.1692 (0.1874) loss: 0.7632 (0.7680) time: 0.1346 data: 0.0555 max mem: 9377 +Train: [66] [2400/6250] eta: 0:09:00 lr: 0.000035 grad: 0.1711 (0.1869) loss: 0.7706 (0.7680) time: 0.2172 data: 0.1400 max mem: 9377 +Train: [66] [2500/6250] eta: 0:08:44 lr: 0.000035 grad: 0.1749 (0.1865) loss: 0.7662 (0.7679) time: 0.1429 data: 0.0550 max mem: 9377 +Train: [66] [2600/6250] eta: 0:08:31 lr: 0.000035 grad: 0.1772 (0.1862) loss: 0.7585 (0.7679) time: 0.1651 data: 0.0823 max mem: 9377 +Train: [66] [2700/6250] eta: 0:08:18 lr: 0.000035 grad: 0.1874 (0.1861) loss: 0.7682 (0.7678) time: 0.1543 data: 0.0721 max mem: 9377 +Train: [66] [2800/6250] eta: 0:08:03 lr: 0.000035 grad: 0.1740 (0.1859) loss: 0.7580 (0.7677) time: 0.1389 data: 0.0578 max mem: 9377 +Train: [66] [2900/6250] eta: 0:07:48 lr: 0.000035 grad: 0.1816 (0.1856) loss: 0.7576 (0.7677) time: 0.1062 data: 0.0219 max mem: 9377 +Train: [66] [3000/6250] eta: 0:07:33 lr: 0.000035 grad: 0.1775 (0.1854) loss: 0.7672 (0.7677) time: 0.1294 data: 0.0467 max mem: 9377 +Train: [66] [3100/6250] eta: 0:07:18 lr: 0.000035 grad: 0.1676 (0.1852) loss: 0.7708 (0.7677) time: 0.1359 data: 0.0517 max mem: 9377 +Train: [66] [3200/6250] eta: 0:07:03 lr: 0.000035 grad: 0.1678 (0.1850) loss: 0.7748 (0.7676) time: 0.1321 data: 0.0478 max mem: 9377 +Train: [66] [3300/6250] eta: 0:06:49 lr: 0.000035 grad: 0.1699 (0.1847) loss: 0.7671 (0.7676) time: 0.1597 data: 0.0770 max mem: 9377 +Train: [66] [3400/6250] eta: 0:06:36 lr: 0.000035 grad: 0.1737 (0.1845) loss: 0.7716 (0.7676) time: 0.1401 data: 0.0589 max mem: 9377 +Train: [66] [3500/6250] eta: 0:06:23 lr: 0.000034 grad: 0.1774 (0.1842) loss: 0.7720 (0.7677) time: 0.1720 data: 0.0913 max mem: 9377 +Train: [66] [3600/6250] eta: 0:06:09 lr: 0.000034 grad: 0.1695 (0.1840) loss: 0.7630 (0.7677) time: 0.1041 data: 0.0167 max mem: 9377 +Train: [66] [3700/6250] eta: 0:05:56 lr: 0.000034 grad: 0.1638 (0.1838) loss: 0.7727 (0.7678) time: 0.1580 data: 0.0759 max mem: 9377 +Train: [66] [3800/6250] eta: 0:05:43 lr: 0.000034 grad: 0.1652 (0.1837) loss: 0.7657 (0.7679) time: 0.2066 data: 0.1290 max mem: 9377 +Train: [66] [3900/6250] eta: 0:05:30 lr: 0.000034 grad: 0.1764 (0.1835) loss: 0.7778 (0.7678) time: 0.1471 data: 0.0620 max mem: 9377 +Train: [66] [4000/6250] eta: 0:05:16 lr: 0.000034 grad: 0.1699 (0.1833) loss: 0.7643 (0.7678) time: 0.1569 data: 0.0790 max mem: 9377 +Train: [66] [4100/6250] eta: 0:05:02 lr: 0.000034 grad: 0.1782 (0.1831) loss: 0.7527 (0.7678) time: 0.1394 data: 0.0609 max mem: 9377 +Train: [66] [4200/6250] eta: 0:04:48 lr: 0.000034 grad: 0.1698 (0.1829) loss: 0.7632 (0.7677) time: 0.1309 data: 0.0516 max mem: 9377 +Train: [66] [4300/6250] eta: 0:04:34 lr: 0.000034 grad: 0.1737 (0.1828) loss: 0.7691 (0.7676) time: 0.1578 data: 0.0826 max mem: 9377 +Train: [66] [4400/6250] eta: 0:04:19 lr: 0.000034 grad: 0.1747 (0.1827) loss: 0.7612 (0.7675) time: 0.1221 data: 0.0403 max mem: 9377 +Train: [66] [4500/6250] eta: 0:04:05 lr: 0.000034 grad: 0.1720 (0.1826) loss: 0.7713 (0.7674) time: 0.1277 data: 0.0499 max mem: 9377 +Train: [66] [4600/6250] eta: 0:03:51 lr: 0.000034 grad: 0.1669 (0.1825) loss: 0.7732 (0.7674) time: 0.1377 data: 0.0555 max mem: 9377 +Train: [66] [4700/6250] eta: 0:03:37 lr: 0.000034 grad: 0.1772 (0.1823) loss: 0.7664 (0.7673) time: 0.1319 data: 0.0558 max mem: 9377 +Train: [66] [4800/6250] eta: 0:03:23 lr: 0.000034 grad: 0.1752 (0.1822) loss: 0.7647 (0.7674) time: 0.1649 data: 0.0877 max mem: 9377 +Train: [66] [4900/6250] eta: 0:03:09 lr: 0.000034 grad: 0.1763 (0.1821) loss: 0.7681 (0.7673) time: 0.0910 data: 0.0109 max mem: 9377 +Train: [66] [5000/6250] eta: 0:02:55 lr: 0.000034 grad: 0.1758 (0.1821) loss: 0.7546 (0.7672) time: 0.1015 data: 0.0209 max mem: 9377 +Train: [66] [5100/6250] eta: 0:02:41 lr: 0.000034 grad: 0.1719 (0.1821) loss: 0.7627 (0.7671) time: 0.1176 data: 0.0286 max mem: 9377 +Train: [66] [5200/6250] eta: 0:02:27 lr: 0.000034 grad: 0.1835 (0.1821) loss: 0.7535 (0.7670) time: 0.1492 data: 0.0700 max mem: 9377 +Train: [66] [5300/6250] eta: 0:02:13 lr: 0.000034 grad: 0.1822 (0.1821) loss: 0.7613 (0.7669) time: 0.1308 data: 0.0446 max mem: 9377 +Train: [66] [5400/6250] eta: 0:01:59 lr: 0.000034 grad: 0.1720 (0.1821) loss: 0.7596 (0.7668) time: 0.1597 data: 0.0746 max mem: 9377 +Train: [66] [5500/6250] eta: 0:01:45 lr: 0.000034 grad: 0.1781 (0.1821) loss: 0.7698 (0.7666) time: 0.1318 data: 0.0487 max mem: 9377 +Train: [66] [5600/6250] eta: 0:01:31 lr: 0.000034 grad: 0.1787 (0.1820) loss: 0.7552 (0.7665) time: 0.1597 data: 0.0795 max mem: 9377 +Train: [66] [5700/6250] eta: 0:01:17 lr: 0.000034 grad: 0.1758 (0.1820) loss: 0.7658 (0.7664) time: 0.1437 data: 0.0621 max mem: 9377 +Train: [66] [5800/6250] eta: 0:01:03 lr: 0.000034 grad: 0.1837 (0.1821) loss: 0.7600 (0.7662) time: 0.1369 data: 0.0528 max mem: 9377 +Train: [66] [5900/6250] eta: 0:00:49 lr: 0.000034 grad: 0.1856 (0.1821) loss: 0.7623 (0.7661) time: 0.1442 data: 0.0686 max mem: 9377 +Train: [66] [6000/6250] eta: 0:00:35 lr: 0.000034 grad: 0.1792 (0.1821) loss: 0.7536 (0.7659) time: 0.1374 data: 0.0524 max mem: 9377 +Train: [66] [6100/6250] eta: 0:00:21 lr: 0.000034 grad: 0.1765 (0.1821) loss: 0.7609 (0.7658) time: 0.1568 data: 0.0750 max mem: 9377 +Train: [66] [6200/6250] eta: 0:00:07 lr: 0.000034 grad: 0.1823 (0.1821) loss: 0.7509 (0.7657) time: 0.1086 data: 0.0216 max mem: 9377 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1790 (0.1821) loss: 0.7578 (0.7656) time: 0.1472 data: 0.0613 max mem: 9377 +Train: [66] Total time: 0:14:42 (0.1412 s / it) +Averaged stats: lr: 0.000034 grad: 0.1790 (0.1821) loss: 0.7578 (0.7656) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:03:33 loss: 0.8207 (0.8207) time: 3.4384 data: 3.3863 max mem: 9377 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.8138 (0.8119) time: 0.1382 data: 0.1116 max mem: 9377 +Eval (hcp-train-subset): [66] Total time: 0:00:13 (0.2185 s / it) +Averaged stats (hcp-train-subset): loss: 0.8138 (0.8119) +Eval (hcp-val): [66] [ 0/62] eta: 0:05:25 loss: 0.8576 (0.8576) time: 5.2571 data: 5.2260 max mem: 9377 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.8493 (0.8516) time: 0.1390 data: 0.1118 max mem: 9377 +Eval (hcp-val): [66] Total time: 0:00:13 (0.2257 s / it) +Averaged stats (hcp-val): loss: 0.8493 (0.8516) +Eval (nsd-val): [66] [ 0/62] eta: 0:04:07 loss: 0.8226 (0.8226) time: 3.9907 data: 3.9268 max mem: 9377 +Eval (nsd-val): [66] [61/62] eta: 0:00:00 loss: 0.8300 (0.8312) time: 0.1316 data: 0.1045 max mem: 9377 +Eval (nsd-val): [66] Total time: 0:00:13 (0.2177 s / it) +Averaged stats (nsd-val): loss: 0.8300 (0.8312) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [67] [ 0/6250] eta: 7:44:31 lr: 0.000034 grad: 0.5127 (0.5127) loss: 0.6836 (0.6836) time: 4.4594 data: 4.2322 max mem: 9377 +Train: [67] [ 100/6250] eta: 0:19:59 lr: 0.000034 grad: 0.2133 (0.2614) loss: 0.7957 (0.7917) time: 0.1549 data: 0.0570 max mem: 9377 +Train: [67] [ 200/6250] eta: 0:16:51 lr: 0.000034 grad: 0.2194 (0.2514) loss: 0.7654 (0.7802) time: 0.1398 data: 0.0449 max mem: 9377 +Train: [67] [ 300/6250] eta: 0:15:38 lr: 0.000034 grad: 0.2136 (0.2360) loss: 0.7529 (0.7748) time: 0.1313 data: 0.0469 max mem: 9377 +Train: [67] [ 400/6250] eta: 0:14:42 lr: 0.000034 grad: 0.1733 (0.2249) loss: 0.7809 (0.7738) time: 0.1263 data: 0.0329 max mem: 9377 +Train: [67] [ 500/6250] eta: 0:14:05 lr: 0.000034 grad: 0.1828 (0.2190) loss: 0.7632 (0.7722) time: 0.1311 data: 0.0311 max mem: 9377 +Train: [67] [ 600/6250] eta: 0:13:34 lr: 0.000033 grad: 0.2032 (0.2169) loss: 0.7549 (0.7698) time: 0.1291 data: 0.0411 max mem: 9377 +Train: [67] [ 700/6250] eta: 0:13:10 lr: 0.000033 grad: 0.1901 (0.2141) loss: 0.7418 (0.7678) time: 0.1389 data: 0.0538 max mem: 9377 +Train: [67] [ 800/6250] eta: 0:12:51 lr: 0.000033 grad: 0.1809 (0.2103) loss: 0.7559 (0.7668) time: 0.1443 data: 0.0586 max mem: 9377 +Train: [67] [ 900/6250] eta: 0:12:37 lr: 0.000033 grad: 0.1675 (0.2065) loss: 0.7596 (0.7668) time: 0.1636 data: 0.0855 max mem: 9377 +Train: [67] [1000/6250] eta: 0:12:21 lr: 0.000033 grad: 0.1665 (0.2037) loss: 0.7805 (0.7669) time: 0.1223 data: 0.0412 max mem: 9377 +Train: [67] [1100/6250] eta: 0:12:09 lr: 0.000033 grad: 0.1778 (0.2013) loss: 0.7703 (0.7668) time: 0.1849 data: 0.1025 max mem: 9377 +Train: [67] [1200/6250] eta: 0:11:48 lr: 0.000033 grad: 0.1761 (0.1994) loss: 0.7641 (0.7668) time: 0.1406 data: 0.0615 max mem: 9377 +Train: [67] [1300/6250] eta: 0:11:34 lr: 0.000033 grad: 0.1785 (0.1976) loss: 0.7703 (0.7670) time: 0.1386 data: 0.0596 max mem: 9377 +Train: [67] [1400/6250] eta: 0:11:18 lr: 0.000033 grad: 0.1663 (0.1961) loss: 0.7738 (0.7670) time: 0.1344 data: 0.0523 max mem: 9377 +Train: [67] [1500/6250] eta: 0:11:03 lr: 0.000033 grad: 0.1741 (0.1948) loss: 0.7636 (0.7669) time: 0.1596 data: 0.0799 max mem: 9377 +Train: [67] [1600/6250] eta: 0:10:47 lr: 0.000033 grad: 0.1693 (0.1935) loss: 0.7682 (0.7669) time: 0.1430 data: 0.0613 max mem: 9377 +Train: [67] [1700/6250] eta: 0:10:33 lr: 0.000033 grad: 0.1730 (0.1925) loss: 0.7650 (0.7668) time: 0.1393 data: 0.0606 max mem: 9377 +Train: [67] [1800/6250] eta: 0:10:18 lr: 0.000033 grad: 0.1756 (0.1918) loss: 0.7621 (0.7666) time: 0.1355 data: 0.0519 max mem: 9377 +Train: [67] [1900/6250] eta: 0:10:04 lr: 0.000033 grad: 0.1774 (0.1909) loss: 0.7646 (0.7665) time: 0.1412 data: 0.0605 max mem: 9377 +Train: [67] [2000/6250] eta: 0:09:52 lr: 0.000033 grad: 0.1801 (0.1904) loss: 0.7531 (0.7663) time: 0.1436 data: 0.0642 max mem: 9377 +Train: [67] [2100/6250] eta: 0:09:40 lr: 0.000033 grad: 0.1653 (0.1896) loss: 0.7694 (0.7663) time: 0.1220 data: 0.0349 max mem: 9377 +Train: [67] [2200/6250] eta: 0:09:25 lr: 0.000033 grad: 0.1762 (0.1889) loss: 0.7646 (0.7663) time: 0.1425 data: 0.0589 max mem: 9377 +Train: [67] [2300/6250] eta: 0:09:13 lr: 0.000033 grad: 0.1774 (0.1885) loss: 0.7605 (0.7661) time: 0.2182 data: 0.1447 max mem: 9377 +Train: [67] [2400/6250] eta: 0:08:59 lr: 0.000033 grad: 0.1763 (0.1879) loss: 0.7644 (0.7659) time: 0.1449 data: 0.0666 max mem: 9377 +Train: [67] [2500/6250] eta: 0:08:46 lr: 0.000033 grad: 0.1709 (0.1874) loss: 0.7707 (0.7658) time: 0.1452 data: 0.0580 max mem: 9377 +Train: [67] [2600/6250] eta: 0:08:32 lr: 0.000033 grad: 0.1836 (0.1871) loss: 0.7547 (0.7656) time: 0.1516 data: 0.0650 max mem: 9377 +Train: [67] [2700/6250] eta: 0:08:19 lr: 0.000033 grad: 0.1820 (0.1868) loss: 0.7633 (0.7654) time: 0.1377 data: 0.0546 max mem: 9377 +Train: [67] [2800/6250] eta: 0:08:05 lr: 0.000033 grad: 0.1651 (0.1866) loss: 0.7730 (0.7653) time: 0.1284 data: 0.0451 max mem: 9377 +Train: [67] [2900/6250] eta: 0:07:51 lr: 0.000033 grad: 0.1767 (0.1863) loss: 0.7545 (0.7652) time: 0.1295 data: 0.0450 max mem: 9377 +Train: [67] [3000/6250] eta: 0:07:36 lr: 0.000033 grad: 0.1790 (0.1861) loss: 0.7587 (0.7650) time: 0.1129 data: 0.0223 max mem: 9377 +Train: [67] [3100/6250] eta: 0:07:21 lr: 0.000033 grad: 0.1790 (0.1860) loss: 0.7578 (0.7648) time: 0.1243 data: 0.0307 max mem: 9377 +Train: [67] [3200/6250] eta: 0:07:06 lr: 0.000033 grad: 0.1763 (0.1859) loss: 0.7588 (0.7647) time: 0.1366 data: 0.0459 max mem: 9377 +Train: [67] [3300/6250] eta: 0:06:50 lr: 0.000033 grad: 0.1858 (0.1858) loss: 0.7586 (0.7646) time: 0.1280 data: 0.0458 max mem: 9377 +Train: [67] [3400/6250] eta: 0:06:36 lr: 0.000033 grad: 0.1849 (0.1858) loss: 0.7568 (0.7644) time: 0.1352 data: 0.0575 max mem: 9377 +Train: [67] [3500/6250] eta: 0:06:23 lr: 0.000033 grad: 0.1873 (0.1857) loss: 0.7557 (0.7643) time: 0.1600 data: 0.0727 max mem: 9377 +Train: [67] [3600/6250] eta: 0:06:09 lr: 0.000033 grad: 0.1808 (0.1857) loss: 0.7438 (0.7641) time: 0.1529 data: 0.0737 max mem: 9377 +Train: [67] [3700/6250] eta: 0:05:57 lr: 0.000033 grad: 0.1856 (0.1855) loss: 0.7551 (0.7640) time: 0.1624 data: 0.0804 max mem: 9377 +Train: [67] [3800/6250] eta: 0:05:42 lr: 0.000033 grad: 0.1822 (0.1855) loss: 0.7689 (0.7638) time: 0.1478 data: 0.0652 max mem: 9377 +Train: [67] [3900/6250] eta: 0:05:30 lr: 0.000033 grad: 0.1746 (0.1854) loss: 0.7544 (0.7636) time: 0.1521 data: 0.0719 max mem: 9377 +Train: [67] [4000/6250] eta: 0:05:16 lr: 0.000032 grad: 0.1815 (0.1854) loss: 0.7596 (0.7634) time: 0.1630 data: 0.0852 max mem: 9377 +Train: [67] [4100/6250] eta: 0:05:03 lr: 0.000032 grad: 0.1738 (0.1854) loss: 0.7587 (0.7633) time: 0.1724 data: 0.0910 max mem: 9377 +Train: [67] [4200/6250] eta: 0:04:49 lr: 0.000032 grad: 0.1916 (0.1854) loss: 0.7504 (0.7631) time: 0.1775 data: 0.0975 max mem: 9377 +Train: [67] [4300/6250] eta: 0:04:36 lr: 0.000032 grad: 0.1833 (0.1854) loss: 0.7512 (0.7628) time: 0.2002 data: 0.1259 max mem: 9377 +Train: [67] [4400/6250] eta: 0:04:21 lr: 0.000032 grad: 0.1837 (0.1853) loss: 0.7568 (0.7627) time: 0.1422 data: 0.0528 max mem: 9377 +Train: [67] [4500/6250] eta: 0:04:06 lr: 0.000032 grad: 0.1728 (0.1853) loss: 0.7496 (0.7625) time: 0.1368 data: 0.0515 max mem: 9377 +Train: [67] [4600/6250] eta: 0:03:52 lr: 0.000032 grad: 0.1781 (0.1852) loss: 0.7477 (0.7623) time: 0.1372 data: 0.0566 max mem: 9377 +Train: [67] [4700/6250] eta: 0:03:38 lr: 0.000032 grad: 0.1838 (0.1852) loss: 0.7621 (0.7622) time: 0.1427 data: 0.0589 max mem: 9377 +Train: [67] [4800/6250] eta: 0:03:23 lr: 0.000032 grad: 0.1830 (0.1851) loss: 0.7634 (0.7622) time: 0.1214 data: 0.0368 max mem: 9377 +Train: [67] [4900/6250] eta: 0:03:09 lr: 0.000032 grad: 0.1866 (0.1850) loss: 0.7541 (0.7622) time: 0.1240 data: 0.0356 max mem: 9377 +Train: [67] [5000/6250] eta: 0:02:55 lr: 0.000032 grad: 0.1817 (0.1850) loss: 0.7566 (0.7621) time: 0.1287 data: 0.0375 max mem: 9377 +Train: [67] [5100/6250] eta: 0:02:41 lr: 0.000032 grad: 0.1868 (0.1850) loss: 0.7527 (0.7620) time: 0.1361 data: 0.0580 max mem: 9377 +Train: [67] [5200/6250] eta: 0:02:27 lr: 0.000032 grad: 0.1812 (0.1850) loss: 0.7484 (0.7618) time: 0.1413 data: 0.0608 max mem: 9377 +Train: [67] [5300/6250] eta: 0:02:12 lr: 0.000032 grad: 0.1774 (0.1850) loss: 0.7564 (0.7618) time: 0.1562 data: 0.0784 max mem: 9377 +Train: [67] [5400/6250] eta: 0:01:58 lr: 0.000032 grad: 0.1914 (0.1850) loss: 0.7571 (0.7616) time: 0.1312 data: 0.0512 max mem: 9377 +Train: [67] [5500/6250] eta: 0:01:44 lr: 0.000032 grad: 0.1796 (0.1850) loss: 0.7583 (0.7615) time: 0.1007 data: 0.0002 max mem: 9377 +Train: [67] [5600/6250] eta: 0:01:30 lr: 0.000032 grad: 0.1808 (0.1850) loss: 0.7539 (0.7614) time: 0.1282 data: 0.0439 max mem: 9377 +Train: [67] [5700/6250] eta: 0:01:16 lr: 0.000032 grad: 0.1759 (0.1850) loss: 0.7621 (0.7614) time: 0.1477 data: 0.0696 max mem: 9377 +Train: [67] [5800/6250] eta: 0:01:02 lr: 0.000032 grad: 0.1835 (0.1851) loss: 0.7654 (0.7614) time: 0.1212 data: 0.0408 max mem: 9377 +Train: [67] [5900/6250] eta: 0:00:48 lr: 0.000032 grad: 0.1886 (0.1850) loss: 0.7561 (0.7614) time: 0.1610 data: 0.0803 max mem: 9377 +Train: [67] [6000/6250] eta: 0:00:34 lr: 0.000032 grad: 0.1798 (0.1850) loss: 0.7598 (0.7613) time: 0.1384 data: 0.0605 max mem: 9377 +Train: [67] [6100/6250] eta: 0:00:20 lr: 0.000032 grad: 0.1882 (0.1849) loss: 0.7610 (0.7613) time: 0.1413 data: 0.0669 max mem: 9377 +Train: [67] [6200/6250] eta: 0:00:06 lr: 0.000032 grad: 0.1925 (0.1849) loss: 0.7439 (0.7613) time: 0.1397 data: 0.0579 max mem: 9377 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1853 (0.1849) loss: 0.7651 (0.7613) time: 0.1369 data: 0.0573 max mem: 9377 +Train: [67] Total time: 0:14:37 (0.1405 s / it) +Averaged stats: lr: 0.000032 grad: 0.1853 (0.1849) loss: 0.7651 (0.7613) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:03:41 loss: 0.8144 (0.8144) time: 3.5686 data: 3.5039 max mem: 9377 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.8084 (0.8098) time: 0.0845 data: 0.0599 max mem: 9377 +Eval (hcp-train-subset): [67] Total time: 0:00:13 (0.2144 s / it) +Averaged stats (hcp-train-subset): loss: 0.8084 (0.8098) +Eval (hcp-val): [67] [ 0/62] eta: 0:03:19 loss: 0.8532 (0.8532) time: 3.2190 data: 3.1320 max mem: 9377 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.8492 (0.8509) time: 0.1307 data: 0.1058 max mem: 9377 +Eval (hcp-val): [67] Total time: 0:00:13 (0.2134 s / it) +Averaged stats (hcp-val): loss: 0.8492 (0.8509) +Eval (nsd-val): [67] [ 0/62] eta: 0:03:28 loss: 0.8214 (0.8214) time: 3.3654 data: 3.2635 max mem: 9377 +Eval (nsd-val): [67] [61/62] eta: 0:00:00 loss: 0.8294 (0.8317) time: 0.1287 data: 0.1015 max mem: 9377 +Eval (nsd-val): [67] Total time: 0:00:13 (0.2179 s / it) +Averaged stats (nsd-val): loss: 0.8294 (0.8317) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [68] [ 0/6250] eta: 10:43:52 lr: 0.000032 grad: 0.3980 (0.3980) loss: 0.7857 (0.7857) time: 6.1812 data: 6.0817 max mem: 9377 +Train: [68] [ 100/6250] eta: 0:19:46 lr: 0.000032 grad: 0.2287 (0.2654) loss: 0.7705 (0.7845) time: 0.1232 data: 0.0308 max mem: 9377 +Train: [68] [ 200/6250] eta: 0:16:56 lr: 0.000032 grad: 0.2126 (0.2543) loss: 0.7776 (0.7815) time: 0.1440 data: 0.0476 max mem: 9377 +Train: [68] [ 300/6250] eta: 0:15:42 lr: 0.000032 grad: 0.1847 (0.2420) loss: 0.7733 (0.7789) time: 0.1303 data: 0.0407 max mem: 9377 +Train: [68] [ 400/6250] eta: 0:14:51 lr: 0.000032 grad: 0.2016 (0.2314) loss: 0.7615 (0.7754) time: 0.1207 data: 0.0310 max mem: 9377 +Train: [68] [ 500/6250] eta: 0:14:09 lr: 0.000032 grad: 0.1997 (0.2244) loss: 0.7671 (0.7736) time: 0.1268 data: 0.0288 max mem: 9377 +Train: [68] [ 600/6250] eta: 0:13:36 lr: 0.000032 grad: 0.1780 (0.2186) loss: 0.7716 (0.7724) time: 0.1237 data: 0.0269 max mem: 9377 +Train: [68] [ 700/6250] eta: 0:13:09 lr: 0.000032 grad: 0.1739 (0.2129) loss: 0.7689 (0.7719) time: 0.1285 data: 0.0475 max mem: 9377 +Train: [68] [ 800/6250] eta: 0:12:53 lr: 0.000032 grad: 0.1852 (0.2093) loss: 0.7639 (0.7711) time: 0.1446 data: 0.0599 max mem: 9377 +Train: [68] [ 900/6250] eta: 0:12:36 lr: 0.000032 grad: 0.1832 (0.2064) loss: 0.7648 (0.7702) time: 0.1400 data: 0.0538 max mem: 9377 +Train: [68] [1000/6250] eta: 0:12:23 lr: 0.000032 grad: 0.1833 (0.2047) loss: 0.7602 (0.7689) time: 0.1519 data: 0.0685 max mem: 9377 +Train: [68] [1100/6250] eta: 0:12:04 lr: 0.000032 grad: 0.1779 (0.2025) loss: 0.7592 (0.7683) time: 0.1289 data: 0.0453 max mem: 9377 +Train: [68] [1200/6250] eta: 0:11:48 lr: 0.000032 grad: 0.1789 (0.2008) loss: 0.7618 (0.7675) time: 0.1388 data: 0.0567 max mem: 9377 +Train: [68] [1300/6250] eta: 0:11:33 lr: 0.000031 grad: 0.1786 (0.1995) loss: 0.7651 (0.7669) time: 0.1392 data: 0.0498 max mem: 9377 +Train: [68] [1400/6250] eta: 0:11:17 lr: 0.000031 grad: 0.1725 (0.1980) loss: 0.7583 (0.7666) time: 0.1359 data: 0.0515 max mem: 9377 +Train: [68] [1500/6250] eta: 0:11:03 lr: 0.000031 grad: 0.1797 (0.1968) loss: 0.7669 (0.7661) time: 0.1327 data: 0.0513 max mem: 9377 +Train: [68] [1600/6250] eta: 0:10:50 lr: 0.000031 grad: 0.1738 (0.1955) loss: 0.7572 (0.7658) time: 0.1431 data: 0.0625 max mem: 9377 +Train: [68] [1700/6250] eta: 0:10:34 lr: 0.000031 grad: 0.1759 (0.1945) loss: 0.7499 (0.7655) time: 0.1193 data: 0.0311 max mem: 9377 +Train: [68] [1800/6250] eta: 0:10:20 lr: 0.000031 grad: 0.1784 (0.1938) loss: 0.7630 (0.7652) time: 0.1429 data: 0.0625 max mem: 9377 +Train: [68] [1900/6250] eta: 0:10:08 lr: 0.000031 grad: 0.1718 (0.1931) loss: 0.7631 (0.7651) time: 0.1390 data: 0.0560 max mem: 9377 +Train: [68] [2000/6250] eta: 0:09:53 lr: 0.000031 grad: 0.1746 (0.1925) loss: 0.7663 (0.7649) time: 0.1355 data: 0.0550 max mem: 9377 +Train: [68] [2100/6250] eta: 0:09:41 lr: 0.000031 grad: 0.1805 (0.1920) loss: 0.7619 (0.7648) time: 0.1359 data: 0.0483 max mem: 9377 +Train: [68] [2200/6250] eta: 0:09:27 lr: 0.000031 grad: 0.1743 (0.1915) loss: 0.7675 (0.7645) time: 0.1362 data: 0.0519 max mem: 9377 +Train: [68] [2300/6250] eta: 0:09:12 lr: 0.000031 grad: 0.1762 (0.1910) loss: 0.7646 (0.7644) time: 0.1296 data: 0.0397 max mem: 9377 +Train: [68] [2400/6250] eta: 0:08:58 lr: 0.000031 grad: 0.1729 (0.1905) loss: 0.7645 (0.7642) time: 0.1385 data: 0.0556 max mem: 9377 +Train: [68] [2500/6250] eta: 0:08:46 lr: 0.000031 grad: 0.1833 (0.1902) loss: 0.7581 (0.7641) time: 0.1556 data: 0.0745 max mem: 9377 +Train: [68] [2600/6250] eta: 0:08:31 lr: 0.000031 grad: 0.1691 (0.1898) loss: 0.7679 (0.7641) time: 0.1649 data: 0.0839 max mem: 9377 +Train: [68] [2700/6250] eta: 0:08:18 lr: 0.000031 grad: 0.1711 (0.1894) loss: 0.7656 (0.7640) time: 0.1381 data: 0.0516 max mem: 9377 +Train: [68] [2800/6250] eta: 0:08:06 lr: 0.000031 grad: 0.1818 (0.1890) loss: 0.7542 (0.7639) time: 0.1486 data: 0.0646 max mem: 9377 +Train: [68] [2900/6250] eta: 0:07:53 lr: 0.000031 grad: 0.1799 (0.1887) loss: 0.7583 (0.7638) time: 0.1582 data: 0.0817 max mem: 9377 +Train: [68] [3000/6250] eta: 0:07:38 lr: 0.000031 grad: 0.1740 (0.1883) loss: 0.7686 (0.7638) time: 0.1439 data: 0.0649 max mem: 9377 +Train: [68] [3100/6250] eta: 0:07:23 lr: 0.000031 grad: 0.1709 (0.1878) loss: 0.7640 (0.7640) time: 0.1218 data: 0.0313 max mem: 9377 +Train: [68] [3200/6250] eta: 0:07:07 lr: 0.000031 grad: 0.1674 (0.1873) loss: 0.7779 (0.7641) time: 0.1397 data: 0.0566 max mem: 9377 +Train: [68] [3300/6250] eta: 0:06:52 lr: 0.000031 grad: 0.1665 (0.1869) loss: 0.7682 (0.7641) time: 0.1285 data: 0.0445 max mem: 9377 +Train: [68] [3400/6250] eta: 0:06:38 lr: 0.000031 grad: 0.1702 (0.1866) loss: 0.7765 (0.7643) time: 0.1456 data: 0.0574 max mem: 9377 +Train: [68] [3500/6250] eta: 0:06:24 lr: 0.000031 grad: 0.1690 (0.1863) loss: 0.7663 (0.7644) time: 0.1512 data: 0.0626 max mem: 9377 +Train: [68] [3600/6250] eta: 0:06:12 lr: 0.000031 grad: 0.1732 (0.1860) loss: 0.7667 (0.7645) time: 0.2247 data: 0.1465 max mem: 9377 +Train: [68] [3700/6250] eta: 0:05:57 lr: 0.000031 grad: 0.1717 (0.1858) loss: 0.7703 (0.7647) time: 0.1464 data: 0.0655 max mem: 9377 +Train: [68] [3800/6250] eta: 0:05:45 lr: 0.000031 grad: 0.1708 (0.1856) loss: 0.7689 (0.7647) time: 0.1797 data: 0.0984 max mem: 9377 +Train: [68] [3900/6250] eta: 0:05:31 lr: 0.000031 grad: 0.1701 (0.1855) loss: 0.7770 (0.7647) time: 0.1607 data: 0.0808 max mem: 9377 +Train: [68] [4000/6250] eta: 0:05:17 lr: 0.000031 grad: 0.1751 (0.1852) loss: 0.7681 (0.7648) time: 0.1392 data: 0.0592 max mem: 9377 +Train: [68] [4100/6250] eta: 0:05:04 lr: 0.000031 grad: 0.1770 (0.1851) loss: 0.7629 (0.7648) time: 0.1524 data: 0.0776 max mem: 9377 +Train: [68] [4200/6250] eta: 0:04:51 lr: 0.000031 grad: 0.1756 (0.1850) loss: 0.7666 (0.7648) time: 0.1674 data: 0.0893 max mem: 9377 +Train: [68] [4300/6250] eta: 0:04:36 lr: 0.000031 grad: 0.1805 (0.1849) loss: 0.7648 (0.7648) time: 0.1230 data: 0.0424 max mem: 9377 +Train: [68] [4400/6250] eta: 0:04:22 lr: 0.000031 grad: 0.1828 (0.1849) loss: 0.7648 (0.7648) time: 0.1474 data: 0.0729 max mem: 9377 +Train: [68] [4500/6250] eta: 0:04:08 lr: 0.000031 grad: 0.1776 (0.1848) loss: 0.7576 (0.7648) time: 0.1440 data: 0.0622 max mem: 9377 +Train: [68] [4600/6250] eta: 0:03:54 lr: 0.000031 grad: 0.1814 (0.1848) loss: 0.7584 (0.7646) time: 0.1431 data: 0.0620 max mem: 9377 +Train: [68] [4700/6250] eta: 0:03:39 lr: 0.000031 grad: 0.1786 (0.1848) loss: 0.7586 (0.7646) time: 0.1516 data: 0.0781 max mem: 9377 +Train: [68] [4800/6250] eta: 0:03:25 lr: 0.000030 grad: 0.1762 (0.1847) loss: 0.7656 (0.7646) time: 0.1313 data: 0.0461 max mem: 9377 +Train: [68] [4900/6250] eta: 0:03:10 lr: 0.000030 grad: 0.1806 (0.1846) loss: 0.7650 (0.7646) time: 0.1296 data: 0.0341 max mem: 9377 +Train: [68] [5000/6250] eta: 0:02:56 lr: 0.000030 grad: 0.1833 (0.1846) loss: 0.7605 (0.7646) time: 0.1346 data: 0.0502 max mem: 9377 +Train: [68] [5100/6250] eta: 0:02:42 lr: 0.000030 grad: 0.1847 (0.1846) loss: 0.7708 (0.7646) time: 0.1296 data: 0.0376 max mem: 9377 +Train: [68] [5200/6250] eta: 0:02:27 lr: 0.000030 grad: 0.1718 (0.1845) loss: 0.7624 (0.7645) time: 0.1296 data: 0.0390 max mem: 9377 +Train: [68] [5300/6250] eta: 0:02:13 lr: 0.000030 grad: 0.1843 (0.1845) loss: 0.7625 (0.7645) time: 0.1235 data: 0.0387 max mem: 9377 +Train: [68] [5400/6250] eta: 0:01:59 lr: 0.000030 grad: 0.1714 (0.1845) loss: 0.7749 (0.7644) time: 0.1273 data: 0.0385 max mem: 9377 +Train: [68] [5500/6250] eta: 0:01:45 lr: 0.000030 grad: 0.1839 (0.1845) loss: 0.7646 (0.7643) time: 0.1114 data: 0.0331 max mem: 9377 +Train: [68] [5600/6250] eta: 0:01:31 lr: 0.000030 grad: 0.1797 (0.1844) loss: 0.7681 (0.7644) time: 0.1410 data: 0.0588 max mem: 9377 +Train: [68] [5700/6250] eta: 0:01:17 lr: 0.000030 grad: 0.1824 (0.1845) loss: 0.7623 (0.7643) time: 0.1177 data: 0.0214 max mem: 9377 +Train: [68] [5800/6250] eta: 0:01:03 lr: 0.000030 grad: 0.1866 (0.1845) loss: 0.7613 (0.7642) time: 0.1476 data: 0.0630 max mem: 9377 +Train: [68] [5900/6250] eta: 0:00:49 lr: 0.000030 grad: 0.1824 (0.1845) loss: 0.7599 (0.7641) time: 0.1188 data: 0.0330 max mem: 9377 +Train: [68] [6000/6250] eta: 0:00:35 lr: 0.000030 grad: 0.1889 (0.1846) loss: 0.7598 (0.7641) time: 0.1480 data: 0.0714 max mem: 9377 +Train: [68] [6100/6250] eta: 0:00:21 lr: 0.000030 grad: 0.1875 (0.1847) loss: 0.7475 (0.7640) time: 0.1383 data: 0.0592 max mem: 9377 +Train: [68] [6200/6250] eta: 0:00:07 lr: 0.000030 grad: 0.1817 (0.1847) loss: 0.7582 (0.7639) time: 0.1372 data: 0.0554 max mem: 9377 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1800 (0.1847) loss: 0.7586 (0.7639) time: 0.1389 data: 0.0611 max mem: 9377 +Train: [68] Total time: 0:14:42 (0.1412 s / it) +Averaged stats: lr: 0.000030 grad: 0.1800 (0.1847) loss: 0.7586 (0.7639) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:03:57 loss: 0.8165 (0.8165) time: 3.8339 data: 3.7646 max mem: 9377 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.8108 (0.8106) time: 0.1210 data: 0.0947 max mem: 9377 +Eval (hcp-train-subset): [68] Total time: 0:00:12 (0.2070 s / it) +Averaged stats (hcp-train-subset): loss: 0.8108 (0.8106) +Eval (hcp-val): [68] [ 0/62] eta: 0:05:15 loss: 0.8522 (0.8522) time: 5.0838 data: 5.0546 max mem: 9377 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.8486 (0.8498) time: 0.1225 data: 0.0976 max mem: 9377 +Eval (hcp-val): [68] Total time: 0:00:12 (0.2019 s / it) +Averaged stats (hcp-val): loss: 0.8486 (0.8498) +Eval (nsd-val): [68] [ 0/62] eta: 0:03:27 loss: 0.8217 (0.8217) time: 3.3501 data: 3.2840 max mem: 9377 +Eval (nsd-val): [68] [61/62] eta: 0:00:00 loss: 0.8290 (0.8296) time: 0.1176 data: 0.0928 max mem: 9377 +Eval (nsd-val): [68] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (nsd-val): loss: 0.8290 (0.8296) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [69] [ 0/6250] eta: 11:44:13 lr: 0.000030 grad: 0.3465 (0.3465) loss: 0.7359 (0.7359) time: 6.7605 data: 6.6577 max mem: 9377 +Train: [69] [ 100/6250] eta: 0:21:11 lr: 0.000030 grad: 0.2856 (0.2670) loss: 0.7737 (0.7904) time: 0.1462 data: 0.0343 max mem: 9377 +Train: [69] [ 200/6250] eta: 0:17:55 lr: 0.000030 grad: 0.2145 (0.2537) loss: 0.7684 (0.7849) time: 0.1402 data: 0.0445 max mem: 9377 +Train: [69] [ 300/6250] eta: 0:16:24 lr: 0.000030 grad: 0.1949 (0.2418) loss: 0.7784 (0.7822) time: 0.1426 data: 0.0531 max mem: 9377 +Train: [69] [ 400/6250] eta: 0:15:22 lr: 0.000030 grad: 0.2158 (0.2347) loss: 0.7697 (0.7787) time: 0.1400 data: 0.0384 max mem: 9377 +Train: [69] [ 500/6250] eta: 0:14:41 lr: 0.000030 grad: 0.1997 (0.2296) loss: 0.7667 (0.7764) time: 0.1144 data: 0.0357 max mem: 9377 +Train: [69] [ 600/6250] eta: 0:14:10 lr: 0.000030 grad: 0.1862 (0.2233) loss: 0.7625 (0.7740) time: 0.1326 data: 0.0485 max mem: 9377 +Train: [69] [ 700/6250] eta: 0:13:31 lr: 0.000030 grad: 0.1888 (0.2179) loss: 0.7612 (0.7729) time: 0.1222 data: 0.0373 max mem: 9377 +Train: [69] [ 800/6250] eta: 0:13:06 lr: 0.000030 grad: 0.1834 (0.2140) loss: 0.7594 (0.7721) time: 0.1318 data: 0.0428 max mem: 9377 +Train: [69] [ 900/6250] eta: 0:12:45 lr: 0.000030 grad: 0.1789 (0.2104) loss: 0.7647 (0.7719) time: 0.1194 data: 0.0181 max mem: 9377 +Train: [69] [1000/6250] eta: 0:12:23 lr: 0.000030 grad: 0.1828 (0.2076) loss: 0.7556 (0.7713) time: 0.1352 data: 0.0547 max mem: 9377 +Train: [69] [1100/6250] eta: 0:12:02 lr: 0.000030 grad: 0.1778 (0.2048) loss: 0.7720 (0.7709) time: 0.1247 data: 0.0427 max mem: 9377 +Train: [69] [1200/6250] eta: 0:11:43 lr: 0.000030 grad: 0.1742 (0.2026) loss: 0.7676 (0.7706) time: 0.1320 data: 0.0532 max mem: 9377 +Train: [69] [1300/6250] eta: 0:11:27 lr: 0.000030 grad: 0.1791 (0.2010) loss: 0.7730 (0.7699) time: 0.1317 data: 0.0476 max mem: 9377 +Train: [69] [1400/6250] eta: 0:11:11 lr: 0.000030 grad: 0.1892 (0.2000) loss: 0.7587 (0.7693) time: 0.1222 data: 0.0366 max mem: 9377 +Train: [69] [1500/6250] eta: 0:10:56 lr: 0.000030 grad: 0.1775 (0.1987) loss: 0.7571 (0.7690) time: 0.1234 data: 0.0363 max mem: 9377 +Train: [69] [1600/6250] eta: 0:10:41 lr: 0.000030 grad: 0.1859 (0.1975) loss: 0.7575 (0.7687) time: 0.1271 data: 0.0439 max mem: 9377 +Train: [69] [1700/6250] eta: 0:10:26 lr: 0.000030 grad: 0.1839 (0.1967) loss: 0.7632 (0.7683) time: 0.1379 data: 0.0624 max mem: 9377 +Train: [69] [1800/6250] eta: 0:10:13 lr: 0.000030 grad: 0.1851 (0.1960) loss: 0.7605 (0.7680) time: 0.1523 data: 0.0696 max mem: 9377 +Train: [69] [1900/6250] eta: 0:09:57 lr: 0.000030 grad: 0.1706 (0.1950) loss: 0.7735 (0.7678) time: 0.1344 data: 0.0526 max mem: 9377 +Train: [69] [2000/6250] eta: 0:09:45 lr: 0.000030 grad: 0.1807 (0.1944) loss: 0.7570 (0.7677) time: 0.1489 data: 0.0710 max mem: 9377 +Train: [69] [2100/6250] eta: 0:09:31 lr: 0.000029 grad: 0.1742 (0.1936) loss: 0.7668 (0.7676) time: 0.1494 data: 0.0700 max mem: 9377 +Train: [69] [2200/6250] eta: 0:09:17 lr: 0.000029 grad: 0.1724 (0.1929) loss: 0.7630 (0.7676) time: 0.1004 data: 0.0002 max mem: 9377 +Train: [69] [2300/6250] eta: 0:09:04 lr: 0.000029 grad: 0.1779 (0.1923) loss: 0.7491 (0.7672) time: 0.0953 data: 0.0084 max mem: 9377 +Train: [69] [2400/6250] eta: 0:08:50 lr: 0.000029 grad: 0.1804 (0.1920) loss: 0.7657 (0.7669) time: 0.1463 data: 0.0576 max mem: 9377 +Train: [69] [2500/6250] eta: 0:08:37 lr: 0.000029 grad: 0.1755 (0.1917) loss: 0.7486 (0.7666) time: 0.1506 data: 0.0666 max mem: 9377 +Train: [69] [2600/6250] eta: 0:08:24 lr: 0.000029 grad: 0.1780 (0.1914) loss: 0.7471 (0.7663) time: 0.1343 data: 0.0572 max mem: 9377 +Train: [69] [2700/6250] eta: 0:08:11 lr: 0.000029 grad: 0.1798 (0.1912) loss: 0.7601 (0.7661) time: 0.1547 data: 0.0755 max mem: 9377 +Train: [69] [2800/6250] eta: 0:07:57 lr: 0.000029 grad: 0.1889 (0.1909) loss: 0.7534 (0.7657) time: 0.1308 data: 0.0488 max mem: 9377 +Train: [69] [2900/6250] eta: 0:07:44 lr: 0.000029 grad: 0.1810 (0.1906) loss: 0.7592 (0.7654) time: 0.1550 data: 0.0623 max mem: 9377 +Train: [69] [3000/6250] eta: 0:07:32 lr: 0.000029 grad: 0.1795 (0.1904) loss: 0.7657 (0.7653) time: 0.1578 data: 0.0662 max mem: 9377 +Train: [69] [3100/6250] eta: 0:07:19 lr: 0.000029 grad: 0.1702 (0.1900) loss: 0.7656 (0.7653) time: 0.1415 data: 0.0531 max mem: 9377 +Train: [69] [3200/6250] eta: 0:07:04 lr: 0.000029 grad: 0.1838 (0.1897) loss: 0.7594 (0.7653) time: 0.1258 data: 0.0339 max mem: 9377 +Train: [69] [3300/6250] eta: 0:06:50 lr: 0.000029 grad: 0.1709 (0.1895) loss: 0.7690 (0.7651) time: 0.1327 data: 0.0489 max mem: 9377 +Train: [69] [3400/6250] eta: 0:06:35 lr: 0.000029 grad: 0.1744 (0.1893) loss: 0.7641 (0.7650) time: 0.1403 data: 0.0592 max mem: 9377 +Train: [69] [3500/6250] eta: 0:06:20 lr: 0.000029 grad: 0.1803 (0.1890) loss: 0.7652 (0.7650) time: 0.1147 data: 0.0296 max mem: 9377 +Train: [69] [3600/6250] eta: 0:06:05 lr: 0.000029 grad: 0.1714 (0.1888) loss: 0.7655 (0.7650) time: 0.1360 data: 0.0484 max mem: 9377 +Train: [69] [3700/6250] eta: 0:05:51 lr: 0.000029 grad: 0.1769 (0.1886) loss: 0.7590 (0.7650) time: 0.1410 data: 0.0578 max mem: 9377 +Train: [69] [3800/6250] eta: 0:05:38 lr: 0.000029 grad: 0.1791 (0.1884) loss: 0.7650 (0.7650) time: 0.1461 data: 0.0660 max mem: 9377 +Train: [69] [3900/6250] eta: 0:05:24 lr: 0.000029 grad: 0.1783 (0.1882) loss: 0.7678 (0.7650) time: 0.1485 data: 0.0728 max mem: 9377 +Train: [69] [4000/6250] eta: 0:05:12 lr: 0.000029 grad: 0.1775 (0.1880) loss: 0.7691 (0.7650) time: 0.1757 data: 0.0999 max mem: 9377 +Train: [69] [4100/6250] eta: 0:04:58 lr: 0.000029 grad: 0.1806 (0.1878) loss: 0.7616 (0.7651) time: 0.1359 data: 0.0540 max mem: 9377 +Train: [69] [4200/6250] eta: 0:04:45 lr: 0.000029 grad: 0.1737 (0.1876) loss: 0.7728 (0.7652) time: 0.1536 data: 0.0762 max mem: 9377 +Train: [69] [4300/6250] eta: 0:04:31 lr: 0.000029 grad: 0.1902 (0.1875) loss: 0.7530 (0.7651) time: 0.1520 data: 0.0770 max mem: 9377 +Train: [69] [4400/6250] eta: 0:04:17 lr: 0.000029 grad: 0.1804 (0.1874) loss: 0.7633 (0.7651) time: 0.1121 data: 0.0252 max mem: 9377 +Train: [69] [4500/6250] eta: 0:04:04 lr: 0.000029 grad: 0.1804 (0.1873) loss: 0.7587 (0.7649) time: 0.1402 data: 0.0606 max mem: 9377 +Train: [69] [4600/6250] eta: 0:03:50 lr: 0.000029 grad: 0.1814 (0.1872) loss: 0.7557 (0.7649) time: 0.1386 data: 0.0582 max mem: 9377 +Train: [69] [4700/6250] eta: 0:03:36 lr: 0.000029 grad: 0.1827 (0.1871) loss: 0.7620 (0.7649) time: 0.1434 data: 0.0629 max mem: 9377 +Train: [69] [4800/6250] eta: 0:03:22 lr: 0.000029 grad: 0.1770 (0.1869) loss: 0.7651 (0.7649) time: 0.1324 data: 0.0468 max mem: 9377 +Train: [69] [4900/6250] eta: 0:03:08 lr: 0.000029 grad: 0.1755 (0.1867) loss: 0.7718 (0.7649) time: 0.1470 data: 0.0617 max mem: 9377 +Train: [69] [5000/6250] eta: 0:02:55 lr: 0.000029 grad: 0.1799 (0.1866) loss: 0.7668 (0.7649) time: 0.1396 data: 0.0575 max mem: 9377 +Train: [69] [5100/6250] eta: 0:02:41 lr: 0.000029 grad: 0.1854 (0.1865) loss: 0.7639 (0.7649) time: 0.1496 data: 0.0658 max mem: 9377 +Train: [69] [5200/6250] eta: 0:02:27 lr: 0.000029 grad: 0.1879 (0.1865) loss: 0.7619 (0.7649) time: 0.1170 data: 0.0370 max mem: 9377 +Train: [69] [5300/6250] eta: 0:02:12 lr: 0.000029 grad: 0.1811 (0.1865) loss: 0.7688 (0.7648) time: 0.1309 data: 0.0472 max mem: 9377 +Train: [69] [5400/6250] eta: 0:01:58 lr: 0.000029 grad: 0.1819 (0.1864) loss: 0.7617 (0.7647) time: 0.1454 data: 0.0667 max mem: 9377 +Train: [69] [5500/6250] eta: 0:01:44 lr: 0.000029 grad: 0.1787 (0.1863) loss: 0.7660 (0.7647) time: 0.1568 data: 0.0786 max mem: 9377 +Train: [69] [5600/6250] eta: 0:01:30 lr: 0.000028 grad: 0.1782 (0.1862) loss: 0.7553 (0.7646) time: 0.1348 data: 0.0471 max mem: 9377 +Train: [69] [5700/6250] eta: 0:01:16 lr: 0.000028 grad: 0.1852 (0.1862) loss: 0.7479 (0.7645) time: 0.1548 data: 0.0744 max mem: 9377 +Train: [69] [5800/6250] eta: 0:01:02 lr: 0.000028 grad: 0.1740 (0.1862) loss: 0.7594 (0.7644) time: 0.1746 data: 0.0933 max mem: 9377 +Train: [69] [5900/6250] eta: 0:00:48 lr: 0.000028 grad: 0.1819 (0.1861) loss: 0.7549 (0.7643) time: 0.2369 data: 0.1568 max mem: 9377 +Train: [69] [6000/6250] eta: 0:00:34 lr: 0.000028 grad: 0.1883 (0.1862) loss: 0.7479 (0.7642) time: 0.1367 data: 0.0554 max mem: 9377 +Train: [69] [6100/6250] eta: 0:00:20 lr: 0.000028 grad: 0.1887 (0.1862) loss: 0.7541 (0.7640) time: 0.0973 data: 0.0003 max mem: 9377 +Train: [69] [6200/6250] eta: 0:00:06 lr: 0.000028 grad: 0.1915 (0.1862) loss: 0.7452 (0.7639) time: 0.1356 data: 0.0535 max mem: 9377 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.1815 (0.1862) loss: 0.7544 (0.7638) time: 0.1043 data: 0.0002 max mem: 9377 +Train: [69] Total time: 0:14:42 (0.1412 s / it) +Averaged stats: lr: 0.000028 grad: 0.1815 (0.1862) loss: 0.7544 (0.7638) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:03:26 loss: 0.8137 (0.8137) time: 3.3273 data: 3.2571 max mem: 9377 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.8115 (0.8109) time: 0.1262 data: 0.0997 max mem: 9377 +Eval (hcp-train-subset): [69] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (hcp-train-subset): loss: 0.8115 (0.8109) +Making plots (hcp-train-subset): example=48 +Eval (hcp-val): [69] [ 0/62] eta: 0:04:43 loss: 0.8455 (0.8455) time: 4.5794 data: 4.5347 max mem: 9377 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.8491 (0.8510) time: 0.1157 data: 0.0893 max mem: 9377 +Eval (hcp-val): [69] Total time: 0:00:13 (0.2133 s / it) +Averaged stats (hcp-val): loss: 0.8491 (0.8510) +Making plots (hcp-val): example=51 +Eval (nsd-val): [69] [ 0/62] eta: 0:05:09 loss: 0.8233 (0.8233) time: 4.9886 data: 4.9262 max mem: 9377 +Eval (nsd-val): [69] [61/62] eta: 0:00:00 loss: 0.8288 (0.8314) time: 0.1289 data: 0.1040 max mem: 9377 +Eval (nsd-val): [69] Total time: 0:00:12 (0.2027 s / it) +Averaged stats (nsd-val): loss: 0.8288 (0.8314) +Making plots (nsd-val): example=1 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00069.pth +Train: [70] [ 0/6250] eta: 7:35:01 lr: 0.000028 grad: 0.1113 (0.1113) loss: 0.8817 (0.8817) time: 4.3682 data: 4.0707 max mem: 9377 +Train: [70] [ 100/6250] eta: 0:21:12 lr: 0.000028 grad: 0.2307 (0.2329) loss: 0.7786 (0.8011) time: 0.1764 data: 0.0929 max mem: 9377 +Train: [70] [ 200/6250] eta: 0:18:00 lr: 0.000028 grad: 0.2396 (0.2359) loss: 0.7697 (0.7861) time: 0.1708 data: 0.0779 max mem: 9377 +Train: [70] [ 300/6250] eta: 0:16:22 lr: 0.000028 grad: 0.2362 (0.2314) loss: 0.7519 (0.7817) time: 0.1440 data: 0.0558 max mem: 9377 +Train: [70] [ 400/6250] eta: 0:15:34 lr: 0.000028 grad: 0.1764 (0.2253) loss: 0.7774 (0.7783) time: 0.1517 data: 0.0619 max mem: 9377 +Train: [70] [ 500/6250] eta: 0:14:47 lr: 0.000028 grad: 0.1972 (0.2200) loss: 0.7657 (0.7758) time: 0.1470 data: 0.0524 max mem: 9377 +Train: [70] [ 600/6250] eta: 0:14:16 lr: 0.000028 grad: 0.1927 (0.2167) loss: 0.7662 (0.7742) time: 0.1293 data: 0.0401 max mem: 9377 +Train: [70] [ 700/6250] eta: 0:13:42 lr: 0.000028 grad: 0.1798 (0.2130) loss: 0.7655 (0.7732) time: 0.1317 data: 0.0326 max mem: 9377 +Train: [70] [ 800/6250] eta: 0:13:19 lr: 0.000028 grad: 0.1886 (0.2101) loss: 0.7632 (0.7721) time: 0.1213 data: 0.0285 max mem: 9377 +Train: [70] [ 900/6250] eta: 0:12:57 lr: 0.000028 grad: 0.1959 (0.2080) loss: 0.7543 (0.7708) time: 0.1486 data: 0.0590 max mem: 9377 +Train: [70] [1000/6250] eta: 0:12:41 lr: 0.000028 grad: 0.1744 (0.2059) loss: 0.7667 (0.7702) time: 0.1572 data: 0.0727 max mem: 9377 +Train: [70] [1100/6250] eta: 0:12:19 lr: 0.000028 grad: 0.1861 (0.2042) loss: 0.7559 (0.7691) time: 0.1419 data: 0.0613 max mem: 9377 +Train: [70] [1200/6250] eta: 0:12:02 lr: 0.000028 grad: 0.1833 (0.2025) loss: 0.7564 (0.7685) time: 0.1517 data: 0.0689 max mem: 9377 +Train: [70] [1300/6250] eta: 0:11:45 lr: 0.000028 grad: 0.1868 (0.2010) loss: 0.7639 (0.7679) time: 0.1398 data: 0.0585 max mem: 9377 +Train: [70] [1400/6250] eta: 0:11:29 lr: 0.000028 grad: 0.1862 (0.2002) loss: 0.7621 (0.7674) time: 0.1249 data: 0.0406 max mem: 9377 +Train: [70] [1500/6250] eta: 0:11:12 lr: 0.000028 grad: 0.1883 (0.1995) loss: 0.7627 (0.7669) time: 0.1427 data: 0.0566 max mem: 9377 +Train: [70] [1600/6250] eta: 0:10:59 lr: 0.000028 grad: 0.1803 (0.1988) loss: 0.7572 (0.7663) time: 0.1466 data: 0.0503 max mem: 9377 +Train: [70] [1700/6250] eta: 0:10:45 lr: 0.000028 grad: 0.1843 (0.1980) loss: 0.7575 (0.7657) time: 0.1649 data: 0.0775 max mem: 9377 +Train: [70] [1800/6250] eta: 0:10:30 lr: 0.000028 grad: 0.1741 (0.1974) loss: 0.7674 (0.7654) time: 0.1215 data: 0.0332 max mem: 9377 +Train: [70] [1900/6250] eta: 0:10:15 lr: 0.000028 grad: 0.1925 (0.1970) loss: 0.7505 (0.7649) time: 0.1371 data: 0.0547 max mem: 9377 +Train: [70] [2000/6250] eta: 0:10:00 lr: 0.000028 grad: 0.1867 (0.1964) loss: 0.7498 (0.7647) time: 0.1349 data: 0.0525 max mem: 9377 +Train: [70] [2100/6250] eta: 0:09:46 lr: 0.000028 grad: 0.1855 (0.1959) loss: 0.7559 (0.7645) time: 0.1244 data: 0.0395 max mem: 9377 +Train: [70] [2200/6250] eta: 0:09:32 lr: 0.000028 grad: 0.1762 (0.1953) loss: 0.7709 (0.7643) time: 0.1413 data: 0.0586 max mem: 9377 +Train: [70] [2300/6250] eta: 0:09:18 lr: 0.000028 grad: 0.1913 (0.1951) loss: 0.7560 (0.7642) time: 0.1214 data: 0.0367 max mem: 9377 +Train: [70] [2400/6250] eta: 0:09:03 lr: 0.000028 grad: 0.1907 (0.1947) loss: 0.7577 (0.7640) time: 0.1312 data: 0.0487 max mem: 9377 +Train: [70] [2500/6250] eta: 0:08:49 lr: 0.000028 grad: 0.1864 (0.1944) loss: 0.7609 (0.7640) time: 0.1024 data: 0.0213 max mem: 9377 +Train: [70] [2600/6250] eta: 0:08:36 lr: 0.000028 grad: 0.1844 (0.1940) loss: 0.7629 (0.7639) time: 0.1388 data: 0.0497 max mem: 9377 +Train: [70] [2700/6250] eta: 0:08:20 lr: 0.000028 grad: 0.1842 (0.1938) loss: 0.7545 (0.7638) time: 0.1393 data: 0.0511 max mem: 9377 +Train: [70] [2800/6250] eta: 0:08:06 lr: 0.000028 grad: 0.1756 (0.1935) loss: 0.7698 (0.7638) time: 0.1397 data: 0.0614 max mem: 9377 +Train: [70] [2900/6250] eta: 0:07:53 lr: 0.000028 grad: 0.1827 (0.1933) loss: 0.7727 (0.7637) time: 0.1448 data: 0.0604 max mem: 9377 +Train: [70] [3000/6250] eta: 0:07:40 lr: 0.000027 grad: 0.1855 (0.1931) loss: 0.7706 (0.7637) time: 0.1477 data: 0.0647 max mem: 9377 +Train: [70] [3100/6250] eta: 0:07:27 lr: 0.000027 grad: 0.1878 (0.1929) loss: 0.7626 (0.7636) time: 0.1561 data: 0.0718 max mem: 9377 +Train: [70] [3200/6250] eta: 0:07:12 lr: 0.000027 grad: 0.1771 (0.1928) loss: 0.7551 (0.7635) time: 0.1482 data: 0.0631 max mem: 9377 +Train: [70] [3300/6250] eta: 0:06:57 lr: 0.000027 grad: 0.1868 (0.1926) loss: 0.7570 (0.7633) time: 0.1258 data: 0.0406 max mem: 9377 +Train: [70] [3400/6250] eta: 0:06:42 lr: 0.000027 grad: 0.1775 (0.1924) loss: 0.7564 (0.7632) time: 0.1162 data: 0.0291 max mem: 9377 +Train: [70] [3500/6250] eta: 0:06:27 lr: 0.000027 grad: 0.1806 (0.1921) loss: 0.7647 (0.7631) time: 0.1365 data: 0.0568 max mem: 9377 +Train: [70] [3600/6250] eta: 0:06:12 lr: 0.000027 grad: 0.1840 (0.1919) loss: 0.7678 (0.7631) time: 0.1306 data: 0.0398 max mem: 9377 +Train: [70] [3700/6250] eta: 0:05:57 lr: 0.000027 grad: 0.1770 (0.1916) loss: 0.7702 (0.7630) time: 0.1267 data: 0.0354 max mem: 9377 +Train: [70] [3800/6250] eta: 0:05:44 lr: 0.000027 grad: 0.1846 (0.1916) loss: 0.7509 (0.7629) time: 0.1459 data: 0.0643 max mem: 9377 +Train: [70] [3900/6250] eta: 0:05:30 lr: 0.000027 grad: 0.1764 (0.1914) loss: 0.7651 (0.7628) time: 0.1478 data: 0.0723 max mem: 9377 +Train: [70] [4000/6250] eta: 0:05:17 lr: 0.000027 grad: 0.1850 (0.1912) loss: 0.7528 (0.7627) time: 0.1380 data: 0.0578 max mem: 9377 +Train: [70] [4100/6250] eta: 0:05:03 lr: 0.000027 grad: 0.1805 (0.1911) loss: 0.7616 (0.7626) time: 0.1478 data: 0.0658 max mem: 9377 +Train: [70] [4200/6250] eta: 0:04:49 lr: 0.000027 grad: 0.1846 (0.1909) loss: 0.7670 (0.7626) time: 0.1544 data: 0.0745 max mem: 9377 +Train: [70] [4300/6250] eta: 0:04:36 lr: 0.000027 grad: 0.1775 (0.1908) loss: 0.7623 (0.7626) time: 0.1623 data: 0.0810 max mem: 9377 +Train: [70] [4400/6250] eta: 0:04:22 lr: 0.000027 grad: 0.1823 (0.1907) loss: 0.7578 (0.7626) time: 0.1420 data: 0.0636 max mem: 9377 +Train: [70] [4500/6250] eta: 0:04:08 lr: 0.000027 grad: 0.1905 (0.1906) loss: 0.7618 (0.7625) time: 0.1223 data: 0.0389 max mem: 9377 +Train: [70] [4600/6250] eta: 0:03:53 lr: 0.000027 grad: 0.1836 (0.1906) loss: 0.7709 (0.7625) time: 0.1307 data: 0.0475 max mem: 9377 +Train: [70] [4700/6250] eta: 0:03:40 lr: 0.000027 grad: 0.1897 (0.1905) loss: 0.7623 (0.7625) time: 0.1636 data: 0.0811 max mem: 9377 +Train: [70] [4800/6250] eta: 0:03:25 lr: 0.000027 grad: 0.1833 (0.1904) loss: 0.7603 (0.7625) time: 0.1150 data: 0.0314 max mem: 9377 +Train: [70] [4900/6250] eta: 0:03:12 lr: 0.000027 grad: 0.1730 (0.1903) loss: 0.7745 (0.7625) time: 0.1139 data: 0.0286 max mem: 9377 +Train: [70] [5000/6250] eta: 0:02:57 lr: 0.000027 grad: 0.1759 (0.1901) loss: 0.7608 (0.7625) time: 0.1434 data: 0.0581 max mem: 9377 +Train: [70] [5100/6250] eta: 0:02:43 lr: 0.000027 grad: 0.1788 (0.1900) loss: 0.7673 (0.7625) time: 0.0880 data: 0.0002 max mem: 9377 +Train: [70] [5200/6250] eta: 0:02:29 lr: 0.000027 grad: 0.1854 (0.1900) loss: 0.7480 (0.7623) time: 0.1281 data: 0.0453 max mem: 9377 +Train: [70] [5300/6250] eta: 0:02:14 lr: 0.000027 grad: 0.1911 (0.1900) loss: 0.7626 (0.7623) time: 0.1348 data: 0.0536 max mem: 9377 +Train: [70] [5400/6250] eta: 0:02:00 lr: 0.000027 grad: 0.1849 (0.1900) loss: 0.7647 (0.7622) time: 0.1768 data: 0.1054 max mem: 9377 +Train: [70] [5500/6250] eta: 0:01:46 lr: 0.000027 grad: 0.1849 (0.1899) loss: 0.7646 (0.7622) time: 0.1456 data: 0.0690 max mem: 9377 +Train: [70] [5600/6250] eta: 0:01:32 lr: 0.000027 grad: 0.1770 (0.1898) loss: 0.7671 (0.7622) time: 0.1332 data: 0.0509 max mem: 9377 +Train: [70] [5700/6250] eta: 0:01:17 lr: 0.000027 grad: 0.1759 (0.1898) loss: 0.7728 (0.7622) time: 0.1473 data: 0.0655 max mem: 9377 +Train: [70] [5800/6250] eta: 0:01:03 lr: 0.000027 grad: 0.1865 (0.1897) loss: 0.7615 (0.7623) time: 0.1967 data: 0.1152 max mem: 9377 +Train: [70] [5900/6250] eta: 0:00:49 lr: 0.000027 grad: 0.1808 (0.1895) loss: 0.7735 (0.7624) time: 0.1430 data: 0.0493 max mem: 9377 +Train: [70] [6000/6250] eta: 0:00:35 lr: 0.000027 grad: 0.1776 (0.1894) loss: 0.7661 (0.7625) time: 0.1624 data: 0.0776 max mem: 9377 +Train: [70] [6100/6250] eta: 0:00:21 lr: 0.000027 grad: 0.1743 (0.1892) loss: 0.7663 (0.7626) time: 0.1777 data: 0.0959 max mem: 9377 +Train: [70] [6200/6250] eta: 0:00:07 lr: 0.000027 grad: 0.1804 (0.1891) loss: 0.7641 (0.7626) time: 0.1528 data: 0.0722 max mem: 9377 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.1845 (0.1890) loss: 0.7613 (0.7626) time: 0.1471 data: 0.0689 max mem: 9377 +Train: [70] Total time: 0:14:52 (0.1429 s / it) +Averaged stats: lr: 0.000027 grad: 0.1845 (0.1890) loss: 0.7613 (0.7626) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:04:48 loss: 0.8140 (0.8140) time: 4.6541 data: 4.6234 max mem: 9377 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.8120 (0.8114) time: 0.1039 data: 0.0791 max mem: 9377 +Eval (hcp-train-subset): [70] Total time: 0:00:12 (0.2094 s / it) +Averaged stats (hcp-train-subset): loss: 0.8120 (0.8114) +Eval (hcp-val): [70] [ 0/62] eta: 0:05:30 loss: 0.8518 (0.8518) time: 5.3236 data: 5.2939 max mem: 9377 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.8493 (0.8519) time: 0.1437 data: 0.1163 max mem: 9377 +Eval (hcp-val): [70] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (hcp-val): loss: 0.8493 (0.8519) +Eval (nsd-val): [70] [ 0/62] eta: 0:05:10 loss: 0.8328 (0.8328) time: 5.0139 data: 4.9838 max mem: 9377 +Eval (nsd-val): [70] [61/62] eta: 0:00:00 loss: 0.8364 (0.8364) time: 0.1252 data: 0.1003 max mem: 9377 +Eval (nsd-val): [70] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (nsd-val): loss: 0.8364 (0.8364) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 9:05:04 lr: 0.000027 grad: 0.3667 (0.3667) loss: 0.7067 (0.7067) time: 5.2328 data: 5.0519 max mem: 9377 +Train: [71] [ 100/6250] eta: 0:20:57 lr: 0.000027 grad: 0.2721 (0.3291) loss: 0.7738 (0.7645) time: 0.1829 data: 0.0741 max mem: 9377 +Train: [71] [ 200/6250] eta: 0:18:24 lr: 0.000027 grad: 0.2215 (0.2899) loss: 0.7717 (0.7657) time: 0.1748 data: 0.0761 max mem: 9377 +Train: [71] [ 300/6250] eta: 0:16:40 lr: 0.000027 grad: 0.2030 (0.2634) loss: 0.7786 (0.7687) time: 0.1277 data: 0.0364 max mem: 9377 +Train: [71] [ 400/6250] eta: 0:15:52 lr: 0.000026 grad: 0.1897 (0.2498) loss: 0.7584 (0.7683) time: 0.1501 data: 0.0700 max mem: 9377 +Train: [71] [ 500/6250] eta: 0:15:08 lr: 0.000026 grad: 0.2032 (0.2403) loss: 0.7666 (0.7674) time: 0.1492 data: 0.0692 max mem: 9377 +Train: [71] [ 600/6250] eta: 0:14:34 lr: 0.000026 grad: 0.1932 (0.2332) loss: 0.7622 (0.7669) time: 0.1354 data: 0.0431 max mem: 9377 +Train: [71] [ 700/6250] eta: 0:14:03 lr: 0.000026 grad: 0.1934 (0.2267) loss: 0.7588 (0.7664) time: 0.1327 data: 0.0419 max mem: 9377 +Train: [71] [ 800/6250] eta: 0:13:40 lr: 0.000026 grad: 0.1843 (0.2219) loss: 0.7690 (0.7662) time: 0.1427 data: 0.0524 max mem: 9377 +Train: [71] [ 900/6250] eta: 0:13:12 lr: 0.000026 grad: 0.1830 (0.2180) loss: 0.7732 (0.7662) time: 0.1230 data: 0.0243 max mem: 9377 +Train: [71] [1000/6250] eta: 0:12:47 lr: 0.000026 grad: 0.1833 (0.2149) loss: 0.7575 (0.7656) time: 0.1046 data: 0.0100 max mem: 9377 +Train: [71] [1100/6250] eta: 0:12:27 lr: 0.000026 grad: 0.1921 (0.2126) loss: 0.7542 (0.7647) time: 0.1300 data: 0.0456 max mem: 9377 +Train: [71] [1200/6250] eta: 0:12:07 lr: 0.000026 grad: 0.1836 (0.2104) loss: 0.7592 (0.7641) time: 0.1464 data: 0.0658 max mem: 9377 +Train: [71] [1300/6250] eta: 0:11:46 lr: 0.000026 grad: 0.1785 (0.2088) loss: 0.7569 (0.7632) time: 0.1483 data: 0.0669 max mem: 9377 +Train: [71] [1400/6250] eta: 0:11:33 lr: 0.000026 grad: 0.1821 (0.2072) loss: 0.7539 (0.7626) time: 0.1063 data: 0.0189 max mem: 9377 +Train: [71] [1500/6250] eta: 0:11:13 lr: 0.000026 grad: 0.1927 (0.2061) loss: 0.7493 (0.7620) time: 0.1296 data: 0.0510 max mem: 9377 +Train: [71] [1600/6250] eta: 0:10:56 lr: 0.000026 grad: 0.1836 (0.2049) loss: 0.7486 (0.7616) time: 0.1252 data: 0.0337 max mem: 9377 +Train: [71] [1700/6250] eta: 0:10:40 lr: 0.000026 grad: 0.1905 (0.2040) loss: 0.7533 (0.7614) time: 0.1372 data: 0.0562 max mem: 9377 +Train: [71] [1800/6250] eta: 0:10:25 lr: 0.000026 grad: 0.1793 (0.2030) loss: 0.7637 (0.7614) time: 0.1553 data: 0.0733 max mem: 9377 +Train: [71] [1900/6250] eta: 0:10:10 lr: 0.000026 grad: 0.1832 (0.2023) loss: 0.7623 (0.7613) time: 0.1231 data: 0.0320 max mem: 9377 +Train: [71] [2000/6250] eta: 0:09:54 lr: 0.000026 grad: 0.1804 (0.2015) loss: 0.7639 (0.7613) time: 0.1204 data: 0.0318 max mem: 9377 +Train: [71] [2100/6250] eta: 0:09:39 lr: 0.000026 grad: 0.1778 (0.2009) loss: 0.7675 (0.7613) time: 0.1563 data: 0.0768 max mem: 9377 +Train: [71] [2200/6250] eta: 0:09:24 lr: 0.000026 grad: 0.1958 (0.2004) loss: 0.7581 (0.7612) time: 0.1206 data: 0.0368 max mem: 9377 +Train: [71] [2300/6250] eta: 0:09:09 lr: 0.000026 grad: 0.1841 (0.1998) loss: 0.7587 (0.7610) time: 0.1195 data: 0.0372 max mem: 9377 +Train: [71] [2400/6250] eta: 0:08:55 lr: 0.000026 grad: 0.1757 (0.1994) loss: 0.7640 (0.7610) time: 0.1223 data: 0.0437 max mem: 9377 +Train: [71] [2500/6250] eta: 0:08:41 lr: 0.000026 grad: 0.1843 (0.1990) loss: 0.7633 (0.7609) time: 0.1336 data: 0.0426 max mem: 9377 +Train: [71] [2600/6250] eta: 0:08:26 lr: 0.000026 grad: 0.1855 (0.1985) loss: 0.7580 (0.7608) time: 0.1158 data: 0.0372 max mem: 9377 +Train: [71] [2700/6250] eta: 0:08:15 lr: 0.000026 grad: 0.1786 (0.1980) loss: 0.7674 (0.7608) time: 0.1543 data: 0.0772 max mem: 9377 +Train: [71] [2800/6250] eta: 0:08:00 lr: 0.000026 grad: 0.1852 (0.1975) loss: 0.7618 (0.7610) time: 0.1509 data: 0.0717 max mem: 9377 +Train: [71] [2900/6250] eta: 0:07:47 lr: 0.000026 grad: 0.1835 (0.1970) loss: 0.7537 (0.7610) time: 0.1332 data: 0.0526 max mem: 9377 +Train: [71] [3000/6250] eta: 0:07:34 lr: 0.000026 grad: 0.1789 (0.1965) loss: 0.7703 (0.7611) time: 0.1509 data: 0.0618 max mem: 9377 +Train: [71] [3100/6250] eta: 0:07:22 lr: 0.000026 grad: 0.1837 (0.1962) loss: 0.7665 (0.7611) time: 0.1555 data: 0.0681 max mem: 9377 +Train: [71] [3200/6250] eta: 0:07:07 lr: 0.000026 grad: 0.1848 (0.1961) loss: 0.7636 (0.7611) time: 0.1399 data: 0.0612 max mem: 9377 +Train: [71] [3300/6250] eta: 0:06:53 lr: 0.000026 grad: 0.1831 (0.1959) loss: 0.7616 (0.7612) time: 0.1284 data: 0.0467 max mem: 9377 +Train: [71] [3400/6250] eta: 0:06:38 lr: 0.000026 grad: 0.1845 (0.1958) loss: 0.7636 (0.7611) time: 0.1351 data: 0.0454 max mem: 9377 +Train: [71] [3500/6250] eta: 0:06:23 lr: 0.000026 grad: 0.1873 (0.1956) loss: 0.7615 (0.7611) time: 0.1355 data: 0.0515 max mem: 9377 +Train: [71] [3600/6250] eta: 0:06:08 lr: 0.000026 grad: 0.1860 (0.1954) loss: 0.7586 (0.7611) time: 0.1300 data: 0.0390 max mem: 9377 +Train: [71] [3700/6250] eta: 0:05:55 lr: 0.000026 grad: 0.1933 (0.1953) loss: 0.7517 (0.7610) time: 0.1515 data: 0.0709 max mem: 9377 +Train: [71] [3800/6250] eta: 0:05:41 lr: 0.000026 grad: 0.1851 (0.1951) loss: 0.7593 (0.7610) time: 0.1527 data: 0.0610 max mem: 9377 +Train: [71] [3900/6250] eta: 0:05:27 lr: 0.000026 grad: 0.1807 (0.1949) loss: 0.7620 (0.7609) time: 0.1189 data: 0.0387 max mem: 9377 +Train: [71] [4000/6250] eta: 0:05:14 lr: 0.000026 grad: 0.1825 (0.1949) loss: 0.7671 (0.7608) time: 0.1479 data: 0.0691 max mem: 9377 +Train: [71] [4100/6250] eta: 0:05:01 lr: 0.000026 grad: 0.1816 (0.1947) loss: 0.7611 (0.7609) time: 0.1440 data: 0.0683 max mem: 9377 +Train: [71] [4200/6250] eta: 0:04:47 lr: 0.000025 grad: 0.2005 (0.1947) loss: 0.7518 (0.7607) time: 0.1103 data: 0.0315 max mem: 9377 +Train: [71] [4300/6250] eta: 0:04:33 lr: 0.000025 grad: 0.1892 (0.1946) loss: 0.7627 (0.7607) time: 0.1451 data: 0.0613 max mem: 9377 +Train: [71] [4400/6250] eta: 0:04:19 lr: 0.000025 grad: 0.1927 (0.1947) loss: 0.7517 (0.7605) time: 0.1439 data: 0.0606 max mem: 9377 +Train: [71] [4500/6250] eta: 0:04:06 lr: 0.000025 grad: 0.1908 (0.1946) loss: 0.7510 (0.7604) time: 0.1577 data: 0.0762 max mem: 9377 +Train: [71] [4600/6250] eta: 0:03:52 lr: 0.000025 grad: 0.1880 (0.1946) loss: 0.7595 (0.7603) time: 0.1282 data: 0.0530 max mem: 9377 +Train: [71] [4700/6250] eta: 0:03:38 lr: 0.000025 grad: 0.1858 (0.1945) loss: 0.7523 (0.7601) time: 0.1522 data: 0.0695 max mem: 9377 +Train: [71] [4800/6250] eta: 0:03:24 lr: 0.000025 grad: 0.1835 (0.1945) loss: 0.7540 (0.7600) time: 0.1418 data: 0.0708 max mem: 9377 +Train: [71] [4900/6250] eta: 0:03:10 lr: 0.000025 grad: 0.1882 (0.1945) loss: 0.7552 (0.7599) time: 0.1592 data: 0.0841 max mem: 9377 +Train: [71] [5000/6250] eta: 0:02:56 lr: 0.000025 grad: 0.1898 (0.1944) loss: 0.7446 (0.7597) time: 0.1668 data: 0.0859 max mem: 9377 +Train: [71] [5100/6250] eta: 0:02:42 lr: 0.000025 grad: 0.1932 (0.1943) loss: 0.7594 (0.7596) time: 0.1487 data: 0.0693 max mem: 9377 +Train: [71] [5200/6250] eta: 0:02:28 lr: 0.000025 grad: 0.1887 (0.1943) loss: 0.7547 (0.7595) time: 0.1107 data: 0.0303 max mem: 9377 +Train: [71] [5300/6250] eta: 0:02:14 lr: 0.000025 grad: 0.1854 (0.1942) loss: 0.7472 (0.7593) time: 0.1614 data: 0.0768 max mem: 9377 +Train: [71] [5400/6250] eta: 0:02:00 lr: 0.000025 grad: 0.1861 (0.1941) loss: 0.7554 (0.7593) time: 0.1222 data: 0.0398 max mem: 9377 +Train: [71] [5500/6250] eta: 0:01:45 lr: 0.000025 grad: 0.1878 (0.1941) loss: 0.7465 (0.7591) time: 0.1610 data: 0.0791 max mem: 9377 +Train: [71] [5600/6250] eta: 0:01:31 lr: 0.000025 grad: 0.1890 (0.1940) loss: 0.7513 (0.7590) time: 0.1421 data: 0.0584 max mem: 9377 +Train: [71] [5700/6250] eta: 0:01:17 lr: 0.000025 grad: 0.1909 (0.1940) loss: 0.7525 (0.7589) time: 0.1291 data: 0.0483 max mem: 9377 +Train: [71] [5800/6250] eta: 0:01:03 lr: 0.000025 grad: 0.1864 (0.1939) loss: 0.7615 (0.7589) time: 0.1430 data: 0.0613 max mem: 9377 +Train: [71] [5900/6250] eta: 0:00:49 lr: 0.000025 grad: 0.1924 (0.1938) loss: 0.7540 (0.7589) time: 0.1414 data: 0.0574 max mem: 9377 +Train: [71] [6000/6250] eta: 0:00:35 lr: 0.000025 grad: 0.1903 (0.1937) loss: 0.7641 (0.7589) time: 0.1643 data: 0.0848 max mem: 9377 +Train: [71] [6100/6250] eta: 0:00:21 lr: 0.000025 grad: 0.1881 (0.1937) loss: 0.7553 (0.7589) time: 0.1283 data: 0.0424 max mem: 9377 +Train: [71] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.1901 (0.1936) loss: 0.7652 (0.7589) time: 0.1400 data: 0.0557 max mem: 9377 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1837 (0.1936) loss: 0.7650 (0.7589) time: 0.1238 data: 0.0382 max mem: 9377 +Train: [71] Total time: 0:14:46 (0.1419 s / it) +Averaged stats: lr: 0.000025 grad: 0.1837 (0.1936) loss: 0.7650 (0.7589) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:05:11 loss: 0.8108 (0.8108) time: 5.0194 data: 4.9754 max mem: 9377 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.8096 (0.8093) time: 0.1129 data: 0.0865 max mem: 9377 +Eval (hcp-train-subset): [71] Total time: 0:00:12 (0.2090 s / it) +Averaged stats (hcp-train-subset): loss: 0.8096 (0.8093) +Eval (hcp-val): [71] [ 0/62] eta: 0:03:24 loss: 0.8522 (0.8522) time: 3.2976 data: 3.2012 max mem: 9377 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.8507 (0.8521) time: 0.0772 data: 0.0522 max mem: 9377 +Eval (hcp-val): [71] Total time: 0:00:14 (0.2269 s / it) +Averaged stats (hcp-val): loss: 0.8507 (0.8521) +Eval (nsd-val): [71] [ 0/62] eta: 0:03:29 loss: 0.8196 (0.8196) time: 3.3838 data: 3.3117 max mem: 9377 +Eval (nsd-val): [71] [61/62] eta: 0:00:00 loss: 0.8279 (0.8304) time: 0.1366 data: 0.1119 max mem: 9377 +Eval (nsd-val): [71] Total time: 0:00:14 (0.2308 s / it) +Averaged stats (nsd-val): loss: 0.8279 (0.8304) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [72] [ 0/6250] eta: 9:17:14 lr: 0.000025 grad: 0.1750 (0.1750) loss: 0.8156 (0.8156) time: 5.3495 data: 5.1064 max mem: 9377 +Train: [72] [ 100/6250] eta: 0:21:07 lr: 0.000025 grad: 0.2665 (0.3003) loss: 0.7808 (0.7729) time: 0.1742 data: 0.0818 max mem: 9377 +Train: [72] [ 200/6250] eta: 0:17:53 lr: 0.000025 grad: 0.2165 (0.2776) loss: 0.7621 (0.7688) time: 0.1609 data: 0.0634 max mem: 9377 +Train: [72] [ 300/6250] eta: 0:16:52 lr: 0.000025 grad: 0.2071 (0.2592) loss: 0.7731 (0.7691) time: 0.1519 data: 0.0510 max mem: 9377 +Train: [72] [ 400/6250] eta: 0:15:56 lr: 0.000025 grad: 0.2015 (0.2462) loss: 0.7749 (0.7692) time: 0.1488 data: 0.0569 max mem: 9377 +Train: [72] [ 500/6250] eta: 0:15:06 lr: 0.000025 grad: 0.2008 (0.2382) loss: 0.7681 (0.7683) time: 0.1341 data: 0.0504 max mem: 9377 +Train: [72] [ 600/6250] eta: 0:14:26 lr: 0.000025 grad: 0.1823 (0.2311) loss: 0.7759 (0.7680) time: 0.1274 data: 0.0351 max mem: 9377 +Train: [72] [ 700/6250] eta: 0:13:56 lr: 0.000025 grad: 0.1917 (0.2252) loss: 0.7720 (0.7676) time: 0.1277 data: 0.0420 max mem: 9377 +Train: [72] [ 800/6250] eta: 0:13:25 lr: 0.000025 grad: 0.1947 (0.2210) loss: 0.7736 (0.7676) time: 0.1241 data: 0.0348 max mem: 9377 +Train: [72] [ 900/6250] eta: 0:12:59 lr: 0.000025 grad: 0.1922 (0.2174) loss: 0.7648 (0.7674) time: 0.1427 data: 0.0527 max mem: 9377 +Train: [72] [1000/6250] eta: 0:12:40 lr: 0.000025 grad: 0.1792 (0.2141) loss: 0.7629 (0.7674) time: 0.1339 data: 0.0521 max mem: 9377 +Train: [72] [1100/6250] eta: 0:12:20 lr: 0.000025 grad: 0.1778 (0.2111) loss: 0.7726 (0.7675) time: 0.1390 data: 0.0558 max mem: 9377 +Train: [72] [1200/6250] eta: 0:12:06 lr: 0.000025 grad: 0.1754 (0.2087) loss: 0.7773 (0.7674) time: 0.1537 data: 0.0669 max mem: 9377 +Train: [72] [1300/6250] eta: 0:11:49 lr: 0.000025 grad: 0.1843 (0.2068) loss: 0.7547 (0.7672) time: 0.1463 data: 0.0609 max mem: 9377 +Train: [72] [1400/6250] eta: 0:11:32 lr: 0.000025 grad: 0.1786 (0.2052) loss: 0.7711 (0.7669) time: 0.1481 data: 0.0635 max mem: 9377 +Train: [72] [1500/6250] eta: 0:11:15 lr: 0.000025 grad: 0.1850 (0.2039) loss: 0.7587 (0.7667) time: 0.1489 data: 0.0680 max mem: 9377 +Train: [72] [1600/6250] eta: 0:11:01 lr: 0.000025 grad: 0.1786 (0.2025) loss: 0.7635 (0.7666) time: 0.1457 data: 0.0708 max mem: 9377 +Train: [72] [1700/6250] eta: 0:10:45 lr: 0.000024 grad: 0.1754 (0.2012) loss: 0.7622 (0.7664) time: 0.1358 data: 0.0517 max mem: 9377 +Train: [72] [1800/6250] eta: 0:10:30 lr: 0.000024 grad: 0.1773 (0.2001) loss: 0.7734 (0.7666) time: 0.1563 data: 0.0803 max mem: 9377 +Train: [72] [1900/6250] eta: 0:10:15 lr: 0.000024 grad: 0.1805 (0.1991) loss: 0.7583 (0.7666) time: 0.1322 data: 0.0414 max mem: 9377 +Train: [72] [2000/6250] eta: 0:10:00 lr: 0.000024 grad: 0.1855 (0.1981) loss: 0.7687 (0.7666) time: 0.1122 data: 0.0332 max mem: 9377 +Train: [72] [2100/6250] eta: 0:09:45 lr: 0.000024 grad: 0.1777 (0.1975) loss: 0.7626 (0.7665) time: 0.1241 data: 0.0405 max mem: 9377 +Train: [72] [2200/6250] eta: 0:09:32 lr: 0.000024 grad: 0.1812 (0.1968) loss: 0.7623 (0.7665) time: 0.1511 data: 0.0685 max mem: 9377 +Train: [72] [2300/6250] eta: 0:09:17 lr: 0.000024 grad: 0.1754 (0.1962) loss: 0.7632 (0.7664) time: 0.1401 data: 0.0511 max mem: 9377 +Train: [72] [2400/6250] eta: 0:09:03 lr: 0.000024 grad: 0.1833 (0.1958) loss: 0.7620 (0.7663) time: 0.1417 data: 0.0519 max mem: 9377 +Train: [72] [2500/6250] eta: 0:08:49 lr: 0.000024 grad: 0.1843 (0.1954) loss: 0.7576 (0.7662) time: 0.1761 data: 0.0980 max mem: 9377 +Train: [72] [2600/6250] eta: 0:08:34 lr: 0.000024 grad: 0.1814 (0.1949) loss: 0.7615 (0.7662) time: 0.1453 data: 0.0691 max mem: 9377 +Train: [72] [2700/6250] eta: 0:08:21 lr: 0.000024 grad: 0.1827 (0.1945) loss: 0.7585 (0.7661) time: 0.1573 data: 0.0825 max mem: 9377 +Train: [72] [2800/6250] eta: 0:08:07 lr: 0.000024 grad: 0.1809 (0.1941) loss: 0.7632 (0.7660) time: 0.1382 data: 0.0564 max mem: 9377 +Train: [72] [2900/6250] eta: 0:07:53 lr: 0.000024 grad: 0.1933 (0.1940) loss: 0.7646 (0.7658) time: 0.1393 data: 0.0585 max mem: 9377 +Train: [72] [3000/6250] eta: 0:07:39 lr: 0.000024 grad: 0.1856 (0.1937) loss: 0.7690 (0.7657) time: 0.1605 data: 0.0781 max mem: 9377 +Train: [72] [3100/6250] eta: 0:07:26 lr: 0.000024 grad: 0.1814 (0.1934) loss: 0.7609 (0.7656) time: 0.1414 data: 0.0510 max mem: 9377 +Train: [72] [3200/6250] eta: 0:07:13 lr: 0.000024 grad: 0.1803 (0.1931) loss: 0.7622 (0.7655) time: 0.1491 data: 0.0601 max mem: 9377 +Train: [72] [3300/6250] eta: 0:06:58 lr: 0.000024 grad: 0.1719 (0.1929) loss: 0.7658 (0.7654) time: 0.1401 data: 0.0563 max mem: 9377 +Train: [72] [3400/6250] eta: 0:06:44 lr: 0.000024 grad: 0.1843 (0.1928) loss: 0.7598 (0.7652) time: 0.1369 data: 0.0564 max mem: 9377 +Train: [72] [3500/6250] eta: 0:06:28 lr: 0.000024 grad: 0.1857 (0.1926) loss: 0.7565 (0.7651) time: 0.1389 data: 0.0561 max mem: 9377 +Train: [72] [3600/6250] eta: 0:06:13 lr: 0.000024 grad: 0.1794 (0.1923) loss: 0.7681 (0.7649) time: 0.1307 data: 0.0540 max mem: 9377 +Train: [72] [3700/6250] eta: 0:05:58 lr: 0.000024 grad: 0.1813 (0.1921) loss: 0.7606 (0.7648) time: 0.1175 data: 0.0296 max mem: 9377 +Train: [72] [3800/6250] eta: 0:05:44 lr: 0.000024 grad: 0.1795 (0.1918) loss: 0.7680 (0.7648) time: 0.1130 data: 0.0247 max mem: 9377 +Train: [72] [3900/6250] eta: 0:05:30 lr: 0.000024 grad: 0.1859 (0.1916) loss: 0.7571 (0.7646) time: 0.1355 data: 0.0479 max mem: 9377 +Train: [72] [4000/6250] eta: 0:05:17 lr: 0.000024 grad: 0.1817 (0.1915) loss: 0.7721 (0.7646) time: 0.1828 data: 0.1038 max mem: 9377 +Train: [72] [4100/6250] eta: 0:05:03 lr: 0.000024 grad: 0.1819 (0.1914) loss: 0.7586 (0.7644) time: 0.1888 data: 0.1097 max mem: 9377 +Train: [72] [4200/6250] eta: 0:04:49 lr: 0.000024 grad: 0.1857 (0.1913) loss: 0.7703 (0.7643) time: 0.1466 data: 0.0665 max mem: 9377 +Train: [72] [4300/6250] eta: 0:04:36 lr: 0.000024 grad: 0.1876 (0.1911) loss: 0.7498 (0.7642) time: 0.1561 data: 0.0776 max mem: 9377 +Train: [72] [4400/6250] eta: 0:04:22 lr: 0.000024 grad: 0.1885 (0.1910) loss: 0.7697 (0.7642) time: 0.1624 data: 0.0890 max mem: 9377 +Train: [72] [4500/6250] eta: 0:04:08 lr: 0.000024 grad: 0.1746 (0.1908) loss: 0.7638 (0.7642) time: 0.1605 data: 0.0804 max mem: 9377 +Train: [72] [4600/6250] eta: 0:03:54 lr: 0.000024 grad: 0.1807 (0.1907) loss: 0.7667 (0.7642) time: 0.1230 data: 0.0397 max mem: 9377 +Train: [72] [4700/6250] eta: 0:03:40 lr: 0.000024 grad: 0.1860 (0.1906) loss: 0.7538 (0.7642) time: 0.1303 data: 0.0499 max mem: 9377 +Train: [72] [4800/6250] eta: 0:03:26 lr: 0.000024 grad: 0.1806 (0.1905) loss: 0.7624 (0.7641) time: 0.1302 data: 0.0480 max mem: 9377 +Train: [72] [4900/6250] eta: 0:03:13 lr: 0.000024 grad: 0.1913 (0.1904) loss: 0.7394 (0.7640) time: 0.2401 data: 0.1563 max mem: 9377 +Train: [72] [5000/6250] eta: 0:02:58 lr: 0.000024 grad: 0.1865 (0.1904) loss: 0.7480 (0.7639) time: 0.1369 data: 0.0542 max mem: 9377 +Train: [72] [5100/6250] eta: 0:02:43 lr: 0.000024 grad: 0.1933 (0.1904) loss: 0.7568 (0.7637) time: 0.1284 data: 0.0426 max mem: 9377 +Train: [72] [5200/6250] eta: 0:02:29 lr: 0.000024 grad: 0.1908 (0.1905) loss: 0.7536 (0.7635) time: 0.1466 data: 0.0681 max mem: 9377 +Train: [72] [5300/6250] eta: 0:02:15 lr: 0.000024 grad: 0.1927 (0.1906) loss: 0.7472 (0.7634) time: 0.1749 data: 0.0935 max mem: 9377 +Train: [72] [5400/6250] eta: 0:02:01 lr: 0.000024 grad: 0.1908 (0.1906) loss: 0.7481 (0.7632) time: 0.1443 data: 0.0604 max mem: 9377 +Train: [72] [5500/6250] eta: 0:01:47 lr: 0.000023 grad: 0.1850 (0.1906) loss: 0.7558 (0.7630) time: 0.1443 data: 0.0653 max mem: 9377 +Train: [72] [5600/6250] eta: 0:01:32 lr: 0.000023 grad: 0.1912 (0.1905) loss: 0.7547 (0.7630) time: 0.1247 data: 0.0450 max mem: 9377 +Train: [72] [5700/6250] eta: 0:01:18 lr: 0.000023 grad: 0.1885 (0.1906) loss: 0.7506 (0.7628) time: 0.1405 data: 0.0638 max mem: 9377 +Train: [72] [5800/6250] eta: 0:01:04 lr: 0.000023 grad: 0.1889 (0.1905) loss: 0.7554 (0.7627) time: 0.1846 data: 0.0990 max mem: 9377 +Train: [72] [5900/6250] eta: 0:00:50 lr: 0.000023 grad: 0.1948 (0.1905) loss: 0.7566 (0.7626) time: 0.1689 data: 0.0869 max mem: 9377 +Train: [72] [6000/6250] eta: 0:00:35 lr: 0.000023 grad: 0.1971 (0.1906) loss: 0.7482 (0.7625) time: 0.1513 data: 0.0718 max mem: 9377 +Train: [72] [6100/6250] eta: 0:00:21 lr: 0.000023 grad: 0.1876 (0.1907) loss: 0.7577 (0.7623) time: 0.1222 data: 0.0373 max mem: 9377 +Train: [72] [6200/6250] eta: 0:00:07 lr: 0.000023 grad: 0.1882 (0.1907) loss: 0.7597 (0.7622) time: 0.1185 data: 0.0322 max mem: 9377 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1953 (0.1908) loss: 0.7532 (0.7621) time: 0.1375 data: 0.0628 max mem: 9377 +Train: [72] Total time: 0:14:57 (0.1435 s / it) +Averaged stats: lr: 0.000023 grad: 0.1953 (0.1908) loss: 0.7532 (0.7621) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:05:29 loss: 0.8133 (0.8133) time: 5.3213 data: 5.2913 max mem: 9377 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.8128 (0.8095) time: 0.1484 data: 0.1184 max mem: 9377 +Eval (hcp-train-subset): [72] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-train-subset): loss: 0.8128 (0.8095) +Eval (hcp-val): [72] [ 0/62] eta: 0:05:03 loss: 0.8488 (0.8488) time: 4.8891 data: 4.8070 max mem: 9377 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.8504 (0.8517) time: 0.0925 data: 0.0656 max mem: 9377 +Eval (hcp-val): [72] Total time: 0:00:15 (0.2485 s / it) +Averaged stats (hcp-val): loss: 0.8504 (0.8517) +Eval (nsd-val): [72] [ 0/62] eta: 0:06:26 loss: 0.8230 (0.8230) time: 6.2267 data: 6.1796 max mem: 9377 +Eval (nsd-val): [72] [61/62] eta: 0:00:00 loss: 0.8291 (0.8306) time: 0.1372 data: 0.1121 max mem: 9377 +Eval (nsd-val): [72] Total time: 0:00:14 (0.2390 s / it) +Averaged stats (nsd-val): loss: 0.8291 (0.8306) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 9:49:40 lr: 0.000023 grad: 0.2167 (0.2167) loss: 0.8574 (0.8574) time: 5.6609 data: 5.5223 max mem: 9377 +Train: [73] [ 100/6250] eta: 0:24:23 lr: 0.000023 grad: 0.2520 (0.2532) loss: 0.7759 (0.7922) time: 0.1491 data: 0.0410 max mem: 9377 +Train: [73] [ 200/6250] eta: 0:20:33 lr: 0.000023 grad: 0.2127 (0.2424) loss: 0.7613 (0.7835) time: 0.1827 data: 0.0808 max mem: 9377 +Train: [73] [ 300/6250] eta: 0:18:45 lr: 0.000023 grad: 0.2070 (0.2344) loss: 0.7579 (0.7770) time: 0.1544 data: 0.0537 max mem: 9377 +Train: [73] [ 400/6250] eta: 0:18:00 lr: 0.000023 grad: 0.2041 (0.2296) loss: 0.7594 (0.7724) time: 0.1746 data: 0.0850 max mem: 9377 +Train: [73] [ 500/6250] eta: 0:17:24 lr: 0.000023 grad: 0.1990 (0.2248) loss: 0.7596 (0.7690) time: 0.1848 data: 0.0924 max mem: 9377 +Train: [73] [ 600/6250] eta: 0:16:36 lr: 0.000023 grad: 0.2010 (0.2212) loss: 0.7536 (0.7662) time: 0.1583 data: 0.0583 max mem: 9377 +Train: [73] [ 700/6250] eta: 0:15:59 lr: 0.000023 grad: 0.1918 (0.2184) loss: 0.7602 (0.7646) time: 0.1511 data: 0.0609 max mem: 9377 +Train: [73] [ 800/6250] eta: 0:15:30 lr: 0.000023 grad: 0.1896 (0.2154) loss: 0.7628 (0.7641) time: 0.1421 data: 0.0527 max mem: 9377 +Train: [73] [ 900/6250] eta: 0:14:53 lr: 0.000023 grad: 0.1789 (0.2129) loss: 0.7716 (0.7640) time: 0.1293 data: 0.0354 max mem: 9377 +Train: [73] [1000/6250] eta: 0:14:22 lr: 0.000023 grad: 0.1916 (0.2106) loss: 0.7638 (0.7638) time: 0.1306 data: 0.0477 max mem: 9377 +Train: [73] [1100/6250] eta: 0:13:56 lr: 0.000023 grad: 0.1907 (0.2090) loss: 0.7664 (0.7636) time: 0.1280 data: 0.0376 max mem: 9377 +Train: [73] [1200/6250] eta: 0:13:29 lr: 0.000023 grad: 0.1882 (0.2075) loss: 0.7548 (0.7634) time: 0.1419 data: 0.0642 max mem: 9377 +Train: [73] [1300/6250] eta: 0:13:08 lr: 0.000023 grad: 0.1854 (0.2064) loss: 0.7588 (0.7630) time: 0.1623 data: 0.0811 max mem: 9377 +Train: [73] [1400/6250] eta: 0:12:44 lr: 0.000023 grad: 0.1909 (0.2054) loss: 0.7568 (0.7628) time: 0.1516 data: 0.0728 max mem: 9377 +Train: [73] [1500/6250] eta: 0:12:26 lr: 0.000023 grad: 0.2001 (0.2048) loss: 0.7573 (0.7625) time: 0.1860 data: 0.0984 max mem: 9377 +Train: [73] [1600/6250] eta: 0:12:02 lr: 0.000023 grad: 0.1927 (0.2042) loss: 0.7636 (0.7622) time: 0.1341 data: 0.0513 max mem: 9377 +Train: [73] [1700/6250] eta: 0:11:42 lr: 0.000023 grad: 0.1922 (0.2037) loss: 0.7549 (0.7620) time: 0.1374 data: 0.0450 max mem: 9377 +Train: [73] [1800/6250] eta: 0:11:23 lr: 0.000023 grad: 0.1902 (0.2031) loss: 0.7564 (0.7619) time: 0.1327 data: 0.0505 max mem: 9377 +Train: [73] [1900/6250] eta: 0:11:03 lr: 0.000023 grad: 0.1876 (0.2025) loss: 0.7568 (0.7616) time: 0.1386 data: 0.0552 max mem: 9377 +Train: [73] [2000/6250] eta: 0:10:43 lr: 0.000023 grad: 0.1932 (0.2021) loss: 0.7537 (0.7613) time: 0.1174 data: 0.0300 max mem: 9377 +Train: [73] [2100/6250] eta: 0:10:26 lr: 0.000023 grad: 0.1892 (0.2017) loss: 0.7526 (0.7612) time: 0.1340 data: 0.0567 max mem: 9377 +Train: [73] [2200/6250] eta: 0:10:09 lr: 0.000023 grad: 0.1945 (0.2013) loss: 0.7660 (0.7611) time: 0.1310 data: 0.0498 max mem: 9377 +Train: [73] [2300/6250] eta: 0:09:51 lr: 0.000023 grad: 0.1906 (0.2008) loss: 0.7544 (0.7609) time: 0.1169 data: 0.0327 max mem: 9377 +Train: [73] [2400/6250] eta: 0:09:35 lr: 0.000023 grad: 0.1989 (0.2004) loss: 0.7489 (0.7607) time: 0.1556 data: 0.0733 max mem: 9377 +Train: [73] [2500/6250] eta: 0:09:21 lr: 0.000023 grad: 0.1786 (0.2000) loss: 0.7524 (0.7605) time: 0.2245 data: 0.1427 max mem: 9377 +Train: [73] [2600/6250] eta: 0:09:03 lr: 0.000023 grad: 0.1905 (0.1996) loss: 0.7580 (0.7604) time: 0.1382 data: 0.0535 max mem: 9377 +Train: [73] [2700/6250] eta: 0:08:47 lr: 0.000023 grad: 0.1830 (0.1994) loss: 0.7615 (0.7601) time: 0.1376 data: 0.0582 max mem: 9377 +Train: [73] [2800/6250] eta: 0:08:33 lr: 0.000023 grad: 0.1809 (0.1991) loss: 0.7627 (0.7599) time: 0.1392 data: 0.0623 max mem: 9377 +Train: [73] [2900/6250] eta: 0:08:18 lr: 0.000023 grad: 0.1893 (0.1990) loss: 0.7510 (0.7596) time: 0.1484 data: 0.0714 max mem: 9377 +Train: [73] [3000/6250] eta: 0:08:02 lr: 0.000023 grad: 0.1876 (0.1988) loss: 0.7598 (0.7595) time: 0.1271 data: 0.0506 max mem: 9377 +Train: [73] [3100/6250] eta: 0:07:48 lr: 0.000023 grad: 0.1877 (0.1986) loss: 0.7574 (0.7593) time: 0.1589 data: 0.0803 max mem: 9377 +Train: [73] [3200/6250] eta: 0:07:33 lr: 0.000022 grad: 0.1913 (0.1984) loss: 0.7519 (0.7591) time: 0.1387 data: 0.0613 max mem: 9377 +Train: [73] [3300/6250] eta: 0:07:18 lr: 0.000022 grad: 0.1863 (0.1981) loss: 0.7475 (0.7590) time: 0.1323 data: 0.0518 max mem: 9377 +Train: [73] [3400/6250] eta: 0:07:02 lr: 0.000022 grad: 0.1928 (0.1979) loss: 0.7605 (0.7590) time: 0.1241 data: 0.0347 max mem: 9377 +Train: [73] [3500/6250] eta: 0:06:46 lr: 0.000022 grad: 0.1882 (0.1977) loss: 0.7538 (0.7589) time: 0.1287 data: 0.0542 max mem: 9377 +Train: [73] [3600/6250] eta: 0:06:30 lr: 0.000022 grad: 0.1895 (0.1974) loss: 0.7672 (0.7590) time: 0.1277 data: 0.0462 max mem: 9377 +Train: [73] [3700/6250] eta: 0:06:14 lr: 0.000022 grad: 0.1888 (0.1971) loss: 0.7599 (0.7591) time: 0.1316 data: 0.0472 max mem: 9377 +Train: [73] [3800/6250] eta: 0:05:58 lr: 0.000022 grad: 0.1876 (0.1968) loss: 0.7661 (0.7592) time: 0.1259 data: 0.0444 max mem: 9377 +Train: [73] [3900/6250] eta: 0:05:42 lr: 0.000022 grad: 0.1899 (0.1965) loss: 0.7591 (0.7593) time: 0.1430 data: 0.0578 max mem: 9377 +Train: [73] [4000/6250] eta: 0:05:27 lr: 0.000022 grad: 0.1916 (0.1964) loss: 0.7578 (0.7593) time: 0.1420 data: 0.0625 max mem: 9377 +Train: [73] [4100/6250] eta: 0:05:13 lr: 0.000022 grad: 0.1953 (0.1963) loss: 0.7541 (0.7593) time: 0.1335 data: 0.0477 max mem: 9377 +Train: [73] [4200/6250] eta: 0:04:58 lr: 0.000022 grad: 0.1894 (0.1961) loss: 0.7490 (0.7593) time: 0.1412 data: 0.0561 max mem: 9377 +Train: [73] [4300/6250] eta: 0:04:44 lr: 0.000022 grad: 0.1892 (0.1960) loss: 0.7605 (0.7594) time: 0.1368 data: 0.0531 max mem: 9377 +Train: [73] [4400/6250] eta: 0:04:30 lr: 0.000022 grad: 0.1845 (0.1958) loss: 0.7674 (0.7595) time: 0.1788 data: 0.1041 max mem: 9377 +Train: [73] [4500/6250] eta: 0:04:17 lr: 0.000022 grad: 0.1829 (0.1957) loss: 0.7656 (0.7597) time: 0.0932 data: 0.0004 max mem: 9377 +Train: [73] [4600/6250] eta: 0:04:02 lr: 0.000022 grad: 0.1854 (0.1955) loss: 0.7719 (0.7598) time: 0.1395 data: 0.0609 max mem: 9377 +Train: [73] [4700/6250] eta: 0:03:48 lr: 0.000022 grad: 0.1979 (0.1955) loss: 0.7698 (0.7600) time: 0.1499 data: 0.0750 max mem: 9377 +Train: [73] [4800/6250] eta: 0:03:33 lr: 0.000022 grad: 0.1904 (0.1954) loss: 0.7634 (0.7601) time: 0.2007 data: 0.1197 max mem: 9377 +Train: [73] [4900/6250] eta: 0:03:18 lr: 0.000022 grad: 0.1858 (0.1952) loss: 0.7698 (0.7602) time: 0.1355 data: 0.0608 max mem: 9377 +Train: [73] [5000/6250] eta: 0:03:04 lr: 0.000022 grad: 0.1870 (0.1951) loss: 0.7693 (0.7603) time: 0.1233 data: 0.0442 max mem: 9377 +Train: [73] [5100/6250] eta: 0:02:49 lr: 0.000022 grad: 0.1894 (0.1950) loss: 0.7636 (0.7604) time: 0.1467 data: 0.0629 max mem: 9377 +Train: [73] [5200/6250] eta: 0:02:34 lr: 0.000022 grad: 0.1885 (0.1949) loss: 0.7717 (0.7606) time: 0.1028 data: 0.0232 max mem: 9377 +Train: [73] [5300/6250] eta: 0:02:19 lr: 0.000022 grad: 0.1814 (0.1947) loss: 0.7684 (0.7607) time: 0.1372 data: 0.0583 max mem: 9377 +Train: [73] [5400/6250] eta: 0:02:04 lr: 0.000022 grad: 0.1891 (0.1946) loss: 0.7699 (0.7608) time: 0.1061 data: 0.0216 max mem: 9377 +Train: [73] [5500/6250] eta: 0:01:49 lr: 0.000022 grad: 0.1918 (0.1945) loss: 0.7665 (0.7609) time: 0.1610 data: 0.0811 max mem: 9377 +Train: [73] [5600/6250] eta: 0:01:35 lr: 0.000022 grad: 0.1826 (0.1944) loss: 0.7653 (0.7610) time: 0.1307 data: 0.0498 max mem: 9377 +Train: [73] [5700/6250] eta: 0:01:20 lr: 0.000022 grad: 0.1930 (0.1943) loss: 0.7674 (0.7611) time: 0.1387 data: 0.0551 max mem: 9377 +Train: [73] [5800/6250] eta: 0:01:05 lr: 0.000022 grad: 0.1834 (0.1942) loss: 0.7671 (0.7612) time: 0.1601 data: 0.0818 max mem: 9377 +Train: [73] [5900/6250] eta: 0:00:51 lr: 0.000022 grad: 0.1973 (0.1941) loss: 0.7658 (0.7613) time: 0.1212 data: 0.0369 max mem: 9377 +Train: [73] [6000/6250] eta: 0:00:36 lr: 0.000022 grad: 0.1929 (0.1941) loss: 0.7660 (0.7613) time: 0.1378 data: 0.0578 max mem: 9377 +Train: [73] [6100/6250] eta: 0:00:21 lr: 0.000022 grad: 0.1894 (0.1941) loss: 0.7665 (0.7613) time: 0.1315 data: 0.0460 max mem: 9377 +Train: [73] [6200/6250] eta: 0:00:07 lr: 0.000022 grad: 0.1971 (0.1941) loss: 0.7520 (0.7613) time: 0.1210 data: 0.0432 max mem: 9377 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.1860 (0.1941) loss: 0.7659 (0.7613) time: 0.1209 data: 0.0406 max mem: 9377 +Train: [73] Total time: 0:15:15 (0.1465 s / it) +Averaged stats: lr: 0.000022 grad: 0.1860 (0.1941) loss: 0.7659 (0.7613) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:04:14 loss: 0.8137 (0.8137) time: 4.0974 data: 4.0022 max mem: 9377 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.8057 (0.8084) time: 0.1348 data: 0.1097 max mem: 9377 +Eval (hcp-train-subset): [73] Total time: 0:00:13 (0.2141 s / it) +Averaged stats (hcp-train-subset): loss: 0.8057 (0.8084) +Eval (hcp-val): [73] [ 0/62] eta: 0:04:29 loss: 0.8561 (0.8561) time: 4.3418 data: 4.2820 max mem: 9377 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.8505 (0.8506) time: 0.1286 data: 0.1037 max mem: 9377 +Eval (hcp-val): [73] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-val): loss: 0.8505 (0.8506) +Eval (nsd-val): [73] [ 0/62] eta: 0:05:54 loss: 0.8222 (0.8222) time: 5.7209 data: 5.6895 max mem: 9377 +Eval (nsd-val): [73] [61/62] eta: 0:00:00 loss: 0.8312 (0.8321) time: 0.1526 data: 0.1274 max mem: 9377 +Eval (nsd-val): [73] Total time: 0:00:14 (0.2348 s / it) +Averaged stats (nsd-val): loss: 0.8312 (0.8321) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [74] [ 0/6250] eta: 9:28:50 lr: 0.000022 grad: 0.7431 (0.7431) loss: 0.7039 (0.7039) time: 5.4608 data: 5.2751 max mem: 9377 +Train: [74] [ 100/6250] eta: 0:23:55 lr: 0.000022 grad: 0.2541 (0.3213) loss: 0.7464 (0.7563) time: 0.1614 data: 0.0311 max mem: 9377 +Train: [74] [ 200/6250] eta: 0:20:41 lr: 0.000022 grad: 0.2515 (0.2850) loss: 0.7625 (0.7561) time: 0.1596 data: 0.0648 max mem: 9377 +Train: [74] [ 300/6250] eta: 0:19:11 lr: 0.000022 grad: 0.2123 (0.2705) loss: 0.7584 (0.7544) time: 0.1736 data: 0.0797 max mem: 9377 +Train: [74] [ 400/6250] eta: 0:18:00 lr: 0.000022 grad: 0.1959 (0.2583) loss: 0.7440 (0.7546) time: 0.1522 data: 0.0542 max mem: 9377 +Train: [74] [ 500/6250] eta: 0:17:20 lr: 0.000022 grad: 0.2092 (0.2468) loss: 0.7532 (0.7562) time: 0.1535 data: 0.0618 max mem: 9377 +Train: [74] [ 600/6250] eta: 0:16:38 lr: 0.000022 grad: 0.1985 (0.2400) loss: 0.7504 (0.7562) time: 0.1564 data: 0.0545 max mem: 9377 +Train: [74] [ 700/6250] eta: 0:16:02 lr: 0.000022 grad: 0.1925 (0.2351) loss: 0.7563 (0.7558) time: 0.1475 data: 0.0540 max mem: 9377 +Train: [74] [ 800/6250] eta: 0:15:30 lr: 0.000022 grad: 0.2016 (0.2301) loss: 0.7546 (0.7553) time: 0.1608 data: 0.0701 max mem: 9377 +Train: [74] [ 900/6250] eta: 0:14:58 lr: 0.000021 grad: 0.1950 (0.2265) loss: 0.7537 (0.7551) time: 0.1616 data: 0.0742 max mem: 9377 +Train: [74] [1000/6250] eta: 0:14:27 lr: 0.000021 grad: 0.2007 (0.2234) loss: 0.7431 (0.7550) time: 0.1525 data: 0.0658 max mem: 9377 +Train: [74] [1100/6250] eta: 0:14:00 lr: 0.000021 grad: 0.1929 (0.2204) loss: 0.7546 (0.7550) time: 0.1483 data: 0.0658 max mem: 9377 +Train: [74] [1200/6250] eta: 0:13:32 lr: 0.000021 grad: 0.1884 (0.2179) loss: 0.7451 (0.7549) time: 0.1646 data: 0.0797 max mem: 9377 +Train: [74] [1300/6250] eta: 0:13:08 lr: 0.000021 grad: 0.1861 (0.2157) loss: 0.7590 (0.7547) time: 0.1553 data: 0.0730 max mem: 9377 +Train: [74] [1400/6250] eta: 0:12:46 lr: 0.000021 grad: 0.1895 (0.2140) loss: 0.7503 (0.7545) time: 0.1417 data: 0.0566 max mem: 9377 +Train: [74] [1500/6250] eta: 0:12:24 lr: 0.000021 grad: 0.1851 (0.2124) loss: 0.7658 (0.7545) time: 0.1217 data: 0.0335 max mem: 9377 +Train: [74] [1600/6250] eta: 0:12:03 lr: 0.000021 grad: 0.1881 (0.2108) loss: 0.7630 (0.7548) time: 0.1302 data: 0.0465 max mem: 9377 +Train: [74] [1700/6250] eta: 0:11:46 lr: 0.000021 grad: 0.1898 (0.2095) loss: 0.7436 (0.7547) time: 0.1499 data: 0.0729 max mem: 9377 +Train: [74] [1800/6250] eta: 0:11:27 lr: 0.000021 grad: 0.1881 (0.2085) loss: 0.7540 (0.7548) time: 0.1740 data: 0.0952 max mem: 9377 +Train: [74] [1900/6250] eta: 0:11:08 lr: 0.000021 grad: 0.2009 (0.2076) loss: 0.7525 (0.7548) time: 0.0990 data: 0.0161 max mem: 9377 +Train: [74] [2000/6250] eta: 0:10:50 lr: 0.000021 grad: 0.1911 (0.2069) loss: 0.7505 (0.7548) time: 0.1459 data: 0.0640 max mem: 9377 +Train: [74] [2100/6250] eta: 0:10:32 lr: 0.000021 grad: 0.1977 (0.2062) loss: 0.7486 (0.7548) time: 0.1455 data: 0.0640 max mem: 9377 +Train: [74] [2200/6250] eta: 0:10:14 lr: 0.000021 grad: 0.1911 (0.2055) loss: 0.7580 (0.7549) time: 0.1379 data: 0.0569 max mem: 9377 +Train: [74] [2300/6250] eta: 0:09:56 lr: 0.000021 grad: 0.1886 (0.2050) loss: 0.7555 (0.7549) time: 0.1472 data: 0.0633 max mem: 9377 +Train: [74] [2400/6250] eta: 0:09:38 lr: 0.000021 grad: 0.1838 (0.2044) loss: 0.7701 (0.7551) time: 0.1408 data: 0.0594 max mem: 9377 +Train: [74] [2500/6250] eta: 0:09:21 lr: 0.000021 grad: 0.1854 (0.2037) loss: 0.7626 (0.7553) time: 0.1282 data: 0.0448 max mem: 9377 +Train: [74] [2600/6250] eta: 0:09:08 lr: 0.000021 grad: 0.1920 (0.2032) loss: 0.7610 (0.7555) time: 0.2610 data: 0.1891 max mem: 9377 +Train: [74] [2700/6250] eta: 0:08:48 lr: 0.000021 grad: 0.1905 (0.2027) loss: 0.7531 (0.7556) time: 0.1368 data: 0.0545 max mem: 9377 +Train: [74] [2800/6250] eta: 0:08:32 lr: 0.000021 grad: 0.1920 (0.2021) loss: 0.7519 (0.7558) time: 0.1273 data: 0.0433 max mem: 9377 +Train: [74] [2900/6250] eta: 0:08:18 lr: 0.000021 grad: 0.1828 (0.2016) loss: 0.7651 (0.7560) time: 0.1452 data: 0.0600 max mem: 9377 +Train: [74] [3000/6250] eta: 0:08:02 lr: 0.000021 grad: 0.1882 (0.2011) loss: 0.7536 (0.7563) time: 0.1509 data: 0.0720 max mem: 9377 +Train: [74] [3100/6250] eta: 0:07:46 lr: 0.000021 grad: 0.1927 (0.2008) loss: 0.7572 (0.7565) time: 0.1331 data: 0.0451 max mem: 9377 +Train: [74] [3200/6250] eta: 0:07:31 lr: 0.000021 grad: 0.1794 (0.2005) loss: 0.7707 (0.7566) time: 0.1532 data: 0.0735 max mem: 9377 +Train: [74] [3300/6250] eta: 0:07:17 lr: 0.000021 grad: 0.1959 (0.2001) loss: 0.7585 (0.7569) time: 0.1806 data: 0.0948 max mem: 9377 +Train: [74] [3400/6250] eta: 0:07:01 lr: 0.000021 grad: 0.1830 (0.1997) loss: 0.7687 (0.7572) time: 0.1375 data: 0.0514 max mem: 9377 +Train: [74] [3500/6250] eta: 0:06:46 lr: 0.000021 grad: 0.1877 (0.1994) loss: 0.7647 (0.7574) time: 0.1589 data: 0.0823 max mem: 9377 +Train: [74] [3600/6250] eta: 0:06:30 lr: 0.000021 grad: 0.1877 (0.1991) loss: 0.7635 (0.7576) time: 0.1205 data: 0.0345 max mem: 9377 +Train: [74] [3700/6250] eta: 0:06:14 lr: 0.000021 grad: 0.1790 (0.1988) loss: 0.7713 (0.7577) time: 0.1261 data: 0.0388 max mem: 9377 +Train: [74] [3800/6250] eta: 0:05:58 lr: 0.000021 grad: 0.1912 (0.1986) loss: 0.7598 (0.7577) time: 0.1227 data: 0.0433 max mem: 9377 +Train: [74] [3900/6250] eta: 0:05:42 lr: 0.000021 grad: 0.1906 (0.1983) loss: 0.7514 (0.7577) time: 0.1196 data: 0.0347 max mem: 9377 +Train: [74] [4000/6250] eta: 0:05:27 lr: 0.000021 grad: 0.1818 (0.1980) loss: 0.7677 (0.7578) time: 0.1415 data: 0.0568 max mem: 9377 +Train: [74] [4100/6250] eta: 0:05:13 lr: 0.000021 grad: 0.1967 (0.1978) loss: 0.7602 (0.7579) time: 0.1321 data: 0.0521 max mem: 9377 +Train: [74] [4200/6250] eta: 0:04:58 lr: 0.000021 grad: 0.1836 (0.1976) loss: 0.7576 (0.7579) time: 0.1550 data: 0.0759 max mem: 9377 +Train: [74] [4300/6250] eta: 0:04:44 lr: 0.000021 grad: 0.1902 (0.1975) loss: 0.7581 (0.7579) time: 0.1575 data: 0.0775 max mem: 9377 +Train: [74] [4400/6250] eta: 0:04:30 lr: 0.000021 grad: 0.1815 (0.1973) loss: 0.7662 (0.7580) time: 0.1553 data: 0.0738 max mem: 9377 +Train: [74] [4500/6250] eta: 0:04:15 lr: 0.000021 grad: 0.1877 (0.1972) loss: 0.7584 (0.7580) time: 0.1541 data: 0.0762 max mem: 9377 +Train: [74] [4600/6250] eta: 0:04:01 lr: 0.000021 grad: 0.1850 (0.1970) loss: 0.7614 (0.7582) time: 0.1974 data: 0.1186 max mem: 9377 +Train: [74] [4700/6250] eta: 0:03:47 lr: 0.000021 grad: 0.1838 (0.1969) loss: 0.7577 (0.7583) time: 0.1535 data: 0.0750 max mem: 9377 +Train: [74] [4800/6250] eta: 0:03:33 lr: 0.000021 grad: 0.1779 (0.1967) loss: 0.7666 (0.7584) time: 0.1330 data: 0.0519 max mem: 9377 +Train: [74] [4900/6250] eta: 0:03:19 lr: 0.000020 grad: 0.1921 (0.1965) loss: 0.7582 (0.7585) time: 0.2262 data: 0.1431 max mem: 9377 +Train: [74] [5000/6250] eta: 0:03:03 lr: 0.000020 grad: 0.1943 (0.1963) loss: 0.7596 (0.7587) time: 0.1504 data: 0.0692 max mem: 9377 +Train: [74] [5100/6250] eta: 0:02:49 lr: 0.000020 grad: 0.1836 (0.1962) loss: 0.7594 (0.7588) time: 0.1481 data: 0.0673 max mem: 9377 +Train: [74] [5200/6250] eta: 0:02:34 lr: 0.000020 grad: 0.1795 (0.1961) loss: 0.7623 (0.7589) time: 0.1155 data: 0.0319 max mem: 9377 +Train: [74] [5300/6250] eta: 0:02:19 lr: 0.000020 grad: 0.1900 (0.1959) loss: 0.7646 (0.7591) time: 0.1083 data: 0.0248 max mem: 9377 +Train: [74] [5400/6250] eta: 0:02:04 lr: 0.000020 grad: 0.1838 (0.1958) loss: 0.7673 (0.7592) time: 0.1325 data: 0.0468 max mem: 9377 +Train: [74] [5500/6250] eta: 0:01:50 lr: 0.000020 grad: 0.1879 (0.1958) loss: 0.7576 (0.7593) time: 0.1300 data: 0.0417 max mem: 9377 +Train: [74] [5600/6250] eta: 0:01:35 lr: 0.000020 grad: 0.1851 (0.1958) loss: 0.7720 (0.7593) time: 0.1135 data: 0.0285 max mem: 9377 +Train: [74] [5700/6250] eta: 0:01:20 lr: 0.000020 grad: 0.1929 (0.1958) loss: 0.7665 (0.7593) time: 0.1775 data: 0.0994 max mem: 9377 +Train: [74] [5800/6250] eta: 0:01:06 lr: 0.000020 grad: 0.1947 (0.1958) loss: 0.7539 (0.7592) time: 0.1562 data: 0.0754 max mem: 9377 +Train: [74] [5900/6250] eta: 0:00:51 lr: 0.000020 grad: 0.1977 (0.1959) loss: 0.7567 (0.7592) time: 0.1654 data: 0.0861 max mem: 9377 +Train: [74] [6000/6250] eta: 0:00:36 lr: 0.000020 grad: 0.1984 (0.1959) loss: 0.7677 (0.7591) time: 0.1567 data: 0.0798 max mem: 9377 +Train: [74] [6100/6250] eta: 0:00:22 lr: 0.000020 grad: 0.1929 (0.1960) loss: 0.7596 (0.7590) time: 0.1198 data: 0.0379 max mem: 9377 +Train: [74] [6200/6250] eta: 0:00:07 lr: 0.000020 grad: 0.2031 (0.1961) loss: 0.7503 (0.7589) time: 0.1606 data: 0.0824 max mem: 9377 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1947 (0.1962) loss: 0.7488 (0.7588) time: 0.1267 data: 0.0351 max mem: 9377 +Train: [74] Total time: 0:15:22 (0.1475 s / it) +Averaged stats: lr: 0.000020 grad: 0.1947 (0.1962) loss: 0.7488 (0.7588) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:05:07 loss: 0.8142 (0.8142) time: 4.9597 data: 4.9301 max mem: 9377 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.8071 (0.8082) time: 0.1115 data: 0.0850 max mem: 9377 +Eval (hcp-train-subset): [74] Total time: 0:00:12 (0.2082 s / it) +Averaged stats (hcp-train-subset): loss: 0.8071 (0.8082) +Making plots (hcp-train-subset): example=56 +Eval (hcp-val): [74] [ 0/62] eta: 0:04:12 loss: 0.8538 (0.8538) time: 4.0734 data: 3.9852 max mem: 9377 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.8504 (0.8531) time: 0.1216 data: 0.0949 max mem: 9377 +Eval (hcp-val): [74] Total time: 0:00:12 (0.2046 s / it) +Averaged stats (hcp-val): loss: 0.8504 (0.8531) +Making plots (hcp-val): example=61 +Eval (nsd-val): [74] [ 0/62] eta: 0:03:58 loss: 0.8288 (0.8288) time: 3.8481 data: 3.7825 max mem: 9377 +Eval (nsd-val): [74] [61/62] eta: 0:00:00 loss: 0.8323 (0.8356) time: 0.1261 data: 0.1011 max mem: 9377 +Eval (nsd-val): [74] Total time: 0:00:13 (0.2121 s / it) +Averaged stats (nsd-val): loss: 0.8323 (0.8356) +Making plots (nsd-val): example=37 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00074.pth +Train: [75] [ 0/6250] eta: 9:33:47 lr: 0.000020 grad: 0.4393 (0.4393) loss: 0.8227 (0.8227) time: 5.5083 data: 5.3567 max mem: 9377 +Train: [75] [ 100/6250] eta: 0:20:27 lr: 0.000020 grad: 0.2025 (0.2552) loss: 0.7973 (0.7906) time: 0.1599 data: 0.0512 max mem: 9377 +Train: [75] [ 200/6250] eta: 0:17:55 lr: 0.000020 grad: 0.2185 (0.2431) loss: 0.7834 (0.7861) time: 0.1492 data: 0.0525 max mem: 9377 +Train: [75] [ 300/6250] eta: 0:17:15 lr: 0.000020 grad: 0.2149 (0.2375) loss: 0.7669 (0.7817) time: 0.1598 data: 0.0756 max mem: 9377 +Train: [75] [ 400/6250] eta: 0:16:31 lr: 0.000020 grad: 0.2133 (0.2337) loss: 0.7705 (0.7767) time: 0.1456 data: 0.0500 max mem: 9377 +Train: [75] [ 500/6250] eta: 0:15:48 lr: 0.000020 grad: 0.2213 (0.2332) loss: 0.7566 (0.7728) time: 0.1357 data: 0.0471 max mem: 9377 +Train: [75] [ 600/6250] eta: 0:15:27 lr: 0.000020 grad: 0.2111 (0.2302) loss: 0.7727 (0.7708) time: 0.1662 data: 0.0817 max mem: 9377 +Train: [75] [ 700/6250] eta: 0:15:04 lr: 0.000020 grad: 0.1988 (0.2265) loss: 0.7560 (0.7691) time: 0.1568 data: 0.0625 max mem: 9377 +Train: [75] [ 800/6250] eta: 0:14:34 lr: 0.000020 grad: 0.2036 (0.2235) loss: 0.7657 (0.7681) time: 0.1533 data: 0.0599 max mem: 9377 +Train: [75] [ 900/6250] eta: 0:14:10 lr: 0.000020 grad: 0.1954 (0.2208) loss: 0.7629 (0.7675) time: 0.1369 data: 0.0447 max mem: 9377 +Train: [75] [1000/6250] eta: 0:13:42 lr: 0.000020 grad: 0.1904 (0.2185) loss: 0.7614 (0.7670) time: 0.1327 data: 0.0485 max mem: 9377 +Train: [75] [1100/6250] eta: 0:13:14 lr: 0.000020 grad: 0.1849 (0.2165) loss: 0.7694 (0.7666) time: 0.1363 data: 0.0525 max mem: 9377 +Train: [75] [1200/6250] eta: 0:12:54 lr: 0.000020 grad: 0.2024 (0.2148) loss: 0.7602 (0.7662) time: 0.1324 data: 0.0513 max mem: 9377 +Train: [75] [1300/6250] eta: 0:12:35 lr: 0.000020 grad: 0.1953 (0.2137) loss: 0.7552 (0.7654) time: 0.1280 data: 0.0449 max mem: 9377 +Train: [75] [1400/6250] eta: 0:12:16 lr: 0.000020 grad: 0.1982 (0.2127) loss: 0.7606 (0.7649) time: 0.1556 data: 0.0706 max mem: 9377 +Train: [75] [1500/6250] eta: 0:11:57 lr: 0.000020 grad: 0.1870 (0.2115) loss: 0.7668 (0.7646) time: 0.1323 data: 0.0497 max mem: 9377 +Train: [75] [1600/6250] eta: 0:11:41 lr: 0.000020 grad: 0.1953 (0.2107) loss: 0.7555 (0.7639) time: 0.1682 data: 0.0883 max mem: 9377 +Train: [75] [1700/6250] eta: 0:11:20 lr: 0.000020 grad: 0.1945 (0.2100) loss: 0.7552 (0.7635) time: 0.1410 data: 0.0590 max mem: 9377 +Train: [75] [1800/6250] eta: 0:11:01 lr: 0.000020 grad: 0.2060 (0.2093) loss: 0.7462 (0.7631) time: 0.1338 data: 0.0497 max mem: 9377 +Train: [75] [1900/6250] eta: 0:10:46 lr: 0.000020 grad: 0.1888 (0.2085) loss: 0.7606 (0.7630) time: 0.1109 data: 0.0175 max mem: 9377 +Train: [75] [2000/6250] eta: 0:10:30 lr: 0.000020 grad: 0.1962 (0.2080) loss: 0.7529 (0.7625) time: 0.1336 data: 0.0536 max mem: 9377 +Train: [75] [2100/6250] eta: 0:10:13 lr: 0.000020 grad: 0.1847 (0.2074) loss: 0.7732 (0.7621) time: 0.1428 data: 0.0638 max mem: 9377 +Train: [75] [2200/6250] eta: 0:09:56 lr: 0.000020 grad: 0.1956 (0.2068) loss: 0.7502 (0.7618) time: 0.1333 data: 0.0548 max mem: 9377 +Train: [75] [2300/6250] eta: 0:09:39 lr: 0.000020 grad: 0.1926 (0.2064) loss: 0.7523 (0.7614) time: 0.1381 data: 0.0580 max mem: 9377 +Train: [75] [2400/6250] eta: 0:09:23 lr: 0.000020 grad: 0.1947 (0.2060) loss: 0.7549 (0.7609) time: 0.1322 data: 0.0488 max mem: 9377 +Train: [75] [2500/6250] eta: 0:09:07 lr: 0.000020 grad: 0.1958 (0.2055) loss: 0.7484 (0.7606) time: 0.1408 data: 0.0653 max mem: 9377 +Train: [75] [2600/6250] eta: 0:08:50 lr: 0.000020 grad: 0.1985 (0.2051) loss: 0.7521 (0.7603) time: 0.1297 data: 0.0494 max mem: 9377 +Train: [75] [2700/6250] eta: 0:08:35 lr: 0.000020 grad: 0.1919 (0.2047) loss: 0.7542 (0.7602) time: 0.1511 data: 0.0761 max mem: 9377 +Train: [75] [2800/6250] eta: 0:08:20 lr: 0.000019 grad: 0.2002 (0.2043) loss: 0.7533 (0.7601) time: 0.1741 data: 0.0986 max mem: 9377 +Train: [75] [2900/6250] eta: 0:08:06 lr: 0.000019 grad: 0.1875 (0.2040) loss: 0.7624 (0.7599) time: 0.1667 data: 0.0914 max mem: 9377 +Train: [75] [3000/6250] eta: 0:07:51 lr: 0.000019 grad: 0.1912 (0.2036) loss: 0.7460 (0.7598) time: 0.1440 data: 0.0621 max mem: 9377 +Train: [75] [3100/6250] eta: 0:07:36 lr: 0.000019 grad: 0.1858 (0.2032) loss: 0.7653 (0.7598) time: 0.1477 data: 0.0706 max mem: 9377 +Train: [75] [3200/6250] eta: 0:07:22 lr: 0.000019 grad: 0.1903 (0.2030) loss: 0.7563 (0.7598) time: 0.1482 data: 0.0636 max mem: 9377 +Train: [75] [3300/6250] eta: 0:07:08 lr: 0.000019 grad: 0.1860 (0.2027) loss: 0.7521 (0.7596) time: 0.1435 data: 0.0532 max mem: 9377 +Train: [75] [3400/6250] eta: 0:06:54 lr: 0.000019 grad: 0.1996 (0.2025) loss: 0.7545 (0.7596) time: 0.1629 data: 0.0718 max mem: 9377 +Train: [75] [3500/6250] eta: 0:06:38 lr: 0.000019 grad: 0.1962 (0.2023) loss: 0.7599 (0.7597) time: 0.1067 data: 0.0305 max mem: 9377 +Train: [75] [3600/6250] eta: 0:06:23 lr: 0.000019 grad: 0.1834 (0.2019) loss: 0.7611 (0.7597) time: 0.1544 data: 0.0703 max mem: 9377 +Train: [75] [3700/6250] eta: 0:06:07 lr: 0.000019 grad: 0.1905 (0.2017) loss: 0.7623 (0.7597) time: 0.1306 data: 0.0496 max mem: 9377 +Train: [75] [3800/6250] eta: 0:05:52 lr: 0.000019 grad: 0.1860 (0.2015) loss: 0.7661 (0.7597) time: 0.1063 data: 0.0162 max mem: 9377 +Train: [75] [3900/6250] eta: 0:05:36 lr: 0.000019 grad: 0.1890 (0.2012) loss: 0.7542 (0.7597) time: 0.1214 data: 0.0381 max mem: 9377 +Train: [75] [4000/6250] eta: 0:05:22 lr: 0.000019 grad: 0.1910 (0.2010) loss: 0.7627 (0.7597) time: 0.1737 data: 0.0984 max mem: 9377 +Train: [75] [4100/6250] eta: 0:05:08 lr: 0.000019 grad: 0.1954 (0.2008) loss: 0.7544 (0.7597) time: 0.1268 data: 0.0476 max mem: 9377 +Train: [75] [4200/6250] eta: 0:04:54 lr: 0.000019 grad: 0.1914 (0.2006) loss: 0.7604 (0.7597) time: 0.1582 data: 0.0783 max mem: 9377 +Train: [75] [4300/6250] eta: 0:04:40 lr: 0.000019 grad: 0.1873 (0.2004) loss: 0.7697 (0.7596) time: 0.1653 data: 0.0842 max mem: 9377 +Train: [75] [4400/6250] eta: 0:04:25 lr: 0.000019 grad: 0.1915 (0.2002) loss: 0.7653 (0.7596) time: 0.1420 data: 0.0621 max mem: 9377 +Train: [75] [4500/6250] eta: 0:04:11 lr: 0.000019 grad: 0.1877 (0.2000) loss: 0.7593 (0.7596) time: 0.1455 data: 0.0671 max mem: 9377 +Train: [75] [4600/6250] eta: 0:03:57 lr: 0.000019 grad: 0.1921 (0.1999) loss: 0.7514 (0.7596) time: 0.0952 data: 0.0002 max mem: 9377 +Train: [75] [4700/6250] eta: 0:03:44 lr: 0.000019 grad: 0.1911 (0.1998) loss: 0.7684 (0.7595) time: 0.2642 data: 0.1858 max mem: 9377 +Train: [75] [4800/6250] eta: 0:03:29 lr: 0.000019 grad: 0.1906 (0.1996) loss: 0.7565 (0.7596) time: 0.1679 data: 0.0870 max mem: 9377 +Train: [75] [4900/6250] eta: 0:03:15 lr: 0.000019 grad: 0.1937 (0.1995) loss: 0.7578 (0.7595) time: 0.1699 data: 0.0932 max mem: 9377 +Train: [75] [5000/6250] eta: 0:03:01 lr: 0.000019 grad: 0.1961 (0.1995) loss: 0.7501 (0.7594) time: 0.1398 data: 0.0600 max mem: 9377 +Train: [75] [5100/6250] eta: 0:02:46 lr: 0.000019 grad: 0.1956 (0.1994) loss: 0.7528 (0.7593) time: 0.1504 data: 0.0697 max mem: 9377 +Train: [75] [5200/6250] eta: 0:02:32 lr: 0.000019 grad: 0.1887 (0.1993) loss: 0.7645 (0.7594) time: 0.1237 data: 0.0439 max mem: 9377 +Train: [75] [5300/6250] eta: 0:02:18 lr: 0.000019 grad: 0.1872 (0.1992) loss: 0.7621 (0.7593) time: 0.1798 data: 0.0995 max mem: 9377 +Train: [75] [5400/6250] eta: 0:02:03 lr: 0.000019 grad: 0.1954 (0.1992) loss: 0.7491 (0.7592) time: 0.0835 data: 0.0002 max mem: 9377 +Train: [75] [5500/6250] eta: 0:01:49 lr: 0.000019 grad: 0.1917 (0.1992) loss: 0.7507 (0.7591) time: 0.1164 data: 0.0266 max mem: 9377 +Train: [75] [5600/6250] eta: 0:01:34 lr: 0.000019 grad: 0.1889 (0.1991) loss: 0.7596 (0.7591) time: 0.1795 data: 0.0996 max mem: 9377 +Train: [75] [5700/6250] eta: 0:01:20 lr: 0.000019 grad: 0.1968 (0.1991) loss: 0.7548 (0.7590) time: 0.1321 data: 0.0479 max mem: 9377 +Train: [75] [5800/6250] eta: 0:01:05 lr: 0.000019 grad: 0.1928 (0.1990) loss: 0.7562 (0.7590) time: 0.1309 data: 0.0504 max mem: 9377 +Train: [75] [5900/6250] eta: 0:00:50 lr: 0.000019 grad: 0.1836 (0.1990) loss: 0.7583 (0.7589) time: 0.1267 data: 0.0415 max mem: 9377 +Train: [75] [6000/6250] eta: 0:00:36 lr: 0.000019 grad: 0.1901 (0.1990) loss: 0.7555 (0.7588) time: 0.1543 data: 0.0732 max mem: 9377 +Train: [75] [6100/6250] eta: 0:00:21 lr: 0.000019 grad: 0.1844 (0.1989) loss: 0.7608 (0.7587) time: 0.1223 data: 0.0367 max mem: 9377 +Train: [75] [6200/6250] eta: 0:00:07 lr: 0.000019 grad: 0.1902 (0.1990) loss: 0.7549 (0.7587) time: 0.0956 data: 0.0100 max mem: 9377 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.1994 (0.1990) loss: 0.7515 (0.7586) time: 0.1568 data: 0.0702 max mem: 9377 +Train: [75] Total time: 0:15:09 (0.1456 s / it) +Averaged stats: lr: 0.000019 grad: 0.1994 (0.1990) loss: 0.7515 (0.7586) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:03:44 loss: 0.8180 (0.8180) time: 3.6134 data: 3.5456 max mem: 9377 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.8061 (0.8070) time: 0.1161 data: 0.0913 max mem: 9377 +Eval (hcp-train-subset): [75] Total time: 0:00:12 (0.2073 s / it) +Averaged stats (hcp-train-subset): loss: 0.8061 (0.8070) +Eval (hcp-val): [75] [ 0/62] eta: 0:05:42 loss: 0.8487 (0.8487) time: 5.5211 data: 5.4756 max mem: 9377 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.8512 (0.8522) time: 0.1245 data: 0.0949 max mem: 9377 +Eval (hcp-val): [75] Total time: 0:00:13 (0.2161 s / it) +Averaged stats (hcp-val): loss: 0.8512 (0.8522) +Eval (nsd-val): [75] [ 0/62] eta: 0:03:32 loss: 0.8232 (0.8232) time: 3.4308 data: 3.3320 max mem: 9377 +Eval (nsd-val): [75] [61/62] eta: 0:00:00 loss: 0.8302 (0.8316) time: 0.1214 data: 0.0964 max mem: 9377 +Eval (nsd-val): [75] Total time: 0:00:13 (0.2174 s / it) +Averaged stats (nsd-val): loss: 0.8302 (0.8316) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [76] [ 0/6250] eta: 7:32:16 lr: 0.000019 grad: 0.1595 (0.1595) loss: 0.8052 (0.8052) time: 4.3419 data: 3.9957 max mem: 9377 +Train: [76] [ 100/6250] eta: 0:20:49 lr: 0.000019 grad: 0.3420 (0.3265) loss: 0.7433 (0.7648) time: 0.1543 data: 0.0474 max mem: 9377 +Train: [76] [ 200/6250] eta: 0:17:47 lr: 0.000019 grad: 0.2267 (0.3005) loss: 0.7647 (0.7593) time: 0.1388 data: 0.0394 max mem: 9377 +Train: [76] [ 300/6250] eta: 0:16:59 lr: 0.000019 grad: 0.2098 (0.2766) loss: 0.7625 (0.7590) time: 0.1586 data: 0.0662 max mem: 9377 +Train: [76] [ 400/6250] eta: 0:16:30 lr: 0.000019 grad: 0.2195 (0.2628) loss: 0.7526 (0.7582) time: 0.1568 data: 0.0671 max mem: 9377 +Train: [76] [ 500/6250] eta: 0:16:09 lr: 0.000019 grad: 0.2008 (0.2506) loss: 0.7620 (0.7591) time: 0.1634 data: 0.0850 max mem: 9377 +Train: [76] [ 600/6250] eta: 0:15:46 lr: 0.000019 grad: 0.1997 (0.2419) loss: 0.7556 (0.7589) time: 0.1835 data: 0.0934 max mem: 9377 +Train: [76] [ 700/6250] eta: 0:15:21 lr: 0.000019 grad: 0.1864 (0.2357) loss: 0.7715 (0.7587) time: 0.1785 data: 0.0887 max mem: 9377 +Train: [76] [ 800/6250] eta: 0:15:02 lr: 0.000018 grad: 0.1867 (0.2307) loss: 0.7617 (0.7590) time: 0.1587 data: 0.0682 max mem: 9377 +Train: [76] [ 900/6250] eta: 0:14:41 lr: 0.000018 grad: 0.1945 (0.2265) loss: 0.7627 (0.7596) time: 0.1806 data: 0.0867 max mem: 9377 +Train: [76] [1000/6250] eta: 0:14:15 lr: 0.000018 grad: 0.1962 (0.2235) loss: 0.7616 (0.7597) time: 0.1623 data: 0.0735 max mem: 9377 +Train: [76] [1100/6250] eta: 0:13:48 lr: 0.000018 grad: 0.1996 (0.2211) loss: 0.7461 (0.7593) time: 0.1694 data: 0.0855 max mem: 9377 +Train: [76] [1200/6250] eta: 0:13:20 lr: 0.000018 grad: 0.1911 (0.2190) loss: 0.7664 (0.7593) time: 0.1327 data: 0.0458 max mem: 9377 +Train: [76] [1300/6250] eta: 0:12:58 lr: 0.000018 grad: 0.1835 (0.2172) loss: 0.7632 (0.7591) time: 0.1481 data: 0.0644 max mem: 9377 +Train: [76] [1400/6250] eta: 0:12:36 lr: 0.000018 grad: 0.1925 (0.2155) loss: 0.7600 (0.7590) time: 0.1444 data: 0.0618 max mem: 9377 +Train: [76] [1500/6250] eta: 0:12:16 lr: 0.000018 grad: 0.1914 (0.2140) loss: 0.7563 (0.7587) time: 0.1409 data: 0.0573 max mem: 9377 +Train: [76] [1600/6250] eta: 0:11:55 lr: 0.000018 grad: 0.1860 (0.2128) loss: 0.7622 (0.7584) time: 0.1371 data: 0.0569 max mem: 9377 +Train: [76] [1700/6250] eta: 0:11:36 lr: 0.000018 grad: 0.1874 (0.2116) loss: 0.7556 (0.7585) time: 0.1189 data: 0.0357 max mem: 9377 +Train: [76] [1800/6250] eta: 0:11:19 lr: 0.000018 grad: 0.2012 (0.2107) loss: 0.7491 (0.7584) time: 0.1427 data: 0.0616 max mem: 9377 +Train: [76] [1900/6250] eta: 0:11:00 lr: 0.000018 grad: 0.1885 (0.2100) loss: 0.7670 (0.7585) time: 0.1460 data: 0.0687 max mem: 9377 +Train: [76] [2000/6250] eta: 0:10:43 lr: 0.000018 grad: 0.2009 (0.2094) loss: 0.7491 (0.7585) time: 0.1378 data: 0.0601 max mem: 9377 +Train: [76] [2100/6250] eta: 0:10:25 lr: 0.000018 grad: 0.1941 (0.2087) loss: 0.7597 (0.7587) time: 0.1362 data: 0.0525 max mem: 9377 +Train: [76] [2200/6250] eta: 0:10:10 lr: 0.000018 grad: 0.1928 (0.2080) loss: 0.7627 (0.7587) time: 0.1514 data: 0.0684 max mem: 9377 +Train: [76] [2300/6250] eta: 0:09:51 lr: 0.000018 grad: 0.1981 (0.2075) loss: 0.7568 (0.7586) time: 0.1157 data: 0.0335 max mem: 9377 +Train: [76] [2400/6250] eta: 0:09:34 lr: 0.000018 grad: 0.1947 (0.2071) loss: 0.7594 (0.7586) time: 0.1392 data: 0.0591 max mem: 9377 +Train: [76] [2500/6250] eta: 0:09:17 lr: 0.000018 grad: 0.1959 (0.2068) loss: 0.7551 (0.7584) time: 0.1412 data: 0.0588 max mem: 9377 +Train: [76] [2600/6250] eta: 0:09:00 lr: 0.000018 grad: 0.1919 (0.2064) loss: 0.7572 (0.7583) time: 0.1403 data: 0.0580 max mem: 9377 +Train: [76] [2700/6250] eta: 0:08:44 lr: 0.000018 grad: 0.1901 (0.2060) loss: 0.7581 (0.7582) time: 0.1370 data: 0.0552 max mem: 9377 +Train: [76] [2800/6250] eta: 0:08:28 lr: 0.000018 grad: 0.1962 (0.2056) loss: 0.7506 (0.7581) time: 0.1263 data: 0.0430 max mem: 9377 +Train: [76] [2900/6250] eta: 0:08:14 lr: 0.000018 grad: 0.1996 (0.2054) loss: 0.7481 (0.7580) time: 0.1317 data: 0.0537 max mem: 9377 +Train: [76] [3000/6250] eta: 0:07:59 lr: 0.000018 grad: 0.1947 (0.2051) loss: 0.7575 (0.7579) time: 0.1381 data: 0.0605 max mem: 9377 +Train: [76] [3100/6250] eta: 0:07:44 lr: 0.000018 grad: 0.1937 (0.2049) loss: 0.7589 (0.7578) time: 0.1414 data: 0.0613 max mem: 9377 +Train: [76] [3200/6250] eta: 0:07:29 lr: 0.000018 grad: 0.2003 (0.2048) loss: 0.7580 (0.7577) time: 0.1574 data: 0.0785 max mem: 9377 +Train: [76] [3300/6250] eta: 0:07:15 lr: 0.000018 grad: 0.2037 (0.2047) loss: 0.7386 (0.7575) time: 0.1440 data: 0.0616 max mem: 9377 +Train: [76] [3400/6250] eta: 0:07:00 lr: 0.000018 grad: 0.1933 (0.2046) loss: 0.7507 (0.7575) time: 0.1386 data: 0.0534 max mem: 9377 +Train: [76] [3500/6250] eta: 0:06:44 lr: 0.000018 grad: 0.1993 (0.2044) loss: 0.7587 (0.7574) time: 0.1387 data: 0.0552 max mem: 9377 +Train: [76] [3600/6250] eta: 0:06:29 lr: 0.000018 grad: 0.1949 (0.2042) loss: 0.7528 (0.7574) time: 0.1574 data: 0.0760 max mem: 9377 +Train: [76] [3700/6250] eta: 0:06:13 lr: 0.000018 grad: 0.2098 (0.2041) loss: 0.7363 (0.7572) time: 0.1257 data: 0.0430 max mem: 9377 +Train: [76] [3800/6250] eta: 0:05:57 lr: 0.000018 grad: 0.1954 (0.2040) loss: 0.7541 (0.7571) time: 0.1279 data: 0.0395 max mem: 9377 +Train: [76] [3900/6250] eta: 0:05:41 lr: 0.000018 grad: 0.1898 (0.2039) loss: 0.7597 (0.7570) time: 0.1462 data: 0.0629 max mem: 9377 +Train: [76] [4000/6250] eta: 0:05:26 lr: 0.000018 grad: 0.1958 (0.2038) loss: 0.7584 (0.7570) time: 0.1199 data: 0.0367 max mem: 9377 +Train: [76] [4100/6250] eta: 0:05:11 lr: 0.000018 grad: 0.1984 (0.2037) loss: 0.7530 (0.7568) time: 0.1274 data: 0.0388 max mem: 9377 +Train: [76] [4200/6250] eta: 0:04:57 lr: 0.000018 grad: 0.1951 (0.2036) loss: 0.7424 (0.7567) time: 0.1400 data: 0.0578 max mem: 9377 +Train: [76] [4300/6250] eta: 0:04:42 lr: 0.000018 grad: 0.1982 (0.2036) loss: 0.7482 (0.7565) time: 0.1567 data: 0.0758 max mem: 9377 +Train: [76] [4400/6250] eta: 0:04:28 lr: 0.000018 grad: 0.2003 (0.2034) loss: 0.7644 (0.7565) time: 0.1378 data: 0.0595 max mem: 9377 +Train: [76] [4500/6250] eta: 0:04:14 lr: 0.000018 grad: 0.1926 (0.2034) loss: 0.7522 (0.7564) time: 0.1569 data: 0.0734 max mem: 9377 +Train: [76] [4600/6250] eta: 0:03:59 lr: 0.000018 grad: 0.1964 (0.2033) loss: 0.7503 (0.7563) time: 0.1130 data: 0.0348 max mem: 9377 +Train: [76] [4700/6250] eta: 0:03:44 lr: 0.000018 grad: 0.1939 (0.2032) loss: 0.7579 (0.7562) time: 0.1354 data: 0.0566 max mem: 9377 +Train: [76] [4800/6250] eta: 0:03:30 lr: 0.000018 grad: 0.2053 (0.2031) loss: 0.7354 (0.7561) time: 0.1408 data: 0.0585 max mem: 9377 +Train: [76] [4900/6250] eta: 0:03:16 lr: 0.000018 grad: 0.2033 (0.2031) loss: 0.7434 (0.7560) time: 0.1803 data: 0.0986 max mem: 9377 +Train: [76] [5000/6250] eta: 0:03:01 lr: 0.000018 grad: 0.1976 (0.2030) loss: 0.7527 (0.7559) time: 0.1547 data: 0.0664 max mem: 9377 +Train: [76] [5100/6250] eta: 0:02:47 lr: 0.000017 grad: 0.1950 (0.2030) loss: 0.7607 (0.7558) time: 0.1547 data: 0.0721 max mem: 9377 +Train: [76] [5200/6250] eta: 0:02:33 lr: 0.000017 grad: 0.1905 (0.2030) loss: 0.7515 (0.7558) time: 0.1527 data: 0.0760 max mem: 9377 +Train: [76] [5300/6250] eta: 0:02:18 lr: 0.000017 grad: 0.1991 (0.2029) loss: 0.7448 (0.7557) time: 0.2068 data: 0.1323 max mem: 9377 +Train: [76] [5400/6250] eta: 0:02:03 lr: 0.000017 grad: 0.1985 (0.2028) loss: 0.7520 (0.7557) time: 0.1428 data: 0.0620 max mem: 9377 +Train: [76] [5500/6250] eta: 0:01:49 lr: 0.000017 grad: 0.1933 (0.2027) loss: 0.7497 (0.7557) time: 0.1673 data: 0.0921 max mem: 9377 +Train: [76] [5600/6250] eta: 0:01:34 lr: 0.000017 grad: 0.2000 (0.2027) loss: 0.7599 (0.7556) time: 0.1187 data: 0.0346 max mem: 9377 +Train: [76] [5700/6250] eta: 0:01:20 lr: 0.000017 grad: 0.1897 (0.2026) loss: 0.7589 (0.7556) time: 0.1474 data: 0.0714 max mem: 9377 +Train: [76] [5800/6250] eta: 0:01:05 lr: 0.000017 grad: 0.1905 (0.2025) loss: 0.7506 (0.7556) time: 0.1428 data: 0.0537 max mem: 9377 +Train: [76] [5900/6250] eta: 0:00:50 lr: 0.000017 grad: 0.1939 (0.2024) loss: 0.7497 (0.7555) time: 0.1208 data: 0.0391 max mem: 9377 +Train: [76] [6000/6250] eta: 0:00:36 lr: 0.000017 grad: 0.1977 (0.2023) loss: 0.7547 (0.7554) time: 0.1330 data: 0.0505 max mem: 9377 +Train: [76] [6100/6250] eta: 0:00:21 lr: 0.000017 grad: 0.2000 (0.2022) loss: 0.7524 (0.7554) time: 0.1420 data: 0.0628 max mem: 9377 +Train: [76] [6200/6250] eta: 0:00:07 lr: 0.000017 grad: 0.1930 (0.2021) loss: 0.7588 (0.7555) time: 0.1231 data: 0.0363 max mem: 9377 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1876 (0.2020) loss: 0.7626 (0.7555) time: 0.1278 data: 0.0499 max mem: 9377 +Train: [76] Total time: 0:15:12 (0.1459 s / it) +Averaged stats: lr: 0.000017 grad: 0.1876 (0.2020) loss: 0.7626 (0.7555) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:04:11 loss: 0.8127 (0.8127) time: 4.0535 data: 3.9823 max mem: 9377 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.8114 (0.8071) time: 0.1283 data: 0.1034 max mem: 9377 +Eval (hcp-train-subset): [76] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (hcp-train-subset): loss: 0.8114 (0.8071) +Eval (hcp-val): [76] [ 0/62] eta: 0:04:52 loss: 0.8555 (0.8555) time: 4.7210 data: 4.6906 max mem: 9377 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.8536 (0.8532) time: 0.1191 data: 0.0944 max mem: 9377 +Eval (hcp-val): [76] Total time: 0:00:12 (0.2026 s / it) +Averaged stats (hcp-val): loss: 0.8536 (0.8532) +Eval (nsd-val): [76] [ 0/62] eta: 0:04:58 loss: 0.8248 (0.8248) time: 4.8157 data: 4.7819 max mem: 9377 +Eval (nsd-val): [76] [61/62] eta: 0:00:00 loss: 0.8360 (0.8379) time: 0.1203 data: 0.0937 max mem: 9377 +Eval (nsd-val): [76] Total time: 0:00:13 (0.2158 s / it) +Averaged stats (nsd-val): loss: 0.8360 (0.8379) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [77] [ 0/6250] eta: 8:57:54 lr: 0.000017 grad: 0.5798 (0.5798) loss: 0.7182 (0.7182) time: 5.1640 data: 5.0074 max mem: 9377 +Train: [77] [ 100/6250] eta: 0:19:51 lr: 0.000017 grad: 0.2809 (0.2967) loss: 0.7644 (0.7780) time: 0.1334 data: 0.0297 max mem: 9377 +Train: [77] [ 200/6250] eta: 0:17:20 lr: 0.000017 grad: 0.2149 (0.2590) loss: 0.7801 (0.7768) time: 0.1447 data: 0.0478 max mem: 9377 +Train: [77] [ 300/6250] eta: 0:15:52 lr: 0.000017 grad: 0.1850 (0.2435) loss: 0.7742 (0.7755) time: 0.1429 data: 0.0478 max mem: 9377 +Train: [77] [ 400/6250] eta: 0:14:52 lr: 0.000017 grad: 0.2115 (0.2347) loss: 0.7567 (0.7722) time: 0.1213 data: 0.0243 max mem: 9377 +Train: [77] [ 500/6250] eta: 0:14:19 lr: 0.000017 grad: 0.1918 (0.2277) loss: 0.7713 (0.7712) time: 0.1634 data: 0.0819 max mem: 9377 +Train: [77] [ 600/6250] eta: 0:14:11 lr: 0.000017 grad: 0.2031 (0.2235) loss: 0.7587 (0.7696) time: 0.1639 data: 0.0676 max mem: 9377 +Train: [77] [ 700/6250] eta: 0:13:53 lr: 0.000017 grad: 0.1897 (0.2195) loss: 0.7636 (0.7687) time: 0.1568 data: 0.0759 max mem: 9377 +Train: [77] [ 800/6250] eta: 0:13:37 lr: 0.000017 grad: 0.1977 (0.2169) loss: 0.7491 (0.7679) time: 0.1401 data: 0.0559 max mem: 9377 +Train: [77] [ 900/6250] eta: 0:13:22 lr: 0.000017 grad: 0.2015 (0.2149) loss: 0.7510 (0.7672) time: 0.1493 data: 0.0587 max mem: 9377 +Train: [77] [1000/6250] eta: 0:13:04 lr: 0.000017 grad: 0.1944 (0.2133) loss: 0.7642 (0.7666) time: 0.1324 data: 0.0463 max mem: 9377 +Train: [77] [1100/6250] eta: 0:12:45 lr: 0.000017 grad: 0.1962 (0.2118) loss: 0.7627 (0.7661) time: 0.1348 data: 0.0423 max mem: 9377 +Train: [77] [1200/6250] eta: 0:12:25 lr: 0.000017 grad: 0.1930 (0.2105) loss: 0.7655 (0.7658) time: 0.1467 data: 0.0523 max mem: 9377 +Train: [77] [1300/6250] eta: 0:12:05 lr: 0.000017 grad: 0.1894 (0.2095) loss: 0.7619 (0.7653) time: 0.1367 data: 0.0556 max mem: 9377 +Train: [77] [1400/6250] eta: 0:11:46 lr: 0.000017 grad: 0.1924 (0.2088) loss: 0.7562 (0.7648) time: 0.1415 data: 0.0605 max mem: 9377 +Train: [77] [1500/6250] eta: 0:11:29 lr: 0.000017 grad: 0.2008 (0.2081) loss: 0.7554 (0.7644) time: 0.1255 data: 0.0402 max mem: 9377 +Train: [77] [1600/6250] eta: 0:11:12 lr: 0.000017 grad: 0.1988 (0.2076) loss: 0.7482 (0.7638) time: 0.1335 data: 0.0479 max mem: 9377 +Train: [77] [1700/6250] eta: 0:11:00 lr: 0.000017 grad: 0.1995 (0.2070) loss: 0.7515 (0.7632) time: 0.1991 data: 0.1213 max mem: 9377 +Train: [77] [1800/6250] eta: 0:10:41 lr: 0.000017 grad: 0.1990 (0.2067) loss: 0.7499 (0.7627) time: 0.1321 data: 0.0419 max mem: 9377 +Train: [77] [1900/6250] eta: 0:10:27 lr: 0.000017 grad: 0.2030 (0.2064) loss: 0.7573 (0.7623) time: 0.1136 data: 0.0269 max mem: 9377 +Train: [77] [2000/6250] eta: 0:10:12 lr: 0.000017 grad: 0.1902 (0.2061) loss: 0.7631 (0.7618) time: 0.1591 data: 0.0827 max mem: 9377 +Train: [77] [2100/6250] eta: 0:09:55 lr: 0.000017 grad: 0.1991 (0.2057) loss: 0.7543 (0.7614) time: 0.1320 data: 0.0480 max mem: 9377 +Train: [77] [2200/6250] eta: 0:09:39 lr: 0.000017 grad: 0.1948 (0.2054) loss: 0.7548 (0.7611) time: 0.1374 data: 0.0585 max mem: 9377 +Train: [77] [2300/6250] eta: 0:09:24 lr: 0.000017 grad: 0.2040 (0.2051) loss: 0.7561 (0.7609) time: 0.0905 data: 0.0039 max mem: 9377 +Train: [77] [2400/6250] eta: 0:09:08 lr: 0.000017 grad: 0.1986 (0.2048) loss: 0.7516 (0.7607) time: 0.1353 data: 0.0536 max mem: 9377 +Train: [77] [2500/6250] eta: 0:08:53 lr: 0.000017 grad: 0.1919 (0.2043) loss: 0.7557 (0.7606) time: 0.1378 data: 0.0626 max mem: 9377 +Train: [77] [2600/6250] eta: 0:08:38 lr: 0.000017 grad: 0.1904 (0.2039) loss: 0.7606 (0.7606) time: 0.1370 data: 0.0555 max mem: 9377 +Train: [77] [2700/6250] eta: 0:08:24 lr: 0.000017 grad: 0.1920 (0.2034) loss: 0.7640 (0.7607) time: 0.1491 data: 0.0657 max mem: 9377 +Train: [77] [2800/6250] eta: 0:08:09 lr: 0.000017 grad: 0.1955 (0.2031) loss: 0.7565 (0.7608) time: 0.1373 data: 0.0529 max mem: 9377 +Train: [77] [2900/6250] eta: 0:07:54 lr: 0.000017 grad: 0.1930 (0.2027) loss: 0.7689 (0.7608) time: 0.1245 data: 0.0451 max mem: 9377 +Train: [77] [3000/6250] eta: 0:07:41 lr: 0.000017 grad: 0.1894 (0.2024) loss: 0.7717 (0.7609) time: 0.2238 data: 0.1500 max mem: 9377 +Train: [77] [3100/6250] eta: 0:07:27 lr: 0.000017 grad: 0.1959 (0.2021) loss: 0.7642 (0.7610) time: 0.1393 data: 0.0558 max mem: 9377 +Train: [77] [3200/6250] eta: 0:07:12 lr: 0.000017 grad: 0.2026 (0.2018) loss: 0.7599 (0.7611) time: 0.1457 data: 0.0620 max mem: 9377 +Train: [77] [3300/6250] eta: 0:06:58 lr: 0.000016 grad: 0.1936 (0.2016) loss: 0.7623 (0.7611) time: 0.1387 data: 0.0546 max mem: 9377 +Train: [77] [3400/6250] eta: 0:06:44 lr: 0.000016 grad: 0.1950 (0.2013) loss: 0.7636 (0.7612) time: 0.1644 data: 0.0849 max mem: 9377 +Train: [77] [3500/6250] eta: 0:06:30 lr: 0.000016 grad: 0.1980 (0.2011) loss: 0.7694 (0.7613) time: 0.1515 data: 0.0547 max mem: 9377 +Train: [77] [3600/6250] eta: 0:06:16 lr: 0.000016 grad: 0.1911 (0.2009) loss: 0.7648 (0.7613) time: 0.1289 data: 0.0360 max mem: 9377 +Train: [77] [3700/6250] eta: 0:06:01 lr: 0.000016 grad: 0.1903 (0.2007) loss: 0.7663 (0.7613) time: 0.1054 data: 0.0145 max mem: 9377 +Train: [77] [3800/6250] eta: 0:05:46 lr: 0.000016 grad: 0.1877 (0.2005) loss: 0.7651 (0.7613) time: 0.1203 data: 0.0421 max mem: 9377 +Train: [77] [3900/6250] eta: 0:05:31 lr: 0.000016 grad: 0.1927 (0.2004) loss: 0.7595 (0.7613) time: 0.1392 data: 0.0519 max mem: 9377 +Train: [77] [4000/6250] eta: 0:05:16 lr: 0.000016 grad: 0.1984 (0.2004) loss: 0.7600 (0.7612) time: 0.1296 data: 0.0466 max mem: 9377 +Train: [77] [4100/6250] eta: 0:05:01 lr: 0.000016 grad: 0.2012 (0.2004) loss: 0.7573 (0.7612) time: 0.1381 data: 0.0556 max mem: 9377 +Train: [77] [4200/6250] eta: 0:04:47 lr: 0.000016 grad: 0.2007 (0.2005) loss: 0.7559 (0.7611) time: 0.1358 data: 0.0551 max mem: 9377 +Train: [77] [4300/6250] eta: 0:04:33 lr: 0.000016 grad: 0.1874 (0.2005) loss: 0.7591 (0.7610) time: 0.1443 data: 0.0625 max mem: 9377 +Train: [77] [4400/6250] eta: 0:04:19 lr: 0.000016 grad: 0.1932 (0.2004) loss: 0.7671 (0.7610) time: 0.1754 data: 0.0957 max mem: 9377 +Train: [77] [4500/6250] eta: 0:04:05 lr: 0.000016 grad: 0.2001 (0.2004) loss: 0.7470 (0.7609) time: 0.1658 data: 0.0899 max mem: 9377 +Train: [77] [4600/6250] eta: 0:03:51 lr: 0.000016 grad: 0.1981 (0.2004) loss: 0.7569 (0.7608) time: 0.1530 data: 0.0713 max mem: 9377 +Train: [77] [4700/6250] eta: 0:03:37 lr: 0.000016 grad: 0.1949 (0.2005) loss: 0.7556 (0.7607) time: 0.1113 data: 0.0329 max mem: 9377 +Train: [77] [4800/6250] eta: 0:03:24 lr: 0.000016 grad: 0.1999 (0.2005) loss: 0.7619 (0.7606) time: 0.1519 data: 0.0786 max mem: 9377 +Train: [77] [4900/6250] eta: 0:03:10 lr: 0.000016 grad: 0.1995 (0.2005) loss: 0.7496 (0.7604) time: 0.1416 data: 0.0663 max mem: 9377 +Train: [77] [5000/6250] eta: 0:02:56 lr: 0.000016 grad: 0.1922 (0.2006) loss: 0.7539 (0.7603) time: 0.1444 data: 0.0677 max mem: 9377 +Train: [77] [5100/6250] eta: 0:02:42 lr: 0.000016 grad: 0.1917 (0.2006) loss: 0.7562 (0.7602) time: 0.1518 data: 0.0710 max mem: 9377 +Train: [77] [5200/6250] eta: 0:02:28 lr: 0.000016 grad: 0.2014 (0.2007) loss: 0.7525 (0.7600) time: 0.1425 data: 0.0567 max mem: 9377 +Train: [77] [5300/6250] eta: 0:02:14 lr: 0.000016 grad: 0.1979 (0.2006) loss: 0.7543 (0.7599) time: 0.1394 data: 0.0562 max mem: 9377 +Train: [77] [5400/6250] eta: 0:02:00 lr: 0.000016 grad: 0.1877 (0.2007) loss: 0.7606 (0.7598) time: 0.1585 data: 0.0840 max mem: 9377 +Train: [77] [5500/6250] eta: 0:01:46 lr: 0.000016 grad: 0.2017 (0.2008) loss: 0.7505 (0.7596) time: 0.1789 data: 0.1053 max mem: 9377 +Train: [77] [5600/6250] eta: 0:01:32 lr: 0.000016 grad: 0.2011 (0.2008) loss: 0.7560 (0.7595) time: 0.1354 data: 0.0499 max mem: 9377 +Train: [77] [5700/6250] eta: 0:01:18 lr: 0.000016 grad: 0.2051 (0.2008) loss: 0.7532 (0.7593) time: 0.0922 data: 0.0093 max mem: 9377 +Train: [77] [5800/6250] eta: 0:01:03 lr: 0.000016 grad: 0.1956 (0.2008) loss: 0.7632 (0.7593) time: 0.1455 data: 0.0682 max mem: 9377 +Train: [77] [5900/6250] eta: 0:00:49 lr: 0.000016 grad: 0.1925 (0.2008) loss: 0.7612 (0.7592) time: 0.1373 data: 0.0624 max mem: 9377 +Train: [77] [6000/6250] eta: 0:00:35 lr: 0.000016 grad: 0.2030 (0.2008) loss: 0.7479 (0.7591) time: 0.1312 data: 0.0510 max mem: 9377 +Train: [77] [6100/6250] eta: 0:00:21 lr: 0.000016 grad: 0.1971 (0.2008) loss: 0.7583 (0.7590) time: 0.1371 data: 0.0572 max mem: 9377 +Train: [77] [6200/6250] eta: 0:00:07 lr: 0.000016 grad: 0.2074 (0.2008) loss: 0.7391 (0.7589) time: 0.1264 data: 0.0479 max mem: 9377 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.2042 (0.2009) loss: 0.7617 (0.7588) time: 0.1227 data: 0.0456 max mem: 9377 +Train: [77] Total time: 0:14:52 (0.1429 s / it) +Averaged stats: lr: 0.000016 grad: 0.2042 (0.2009) loss: 0.7617 (0.7588) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:03:44 loss: 0.8151 (0.8151) time: 3.6240 data: 3.5603 max mem: 9377 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.8080 (0.8059) time: 0.1235 data: 0.0986 max mem: 9377 +Eval (hcp-train-subset): [77] Total time: 0:00:12 (0.2078 s / it) +Averaged stats (hcp-train-subset): loss: 0.8080 (0.8059) +Eval (hcp-val): [77] [ 0/62] eta: 0:05:00 loss: 0.8491 (0.8491) time: 4.8505 data: 4.8208 max mem: 9377 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.8495 (0.8516) time: 0.1185 data: 0.0935 max mem: 9377 +Eval (hcp-val): [77] Total time: 0:00:12 (0.2059 s / it) +Averaged stats (hcp-val): loss: 0.8495 (0.8516) +Eval (nsd-val): [77] [ 0/62] eta: 0:06:05 loss: 0.8212 (0.8212) time: 5.9003 data: 5.8703 max mem: 9377 +Eval (nsd-val): [77] [61/62] eta: 0:00:00 loss: 0.8311 (0.8314) time: 0.1241 data: 0.0972 max mem: 9377 +Eval (nsd-val): [77] Total time: 0:00:13 (0.2153 s / it) +Averaged stats (nsd-val): loss: 0.8311 (0.8314) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 10:45:52 lr: 0.000016 grad: 0.1730 (0.1730) loss: 0.8015 (0.8015) time: 6.2004 data: 6.0494 max mem: 9377 +Train: [78] [ 100/6250] eta: 0:21:09 lr: 0.000016 grad: 0.2064 (0.2729) loss: 0.7831 (0.7732) time: 0.1393 data: 0.0372 max mem: 9377 +Train: [78] [ 200/6250] eta: 0:17:29 lr: 0.000016 grad: 0.2187 (0.2463) loss: 0.7677 (0.7711) time: 0.1400 data: 0.0444 max mem: 9377 +Train: [78] [ 300/6250] eta: 0:16:11 lr: 0.000016 grad: 0.2098 (0.2378) loss: 0.7606 (0.7672) time: 0.1390 data: 0.0560 max mem: 9377 +Train: [78] [ 400/6250] eta: 0:15:01 lr: 0.000016 grad: 0.2059 (0.2330) loss: 0.7587 (0.7648) time: 0.1256 data: 0.0333 max mem: 9377 +Train: [78] [ 500/6250] eta: 0:14:21 lr: 0.000016 grad: 0.2034 (0.2274) loss: 0.7601 (0.7639) time: 0.1242 data: 0.0286 max mem: 9377 +Train: [78] [ 600/6250] eta: 0:13:54 lr: 0.000016 grad: 0.2042 (0.2229) loss: 0.7593 (0.7642) time: 0.1464 data: 0.0556 max mem: 9377 +Train: [78] [ 700/6250] eta: 0:13:32 lr: 0.000016 grad: 0.2017 (0.2201) loss: 0.7625 (0.7646) time: 0.1479 data: 0.0627 max mem: 9377 +Train: [78] [ 800/6250] eta: 0:13:22 lr: 0.000016 grad: 0.2041 (0.2186) loss: 0.7589 (0.7642) time: 0.1619 data: 0.0843 max mem: 9377 +Train: [78] [ 900/6250] eta: 0:13:06 lr: 0.000016 grad: 0.2002 (0.2170) loss: 0.7625 (0.7639) time: 0.1655 data: 0.0836 max mem: 9377 +Train: [78] [1000/6250] eta: 0:12:47 lr: 0.000016 grad: 0.1977 (0.2151) loss: 0.7632 (0.7640) time: 0.1410 data: 0.0561 max mem: 9377 +Train: [78] [1100/6250] eta: 0:12:35 lr: 0.000016 grad: 0.2019 (0.2139) loss: 0.7587 (0.7637) time: 0.1489 data: 0.0663 max mem: 9377 +Train: [78] [1200/6250] eta: 0:12:20 lr: 0.000016 grad: 0.2005 (0.2130) loss: 0.7569 (0.7633) time: 0.1449 data: 0.0554 max mem: 9377 +Train: [78] [1300/6250] eta: 0:12:01 lr: 0.000016 grad: 0.1994 (0.2118) loss: 0.7690 (0.7632) time: 0.1418 data: 0.0565 max mem: 9377 +Train: [78] [1400/6250] eta: 0:11:41 lr: 0.000016 grad: 0.1899 (0.2109) loss: 0.7643 (0.7629) time: 0.1284 data: 0.0367 max mem: 9377 +Train: [78] [1500/6250] eta: 0:11:21 lr: 0.000015 grad: 0.1896 (0.2100) loss: 0.7682 (0.7628) time: 0.1264 data: 0.0472 max mem: 9377 +Train: [78] [1600/6250] eta: 0:11:02 lr: 0.000015 grad: 0.1918 (0.2094) loss: 0.7593 (0.7626) time: 0.1294 data: 0.0455 max mem: 9377 +Train: [78] [1700/6250] eta: 0:10:45 lr: 0.000015 grad: 0.2018 (0.2090) loss: 0.7600 (0.7625) time: 0.1442 data: 0.0542 max mem: 9377 +Train: [78] [1800/6250] eta: 0:10:28 lr: 0.000015 grad: 0.1906 (0.2082) loss: 0.7678 (0.7624) time: 0.1311 data: 0.0494 max mem: 9377 +Train: [78] [1900/6250] eta: 0:10:11 lr: 0.000015 grad: 0.1871 (0.2078) loss: 0.7698 (0.7623) time: 0.1316 data: 0.0455 max mem: 9377 +Train: [78] [2000/6250] eta: 0:09:57 lr: 0.000015 grad: 0.1941 (0.2073) loss: 0.7593 (0.7623) time: 0.1392 data: 0.0616 max mem: 9377 +Train: [78] [2100/6250] eta: 0:09:42 lr: 0.000015 grad: 0.2013 (0.2069) loss: 0.7509 (0.7623) time: 0.1340 data: 0.0558 max mem: 9377 +Train: [78] [2200/6250] eta: 0:09:27 lr: 0.000015 grad: 0.1967 (0.2067) loss: 0.7634 (0.7621) time: 0.1439 data: 0.0567 max mem: 9377 +Train: [78] [2300/6250] eta: 0:09:13 lr: 0.000015 grad: 0.1929 (0.2063) loss: 0.7539 (0.7620) time: 0.1327 data: 0.0499 max mem: 9377 +Train: [78] [2400/6250] eta: 0:08:58 lr: 0.000015 grad: 0.2041 (0.2060) loss: 0.7470 (0.7619) time: 0.1230 data: 0.0367 max mem: 9377 +Train: [78] [2500/6250] eta: 0:08:44 lr: 0.000015 grad: 0.1963 (0.2058) loss: 0.7639 (0.7617) time: 0.1299 data: 0.0487 max mem: 9377 +Train: [78] [2600/6250] eta: 0:08:29 lr: 0.000015 grad: 0.1984 (0.2056) loss: 0.7548 (0.7616) time: 0.1352 data: 0.0545 max mem: 9377 +Train: [78] [2700/6250] eta: 0:08:14 lr: 0.000015 grad: 0.2019 (0.2052) loss: 0.7469 (0.7615) time: 0.1116 data: 0.0299 max mem: 9377 +Train: [78] [2800/6250] eta: 0:08:01 lr: 0.000015 grad: 0.1982 (0.2050) loss: 0.7555 (0.7613) time: 0.1396 data: 0.0616 max mem: 9377 +Train: [78] [2900/6250] eta: 0:07:46 lr: 0.000015 grad: 0.1936 (0.2048) loss: 0.7506 (0.7613) time: 0.1381 data: 0.0549 max mem: 9377 +Train: [78] [3000/6250] eta: 0:07:31 lr: 0.000015 grad: 0.1897 (0.2043) loss: 0.7708 (0.7614) time: 0.1404 data: 0.0642 max mem: 9377 +Train: [78] [3100/6250] eta: 0:07:19 lr: 0.000015 grad: 0.1955 (0.2040) loss: 0.7581 (0.7613) time: 0.1411 data: 0.0615 max mem: 9377 +Train: [78] [3200/6250] eta: 0:07:05 lr: 0.000015 grad: 0.1974 (0.2038) loss: 0.7592 (0.7611) time: 0.1308 data: 0.0552 max mem: 9377 +Train: [78] [3300/6250] eta: 0:06:51 lr: 0.000015 grad: 0.1987 (0.2035) loss: 0.7582 (0.7610) time: 0.1294 data: 0.0512 max mem: 9377 +Train: [78] [3400/6250] eta: 0:06:37 lr: 0.000015 grad: 0.1985 (0.2033) loss: 0.7524 (0.7609) time: 0.1494 data: 0.0657 max mem: 9377 +Train: [78] [3500/6250] eta: 0:06:25 lr: 0.000015 grad: 0.2023 (0.2033) loss: 0.7504 (0.7607) time: 0.1705 data: 0.0897 max mem: 9377 +Train: [78] [3600/6250] eta: 0:06:11 lr: 0.000015 grad: 0.2013 (0.2031) loss: 0.7536 (0.7606) time: 0.1615 data: 0.0812 max mem: 9377 +Train: [78] [3700/6250] eta: 0:05:57 lr: 0.000015 grad: 0.1944 (0.2030) loss: 0.7603 (0.7605) time: 0.1285 data: 0.0366 max mem: 9377 +Train: [78] [3800/6250] eta: 0:05:43 lr: 0.000015 grad: 0.1968 (0.2029) loss: 0.7569 (0.7604) time: 0.1272 data: 0.0473 max mem: 9377 +Train: [78] [3900/6250] eta: 0:05:29 lr: 0.000015 grad: 0.1935 (0.2029) loss: 0.7556 (0.7603) time: 0.0900 data: 0.0008 max mem: 9377 +Train: [78] [4000/6250] eta: 0:05:14 lr: 0.000015 grad: 0.1925 (0.2027) loss: 0.7613 (0.7603) time: 0.1363 data: 0.0571 max mem: 9377 +Train: [78] [4100/6250] eta: 0:04:59 lr: 0.000015 grad: 0.1907 (0.2025) loss: 0.7646 (0.7603) time: 0.1145 data: 0.0269 max mem: 9377 +Train: [78] [4200/6250] eta: 0:04:45 lr: 0.000015 grad: 0.1830 (0.2024) loss: 0.7703 (0.7603) time: 0.1395 data: 0.0565 max mem: 9377 +Train: [78] [4300/6250] eta: 0:04:30 lr: 0.000015 grad: 0.1897 (0.2022) loss: 0.7600 (0.7603) time: 0.1227 data: 0.0413 max mem: 9377 +Train: [78] [4400/6250] eta: 0:04:16 lr: 0.000015 grad: 0.1886 (0.2020) loss: 0.7586 (0.7604) time: 0.1254 data: 0.0400 max mem: 9377 +Train: [78] [4500/6250] eta: 0:04:02 lr: 0.000015 grad: 0.1892 (0.2019) loss: 0.7671 (0.7605) time: 0.1369 data: 0.0462 max mem: 9377 +Train: [78] [4600/6250] eta: 0:03:48 lr: 0.000015 grad: 0.2029 (0.2017) loss: 0.7635 (0.7605) time: 0.1486 data: 0.0724 max mem: 9377 +Train: [78] [4700/6250] eta: 0:03:34 lr: 0.000015 grad: 0.1935 (0.2016) loss: 0.7649 (0.7606) time: 0.1424 data: 0.0579 max mem: 9377 +Train: [78] [4800/6250] eta: 0:03:20 lr: 0.000015 grad: 0.1855 (0.2015) loss: 0.7636 (0.7606) time: 0.1262 data: 0.0389 max mem: 9377 +Train: [78] [4900/6250] eta: 0:03:07 lr: 0.000015 grad: 0.1910 (0.2013) loss: 0.7580 (0.7607) time: 0.1530 data: 0.0777 max mem: 9377 +Train: [78] [5000/6250] eta: 0:02:53 lr: 0.000015 grad: 0.1925 (0.2012) loss: 0.7513 (0.7607) time: 0.1364 data: 0.0549 max mem: 9377 +Train: [78] [5100/6250] eta: 0:02:39 lr: 0.000015 grad: 0.1921 (0.2011) loss: 0.7514 (0.7607) time: 0.1426 data: 0.0637 max mem: 9377 +Train: [78] [5200/6250] eta: 0:02:25 lr: 0.000015 grad: 0.2020 (0.2010) loss: 0.7484 (0.7606) time: 0.1490 data: 0.0710 max mem: 9377 +Train: [78] [5300/6250] eta: 0:02:11 lr: 0.000015 grad: 0.2011 (0.2009) loss: 0.7561 (0.7605) time: 0.1310 data: 0.0448 max mem: 9377 +Train: [78] [5400/6250] eta: 0:01:58 lr: 0.000015 grad: 0.1897 (0.2009) loss: 0.7669 (0.7605) time: 0.1091 data: 0.0268 max mem: 9377 +Train: [78] [5500/6250] eta: 0:01:44 lr: 0.000015 grad: 0.1962 (0.2009) loss: 0.7567 (0.7604) time: 0.1627 data: 0.0838 max mem: 9377 +Train: [78] [5600/6250] eta: 0:01:30 lr: 0.000015 grad: 0.1944 (0.2009) loss: 0.7641 (0.7603) time: 0.1417 data: 0.0574 max mem: 9377 +Train: [78] [5700/6250] eta: 0:01:16 lr: 0.000015 grad: 0.1869 (0.2009) loss: 0.7615 (0.7603) time: 0.1620 data: 0.0818 max mem: 9377 +Train: [78] [5800/6250] eta: 0:01:02 lr: 0.000015 grad: 0.1975 (0.2008) loss: 0.7536 (0.7602) time: 0.1777 data: 0.0950 max mem: 9377 +Train: [78] [5900/6250] eta: 0:00:48 lr: 0.000015 grad: 0.1944 (0.2008) loss: 0.7624 (0.7601) time: 0.1008 data: 0.0161 max mem: 9377 +Train: [78] [6000/6250] eta: 0:00:34 lr: 0.000015 grad: 0.1990 (0.2008) loss: 0.7494 (0.7600) time: 0.1265 data: 0.0407 max mem: 9377 +Train: [78] [6100/6250] eta: 0:00:20 lr: 0.000015 grad: 0.2012 (0.2008) loss: 0.7566 (0.7599) time: 0.1268 data: 0.0426 max mem: 9377 +Train: [78] [6200/6250] eta: 0:00:06 lr: 0.000014 grad: 0.2020 (0.2007) loss: 0.7593 (0.7599) time: 0.1226 data: 0.0451 max mem: 9377 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.2030 (0.2007) loss: 0.7501 (0.7598) time: 0.1426 data: 0.0573 max mem: 9377 +Train: [78] Total time: 0:14:35 (0.1401 s / it) +Averaged stats: lr: 0.000014 grad: 0.2030 (0.2007) loss: 0.7501 (0.7598) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:05:13 loss: 0.8183 (0.8183) time: 5.0621 data: 5.0316 max mem: 9377 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.8052 (0.8076) time: 0.1186 data: 0.0939 max mem: 9377 +Eval (hcp-train-subset): [78] Total time: 0:00:13 (0.2114 s / it) +Averaged stats (hcp-train-subset): loss: 0.8052 (0.8076) +Eval (hcp-val): [78] [ 0/62] eta: 0:05:15 loss: 0.8527 (0.8527) time: 5.0889 data: 5.0586 max mem: 9377 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.8517 (0.8550) time: 0.1197 data: 0.0948 max mem: 9377 +Eval (hcp-val): [78] Total time: 0:00:12 (0.2081 s / it) +Averaged stats (hcp-val): loss: 0.8517 (0.8550) +Eval (nsd-val): [78] [ 0/62] eta: 0:05:51 loss: 0.8262 (0.8262) time: 5.6753 data: 5.6450 max mem: 9377 +Eval (nsd-val): [78] [61/62] eta: 0:00:00 loss: 0.8361 (0.8386) time: 0.1209 data: 0.0958 max mem: 9377 +Eval (nsd-val): [78] Total time: 0:00:12 (0.2060 s / it) +Averaged stats (nsd-val): loss: 0.8361 (0.8386) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [79] [ 0/6250] eta: 8:20:46 lr: 0.000014 grad: 0.3877 (0.3877) loss: 0.7699 (0.7699) time: 4.8074 data: 4.5668 max mem: 9377 +Train: [79] [ 100/6250] eta: 0:20:06 lr: 0.000014 grad: 0.2860 (0.2733) loss: 0.7436 (0.7673) time: 0.1584 data: 0.0580 max mem: 9377 +Train: [79] [ 200/6250] eta: 0:17:49 lr: 0.000014 grad: 0.2092 (0.2549) loss: 0.7744 (0.7670) time: 0.1390 data: 0.0236 max mem: 9377 +Train: [79] [ 300/6250] eta: 0:16:24 lr: 0.000014 grad: 0.2168 (0.2433) loss: 0.7720 (0.7657) time: 0.1471 data: 0.0606 max mem: 9377 +Train: [79] [ 400/6250] eta: 0:15:22 lr: 0.000014 grad: 0.2139 (0.2385) loss: 0.7583 (0.7634) time: 0.1479 data: 0.0628 max mem: 9377 +Train: [79] [ 500/6250] eta: 0:14:39 lr: 0.000014 grad: 0.2091 (0.2337) loss: 0.7504 (0.7614) time: 0.1428 data: 0.0561 max mem: 9377 +Train: [79] [ 600/6250] eta: 0:13:59 lr: 0.000014 grad: 0.2081 (0.2298) loss: 0.7514 (0.7599) time: 0.1401 data: 0.0518 max mem: 9377 +Train: [79] [ 700/6250] eta: 0:13:29 lr: 0.000014 grad: 0.1997 (0.2265) loss: 0.7603 (0.7587) time: 0.1340 data: 0.0402 max mem: 9377 +Train: [79] [ 800/6250] eta: 0:13:05 lr: 0.000014 grad: 0.1922 (0.2237) loss: 0.7539 (0.7585) time: 0.1352 data: 0.0438 max mem: 9377 +Train: [79] [ 900/6250] eta: 0:12:45 lr: 0.000014 grad: 0.1912 (0.2213) loss: 0.7651 (0.7587) time: 0.1398 data: 0.0579 max mem: 9377 +Train: [79] [1000/6250] eta: 0:12:30 lr: 0.000014 grad: 0.1919 (0.2197) loss: 0.7554 (0.7586) time: 0.1355 data: 0.0490 max mem: 9377 +Train: [79] [1100/6250] eta: 0:12:15 lr: 0.000014 grad: 0.1937 (0.2179) loss: 0.7598 (0.7587) time: 0.1386 data: 0.0640 max mem: 9377 +Train: [79] [1200/6250] eta: 0:12:01 lr: 0.000014 grad: 0.1976 (0.2164) loss: 0.7550 (0.7586) time: 0.1329 data: 0.0490 max mem: 9377 +Train: [79] [1300/6250] eta: 0:11:47 lr: 0.000014 grad: 0.1984 (0.2153) loss: 0.7565 (0.7586) time: 0.1431 data: 0.0603 max mem: 9377 +Train: [79] [1400/6250] eta: 0:11:38 lr: 0.000014 grad: 0.2031 (0.2144) loss: 0.7565 (0.7587) time: 0.1607 data: 0.0763 max mem: 9377 +Train: [79] [1500/6250] eta: 0:11:27 lr: 0.000014 grad: 0.1970 (0.2135) loss: 0.7656 (0.7589) time: 0.1556 data: 0.0677 max mem: 9377 +Train: [79] [1600/6250] eta: 0:11:11 lr: 0.000014 grad: 0.1991 (0.2127) loss: 0.7639 (0.7589) time: 0.1356 data: 0.0550 max mem: 9377 +Train: [79] [1700/6250] eta: 0:10:57 lr: 0.000014 grad: 0.1985 (0.2122) loss: 0.7517 (0.7590) time: 0.1439 data: 0.0602 max mem: 9377 +Train: [79] [1800/6250] eta: 0:10:40 lr: 0.000014 grad: 0.1984 (0.2115) loss: 0.7571 (0.7589) time: 0.1422 data: 0.0595 max mem: 9377 +Train: [79] [1900/6250] eta: 0:10:25 lr: 0.000014 grad: 0.1968 (0.2109) loss: 0.7629 (0.7589) time: 0.1473 data: 0.0670 max mem: 9377 +Train: [79] [2000/6250] eta: 0:10:09 lr: 0.000014 grad: 0.2032 (0.2104) loss: 0.7579 (0.7589) time: 0.1396 data: 0.0427 max mem: 9377 +Train: [79] [2100/6250] eta: 0:09:53 lr: 0.000014 grad: 0.2013 (0.2097) loss: 0.7547 (0.7590) time: 0.1256 data: 0.0484 max mem: 9377 +Train: [79] [2200/6250] eta: 0:09:37 lr: 0.000014 grad: 0.2016 (0.2093) loss: 0.7536 (0.7589) time: 0.1572 data: 0.0761 max mem: 9377 +Train: [79] [2300/6250] eta: 0:09:22 lr: 0.000014 grad: 0.1970 (0.2090) loss: 0.7686 (0.7589) time: 0.1277 data: 0.0436 max mem: 9377 +Train: [79] [2400/6250] eta: 0:09:07 lr: 0.000014 grad: 0.1977 (0.2087) loss: 0.7619 (0.7589) time: 0.1359 data: 0.0516 max mem: 9377 +Train: [79] [2500/6250] eta: 0:08:52 lr: 0.000014 grad: 0.1998 (0.2085) loss: 0.7524 (0.7589) time: 0.1334 data: 0.0527 max mem: 9377 +Train: [79] [2600/6250] eta: 0:08:37 lr: 0.000014 grad: 0.1968 (0.2081) loss: 0.7588 (0.7590) time: 0.1339 data: 0.0506 max mem: 9377 +Train: [79] [2700/6250] eta: 0:08:21 lr: 0.000014 grad: 0.1891 (0.2077) loss: 0.7657 (0.7591) time: 0.1234 data: 0.0377 max mem: 9377 +Train: [79] [2800/6250] eta: 0:08:06 lr: 0.000014 grad: 0.1949 (0.2073) loss: 0.7659 (0.7592) time: 0.1232 data: 0.0403 max mem: 9377 +Train: [79] [2900/6250] eta: 0:07:52 lr: 0.000014 grad: 0.1994 (0.2071) loss: 0.7576 (0.7592) time: 0.1593 data: 0.0831 max mem: 9377 +Train: [79] [3000/6250] eta: 0:07:38 lr: 0.000014 grad: 0.1986 (0.2068) loss: 0.7667 (0.7593) time: 0.1437 data: 0.0650 max mem: 9377 +Train: [79] [3100/6250] eta: 0:07:24 lr: 0.000014 grad: 0.1965 (0.2065) loss: 0.7667 (0.7594) time: 0.1168 data: 0.0314 max mem: 9377 +Train: [79] [3200/6250] eta: 0:07:11 lr: 0.000014 grad: 0.2012 (0.2064) loss: 0.7571 (0.7594) time: 0.1558 data: 0.0743 max mem: 9377 +Train: [79] [3300/6250] eta: 0:06:56 lr: 0.000014 grad: 0.1941 (0.2062) loss: 0.7532 (0.7595) time: 0.1582 data: 0.0755 max mem: 9377 +Train: [79] [3400/6250] eta: 0:06:42 lr: 0.000014 grad: 0.1938 (0.2060) loss: 0.7624 (0.7595) time: 0.1368 data: 0.0592 max mem: 9377 +Train: [79] [3500/6250] eta: 0:06:29 lr: 0.000014 grad: 0.1899 (0.2057) loss: 0.7645 (0.7597) time: 0.1644 data: 0.0833 max mem: 9377 +Train: [79] [3600/6250] eta: 0:06:15 lr: 0.000014 grad: 0.1877 (0.2055) loss: 0.7749 (0.7598) time: 0.1378 data: 0.0512 max mem: 9377 +Train: [79] [3700/6250] eta: 0:06:01 lr: 0.000014 grad: 0.1930 (0.2052) loss: 0.7637 (0.7599) time: 0.1569 data: 0.0788 max mem: 9377 +Train: [79] [3800/6250] eta: 0:05:47 lr: 0.000014 grad: 0.2046 (0.2050) loss: 0.7589 (0.7599) time: 0.1506 data: 0.0711 max mem: 9377 +Train: [79] [3900/6250] eta: 0:05:31 lr: 0.000014 grad: 0.1936 (0.2049) loss: 0.7642 (0.7598) time: 0.1327 data: 0.0477 max mem: 9377 +Train: [79] [4000/6250] eta: 0:05:17 lr: 0.000014 grad: 0.1984 (0.2048) loss: 0.7598 (0.7598) time: 0.1294 data: 0.0496 max mem: 9377 +Train: [79] [4100/6250] eta: 0:05:02 lr: 0.000014 grad: 0.1977 (0.2046) loss: 0.7623 (0.7598) time: 0.1325 data: 0.0521 max mem: 9377 +Train: [79] [4200/6250] eta: 0:04:47 lr: 0.000014 grad: 0.2010 (0.2046) loss: 0.7651 (0.7597) time: 0.1263 data: 0.0435 max mem: 9377 +Train: [79] [4300/6250] eta: 0:04:33 lr: 0.000014 grad: 0.2004 (0.2046) loss: 0.7605 (0.7597) time: 0.1423 data: 0.0661 max mem: 9377 +Train: [79] [4400/6250] eta: 0:04:19 lr: 0.000014 grad: 0.1962 (0.2046) loss: 0.7574 (0.7597) time: 0.1338 data: 0.0424 max mem: 9377 +Train: [79] [4500/6250] eta: 0:04:04 lr: 0.000014 grad: 0.2019 (0.2045) loss: 0.7489 (0.7597) time: 0.1200 data: 0.0309 max mem: 9377 +Train: [79] [4600/6250] eta: 0:03:50 lr: 0.000014 grad: 0.2025 (0.2045) loss: 0.7599 (0.7596) time: 0.1266 data: 0.0481 max mem: 9377 +Train: [79] [4700/6250] eta: 0:03:36 lr: 0.000013 grad: 0.2045 (0.2045) loss: 0.7602 (0.7595) time: 0.1416 data: 0.0549 max mem: 9377 +Train: [79] [4800/6250] eta: 0:03:22 lr: 0.000013 grad: 0.2025 (0.2044) loss: 0.7582 (0.7595) time: 0.0902 data: 0.0005 max mem: 9377 +Train: [79] [4900/6250] eta: 0:03:08 lr: 0.000013 grad: 0.2048 (0.2044) loss: 0.7505 (0.7595) time: 0.1063 data: 0.0222 max mem: 9377 +Train: [79] [5000/6250] eta: 0:02:54 lr: 0.000013 grad: 0.1981 (0.2043) loss: 0.7545 (0.7595) time: 0.1254 data: 0.0422 max mem: 9377 +Train: [79] [5100/6250] eta: 0:02:40 lr: 0.000013 grad: 0.2030 (0.2043) loss: 0.7591 (0.7594) time: 0.1378 data: 0.0598 max mem: 9377 +Train: [79] [5200/6250] eta: 0:02:26 lr: 0.000013 grad: 0.2055 (0.2043) loss: 0.7510 (0.7594) time: 0.1750 data: 0.0948 max mem: 9377 +Train: [79] [5300/6250] eta: 0:02:12 lr: 0.000013 grad: 0.1912 (0.2041) loss: 0.7569 (0.7593) time: 0.1003 data: 0.0123 max mem: 9377 +Train: [79] [5400/6250] eta: 0:01:58 lr: 0.000013 grad: 0.2025 (0.2041) loss: 0.7451 (0.7593) time: 0.1252 data: 0.0453 max mem: 9377 +Train: [79] [5500/6250] eta: 0:01:44 lr: 0.000013 grad: 0.2060 (0.2040) loss: 0.7533 (0.7591) time: 0.1424 data: 0.0539 max mem: 9377 +Train: [79] [5600/6250] eta: 0:01:30 lr: 0.000013 grad: 0.2020 (0.2040) loss: 0.7488 (0.7590) time: 0.1567 data: 0.0727 max mem: 9377 +Train: [79] [5700/6250] eta: 0:01:16 lr: 0.000013 grad: 0.2042 (0.2040) loss: 0.7507 (0.7590) time: 0.1666 data: 0.0857 max mem: 9377 +Train: [79] [5800/6250] eta: 0:01:02 lr: 0.000013 grad: 0.2018 (0.2040) loss: 0.7390 (0.7588) time: 0.1312 data: 0.0527 max mem: 9377 +Train: [79] [5900/6250] eta: 0:00:48 lr: 0.000013 grad: 0.1921 (0.2038) loss: 0.7587 (0.7587) time: 0.1513 data: 0.0712 max mem: 9377 +Train: [79] [6000/6250] eta: 0:00:34 lr: 0.000013 grad: 0.1891 (0.2038) loss: 0.7554 (0.7586) time: 0.1476 data: 0.0714 max mem: 9377 +Train: [79] [6100/6250] eta: 0:00:21 lr: 0.000013 grad: 0.2000 (0.2038) loss: 0.7525 (0.7584) time: 0.1089 data: 0.0255 max mem: 9377 +Train: [79] [6200/6250] eta: 0:00:07 lr: 0.000013 grad: 0.1970 (0.2037) loss: 0.7466 (0.7583) time: 0.1505 data: 0.0736 max mem: 9377 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.1981 (0.2037) loss: 0.7469 (0.7583) time: 0.1402 data: 0.0610 max mem: 9377 +Train: [79] Total time: 0:14:40 (0.1409 s / it) +Averaged stats: lr: 0.000013 grad: 0.1981 (0.2037) loss: 0.7469 (0.7583) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:05:02 loss: 0.8155 (0.8155) time: 4.8865 data: 4.8567 max mem: 9377 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.8084 (0.8066) time: 0.1147 data: 0.0899 max mem: 9377 +Eval (hcp-train-subset): [79] Total time: 0:00:12 (0.2089 s / it) +Averaged stats (hcp-train-subset): loss: 0.8084 (0.8066) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [79] [ 0/62] eta: 0:04:06 loss: 0.8554 (0.8554) time: 3.9754 data: 3.9042 max mem: 9377 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.8547 (0.8542) time: 0.1224 data: 0.0973 max mem: 9377 +Eval (hcp-val): [79] Total time: 0:00:12 (0.2057 s / it) +Averaged stats (hcp-val): loss: 0.8547 (0.8542) +Making plots (hcp-val): example=8 +Eval (nsd-val): [79] [ 0/62] eta: 0:05:45 loss: 0.8246 (0.8246) time: 5.5790 data: 5.5471 max mem: 9377 +Eval (nsd-val): [79] [61/62] eta: 0:00:00 loss: 0.8298 (0.8336) time: 0.1140 data: 0.0887 max mem: 9377 +Eval (nsd-val): [79] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (nsd-val): loss: 0.8298 (0.8336) +Making plots (nsd-val): example=10 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 10:16:36 lr: 0.000013 grad: 0.6128 (0.6128) loss: 0.7143 (0.7143) time: 5.9194 data: 5.7972 max mem: 9377 +Train: [80] [ 100/6250] eta: 0:20:30 lr: 0.000013 grad: 0.2412 (0.2700) loss: 0.7721 (0.7767) time: 0.1473 data: 0.0376 max mem: 9377 +Train: [80] [ 200/6250] eta: 0:17:31 lr: 0.000013 grad: 0.2117 (0.2533) loss: 0.7720 (0.7668) time: 0.1434 data: 0.0555 max mem: 9377 +Train: [80] [ 300/6250] eta: 0:16:03 lr: 0.000013 grad: 0.2124 (0.2430) loss: 0.7588 (0.7648) time: 0.1485 data: 0.0587 max mem: 9377 +Train: [80] [ 400/6250] eta: 0:14:57 lr: 0.000013 grad: 0.2069 (0.2353) loss: 0.7543 (0.7638) time: 0.1306 data: 0.0395 max mem: 9377 +Train: [80] [ 500/6250] eta: 0:14:14 lr: 0.000013 grad: 0.1996 (0.2287) loss: 0.7554 (0.7629) time: 0.1302 data: 0.0366 max mem: 9377 +Train: [80] [ 600/6250] eta: 0:13:44 lr: 0.000013 grad: 0.1970 (0.2259) loss: 0.7531 (0.7613) time: 0.1350 data: 0.0436 max mem: 9377 +Train: [80] [ 700/6250] eta: 0:13:19 lr: 0.000013 grad: 0.1968 (0.2229) loss: 0.7571 (0.7602) time: 0.1204 data: 0.0224 max mem: 9377 +Train: [80] [ 800/6250] eta: 0:13:01 lr: 0.000013 grad: 0.2000 (0.2205) loss: 0.7451 (0.7595) time: 0.1210 data: 0.0322 max mem: 9377 +Train: [80] [ 900/6250] eta: 0:12:44 lr: 0.000013 grad: 0.1966 (0.2186) loss: 0.7482 (0.7590) time: 0.1525 data: 0.0644 max mem: 9377 +Train: [80] [1000/6250] eta: 0:12:27 lr: 0.000013 grad: 0.1986 (0.2173) loss: 0.7575 (0.7587) time: 0.1411 data: 0.0554 max mem: 9377 +Train: [80] [1100/6250] eta: 0:12:18 lr: 0.000013 grad: 0.2076 (0.2161) loss: 0.7576 (0.7585) time: 0.1510 data: 0.0711 max mem: 9377 +Train: [80] [1200/6250] eta: 0:12:05 lr: 0.000013 grad: 0.2016 (0.2151) loss: 0.7550 (0.7579) time: 0.1663 data: 0.0884 max mem: 9377 +Train: [80] [1300/6250] eta: 0:11:51 lr: 0.000013 grad: 0.1992 (0.2143) loss: 0.7560 (0.7573) time: 0.1410 data: 0.0590 max mem: 9377 +Train: [80] [1400/6250] eta: 0:11:42 lr: 0.000013 grad: 0.2003 (0.2137) loss: 0.7505 (0.7572) time: 0.1672 data: 0.0820 max mem: 9377 +Train: [80] [1500/6250] eta: 0:11:31 lr: 0.000013 grad: 0.1984 (0.2128) loss: 0.7531 (0.7572) time: 0.1532 data: 0.0662 max mem: 9377 +Train: [80] [1600/6250] eta: 0:11:18 lr: 0.000013 grad: 0.1982 (0.2122) loss: 0.7472 (0.7570) time: 0.1568 data: 0.0703 max mem: 9377 +Train: [80] [1700/6250] eta: 0:11:04 lr: 0.000013 grad: 0.2000 (0.2117) loss: 0.7481 (0.7568) time: 0.1418 data: 0.0471 max mem: 9377 +Train: [80] [1800/6250] eta: 0:10:48 lr: 0.000013 grad: 0.2031 (0.2111) loss: 0.7491 (0.7568) time: 0.1471 data: 0.0600 max mem: 9377 +Train: [80] [1900/6250] eta: 0:10:33 lr: 0.000013 grad: 0.1922 (0.2105) loss: 0.7539 (0.7568) time: 0.1450 data: 0.0583 max mem: 9377 +Train: [80] [2000/6250] eta: 0:10:17 lr: 0.000013 grad: 0.2024 (0.2101) loss: 0.7470 (0.7567) time: 0.1440 data: 0.0616 max mem: 9377 +Train: [80] [2100/6250] eta: 0:10:01 lr: 0.000013 grad: 0.2008 (0.2097) loss: 0.7580 (0.7567) time: 0.1388 data: 0.0581 max mem: 9377 +Train: [80] [2200/6250] eta: 0:09:46 lr: 0.000013 grad: 0.1939 (0.2093) loss: 0.7513 (0.7565) time: 0.1382 data: 0.0614 max mem: 9377 +Train: [80] [2300/6250] eta: 0:09:34 lr: 0.000013 grad: 0.2079 (0.2090) loss: 0.7589 (0.7565) time: 0.2683 data: 0.1969 max mem: 9377 +Train: [80] [2400/6250] eta: 0:09:17 lr: 0.000013 grad: 0.1995 (0.2088) loss: 0.7525 (0.7564) time: 0.1514 data: 0.0699 max mem: 9377 +Train: [80] [2500/6250] eta: 0:09:02 lr: 0.000013 grad: 0.1947 (0.2085) loss: 0.7598 (0.7564) time: 0.1322 data: 0.0484 max mem: 9377 +Train: [80] [2600/6250] eta: 0:08:47 lr: 0.000013 grad: 0.1974 (0.2084) loss: 0.7537 (0.7563) time: 0.1407 data: 0.0638 max mem: 9377 +Train: [80] [2700/6250] eta: 0:08:32 lr: 0.000013 grad: 0.1956 (0.2082) loss: 0.7598 (0.7562) time: 0.1324 data: 0.0473 max mem: 9377 +Train: [80] [2800/6250] eta: 0:08:16 lr: 0.000013 grad: 0.1988 (0.2080) loss: 0.7560 (0.7562) time: 0.1296 data: 0.0486 max mem: 9377 +Train: [80] [2900/6250] eta: 0:08:02 lr: 0.000013 grad: 0.2035 (0.2078) loss: 0.7515 (0.7562) time: 0.1353 data: 0.0534 max mem: 9377 +Train: [80] [3000/6250] eta: 0:07:46 lr: 0.000013 grad: 0.1928 (0.2076) loss: 0.7556 (0.7563) time: 0.1324 data: 0.0487 max mem: 9377 +Train: [80] [3100/6250] eta: 0:07:32 lr: 0.000013 grad: 0.2036 (0.2075) loss: 0.7639 (0.7563) time: 0.1578 data: 0.0743 max mem: 9377 +Train: [80] [3200/6250] eta: 0:07:19 lr: 0.000013 grad: 0.2106 (0.2073) loss: 0.7522 (0.7564) time: 0.1659 data: 0.0801 max mem: 9377 +Train: [80] [3300/6250] eta: 0:07:04 lr: 0.000013 grad: 0.2016 (0.2072) loss: 0.7612 (0.7565) time: 0.1598 data: 0.0816 max mem: 9377 +Train: [80] [3400/6250] eta: 0:06:49 lr: 0.000012 grad: 0.1992 (0.2070) loss: 0.7565 (0.7567) time: 0.1402 data: 0.0568 max mem: 9377 +Train: [80] [3500/6250] eta: 0:06:35 lr: 0.000012 grad: 0.2000 (0.2069) loss: 0.7627 (0.7567) time: 0.1538 data: 0.0714 max mem: 9377 +Train: [80] [3600/6250] eta: 0:06:21 lr: 0.000012 grad: 0.1950 (0.2067) loss: 0.7566 (0.7568) time: 0.1449 data: 0.0615 max mem: 9377 +Train: [80] [3700/6250] eta: 0:06:06 lr: 0.000012 grad: 0.1952 (0.2066) loss: 0.7571 (0.7569) time: 0.1436 data: 0.0580 max mem: 9377 +Train: [80] [3800/6250] eta: 0:05:52 lr: 0.000012 grad: 0.1976 (0.2063) loss: 0.7668 (0.7569) time: 0.1294 data: 0.0450 max mem: 9377 +Train: [80] [3900/6250] eta: 0:05:36 lr: 0.000012 grad: 0.2008 (0.2062) loss: 0.7544 (0.7570) time: 0.1180 data: 0.0303 max mem: 9377 +Train: [80] [4000/6250] eta: 0:05:21 lr: 0.000012 grad: 0.1943 (0.2061) loss: 0.7559 (0.7570) time: 0.1415 data: 0.0641 max mem: 9377 +Train: [80] [4100/6250] eta: 0:05:06 lr: 0.000012 grad: 0.1949 (0.2060) loss: 0.7531 (0.7570) time: 0.1083 data: 0.0204 max mem: 9377 +Train: [80] [4200/6250] eta: 0:04:51 lr: 0.000012 grad: 0.1952 (0.2058) loss: 0.7586 (0.7570) time: 0.1398 data: 0.0594 max mem: 9377 +Train: [80] [4300/6250] eta: 0:04:36 lr: 0.000012 grad: 0.2071 (0.2058) loss: 0.7565 (0.7571) time: 0.1375 data: 0.0536 max mem: 9377 +Train: [80] [4400/6250] eta: 0:04:22 lr: 0.000012 grad: 0.2034 (0.2057) loss: 0.7572 (0.7571) time: 0.1303 data: 0.0482 max mem: 9377 +Train: [80] [4500/6250] eta: 0:04:07 lr: 0.000012 grad: 0.2026 (0.2057) loss: 0.7660 (0.7571) time: 0.1402 data: 0.0588 max mem: 9377 +Train: [80] [4600/6250] eta: 0:03:53 lr: 0.000012 grad: 0.2147 (0.2058) loss: 0.7512 (0.7570) time: 0.1089 data: 0.0232 max mem: 9377 +Train: [80] [4700/6250] eta: 0:03:39 lr: 0.000012 grad: 0.2059 (0.2059) loss: 0.7565 (0.7570) time: 0.1274 data: 0.0410 max mem: 9377 +Train: [80] [4800/6250] eta: 0:03:24 lr: 0.000012 grad: 0.2079 (0.2060) loss: 0.7495 (0.7569) time: 0.1362 data: 0.0585 max mem: 9377 +Train: [80] [4900/6250] eta: 0:03:10 lr: 0.000012 grad: 0.2067 (0.2061) loss: 0.7614 (0.7569) time: 0.1302 data: 0.0492 max mem: 9377 +Train: [80] [5000/6250] eta: 0:02:56 lr: 0.000012 grad: 0.1988 (0.2061) loss: 0.7652 (0.7568) time: 0.1580 data: 0.0794 max mem: 9377 +Train: [80] [5100/6250] eta: 0:02:42 lr: 0.000012 grad: 0.2062 (0.2062) loss: 0.7613 (0.7568) time: 0.1521 data: 0.0660 max mem: 9377 +Train: [80] [5200/6250] eta: 0:02:28 lr: 0.000012 grad: 0.2001 (0.2062) loss: 0.7614 (0.7568) time: 0.1001 data: 0.0136 max mem: 9377 +Train: [80] [5300/6250] eta: 0:02:14 lr: 0.000012 grad: 0.2005 (0.2062) loss: 0.7577 (0.7568) time: 0.1352 data: 0.0476 max mem: 9377 +Train: [80] [5400/6250] eta: 0:02:00 lr: 0.000012 grad: 0.2035 (0.2062) loss: 0.7562 (0.7568) time: 0.1250 data: 0.0403 max mem: 9377 +Train: [80] [5500/6250] eta: 0:01:46 lr: 0.000012 grad: 0.2084 (0.2062) loss: 0.7629 (0.7569) time: 0.0900 data: 0.0029 max mem: 9377 +Train: [80] [5600/6250] eta: 0:01:31 lr: 0.000012 grad: 0.2004 (0.2061) loss: 0.7653 (0.7570) time: 0.1176 data: 0.0304 max mem: 9377 +Train: [80] [5700/6250] eta: 0:01:18 lr: 0.000012 grad: 0.1968 (0.2061) loss: 0.7556 (0.7570) time: 0.1397 data: 0.0539 max mem: 9377 +Train: [80] [5800/6250] eta: 0:01:03 lr: 0.000012 grad: 0.1957 (0.2060) loss: 0.7636 (0.7570) time: 0.1197 data: 0.0371 max mem: 9377 +Train: [80] [5900/6250] eta: 0:00:49 lr: 0.000012 grad: 0.2066 (0.2060) loss: 0.7447 (0.7570) time: 0.1721 data: 0.0870 max mem: 9377 +Train: [80] [6000/6250] eta: 0:00:35 lr: 0.000012 grad: 0.2063 (0.2060) loss: 0.7520 (0.7570) time: 0.1328 data: 0.0387 max mem: 9377 +Train: [80] [6100/6250] eta: 0:00:21 lr: 0.000012 grad: 0.1989 (0.2061) loss: 0.7511 (0.7569) time: 0.1416 data: 0.0603 max mem: 9377 +Train: [80] [6200/6250] eta: 0:00:07 lr: 0.000012 grad: 0.1993 (0.2060) loss: 0.7610 (0.7569) time: 0.1703 data: 0.0918 max mem: 9377 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.2017 (0.2060) loss: 0.7512 (0.7569) time: 0.1361 data: 0.0595 max mem: 9377 +Train: [80] Total time: 0:14:51 (0.1427 s / it) +Averaged stats: lr: 0.000012 grad: 0.2017 (0.2060) loss: 0.7512 (0.7569) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:05:30 loss: 0.8150 (0.8150) time: 5.3306 data: 5.2972 max mem: 9377 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.8086 (0.8061) time: 0.1157 data: 0.0910 max mem: 9377 +Eval (hcp-train-subset): [80] Total time: 0:00:13 (0.2128 s / it) +Averaged stats (hcp-train-subset): loss: 0.8086 (0.8061) +Eval (hcp-val): [80] [ 0/62] eta: 0:05:33 loss: 0.8584 (0.8584) time: 5.3790 data: 5.3488 max mem: 9377 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.8494 (0.8533) time: 0.1129 data: 0.0879 max mem: 9377 +Eval (hcp-val): [80] Total time: 0:00:12 (0.2062 s / it) +Averaged stats (hcp-val): loss: 0.8494 (0.8533) +Eval (nsd-val): [80] [ 0/62] eta: 0:05:26 loss: 0.8259 (0.8259) time: 5.2687 data: 5.2380 max mem: 9377 +Eval (nsd-val): [80] [61/62] eta: 0:00:00 loss: 0.8321 (0.8344) time: 0.1355 data: 0.1103 max mem: 9377 +Eval (nsd-val): [80] Total time: 0:00:13 (0.2182 s / it) +Averaged stats (nsd-val): loss: 0.8321 (0.8344) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 9:53:10 lr: 0.000012 grad: 0.5648 (0.5648) loss: 0.7255 (0.7255) time: 5.6945 data: 5.5490 max mem: 9377 +Train: [81] [ 100/6250] eta: 0:21:01 lr: 0.000012 grad: 0.2490 (0.2674) loss: 0.7689 (0.7721) time: 0.1484 data: 0.0419 max mem: 9377 +Train: [81] [ 200/6250] eta: 0:17:52 lr: 0.000012 grad: 0.2324 (0.2570) loss: 0.7517 (0.7651) time: 0.1469 data: 0.0565 max mem: 9377 +Train: [81] [ 300/6250] eta: 0:16:22 lr: 0.000012 grad: 0.2305 (0.2497) loss: 0.7461 (0.7609) time: 0.1349 data: 0.0490 max mem: 9377 +Train: [81] [ 400/6250] eta: 0:15:13 lr: 0.000012 grad: 0.2026 (0.2409) loss: 0.7663 (0.7606) time: 0.1341 data: 0.0434 max mem: 9377 +Train: [81] [ 500/6250] eta: 0:14:28 lr: 0.000012 grad: 0.1999 (0.2334) loss: 0.7703 (0.7617) time: 0.1179 data: 0.0212 max mem: 9377 +Train: [81] [ 600/6250] eta: 0:13:55 lr: 0.000012 grad: 0.2022 (0.2289) loss: 0.7517 (0.7623) time: 0.1304 data: 0.0407 max mem: 9377 +Train: [81] [ 700/6250] eta: 0:13:30 lr: 0.000012 grad: 0.1977 (0.2250) loss: 0.7637 (0.7625) time: 0.1585 data: 0.0752 max mem: 9377 +Train: [81] [ 800/6250] eta: 0:13:09 lr: 0.000012 grad: 0.1950 (0.2225) loss: 0.7576 (0.7620) time: 0.1519 data: 0.0597 max mem: 9377 +Train: [81] [ 900/6250] eta: 0:12:52 lr: 0.000012 grad: 0.2087 (0.2207) loss: 0.7540 (0.7613) time: 0.1373 data: 0.0442 max mem: 9377 +Train: [81] [1000/6250] eta: 0:12:34 lr: 0.000012 grad: 0.2028 (0.2194) loss: 0.7559 (0.7607) time: 0.1281 data: 0.0475 max mem: 9377 +Train: [81] [1100/6250] eta: 0:12:18 lr: 0.000012 grad: 0.1997 (0.2181) loss: 0.7634 (0.7602) time: 0.1576 data: 0.0760 max mem: 9377 +Train: [81] [1200/6250] eta: 0:12:07 lr: 0.000012 grad: 0.2083 (0.2171) loss: 0.7468 (0.7597) time: 0.1491 data: 0.0543 max mem: 9377 +Train: [81] [1300/6250] eta: 0:11:55 lr: 0.000012 grad: 0.2021 (0.2162) loss: 0.7510 (0.7595) time: 0.1584 data: 0.0828 max mem: 9377 +Train: [81] [1400/6250] eta: 0:11:41 lr: 0.000012 grad: 0.2010 (0.2155) loss: 0.7474 (0.7590) time: 0.1402 data: 0.0630 max mem: 9377 +Train: [81] [1500/6250] eta: 0:11:32 lr: 0.000012 grad: 0.2021 (0.2147) loss: 0.7603 (0.7587) time: 0.1746 data: 0.0920 max mem: 9377 +Train: [81] [1600/6250] eta: 0:11:23 lr: 0.000012 grad: 0.2034 (0.2140) loss: 0.7563 (0.7587) time: 0.1785 data: 0.0954 max mem: 9377 +Train: [81] [1700/6250] eta: 0:11:13 lr: 0.000012 grad: 0.2003 (0.2133) loss: 0.7459 (0.7584) time: 0.1553 data: 0.0729 max mem: 9377 +Train: [81] [1800/6250] eta: 0:10:59 lr: 0.000012 grad: 0.1937 (0.2128) loss: 0.7578 (0.7581) time: 0.1433 data: 0.0549 max mem: 9377 +Train: [81] [1900/6250] eta: 0:10:45 lr: 0.000012 grad: 0.2007 (0.2120) loss: 0.7647 (0.7583) time: 0.1442 data: 0.0587 max mem: 9377 +Train: [81] [2000/6250] eta: 0:10:31 lr: 0.000012 grad: 0.1997 (0.2114) loss: 0.7690 (0.7584) time: 0.1617 data: 0.0712 max mem: 9377 +Train: [81] [2100/6250] eta: 0:10:15 lr: 0.000012 grad: 0.2018 (0.2111) loss: 0.7567 (0.7582) time: 0.1337 data: 0.0564 max mem: 9377 +Train: [81] [2200/6250] eta: 0:09:59 lr: 0.000012 grad: 0.1971 (0.2108) loss: 0.7609 (0.7581) time: 0.1291 data: 0.0507 max mem: 9377 +Train: [81] [2300/6250] eta: 0:09:43 lr: 0.000011 grad: 0.2004 (0.2104) loss: 0.7595 (0.7582) time: 0.1362 data: 0.0583 max mem: 9377 +Train: [81] [2400/6250] eta: 0:09:26 lr: 0.000011 grad: 0.2046 (0.2099) loss: 0.7614 (0.7583) time: 0.1341 data: 0.0526 max mem: 9377 +Train: [81] [2500/6250] eta: 0:09:11 lr: 0.000011 grad: 0.1973 (0.2095) loss: 0.7569 (0.7584) time: 0.1448 data: 0.0583 max mem: 9377 +Train: [81] [2600/6250] eta: 0:08:55 lr: 0.000011 grad: 0.2023 (0.2093) loss: 0.7558 (0.7585) time: 0.1590 data: 0.0871 max mem: 9377 +Train: [81] [2700/6250] eta: 0:08:39 lr: 0.000011 grad: 0.2006 (0.2089) loss: 0.7619 (0.7586) time: 0.1298 data: 0.0462 max mem: 9377 +Train: [81] [2800/6250] eta: 0:08:23 lr: 0.000011 grad: 0.1988 (0.2087) loss: 0.7602 (0.7586) time: 0.1322 data: 0.0457 max mem: 9377 +Train: [81] [2900/6250] eta: 0:08:08 lr: 0.000011 grad: 0.2017 (0.2084) loss: 0.7544 (0.7587) time: 0.1360 data: 0.0546 max mem: 9377 +Train: [81] [3000/6250] eta: 0:07:53 lr: 0.000011 grad: 0.1958 (0.2081) loss: 0.7525 (0.7588) time: 0.1347 data: 0.0495 max mem: 9377 +Train: [81] [3100/6250] eta: 0:07:37 lr: 0.000011 grad: 0.1947 (0.2078) loss: 0.7604 (0.7588) time: 0.1346 data: 0.0527 max mem: 9377 +Train: [81] [3200/6250] eta: 0:07:23 lr: 0.000011 grad: 0.2061 (0.2077) loss: 0.7527 (0.7587) time: 0.1743 data: 0.1022 max mem: 9377 +Train: [81] [3300/6250] eta: 0:07:08 lr: 0.000011 grad: 0.2010 (0.2075) loss: 0.7605 (0.7587) time: 0.1398 data: 0.0586 max mem: 9377 +Train: [81] [3400/6250] eta: 0:06:53 lr: 0.000011 grad: 0.2036 (0.2073) loss: 0.7628 (0.7587) time: 0.1404 data: 0.0588 max mem: 9377 +Train: [81] [3500/6250] eta: 0:06:38 lr: 0.000011 grad: 0.1998 (0.2071) loss: 0.7618 (0.7587) time: 0.1460 data: 0.0623 max mem: 9377 +Train: [81] [3600/6250] eta: 0:06:24 lr: 0.000011 grad: 0.2020 (0.2071) loss: 0.7580 (0.7587) time: 0.1421 data: 0.0566 max mem: 9377 +Train: [81] [3700/6250] eta: 0:06:10 lr: 0.000011 grad: 0.2004 (0.2069) loss: 0.7533 (0.7586) time: 0.1566 data: 0.0787 max mem: 9377 +Train: [81] [3800/6250] eta: 0:05:55 lr: 0.000011 grad: 0.2025 (0.2069) loss: 0.7643 (0.7586) time: 0.1105 data: 0.0269 max mem: 9377 +Train: [81] [3900/6250] eta: 0:05:39 lr: 0.000011 grad: 0.1975 (0.2068) loss: 0.7709 (0.7586) time: 0.1469 data: 0.0691 max mem: 9377 +Train: [81] [4000/6250] eta: 0:05:24 lr: 0.000011 grad: 0.2075 (0.2066) loss: 0.7525 (0.7586) time: 0.1357 data: 0.0543 max mem: 9377 +Train: [81] [4100/6250] eta: 0:05:08 lr: 0.000011 grad: 0.1956 (0.2065) loss: 0.7520 (0.7586) time: 0.1321 data: 0.0460 max mem: 9377 +Train: [81] [4200/6250] eta: 0:04:53 lr: 0.000011 grad: 0.2072 (0.2065) loss: 0.7525 (0.7586) time: 0.1406 data: 0.0559 max mem: 9377 +Train: [81] [4300/6250] eta: 0:04:38 lr: 0.000011 grad: 0.2022 (0.2064) loss: 0.7587 (0.7585) time: 0.1318 data: 0.0436 max mem: 9377 +Train: [81] [4400/6250] eta: 0:04:24 lr: 0.000011 grad: 0.2032 (0.2065) loss: 0.7483 (0.7584) time: 0.1296 data: 0.0488 max mem: 9377 +Train: [81] [4500/6250] eta: 0:04:09 lr: 0.000011 grad: 0.2017 (0.2063) loss: 0.7563 (0.7585) time: 0.1180 data: 0.0384 max mem: 9377 +Train: [81] [4600/6250] eta: 0:03:54 lr: 0.000011 grad: 0.1941 (0.2063) loss: 0.7629 (0.7585) time: 0.1347 data: 0.0541 max mem: 9377 +Train: [81] [4700/6250] eta: 0:03:40 lr: 0.000011 grad: 0.2075 (0.2062) loss: 0.7554 (0.7584) time: 0.1248 data: 0.0415 max mem: 9377 +Train: [81] [4800/6250] eta: 0:03:26 lr: 0.000011 grad: 0.1945 (0.2061) loss: 0.7526 (0.7584) time: 0.1242 data: 0.0416 max mem: 9377 +Train: [81] [4900/6250] eta: 0:03:12 lr: 0.000011 grad: 0.2008 (0.2060) loss: 0.7519 (0.7584) time: 0.1487 data: 0.0654 max mem: 9377 +Train: [81] [5000/6250] eta: 0:02:57 lr: 0.000011 grad: 0.2051 (0.2059) loss: 0.7610 (0.7584) time: 0.1450 data: 0.0652 max mem: 9377 +Train: [81] [5100/6250] eta: 0:02:43 lr: 0.000011 grad: 0.1991 (0.2058) loss: 0.7583 (0.7584) time: 0.0865 data: 0.0033 max mem: 9377 +Train: [81] [5200/6250] eta: 0:02:29 lr: 0.000011 grad: 0.1959 (0.2057) loss: 0.7461 (0.7583) time: 0.1443 data: 0.0670 max mem: 9377 +Train: [81] [5300/6250] eta: 0:02:15 lr: 0.000011 grad: 0.2037 (0.2056) loss: 0.7524 (0.7583) time: 0.1593 data: 0.0774 max mem: 9377 +Train: [81] [5400/6250] eta: 0:02:00 lr: 0.000011 grad: 0.1969 (0.2056) loss: 0.7531 (0.7582) time: 0.1374 data: 0.0565 max mem: 9377 +Train: [81] [5500/6250] eta: 0:01:46 lr: 0.000011 grad: 0.2072 (0.2056) loss: 0.7471 (0.7581) time: 0.1396 data: 0.0646 max mem: 9377 +Train: [81] [5600/6250] eta: 0:01:32 lr: 0.000011 grad: 0.2067 (0.2055) loss: 0.7506 (0.7581) time: 0.1414 data: 0.0564 max mem: 9377 +Train: [81] [5700/6250] eta: 0:01:18 lr: 0.000011 grad: 0.2021 (0.2055) loss: 0.7669 (0.7580) time: 0.1674 data: 0.0869 max mem: 9377 +Train: [81] [5800/6250] eta: 0:01:04 lr: 0.000011 grad: 0.2011 (0.2054) loss: 0.7631 (0.7580) time: 0.1607 data: 0.0831 max mem: 9377 +Train: [81] [5900/6250] eta: 0:00:49 lr: 0.000011 grad: 0.1900 (0.2053) loss: 0.7670 (0.7581) time: 0.1685 data: 0.0913 max mem: 9377 +Train: [81] [6000/6250] eta: 0:00:35 lr: 0.000011 grad: 0.2024 (0.2054) loss: 0.7506 (0.7580) time: 0.1309 data: 0.0491 max mem: 9377 +Train: [81] [6100/6250] eta: 0:00:21 lr: 0.000011 grad: 0.1963 (0.2054) loss: 0.7594 (0.7580) time: 0.1590 data: 0.0815 max mem: 9377 +Train: [81] [6200/6250] eta: 0:00:07 lr: 0.000011 grad: 0.2034 (0.2054) loss: 0.7524 (0.7580) time: 0.1584 data: 0.0808 max mem: 9377 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.2073 (0.2054) loss: 0.7578 (0.7580) time: 0.1234 data: 0.0331 max mem: 9377 +Train: [81] Total time: 0:14:58 (0.1438 s / it) +Averaged stats: lr: 0.000011 grad: 0.2073 (0.2054) loss: 0.7578 (0.7580) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:05:18 loss: 0.8113 (0.8113) time: 5.1308 data: 5.0679 max mem: 9377 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.8090 (0.8067) time: 0.1260 data: 0.1005 max mem: 9377 +Eval (hcp-train-subset): [81] Total time: 0:00:13 (0.2137 s / it) +Averaged stats (hcp-train-subset): loss: 0.8090 (0.8067) +Eval (hcp-val): [81] [ 0/62] eta: 0:05:27 loss: 0.8530 (0.8530) time: 5.2770 data: 5.2474 max mem: 9377 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.8522 (0.8541) time: 0.1164 data: 0.0898 max mem: 9377 +Eval (hcp-val): [81] Total time: 0:00:13 (0.2136 s / it) +Averaged stats (hcp-val): loss: 0.8522 (0.8541) +Eval (nsd-val): [81] [ 0/62] eta: 0:05:45 loss: 0.8276 (0.8276) time: 5.5711 data: 5.5411 max mem: 9377 +Eval (nsd-val): [81] [61/62] eta: 0:00:00 loss: 0.8321 (0.8345) time: 0.1090 data: 0.0821 max mem: 9377 +Eval (nsd-val): [81] Total time: 0:00:13 (0.2105 s / it) +Averaged stats (nsd-val): loss: 0.8321 (0.8345) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [82] [ 0/6250] eta: 9:07:51 lr: 0.000011 grad: 0.2067 (0.2067) loss: 0.7792 (0.7792) time: 5.2594 data: 5.0917 max mem: 9377 +Train: [82] [ 100/6250] eta: 0:20:12 lr: 0.000011 grad: 0.2276 (0.2548) loss: 0.7764 (0.7798) time: 0.1535 data: 0.0521 max mem: 9377 +Train: [82] [ 200/6250] eta: 0:17:40 lr: 0.000011 grad: 0.2196 (0.2455) loss: 0.7711 (0.7729) time: 0.1417 data: 0.0345 max mem: 9377 +Train: [82] [ 300/6250] eta: 0:16:25 lr: 0.000011 grad: 0.2367 (0.2443) loss: 0.7516 (0.7660) time: 0.1508 data: 0.0592 max mem: 9377 +Train: [82] [ 400/6250] eta: 0:15:28 lr: 0.000011 grad: 0.2133 (0.2392) loss: 0.7520 (0.7625) time: 0.1442 data: 0.0592 max mem: 9377 +Train: [82] [ 500/6250] eta: 0:14:38 lr: 0.000011 grad: 0.2156 (0.2354) loss: 0.7587 (0.7607) time: 0.1392 data: 0.0371 max mem: 9377 +Train: [82] [ 600/6250] eta: 0:14:01 lr: 0.000011 grad: 0.2063 (0.2316) loss: 0.7508 (0.7597) time: 0.1372 data: 0.0433 max mem: 9377 +Train: [82] [ 700/6250] eta: 0:13:37 lr: 0.000011 grad: 0.2150 (0.2290) loss: 0.7442 (0.7585) time: 0.1397 data: 0.0566 max mem: 9377 +Train: [82] [ 800/6250] eta: 0:13:16 lr: 0.000011 grad: 0.2077 (0.2266) loss: 0.7533 (0.7577) time: 0.1597 data: 0.0743 max mem: 9377 +Train: [82] [ 900/6250] eta: 0:12:55 lr: 0.000011 grad: 0.1998 (0.2242) loss: 0.7615 (0.7578) time: 0.1329 data: 0.0419 max mem: 9377 +Train: [82] [1000/6250] eta: 0:12:39 lr: 0.000011 grad: 0.1964 (0.2221) loss: 0.7581 (0.7581) time: 0.1363 data: 0.0562 max mem: 9377 +Train: [82] [1100/6250] eta: 0:12:21 lr: 0.000011 grad: 0.2057 (0.2206) loss: 0.7573 (0.7582) time: 0.1423 data: 0.0588 max mem: 9377 +Train: [82] [1200/6250] eta: 0:12:04 lr: 0.000011 grad: 0.1899 (0.2188) loss: 0.7665 (0.7586) time: 0.1513 data: 0.0739 max mem: 9377 +Train: [82] [1300/6250] eta: 0:11:57 lr: 0.000011 grad: 0.1979 (0.2174) loss: 0.7532 (0.7588) time: 0.1605 data: 0.0791 max mem: 9377 +Train: [82] [1400/6250] eta: 0:11:47 lr: 0.000010 grad: 0.1977 (0.2164) loss: 0.7563 (0.7590) time: 0.1533 data: 0.0686 max mem: 9377 +Train: [82] [1500/6250] eta: 0:11:36 lr: 0.000010 grad: 0.2055 (0.2154) loss: 0.7585 (0.7592) time: 0.1370 data: 0.0561 max mem: 9377 +Train: [82] [1600/6250] eta: 0:11:29 lr: 0.000010 grad: 0.2026 (0.2144) loss: 0.7511 (0.7595) time: 0.1855 data: 0.0992 max mem: 9377 +Train: [82] [1700/6250] eta: 0:11:21 lr: 0.000010 grad: 0.1929 (0.2135) loss: 0.7645 (0.7596) time: 0.1716 data: 0.0885 max mem: 9377 +Train: [82] [1800/6250] eta: 0:11:08 lr: 0.000010 grad: 0.1976 (0.2129) loss: 0.7645 (0.7598) time: 0.1675 data: 0.0857 max mem: 9377 +Train: [82] [1900/6250] eta: 0:10:54 lr: 0.000010 grad: 0.1923 (0.2121) loss: 0.7618 (0.7600) time: 0.1592 data: 0.0657 max mem: 9377 +Train: [82] [2000/6250] eta: 0:10:40 lr: 0.000010 grad: 0.1977 (0.2115) loss: 0.7583 (0.7601) time: 0.1564 data: 0.0614 max mem: 9377 +Train: [82] [2100/6250] eta: 0:10:22 lr: 0.000010 grad: 0.1934 (0.2111) loss: 0.7600 (0.7601) time: 0.1347 data: 0.0447 max mem: 9377 +Train: [82] [2200/6250] eta: 0:10:05 lr: 0.000010 grad: 0.2000 (0.2107) loss: 0.7558 (0.7601) time: 0.1507 data: 0.0658 max mem: 9377 +Train: [82] [2300/6250] eta: 0:09:48 lr: 0.000010 grad: 0.1993 (0.2102) loss: 0.7708 (0.7601) time: 0.1462 data: 0.0616 max mem: 9377 +Train: [82] [2400/6250] eta: 0:09:32 lr: 0.000010 grad: 0.2095 (0.2100) loss: 0.7505 (0.7601) time: 0.1492 data: 0.0593 max mem: 9377 +Train: [82] [2500/6250] eta: 0:09:16 lr: 0.000010 grad: 0.2011 (0.2097) loss: 0.7590 (0.7601) time: 0.1414 data: 0.0484 max mem: 9377 +Train: [82] [2600/6250] eta: 0:09:00 lr: 0.000010 grad: 0.1950 (0.2094) loss: 0.7495 (0.7599) time: 0.1342 data: 0.0510 max mem: 9377 +Train: [82] [2700/6250] eta: 0:08:44 lr: 0.000010 grad: 0.2036 (0.2092) loss: 0.7496 (0.7598) time: 0.1182 data: 0.0406 max mem: 9377 +Train: [82] [2800/6250] eta: 0:08:28 lr: 0.000010 grad: 0.1994 (0.2089) loss: 0.7515 (0.7596) time: 0.1362 data: 0.0546 max mem: 9377 +Train: [82] [2900/6250] eta: 0:08:13 lr: 0.000010 grad: 0.2046 (0.2088) loss: 0.7482 (0.7594) time: 0.1414 data: 0.0600 max mem: 9377 +Train: [82] [3000/6250] eta: 0:07:59 lr: 0.000010 grad: 0.2070 (0.2087) loss: 0.7458 (0.7592) time: 0.1333 data: 0.0496 max mem: 9377 +Train: [82] [3100/6250] eta: 0:07:43 lr: 0.000010 grad: 0.2076 (0.2087) loss: 0.7526 (0.7589) time: 0.1140 data: 0.0360 max mem: 9377 +Train: [82] [3200/6250] eta: 0:07:28 lr: 0.000010 grad: 0.2057 (0.2086) loss: 0.7575 (0.7588) time: 0.1337 data: 0.0522 max mem: 9377 +Train: [82] [3300/6250] eta: 0:07:13 lr: 0.000010 grad: 0.2077 (0.2086) loss: 0.7639 (0.7587) time: 0.1408 data: 0.0575 max mem: 9377 +Train: [82] [3400/6250] eta: 0:06:58 lr: 0.000010 grad: 0.2009 (0.2085) loss: 0.7585 (0.7586) time: 0.1433 data: 0.0593 max mem: 9377 +Train: [82] [3500/6250] eta: 0:06:42 lr: 0.000010 grad: 0.2157 (0.2085) loss: 0.7430 (0.7585) time: 0.1188 data: 0.0383 max mem: 9377 +Train: [82] [3600/6250] eta: 0:06:27 lr: 0.000010 grad: 0.2027 (0.2085) loss: 0.7598 (0.7585) time: 0.1347 data: 0.0477 max mem: 9377 +Train: [82] [3700/6250] eta: 0:06:13 lr: 0.000010 grad: 0.2001 (0.2083) loss: 0.7588 (0.7585) time: 0.1582 data: 0.0791 max mem: 9377 +Train: [82] [3800/6250] eta: 0:05:58 lr: 0.000010 grad: 0.1985 (0.2083) loss: 0.7594 (0.7584) time: 0.1515 data: 0.0651 max mem: 9377 +Train: [82] [3900/6250] eta: 0:05:43 lr: 0.000010 grad: 0.2023 (0.2081) loss: 0.7606 (0.7585) time: 0.1429 data: 0.0637 max mem: 9377 +Train: [82] [4000/6250] eta: 0:05:28 lr: 0.000010 grad: 0.1904 (0.2079) loss: 0.7669 (0.7586) time: 0.1323 data: 0.0428 max mem: 9377 +Train: [82] [4100/6250] eta: 0:05:12 lr: 0.000010 grad: 0.2052 (0.2078) loss: 0.7586 (0.7587) time: 0.1312 data: 0.0481 max mem: 9377 +Train: [82] [4200/6250] eta: 0:04:57 lr: 0.000010 grad: 0.2007 (0.2077) loss: 0.7570 (0.7586) time: 0.1240 data: 0.0341 max mem: 9377 +Train: [82] [4300/6250] eta: 0:04:42 lr: 0.000010 grad: 0.1919 (0.2076) loss: 0.7662 (0.7586) time: 0.1538 data: 0.0563 max mem: 9377 +Train: [82] [4400/6250] eta: 0:04:27 lr: 0.000010 grad: 0.1952 (0.2075) loss: 0.7627 (0.7587) time: 0.1309 data: 0.0458 max mem: 9377 +Train: [82] [4500/6250] eta: 0:04:12 lr: 0.000010 grad: 0.1968 (0.2074) loss: 0.7576 (0.7586) time: 0.1204 data: 0.0367 max mem: 9377 +Train: [82] [4600/6250] eta: 0:03:57 lr: 0.000010 grad: 0.1918 (0.2073) loss: 0.7592 (0.7586) time: 0.1156 data: 0.0307 max mem: 9377 +Train: [82] [4700/6250] eta: 0:03:42 lr: 0.000010 grad: 0.2061 (0.2072) loss: 0.7537 (0.7586) time: 0.1303 data: 0.0451 max mem: 9377 +Train: [82] [4800/6250] eta: 0:03:28 lr: 0.000010 grad: 0.2021 (0.2071) loss: 0.7567 (0.7585) time: 0.1700 data: 0.0895 max mem: 9377 +Train: [82] [4900/6250] eta: 0:03:14 lr: 0.000010 grad: 0.2010 (0.2069) loss: 0.7687 (0.7586) time: 0.1046 data: 0.0198 max mem: 9377 +Train: [82] [5000/6250] eta: 0:02:59 lr: 0.000010 grad: 0.2010 (0.2069) loss: 0.7533 (0.7586) time: 0.1352 data: 0.0553 max mem: 9377 +Train: [82] [5100/6250] eta: 0:02:45 lr: 0.000010 grad: 0.1962 (0.2067) loss: 0.7653 (0.7587) time: 0.0914 data: 0.0002 max mem: 9377 +Train: [82] [5200/6250] eta: 0:02:30 lr: 0.000010 grad: 0.2080 (0.2066) loss: 0.7567 (0.7588) time: 0.1360 data: 0.0573 max mem: 9377 +Train: [82] [5300/6250] eta: 0:02:16 lr: 0.000010 grad: 0.1976 (0.2065) loss: 0.7597 (0.7587) time: 0.1491 data: 0.0672 max mem: 9377 +Train: [82] [5400/6250] eta: 0:02:01 lr: 0.000010 grad: 0.1992 (0.2064) loss: 0.7625 (0.7588) time: 0.1311 data: 0.0427 max mem: 9377 +Train: [82] [5500/6250] eta: 0:01:47 lr: 0.000010 grad: 0.1953 (0.2063) loss: 0.7628 (0.7588) time: 0.1315 data: 0.0444 max mem: 9377 +Train: [82] [5600/6250] eta: 0:01:33 lr: 0.000010 grad: 0.2033 (0.2063) loss: 0.7565 (0.7587) time: 0.1411 data: 0.0529 max mem: 9377 +Train: [82] [5700/6250] eta: 0:01:18 lr: 0.000010 grad: 0.2069 (0.2064) loss: 0.7515 (0.7587) time: 0.1265 data: 0.0441 max mem: 9377 +Train: [82] [5800/6250] eta: 0:01:04 lr: 0.000010 grad: 0.1992 (0.2064) loss: 0.7534 (0.7586) time: 0.1600 data: 0.0737 max mem: 9377 +Train: [82] [5900/6250] eta: 0:00:50 lr: 0.000010 grad: 0.2033 (0.2064) loss: 0.7474 (0.7585) time: 0.1280 data: 0.0379 max mem: 9377 +Train: [82] [6000/6250] eta: 0:00:35 lr: 0.000010 grad: 0.2043 (0.2064) loss: 0.7499 (0.7584) time: 0.1216 data: 0.0358 max mem: 9377 +Train: [82] [6100/6250] eta: 0:00:21 lr: 0.000010 grad: 0.2114 (0.2064) loss: 0.7501 (0.7583) time: 0.1793 data: 0.0996 max mem: 9377 +Train: [82] [6200/6250] eta: 0:00:07 lr: 0.000010 grad: 0.2040 (0.2064) loss: 0.7479 (0.7582) time: 0.1819 data: 0.1054 max mem: 9377 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.1996 (0.2064) loss: 0.7500 (0.7582) time: 0.1519 data: 0.0626 max mem: 9377 +Train: [82] Total time: 0:15:01 (0.1442 s / it) +Averaged stats: lr: 0.000010 grad: 0.1996 (0.2064) loss: 0.7500 (0.7582) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:05:40 loss: 0.8112 (0.8112) time: 5.4860 data: 5.4539 max mem: 9377 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.8070 (0.8051) time: 0.1324 data: 0.1069 max mem: 9377 +Eval (hcp-train-subset): [82] Total time: 0:00:13 (0.2174 s / it) +Averaged stats (hcp-train-subset): loss: 0.8070 (0.8051) +Eval (hcp-val): [82] [ 0/62] eta: 0:05:12 loss: 0.8555 (0.8555) time: 5.0453 data: 5.0152 max mem: 9377 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.8525 (0.8547) time: 0.1283 data: 0.1014 max mem: 9377 +Eval (hcp-val): [82] Total time: 0:00:13 (0.2152 s / it) +Averaged stats (hcp-val): loss: 0.8525 (0.8547) +Eval (nsd-val): [82] [ 0/62] eta: 0:05:50 loss: 0.8219 (0.8219) time: 5.6596 data: 5.6298 max mem: 9377 +Eval (nsd-val): [82] [61/62] eta: 0:00:00 loss: 0.8324 (0.8342) time: 0.1247 data: 0.0998 max mem: 9377 +Eval (nsd-val): [82] Total time: 0:00:13 (0.2205 s / it) +Averaged stats (nsd-val): loss: 0.8324 (0.8342) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 7:58:10 lr: 0.000010 grad: 0.4025 (0.4025) loss: 0.7193 (0.7193) time: 4.5904 data: 4.2771 max mem: 9377 +Train: [83] [ 100/6250] eta: 0:21:17 lr: 0.000010 grad: 0.2432 (0.2769) loss: 0.7518 (0.7579) time: 0.1495 data: 0.0393 max mem: 9377 +Train: [83] [ 200/6250] eta: 0:18:52 lr: 0.000010 grad: 0.2168 (0.2547) loss: 0.7617 (0.7565) time: 0.1638 data: 0.0616 max mem: 9377 +Train: [83] [ 300/6250] eta: 0:17:44 lr: 0.000010 grad: 0.2204 (0.2440) loss: 0.7550 (0.7558) time: 0.1618 data: 0.0736 max mem: 9377 +Train: [83] [ 400/6250] eta: 0:17:02 lr: 0.000010 grad: 0.2184 (0.2394) loss: 0.7476 (0.7542) time: 0.1319 data: 0.0427 max mem: 9377 +Train: [83] [ 500/6250] eta: 0:16:07 lr: 0.000010 grad: 0.2064 (0.2344) loss: 0.7668 (0.7539) time: 0.1377 data: 0.0368 max mem: 9377 +Train: [83] [ 600/6250] eta: 0:15:21 lr: 0.000010 grad: 0.2068 (0.2307) loss: 0.7518 (0.7547) time: 0.1333 data: 0.0402 max mem: 9377 +Train: [83] [ 700/6250] eta: 0:14:57 lr: 0.000009 grad: 0.2110 (0.2282) loss: 0.7531 (0.7550) time: 0.1601 data: 0.0686 max mem: 9377 +Train: [83] [ 800/6250] eta: 0:14:48 lr: 0.000009 grad: 0.2087 (0.2261) loss: 0.7529 (0.7553) time: 0.2403 data: 0.1488 max mem: 9377 +Train: [83] [ 900/6250] eta: 0:14:32 lr: 0.000009 grad: 0.2078 (0.2239) loss: 0.7536 (0.7559) time: 0.1796 data: 0.0959 max mem: 9377 +Train: [83] [1000/6250] eta: 0:14:17 lr: 0.000009 grad: 0.2014 (0.2224) loss: 0.7576 (0.7564) time: 0.1468 data: 0.0575 max mem: 9377 +Train: [83] [1100/6250] eta: 0:13:59 lr: 0.000009 grad: 0.1904 (0.2207) loss: 0.7592 (0.7568) time: 0.1669 data: 0.0822 max mem: 9377 +Train: [83] [1200/6250] eta: 0:13:38 lr: 0.000009 grad: 0.2041 (0.2195) loss: 0.7634 (0.7570) time: 0.1471 data: 0.0587 max mem: 9377 +Train: [83] [1300/6250] eta: 0:13:15 lr: 0.000009 grad: 0.2061 (0.2186) loss: 0.7577 (0.7572) time: 0.1346 data: 0.0541 max mem: 9377 +Train: [83] [1400/6250] eta: 0:12:55 lr: 0.000009 grad: 0.1988 (0.2177) loss: 0.7629 (0.7575) time: 0.1796 data: 0.0963 max mem: 9377 +Train: [83] [1500/6250] eta: 0:12:39 lr: 0.000009 grad: 0.2032 (0.2166) loss: 0.7557 (0.7576) time: 0.1205 data: 0.0384 max mem: 9377 +Train: [83] [1600/6250] eta: 0:12:23 lr: 0.000009 grad: 0.1944 (0.2158) loss: 0.7584 (0.7578) time: 0.1663 data: 0.0846 max mem: 9377 +Train: [83] [1700/6250] eta: 0:12:08 lr: 0.000009 grad: 0.2049 (0.2150) loss: 0.7590 (0.7579) time: 0.1961 data: 0.1074 max mem: 9377 +Train: [83] [1800/6250] eta: 0:12:01 lr: 0.000009 grad: 0.1956 (0.2142) loss: 0.7558 (0.7581) time: 0.2175 data: 0.1269 max mem: 9377 +Train: [83] [1900/6250] eta: 0:11:44 lr: 0.000009 grad: 0.2003 (0.2138) loss: 0.7615 (0.7581) time: 0.1668 data: 0.0774 max mem: 9377 +Train: [83] [2000/6250] eta: 0:11:32 lr: 0.000009 grad: 0.1940 (0.2134) loss: 0.7726 (0.7580) time: 0.1629 data: 0.0723 max mem: 9377 +Train: [83] [2100/6250] eta: 0:11:10 lr: 0.000009 grad: 0.1991 (0.2129) loss: 0.7520 (0.7580) time: 0.1351 data: 0.0446 max mem: 9377 +Train: [83] [2200/6250] eta: 0:10:52 lr: 0.000009 grad: 0.1991 (0.2125) loss: 0.7547 (0.7580) time: 0.0983 data: 0.0002 max mem: 9377 +Train: [83] [2300/6250] eta: 0:10:33 lr: 0.000009 grad: 0.2026 (0.2122) loss: 0.7542 (0.7579) time: 0.1498 data: 0.0685 max mem: 9377 +Train: [83] [2400/6250] eta: 0:10:14 lr: 0.000009 grad: 0.2011 (0.2121) loss: 0.7575 (0.7577) time: 0.1318 data: 0.0362 max mem: 9377 +Train: [83] [2500/6250] eta: 0:09:56 lr: 0.000009 grad: 0.2004 (0.2119) loss: 0.7576 (0.7576) time: 0.1714 data: 0.0931 max mem: 9377 +Train: [83] [2600/6250] eta: 0:09:37 lr: 0.000009 grad: 0.2040 (0.2117) loss: 0.7559 (0.7575) time: 0.1392 data: 0.0476 max mem: 9377 +Train: [83] [2700/6250] eta: 0:09:19 lr: 0.000009 grad: 0.2040 (0.2114) loss: 0.7577 (0.7574) time: 0.1395 data: 0.0529 max mem: 9377 +Train: [83] [2800/6250] eta: 0:09:02 lr: 0.000009 grad: 0.1997 (0.2112) loss: 0.7628 (0.7574) time: 0.1553 data: 0.0740 max mem: 9377 +Train: [83] [2900/6250] eta: 0:08:46 lr: 0.000009 grad: 0.1996 (0.2109) loss: 0.7521 (0.7574) time: 0.1562 data: 0.0739 max mem: 9377 +Train: [83] [3000/6250] eta: 0:08:28 lr: 0.000009 grad: 0.2073 (0.2108) loss: 0.7483 (0.7572) time: 0.1429 data: 0.0568 max mem: 9377 +Train: [83] [3100/6250] eta: 0:08:11 lr: 0.000009 grad: 0.2081 (0.2106) loss: 0.7510 (0.7571) time: 0.1521 data: 0.0733 max mem: 9377 +Train: [83] [3200/6250] eta: 0:07:54 lr: 0.000009 grad: 0.2066 (0.2104) loss: 0.7572 (0.7571) time: 0.1513 data: 0.0664 max mem: 9377 +Train: [83] [3300/6250] eta: 0:07:37 lr: 0.000009 grad: 0.2047 (0.2103) loss: 0.7507 (0.7570) time: 0.1745 data: 0.0950 max mem: 9377 +Train: [83] [3400/6250] eta: 0:07:22 lr: 0.000009 grad: 0.1947 (0.2102) loss: 0.7638 (0.7569) time: 0.1296 data: 0.0459 max mem: 9377 +Train: [83] [3500/6250] eta: 0:07:06 lr: 0.000009 grad: 0.2011 (0.2099) loss: 0.7589 (0.7570) time: 0.1483 data: 0.0652 max mem: 9377 +Train: [83] [3600/6250] eta: 0:06:49 lr: 0.000009 grad: 0.2045 (0.2098) loss: 0.7525 (0.7569) time: 0.1446 data: 0.0614 max mem: 9377 +Train: [83] [3700/6250] eta: 0:06:34 lr: 0.000009 grad: 0.2036 (0.2096) loss: 0.7589 (0.7570) time: 0.1661 data: 0.0823 max mem: 9377 +Train: [83] [3800/6250] eta: 0:06:18 lr: 0.000009 grad: 0.2005 (0.2094) loss: 0.7443 (0.7570) time: 0.1491 data: 0.0626 max mem: 9377 +Train: [83] [3900/6250] eta: 0:06:02 lr: 0.000009 grad: 0.1958 (0.2092) loss: 0.7681 (0.7571) time: 0.1485 data: 0.0646 max mem: 9377 +Train: [83] [4000/6250] eta: 0:05:46 lr: 0.000009 grad: 0.1989 (0.2091) loss: 0.7665 (0.7571) time: 0.1455 data: 0.0572 max mem: 9377 +Train: [83] [4100/6250] eta: 0:05:29 lr: 0.000009 grad: 0.2052 (0.2089) loss: 0.7453 (0.7571) time: 0.1378 data: 0.0483 max mem: 9377 +Train: [83] [4200/6250] eta: 0:05:13 lr: 0.000009 grad: 0.2095 (0.2089) loss: 0.7531 (0.7571) time: 0.1119 data: 0.0231 max mem: 9377 +Train: [83] [4300/6250] eta: 0:04:57 lr: 0.000009 grad: 0.2017 (0.2088) loss: 0.7586 (0.7572) time: 0.1251 data: 0.0393 max mem: 9377 +Train: [83] [4400/6250] eta: 0:04:41 lr: 0.000009 grad: 0.2056 (0.2086) loss: 0.7562 (0.7574) time: 0.1433 data: 0.0539 max mem: 9377 +Train: [83] [4500/6250] eta: 0:04:25 lr: 0.000009 grad: 0.2043 (0.2083) loss: 0.7609 (0.7575) time: 0.1484 data: 0.0667 max mem: 9377 +Train: [83] [4600/6250] eta: 0:04:10 lr: 0.000009 grad: 0.1938 (0.2082) loss: 0.7677 (0.7576) time: 0.1490 data: 0.0663 max mem: 9377 +Train: [83] [4700/6250] eta: 0:03:54 lr: 0.000009 grad: 0.2072 (0.2081) loss: 0.7531 (0.7576) time: 0.1368 data: 0.0476 max mem: 9377 +Train: [83] [4800/6250] eta: 0:03:39 lr: 0.000009 grad: 0.2032 (0.2081) loss: 0.7560 (0.7576) time: 0.1448 data: 0.0564 max mem: 9377 +Train: [83] [4900/6250] eta: 0:03:23 lr: 0.000009 grad: 0.1978 (0.2081) loss: 0.7648 (0.7576) time: 0.1547 data: 0.0740 max mem: 9377 +Train: [83] [5000/6250] eta: 0:03:08 lr: 0.000009 grad: 0.2090 (0.2080) loss: 0.7525 (0.7576) time: 0.1281 data: 0.0457 max mem: 9377 +Train: [83] [5100/6250] eta: 0:02:52 lr: 0.000009 grad: 0.1990 (0.2080) loss: 0.7456 (0.7575) time: 0.1295 data: 0.0430 max mem: 9377 +Train: [83] [5200/6250] eta: 0:02:37 lr: 0.000009 grad: 0.2111 (0.2080) loss: 0.7496 (0.7574) time: 0.1290 data: 0.0409 max mem: 9377 +Train: [83] [5300/6250] eta: 0:02:22 lr: 0.000009 grad: 0.1980 (0.2081) loss: 0.7512 (0.7573) time: 0.1570 data: 0.0786 max mem: 9377 +Train: [83] [5400/6250] eta: 0:02:07 lr: 0.000009 grad: 0.2063 (0.2081) loss: 0.7576 (0.7572) time: 0.1693 data: 0.0879 max mem: 9377 +Train: [83] [5500/6250] eta: 0:01:52 lr: 0.000009 grad: 0.2016 (0.2081) loss: 0.7489 (0.7572) time: 0.1323 data: 0.0556 max mem: 9377 +Train: [83] [5600/6250] eta: 0:01:37 lr: 0.000009 grad: 0.2129 (0.2081) loss: 0.7386 (0.7572) time: 0.1561 data: 0.0702 max mem: 9377 +Train: [83] [5700/6250] eta: 0:01:22 lr: 0.000009 grad: 0.1948 (0.2080) loss: 0.7618 (0.7572) time: 0.1444 data: 0.0643 max mem: 9377 +Train: [83] [5800/6250] eta: 0:01:07 lr: 0.000009 grad: 0.2013 (0.2080) loss: 0.7561 (0.7571) time: 0.0849 data: 0.0002 max mem: 9377 +Train: [83] [5900/6250] eta: 0:00:52 lr: 0.000009 grad: 0.2099 (0.2080) loss: 0.7555 (0.7571) time: 0.1511 data: 0.0721 max mem: 9377 +Train: [83] [6000/6250] eta: 0:00:37 lr: 0.000009 grad: 0.2052 (0.2081) loss: 0.7533 (0.7570) time: 0.1702 data: 0.0893 max mem: 9377 +Train: [83] [6100/6250] eta: 0:00:22 lr: 0.000009 grad: 0.2020 (0.2081) loss: 0.7503 (0.7569) time: 0.1348 data: 0.0477 max mem: 9377 +Train: [83] [6200/6250] eta: 0:00:07 lr: 0.000009 grad: 0.2043 (0.2081) loss: 0.7564 (0.7569) time: 0.1057 data: 0.0194 max mem: 9377 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.2027 (0.2080) loss: 0.7521 (0.7569) time: 0.0977 data: 0.0067 max mem: 9377 +Train: [83] Total time: 0:15:40 (0.1504 s / it) +Averaged stats: lr: 0.000009 grad: 0.2027 (0.2080) loss: 0.7521 (0.7569) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:05:50 loss: 0.8098 (0.8098) time: 5.6455 data: 5.6140 max mem: 9377 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.8037 (0.8044) time: 0.1282 data: 0.1016 max mem: 9377 +Eval (hcp-train-subset): [83] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (hcp-train-subset): loss: 0.8037 (0.8044) +Eval (hcp-val): [83] [ 0/62] eta: 0:05:24 loss: 0.8533 (0.8533) time: 5.2373 data: 5.2071 max mem: 9377 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.8501 (0.8539) time: 0.1299 data: 0.1051 max mem: 9377 +Eval (hcp-val): [83] Total time: 0:00:13 (0.2155 s / it) +Averaged stats (hcp-val): loss: 0.8501 (0.8539) +Eval (nsd-val): [83] [ 0/62] eta: 0:05:00 loss: 0.8298 (0.8298) time: 4.8400 data: 4.8009 max mem: 9377 +Eval (nsd-val): [83] [61/62] eta: 0:00:00 loss: 0.8339 (0.8375) time: 0.1362 data: 0.1094 max mem: 9377 +Eval (nsd-val): [83] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (nsd-val): loss: 0.8339 (0.8375) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [84] [ 0/6250] eta: 11:02:08 lr: 0.000009 grad: 0.5193 (0.5193) loss: 0.6768 (0.6768) time: 6.3566 data: 6.2221 max mem: 9377 +Train: [84] [ 100/6250] eta: 0:23:04 lr: 0.000009 grad: 0.2287 (0.2677) loss: 0.7784 (0.7813) time: 0.1645 data: 0.0500 max mem: 9377 +Train: [84] [ 200/6250] eta: 0:18:53 lr: 0.000009 grad: 0.2341 (0.2530) loss: 0.7535 (0.7747) time: 0.1487 data: 0.0446 max mem: 9377 +Train: [84] [ 300/6250] eta: 0:17:15 lr: 0.000008 grad: 0.2324 (0.2471) loss: 0.7422 (0.7684) time: 0.1486 data: 0.0556 max mem: 9377 +Train: [84] [ 400/6250] eta: 0:16:00 lr: 0.000008 grad: 0.2212 (0.2403) loss: 0.7509 (0.7659) time: 0.1320 data: 0.0307 max mem: 9377 +Train: [84] [ 500/6250] eta: 0:15:15 lr: 0.000008 grad: 0.2262 (0.2365) loss: 0.7448 (0.7644) time: 0.1346 data: 0.0438 max mem: 9377 +Train: [84] [ 600/6250] eta: 0:14:33 lr: 0.000008 grad: 0.2117 (0.2338) loss: 0.7508 (0.7624) time: 0.1409 data: 0.0498 max mem: 9377 +Train: [84] [ 700/6250] eta: 0:14:12 lr: 0.000008 grad: 0.2039 (0.2311) loss: 0.7663 (0.7612) time: 0.1435 data: 0.0526 max mem: 9377 +Train: [84] [ 800/6250] eta: 0:13:51 lr: 0.000008 grad: 0.2034 (0.2284) loss: 0.7491 (0.7602) time: 0.1634 data: 0.0718 max mem: 9377 +Train: [84] [ 900/6250] eta: 0:13:31 lr: 0.000008 grad: 0.2096 (0.2269) loss: 0.7547 (0.7590) time: 0.1380 data: 0.0495 max mem: 9377 +Train: [84] [1000/6250] eta: 0:13:15 lr: 0.000008 grad: 0.2104 (0.2249) loss: 0.7499 (0.7585) time: 0.1463 data: 0.0591 max mem: 9377 +Train: [84] [1100/6250] eta: 0:13:02 lr: 0.000008 grad: 0.2001 (0.2232) loss: 0.7467 (0.7580) time: 0.1486 data: 0.0605 max mem: 9377 +Train: [84] [1200/6250] eta: 0:12:47 lr: 0.000008 grad: 0.2097 (0.2218) loss: 0.7476 (0.7575) time: 0.1373 data: 0.0430 max mem: 9377 +Train: [84] [1300/6250] eta: 0:12:32 lr: 0.000008 grad: 0.2086 (0.2208) loss: 0.7468 (0.7569) time: 0.1264 data: 0.0259 max mem: 9377 +Train: [84] [1400/6250] eta: 0:12:14 lr: 0.000008 grad: 0.2074 (0.2196) loss: 0.7409 (0.7567) time: 0.1161 data: 0.0196 max mem: 9377 +Train: [84] [1500/6250] eta: 0:11:56 lr: 0.000008 grad: 0.2114 (0.2188) loss: 0.7439 (0.7563) time: 0.1399 data: 0.0546 max mem: 9377 +Train: [84] [1600/6250] eta: 0:11:42 lr: 0.000008 grad: 0.2074 (0.2180) loss: 0.7503 (0.7558) time: 0.1781 data: 0.0953 max mem: 9377 +Train: [84] [1700/6250] eta: 0:11:24 lr: 0.000008 grad: 0.2072 (0.2176) loss: 0.7478 (0.7554) time: 0.1522 data: 0.0746 max mem: 9377 +Train: [84] [1800/6250] eta: 0:11:08 lr: 0.000008 grad: 0.2107 (0.2170) loss: 0.7428 (0.7551) time: 0.1732 data: 0.0927 max mem: 9377 +Train: [84] [1900/6250] eta: 0:10:51 lr: 0.000008 grad: 0.2110 (0.2168) loss: 0.7479 (0.7550) time: 0.1453 data: 0.0594 max mem: 9377 +Train: [84] [2000/6250] eta: 0:10:36 lr: 0.000008 grad: 0.2123 (0.2165) loss: 0.7452 (0.7547) time: 0.1673 data: 0.0718 max mem: 9377 +Train: [84] [2100/6250] eta: 0:10:21 lr: 0.000008 grad: 0.1986 (0.2162) loss: 0.7598 (0.7545) time: 0.1613 data: 0.0697 max mem: 9377 +Train: [84] [2200/6250] eta: 0:10:04 lr: 0.000008 grad: 0.2046 (0.2159) loss: 0.7467 (0.7542) time: 0.1420 data: 0.0579 max mem: 9377 +Train: [84] [2300/6250] eta: 0:09:46 lr: 0.000008 grad: 0.2032 (0.2153) loss: 0.7572 (0.7540) time: 0.1204 data: 0.0316 max mem: 9377 +Train: [84] [2400/6250] eta: 0:09:31 lr: 0.000008 grad: 0.2057 (0.2149) loss: 0.7568 (0.7541) time: 0.1295 data: 0.0387 max mem: 9377 +Train: [84] [2500/6250] eta: 0:09:14 lr: 0.000008 grad: 0.2008 (0.2144) loss: 0.7611 (0.7543) time: 0.1278 data: 0.0388 max mem: 9377 +Train: [84] [2600/6250] eta: 0:08:58 lr: 0.000008 grad: 0.1970 (0.2139) loss: 0.7548 (0.7544) time: 0.1273 data: 0.0359 max mem: 9377 +Train: [84] [2700/6250] eta: 0:08:41 lr: 0.000008 grad: 0.1928 (0.2133) loss: 0.7633 (0.7546) time: 0.1300 data: 0.0454 max mem: 9377 +Train: [84] [2800/6250] eta: 0:08:25 lr: 0.000008 grad: 0.1941 (0.2128) loss: 0.7687 (0.7550) time: 0.1387 data: 0.0530 max mem: 9377 +Train: [84] [2900/6250] eta: 0:08:09 lr: 0.000008 grad: 0.1880 (0.2123) loss: 0.7658 (0.7553) time: 0.1390 data: 0.0494 max mem: 9377 +Train: [84] [3000/6250] eta: 0:07:53 lr: 0.000008 grad: 0.1968 (0.2118) loss: 0.7649 (0.7556) time: 0.1380 data: 0.0516 max mem: 9377 +Train: [84] [3100/6250] eta: 0:07:38 lr: 0.000008 grad: 0.1983 (0.2114) loss: 0.7568 (0.7558) time: 0.1424 data: 0.0613 max mem: 9377 +Train: [84] [3200/6250] eta: 0:07:24 lr: 0.000008 grad: 0.1943 (0.2110) loss: 0.7663 (0.7560) time: 0.1595 data: 0.0802 max mem: 9377 +Train: [84] [3300/6250] eta: 0:07:08 lr: 0.000008 grad: 0.1894 (0.2105) loss: 0.7705 (0.7563) time: 0.1092 data: 0.0236 max mem: 9377 +Train: [84] [3400/6250] eta: 0:06:54 lr: 0.000008 grad: 0.2029 (0.2102) loss: 0.7571 (0.7565) time: 0.1905 data: 0.1136 max mem: 9377 +Train: [84] [3500/6250] eta: 0:06:40 lr: 0.000008 grad: 0.2004 (0.2100) loss: 0.7563 (0.7565) time: 0.1299 data: 0.0379 max mem: 9377 +Train: [84] [3600/6250] eta: 0:06:25 lr: 0.000008 grad: 0.2096 (0.2098) loss: 0.7501 (0.7565) time: 0.1438 data: 0.0581 max mem: 9377 +Train: [84] [3700/6250] eta: 0:06:11 lr: 0.000008 grad: 0.1968 (0.2096) loss: 0.7600 (0.7566) time: 0.1426 data: 0.0645 max mem: 9377 +Train: [84] [3800/6250] eta: 0:05:57 lr: 0.000008 grad: 0.1916 (0.2094) loss: 0.7724 (0.7567) time: 0.1539 data: 0.0659 max mem: 9377 +Train: [84] [3900/6250] eta: 0:05:43 lr: 0.000008 grad: 0.1995 (0.2093) loss: 0.7616 (0.7568) time: 0.1455 data: 0.0485 max mem: 9377 +Train: [84] [4000/6250] eta: 0:05:28 lr: 0.000008 grad: 0.2151 (0.2092) loss: 0.7502 (0.7568) time: 0.1348 data: 0.0536 max mem: 9377 +Train: [84] [4100/6250] eta: 0:05:13 lr: 0.000008 grad: 0.2090 (0.2091) loss: 0.7622 (0.7568) time: 0.1372 data: 0.0497 max mem: 9377 +Train: [84] [4200/6250] eta: 0:04:57 lr: 0.000008 grad: 0.1955 (0.2089) loss: 0.7653 (0.7569) time: 0.1327 data: 0.0471 max mem: 9377 +Train: [84] [4300/6250] eta: 0:04:43 lr: 0.000008 grad: 0.1980 (0.2088) loss: 0.7543 (0.7569) time: 0.1588 data: 0.0723 max mem: 9377 +Train: [84] [4400/6250] eta: 0:04:28 lr: 0.000008 grad: 0.2034 (0.2086) loss: 0.7521 (0.7570) time: 0.1417 data: 0.0552 max mem: 9377 +Train: [84] [4500/6250] eta: 0:04:13 lr: 0.000008 grad: 0.2040 (0.2085) loss: 0.7619 (0.7570) time: 0.1440 data: 0.0649 max mem: 9377 +Train: [84] [4600/6250] eta: 0:03:58 lr: 0.000008 grad: 0.1983 (0.2085) loss: 0.7554 (0.7570) time: 0.1416 data: 0.0569 max mem: 9377 +Train: [84] [4700/6250] eta: 0:03:44 lr: 0.000008 grad: 0.1997 (0.2084) loss: 0.7591 (0.7570) time: 0.1681 data: 0.0905 max mem: 9377 +Train: [84] [4800/6250] eta: 0:03:29 lr: 0.000008 grad: 0.2036 (0.2083) loss: 0.7570 (0.7570) time: 0.1456 data: 0.0583 max mem: 9377 +Train: [84] [4900/6250] eta: 0:03:15 lr: 0.000008 grad: 0.2055 (0.2084) loss: 0.7539 (0.7569) time: 0.1408 data: 0.0570 max mem: 9377 +Train: [84] [5000/6250] eta: 0:03:00 lr: 0.000008 grad: 0.2045 (0.2084) loss: 0.7537 (0.7569) time: 0.1365 data: 0.0510 max mem: 9377 +Train: [84] [5100/6250] eta: 0:02:46 lr: 0.000008 grad: 0.2065 (0.2084) loss: 0.7550 (0.7568) time: 0.1421 data: 0.0566 max mem: 9377 +Train: [84] [5200/6250] eta: 0:02:31 lr: 0.000008 grad: 0.1999 (0.2083) loss: 0.7556 (0.7568) time: 0.0911 data: 0.0002 max mem: 9377 +Train: [84] [5300/6250] eta: 0:02:17 lr: 0.000008 grad: 0.2115 (0.2082) loss: 0.7614 (0.7568) time: 0.1359 data: 0.0502 max mem: 9377 +Train: [84] [5400/6250] eta: 0:02:02 lr: 0.000008 grad: 0.2013 (0.2082) loss: 0.7572 (0.7567) time: 0.0855 data: 0.0002 max mem: 9377 +Train: [84] [5500/6250] eta: 0:01:48 lr: 0.000008 grad: 0.1995 (0.2082) loss: 0.7536 (0.7567) time: 0.1514 data: 0.0615 max mem: 9377 +Train: [84] [5600/6250] eta: 0:01:33 lr: 0.000008 grad: 0.2036 (0.2081) loss: 0.7535 (0.7567) time: 0.1033 data: 0.0087 max mem: 9377 +Train: [84] [5700/6250] eta: 0:01:19 lr: 0.000008 grad: 0.1985 (0.2081) loss: 0.7567 (0.7567) time: 0.1408 data: 0.0576 max mem: 9377 +Train: [84] [5800/6250] eta: 0:01:04 lr: 0.000008 grad: 0.2095 (0.2080) loss: 0.7594 (0.7567) time: 0.1080 data: 0.0111 max mem: 9377 +Train: [84] [5900/6250] eta: 0:00:50 lr: 0.000008 grad: 0.2018 (0.2079) loss: 0.7607 (0.7567) time: 0.1325 data: 0.0465 max mem: 9377 +Train: [84] [6000/6250] eta: 0:00:36 lr: 0.000008 grad: 0.2014 (0.2079) loss: 0.7498 (0.7568) time: 0.1277 data: 0.0373 max mem: 9377 +Train: [84] [6100/6250] eta: 0:00:21 lr: 0.000008 grad: 0.2032 (0.2078) loss: 0.7556 (0.7568) time: 0.0828 data: 0.0002 max mem: 9377 +Train: [84] [6200/6250] eta: 0:00:07 lr: 0.000008 grad: 0.2018 (0.2077) loss: 0.7618 (0.7569) time: 0.1452 data: 0.0596 max mem: 9377 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.2007 (0.2077) loss: 0.7578 (0.7570) time: 0.1570 data: 0.0783 max mem: 9377 +Train: [84] Total time: 0:15:09 (0.1455 s / it) +Averaged stats: lr: 0.000008 grad: 0.2007 (0.2077) loss: 0.7578 (0.7570) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:05:59 loss: 0.8087 (0.8087) time: 5.7959 data: 5.7658 max mem: 9377 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.8052 (0.8039) time: 0.1087 data: 0.0840 max mem: 9377 +Eval (hcp-train-subset): [84] Total time: 0:00:13 (0.2126 s / it) +Averaged stats (hcp-train-subset): loss: 0.8052 (0.8039) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [84] [ 0/62] eta: 0:05:14 loss: 0.8608 (0.8608) time: 5.0781 data: 5.0473 max mem: 9377 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.8524 (0.8543) time: 0.1180 data: 0.0930 max mem: 9377 +Eval (hcp-val): [84] Total time: 0:00:13 (0.2186 s / it) +Averaged stats (hcp-val): loss: 0.8524 (0.8543) +Making plots (hcp-val): example=23 +Eval (nsd-val): [84] [ 0/62] eta: 0:05:29 loss: 0.8287 (0.8287) time: 5.3160 data: 5.2849 max mem: 9377 +Eval (nsd-val): [84] [61/62] eta: 0:00:00 loss: 0.8365 (0.8354) time: 0.1267 data: 0.1012 max mem: 9377 +Eval (nsd-val): [84] Total time: 0:00:14 (0.2259 s / it) +Averaged stats (nsd-val): loss: 0.8365 (0.8354) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00084.pth +Train: [85] [ 0/6250] eta: 8:20:47 lr: 0.000008 grad: 0.2018 (0.2018) loss: 0.7983 (0.7983) time: 4.8077 data: 4.6169 max mem: 9377 +Train: [85] [ 100/6250] eta: 0:20:54 lr: 0.000008 grad: 0.2350 (0.2591) loss: 0.7677 (0.7777) time: 0.1522 data: 0.0643 max mem: 9377 +Train: [85] [ 200/6250] eta: 0:17:24 lr: 0.000008 grad: 0.2222 (0.2430) loss: 0.7562 (0.7731) time: 0.1367 data: 0.0458 max mem: 9377 +Train: [85] [ 300/6250] eta: 0:16:14 lr: 0.000007 grad: 0.2349 (0.2388) loss: 0.7265 (0.7662) time: 0.1542 data: 0.0627 max mem: 9377 +Train: [85] [ 400/6250] eta: 0:15:13 lr: 0.000007 grad: 0.2202 (0.2357) loss: 0.7452 (0.7627) time: 0.1405 data: 0.0507 max mem: 9377 +Train: [85] [ 500/6250] eta: 0:14:32 lr: 0.000007 grad: 0.2216 (0.2327) loss: 0.7529 (0.7611) time: 0.1330 data: 0.0409 max mem: 9377 +Train: [85] [ 600/6250] eta: 0:13:57 lr: 0.000007 grad: 0.2113 (0.2293) loss: 0.7589 (0.7610) time: 0.1404 data: 0.0447 max mem: 9377 +Train: [85] [ 700/6250] eta: 0:13:29 lr: 0.000007 grad: 0.2067 (0.2269) loss: 0.7542 (0.7606) time: 0.1195 data: 0.0244 max mem: 9377 +Train: [85] [ 800/6250] eta: 0:13:20 lr: 0.000007 grad: 0.2041 (0.2248) loss: 0.7631 (0.7602) time: 0.1917 data: 0.1062 max mem: 9377 +Train: [85] [ 900/6250] eta: 0:13:01 lr: 0.000007 grad: 0.2054 (0.2228) loss: 0.7631 (0.7603) time: 0.1343 data: 0.0429 max mem: 9377 +Train: [85] [1000/6250] eta: 0:12:46 lr: 0.000007 grad: 0.2053 (0.2216) loss: 0.7571 (0.7600) time: 0.1592 data: 0.0829 max mem: 9377 +Train: [85] [1100/6250] eta: 0:12:25 lr: 0.000007 grad: 0.2038 (0.2208) loss: 0.7586 (0.7592) time: 0.1098 data: 0.0144 max mem: 9377 +Train: [85] [1200/6250] eta: 0:12:10 lr: 0.000007 grad: 0.2066 (0.2200) loss: 0.7511 (0.7587) time: 0.1548 data: 0.0688 max mem: 9377 +Train: [85] [1300/6250] eta: 0:11:56 lr: 0.000007 grad: 0.2108 (0.2195) loss: 0.7575 (0.7581) time: 0.1241 data: 0.0395 max mem: 9377 +Train: [85] [1400/6250] eta: 0:11:41 lr: 0.000007 grad: 0.2043 (0.2190) loss: 0.7419 (0.7574) time: 0.1114 data: 0.0312 max mem: 9377 +Train: [85] [1500/6250] eta: 0:11:26 lr: 0.000007 grad: 0.1986 (0.2184) loss: 0.7638 (0.7571) time: 0.1229 data: 0.0448 max mem: 9377 +Train: [85] [1600/6250] eta: 0:11:13 lr: 0.000007 grad: 0.1977 (0.2179) loss: 0.7634 (0.7567) time: 0.1877 data: 0.1044 max mem: 9377 +Train: [85] [1700/6250] eta: 0:10:58 lr: 0.000007 grad: 0.2025 (0.2173) loss: 0.7591 (0.7566) time: 0.1521 data: 0.0685 max mem: 9377 +Train: [85] [1800/6250] eta: 0:10:41 lr: 0.000007 grad: 0.2107 (0.2170) loss: 0.7490 (0.7562) time: 0.1487 data: 0.0654 max mem: 9377 +Train: [85] [1900/6250] eta: 0:10:28 lr: 0.000007 grad: 0.2054 (0.2165) loss: 0.7581 (0.7563) time: 0.1337 data: 0.0481 max mem: 9377 +Train: [85] [2000/6250] eta: 0:10:12 lr: 0.000007 grad: 0.2021 (0.2161) loss: 0.7628 (0.7563) time: 0.1532 data: 0.0688 max mem: 9377 +Train: [85] [2100/6250] eta: 0:10:00 lr: 0.000007 grad: 0.2065 (0.2156) loss: 0.7567 (0.7565) time: 0.1468 data: 0.0637 max mem: 9377 +Train: [85] [2200/6250] eta: 0:09:51 lr: 0.000007 grad: 0.2111 (0.2151) loss: 0.7555 (0.7564) time: 0.1942 data: 0.1091 max mem: 9377 +Train: [85] [2300/6250] eta: 0:09:36 lr: 0.000007 grad: 0.2010 (0.2147) loss: 0.7462 (0.7563) time: 0.1416 data: 0.0579 max mem: 9377 +Train: [85] [2400/6250] eta: 0:09:21 lr: 0.000007 grad: 0.2078 (0.2143) loss: 0.7508 (0.7562) time: 0.1630 data: 0.0829 max mem: 9377 +Train: [85] [2500/6250] eta: 0:09:04 lr: 0.000007 grad: 0.2048 (0.2141) loss: 0.7542 (0.7562) time: 0.1389 data: 0.0573 max mem: 9377 +Train: [85] [2600/6250] eta: 0:08:48 lr: 0.000007 grad: 0.2089 (0.2138) loss: 0.7590 (0.7562) time: 0.1225 data: 0.0356 max mem: 9377 +Train: [85] [2700/6250] eta: 0:08:33 lr: 0.000007 grad: 0.2013 (0.2135) loss: 0.7582 (0.7561) time: 0.1320 data: 0.0532 max mem: 9377 +Train: [85] [2800/6250] eta: 0:08:20 lr: 0.000007 grad: 0.2077 (0.2133) loss: 0.7568 (0.7562) time: 0.1007 data: 0.0081 max mem: 9377 +Train: [85] [2900/6250] eta: 0:08:06 lr: 0.000007 grad: 0.2077 (0.2131) loss: 0.7595 (0.7561) time: 0.1757 data: 0.0943 max mem: 9377 +Train: [85] [3000/6250] eta: 0:07:51 lr: 0.000007 grad: 0.2094 (0.2130) loss: 0.7438 (0.7560) time: 0.1280 data: 0.0478 max mem: 9377 +Train: [85] [3100/6250] eta: 0:07:36 lr: 0.000007 grad: 0.1896 (0.2126) loss: 0.7721 (0.7563) time: 0.1236 data: 0.0423 max mem: 9377 +Train: [85] [3200/6250] eta: 0:07:22 lr: 0.000007 grad: 0.2019 (0.2123) loss: 0.7584 (0.7565) time: 0.1296 data: 0.0479 max mem: 9377 +Train: [85] [3300/6250] eta: 0:07:06 lr: 0.000007 grad: 0.2041 (0.2120) loss: 0.7605 (0.7567) time: 0.1394 data: 0.0540 max mem: 9377 +Train: [85] [3400/6250] eta: 0:06:54 lr: 0.000007 grad: 0.2001 (0.2117) loss: 0.7669 (0.7569) time: 0.3106 data: 0.2346 max mem: 9377 +Train: [85] [3500/6250] eta: 0:06:40 lr: 0.000007 grad: 0.2032 (0.2115) loss: 0.7644 (0.7570) time: 0.1314 data: 0.0518 max mem: 9377 +Train: [85] [3600/6250] eta: 0:06:27 lr: 0.000007 grad: 0.2018 (0.2114) loss: 0.7464 (0.7570) time: 0.1571 data: 0.0777 max mem: 9377 +Train: [85] [3700/6250] eta: 0:06:13 lr: 0.000007 grad: 0.2008 (0.2113) loss: 0.7494 (0.7569) time: 0.1276 data: 0.0464 max mem: 9377 +Train: [85] [3800/6250] eta: 0:06:00 lr: 0.000007 grad: 0.2041 (0.2111) loss: 0.7572 (0.7569) time: 0.1743 data: 0.0836 max mem: 9377 +Train: [85] [3900/6250] eta: 0:05:47 lr: 0.000007 grad: 0.2008 (0.2110) loss: 0.7607 (0.7569) time: 0.1641 data: 0.0703 max mem: 9377 +Train: [85] [4000/6250] eta: 0:05:34 lr: 0.000007 grad: 0.2112 (0.2109) loss: 0.7443 (0.7568) time: 0.2039 data: 0.1244 max mem: 9377 +Train: [85] [4100/6250] eta: 0:05:18 lr: 0.000007 grad: 0.1978 (0.2108) loss: 0.7613 (0.7568) time: 0.1505 data: 0.0603 max mem: 9377 +Train: [85] [4200/6250] eta: 0:05:03 lr: 0.000007 grad: 0.2010 (0.2107) loss: 0.7454 (0.7568) time: 0.1485 data: 0.0671 max mem: 9377 +Train: [85] [4300/6250] eta: 0:04:48 lr: 0.000007 grad: 0.2025 (0.2107) loss: 0.7623 (0.7567) time: 0.1314 data: 0.0518 max mem: 9377 +Train: [85] [4400/6250] eta: 0:04:34 lr: 0.000007 grad: 0.2130 (0.2106) loss: 0.7523 (0.7567) time: 0.1441 data: 0.0580 max mem: 9377 +Train: [85] [4500/6250] eta: 0:04:19 lr: 0.000007 grad: 0.2086 (0.2106) loss: 0.7489 (0.7566) time: 0.1578 data: 0.0773 max mem: 9377 +Train: [85] [4600/6250] eta: 0:04:04 lr: 0.000007 grad: 0.2061 (0.2106) loss: 0.7571 (0.7566) time: 0.1861 data: 0.1060 max mem: 9377 +Train: [85] [4700/6250] eta: 0:03:50 lr: 0.000007 grad: 0.2051 (0.2106) loss: 0.7370 (0.7564) time: 0.1802 data: 0.0994 max mem: 9377 +Train: [85] [4800/6250] eta: 0:03:35 lr: 0.000007 grad: 0.2133 (0.2108) loss: 0.7464 (0.7563) time: 0.1015 data: 0.0003 max mem: 9377 +Train: [85] [4900/6250] eta: 0:03:20 lr: 0.000007 grad: 0.1990 (0.2107) loss: 0.7567 (0.7563) time: 0.1447 data: 0.0636 max mem: 9377 +Train: [85] [5000/6250] eta: 0:03:05 lr: 0.000007 grad: 0.2004 (0.2106) loss: 0.7524 (0.7562) time: 0.1623 data: 0.0810 max mem: 9377 +Train: [85] [5100/6250] eta: 0:02:50 lr: 0.000007 grad: 0.2037 (0.2106) loss: 0.7504 (0.7561) time: 0.1239 data: 0.0328 max mem: 9377 +Train: [85] [5200/6250] eta: 0:02:35 lr: 0.000007 grad: 0.2052 (0.2105) loss: 0.7677 (0.7561) time: 0.1546 data: 0.0777 max mem: 9377 +Train: [85] [5300/6250] eta: 0:02:20 lr: 0.000007 grad: 0.2035 (0.2104) loss: 0.7607 (0.7562) time: 0.1517 data: 0.0714 max mem: 9377 +Train: [85] [5400/6250] eta: 0:02:05 lr: 0.000007 grad: 0.2054 (0.2104) loss: 0.7493 (0.7561) time: 0.1509 data: 0.0638 max mem: 9377 +Train: [85] [5500/6250] eta: 0:01:51 lr: 0.000007 grad: 0.2020 (0.2103) loss: 0.7569 (0.7561) time: 0.2383 data: 0.1541 max mem: 9377 +Train: [85] [5600/6250] eta: 0:01:36 lr: 0.000007 grad: 0.2044 (0.2103) loss: 0.7569 (0.7561) time: 0.1163 data: 0.0244 max mem: 9377 +Train: [85] [5700/6250] eta: 0:01:21 lr: 0.000007 grad: 0.2043 (0.2102) loss: 0.7596 (0.7561) time: 0.1565 data: 0.0735 max mem: 9377 +Train: [85] [5800/6250] eta: 0:01:06 lr: 0.000007 grad: 0.2144 (0.2101) loss: 0.7563 (0.7561) time: 0.1371 data: 0.0484 max mem: 9377 +Train: [85] [5900/6250] eta: 0:00:51 lr: 0.000007 grad: 0.2049 (0.2101) loss: 0.7546 (0.7561) time: 0.1354 data: 0.0549 max mem: 9377 +Train: [85] [6000/6250] eta: 0:00:36 lr: 0.000007 grad: 0.2109 (0.2100) loss: 0.7540 (0.7562) time: 0.1303 data: 0.0499 max mem: 9377 +Train: [85] [6100/6250] eta: 0:00:22 lr: 0.000007 grad: 0.2091 (0.2100) loss: 0.7493 (0.7561) time: 0.1377 data: 0.0580 max mem: 9377 +Train: [85] [6200/6250] eta: 0:00:07 lr: 0.000007 grad: 0.2090 (0.2100) loss: 0.7458 (0.7561) time: 0.1396 data: 0.0544 max mem: 9377 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.2093 (0.2100) loss: 0.7446 (0.7561) time: 0.1211 data: 0.0380 max mem: 9377 +Train: [85] Total time: 0:15:27 (0.1483 s / it) +Averaged stats: lr: 0.000007 grad: 0.2093 (0.2100) loss: 0.7446 (0.7561) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:06:12 loss: 0.8135 (0.8135) time: 6.0020 data: 5.9711 max mem: 9377 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.8046 (0.8041) time: 0.1276 data: 0.1027 max mem: 9377 +Eval (hcp-train-subset): [85] Total time: 0:00:13 (0.2198 s / it) +Averaged stats (hcp-train-subset): loss: 0.8046 (0.8041) +Eval (hcp-val): [85] [ 0/62] eta: 0:05:44 loss: 0.8588 (0.8588) time: 5.5576 data: 5.5272 max mem: 9377 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.8532 (0.8553) time: 0.1247 data: 0.1001 max mem: 9377 +Eval (hcp-val): [85] Total time: 0:00:13 (0.2176 s / it) +Averaged stats (hcp-val): loss: 0.8532 (0.8553) +Eval (nsd-val): [85] [ 0/62] eta: 0:05:00 loss: 0.8305 (0.8305) time: 4.8496 data: 4.8185 max mem: 9377 +Eval (nsd-val): [85] [61/62] eta: 0:00:00 loss: 0.8365 (0.8366) time: 0.1249 data: 0.0997 max mem: 9377 +Eval (nsd-val): [85] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (nsd-val): loss: 0.8365 (0.8366) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 11:35:49 lr: 0.000007 grad: 0.1714 (0.1714) loss: 0.8092 (0.8092) time: 6.6799 data: 6.5804 max mem: 9377 +Train: [86] [ 100/6250] eta: 0:22:13 lr: 0.000007 grad: 0.2416 (0.2654) loss: 0.7500 (0.7598) time: 0.1710 data: 0.0728 max mem: 9377 +Train: [86] [ 200/6250] eta: 0:18:06 lr: 0.000007 grad: 0.2149 (0.2472) loss: 0.7590 (0.7574) time: 0.1362 data: 0.0409 max mem: 9377 +Train: [86] [ 300/6250] eta: 0:16:34 lr: 0.000007 grad: 0.2249 (0.2402) loss: 0.7421 (0.7543) time: 0.1396 data: 0.0454 max mem: 9377 +Train: [86] [ 400/6250] eta: 0:15:39 lr: 0.000007 grad: 0.2095 (0.2352) loss: 0.7437 (0.7528) time: 0.1225 data: 0.0388 max mem: 9377 +Train: [86] [ 500/6250] eta: 0:14:54 lr: 0.000007 grad: 0.2096 (0.2314) loss: 0.7543 (0.7524) time: 0.1156 data: 0.0242 max mem: 9377 +Train: [86] [ 600/6250] eta: 0:14:16 lr: 0.000006 grad: 0.2004 (0.2287) loss: 0.7458 (0.7525) time: 0.1362 data: 0.0439 max mem: 9377 +Train: [86] [ 700/6250] eta: 0:13:49 lr: 0.000006 grad: 0.2126 (0.2258) loss: 0.7502 (0.7529) time: 0.1550 data: 0.0669 max mem: 9377 +Train: [86] [ 800/6250] eta: 0:13:42 lr: 0.000006 grad: 0.2069 (0.2240) loss: 0.7551 (0.7530) time: 0.1734 data: 0.0859 max mem: 9377 +Train: [86] [ 900/6250] eta: 0:13:42 lr: 0.000006 grad: 0.2118 (0.2224) loss: 0.7490 (0.7525) time: 0.1881 data: 0.1025 max mem: 9377 +Train: [86] [1000/6250] eta: 0:13:38 lr: 0.000006 grad: 0.2025 (0.2210) loss: 0.7461 (0.7524) time: 0.1777 data: 0.0942 max mem: 9377 +Train: [86] [1100/6250] eta: 0:13:31 lr: 0.000006 grad: 0.2057 (0.2198) loss: 0.7510 (0.7522) time: 0.1804 data: 0.0979 max mem: 9377 +Train: [86] [1200/6250] eta: 0:13:18 lr: 0.000006 grad: 0.2146 (0.2187) loss: 0.7448 (0.7524) time: 0.1638 data: 0.0790 max mem: 9377 +Train: [86] [1300/6250] eta: 0:13:07 lr: 0.000006 grad: 0.2008 (0.2178) loss: 0.7560 (0.7524) time: 0.1695 data: 0.0852 max mem: 9377 +Train: [86] [1400/6250] eta: 0:12:51 lr: 0.000006 grad: 0.2039 (0.2169) loss: 0.7550 (0.7525) time: 0.1562 data: 0.0665 max mem: 9377 +Train: [86] [1500/6250] eta: 0:12:34 lr: 0.000006 grad: 0.1971 (0.2159) loss: 0.7590 (0.7530) time: 0.1476 data: 0.0639 max mem: 9377 +Train: [86] [1600/6250] eta: 0:12:18 lr: 0.000006 grad: 0.2111 (0.2154) loss: 0.7652 (0.7531) time: 0.1892 data: 0.1109 max mem: 9377 +Train: [86] [1700/6250] eta: 0:11:57 lr: 0.000006 grad: 0.2055 (0.2152) loss: 0.7485 (0.7529) time: 0.1500 data: 0.0657 max mem: 9377 +Train: [86] [1800/6250] eta: 0:11:37 lr: 0.000006 grad: 0.2052 (0.2148) loss: 0.7438 (0.7527) time: 0.1452 data: 0.0586 max mem: 9377 +Train: [86] [1900/6250] eta: 0:11:19 lr: 0.000006 grad: 0.2033 (0.2145) loss: 0.7523 (0.7527) time: 0.1446 data: 0.0581 max mem: 9377 +Train: [86] [2000/6250] eta: 0:11:02 lr: 0.000006 grad: 0.2107 (0.2142) loss: 0.7488 (0.7528) time: 0.1344 data: 0.0518 max mem: 9377 +Train: [86] [2100/6250] eta: 0:10:45 lr: 0.000006 grad: 0.2059 (0.2137) loss: 0.7525 (0.7530) time: 0.1390 data: 0.0576 max mem: 9377 +Train: [86] [2200/6250] eta: 0:10:28 lr: 0.000006 grad: 0.2042 (0.2134) loss: 0.7549 (0.7531) time: 0.1632 data: 0.0837 max mem: 9377 +Train: [86] [2300/6250] eta: 0:10:09 lr: 0.000006 grad: 0.1957 (0.2130) loss: 0.7668 (0.7533) time: 0.1277 data: 0.0484 max mem: 9377 +Train: [86] [2400/6250] eta: 0:09:52 lr: 0.000006 grad: 0.2021 (0.2126) loss: 0.7658 (0.7536) time: 0.1429 data: 0.0522 max mem: 9377 +Train: [86] [2500/6250] eta: 0:09:37 lr: 0.000006 grad: 0.1933 (0.2121) loss: 0.7704 (0.7540) time: 0.1553 data: 0.0689 max mem: 9377 +Train: [86] [2600/6250] eta: 0:09:20 lr: 0.000006 grad: 0.1978 (0.2117) loss: 0.7685 (0.7544) time: 0.1076 data: 0.0232 max mem: 9377 +Train: [86] [2700/6250] eta: 0:09:03 lr: 0.000006 grad: 0.2062 (0.2113) loss: 0.7528 (0.7547) time: 0.1100 data: 0.0214 max mem: 9377 +Train: [86] [2800/6250] eta: 0:08:45 lr: 0.000006 grad: 0.1967 (0.2111) loss: 0.7710 (0.7550) time: 0.1306 data: 0.0437 max mem: 9377 +Train: [86] [2900/6250] eta: 0:08:28 lr: 0.000006 grad: 0.2076 (0.2109) loss: 0.7574 (0.7552) time: 0.1267 data: 0.0446 max mem: 9377 +Train: [86] [3000/6250] eta: 0:08:11 lr: 0.000006 grad: 0.2032 (0.2108) loss: 0.7527 (0.7554) time: 0.1444 data: 0.0579 max mem: 9377 +Train: [86] [3100/6250] eta: 0:07:55 lr: 0.000006 grad: 0.2022 (0.2106) loss: 0.7632 (0.7556) time: 0.1583 data: 0.0717 max mem: 9377 +Train: [86] [3200/6250] eta: 0:07:39 lr: 0.000006 grad: 0.2024 (0.2104) loss: 0.7621 (0.7558) time: 0.1538 data: 0.0723 max mem: 9377 +Train: [86] [3300/6250] eta: 0:07:24 lr: 0.000006 grad: 0.1989 (0.2101) loss: 0.7592 (0.7560) time: 0.1489 data: 0.0686 max mem: 9377 +Train: [86] [3400/6250] eta: 0:07:09 lr: 0.000006 grad: 0.2055 (0.2100) loss: 0.7547 (0.7561) time: 0.1470 data: 0.0672 max mem: 9377 +Train: [86] [3500/6250] eta: 0:06:54 lr: 0.000006 grad: 0.2062 (0.2098) loss: 0.7587 (0.7562) time: 0.1505 data: 0.0816 max mem: 9377 +Train: [86] [3600/6250] eta: 0:06:38 lr: 0.000006 grad: 0.2107 (0.2098) loss: 0.7528 (0.7562) time: 0.1368 data: 0.0599 max mem: 9377 +Train: [86] [3700/6250] eta: 0:06:23 lr: 0.000006 grad: 0.1984 (0.2097) loss: 0.7625 (0.7562) time: 0.1558 data: 0.0819 max mem: 9377 +Train: [86] [3800/6250] eta: 0:06:07 lr: 0.000006 grad: 0.2028 (0.2095) loss: 0.7539 (0.7562) time: 0.1495 data: 0.0627 max mem: 9377 +Train: [86] [3900/6250] eta: 0:05:52 lr: 0.000006 grad: 0.1954 (0.2093) loss: 0.7572 (0.7563) time: 0.1533 data: 0.0689 max mem: 9377 +Train: [86] [4000/6250] eta: 0:05:37 lr: 0.000006 grad: 0.2074 (0.2093) loss: 0.7572 (0.7563) time: 0.1560 data: 0.0709 max mem: 9377 +Train: [86] [4100/6250] eta: 0:05:21 lr: 0.000006 grad: 0.2006 (0.2092) loss: 0.7598 (0.7563) time: 0.1457 data: 0.0656 max mem: 9377 +Train: [86] [4200/6250] eta: 0:05:05 lr: 0.000006 grad: 0.1985 (0.2090) loss: 0.7563 (0.7565) time: 0.1141 data: 0.0283 max mem: 9377 +Train: [86] [4300/6250] eta: 0:04:50 lr: 0.000006 grad: 0.2042 (0.2089) loss: 0.7582 (0.7565) time: 0.1388 data: 0.0531 max mem: 9377 +Train: [86] [4400/6250] eta: 0:04:35 lr: 0.000006 grad: 0.2002 (0.2088) loss: 0.7536 (0.7566) time: 0.1652 data: 0.0788 max mem: 9377 +Train: [86] [4500/6250] eta: 0:04:20 lr: 0.000006 grad: 0.2040 (0.2089) loss: 0.7641 (0.7566) time: 0.1283 data: 0.0435 max mem: 9377 +Train: [86] [4600/6250] eta: 0:04:05 lr: 0.000006 grad: 0.2097 (0.2088) loss: 0.7482 (0.7566) time: 0.1384 data: 0.0516 max mem: 9377 +Train: [86] [4700/6250] eta: 0:03:50 lr: 0.000006 grad: 0.2074 (0.2088) loss: 0.7484 (0.7566) time: 0.1449 data: 0.0660 max mem: 9377 +Train: [86] [4800/6250] eta: 0:03:35 lr: 0.000006 grad: 0.2031 (0.2088) loss: 0.7568 (0.7566) time: 0.1593 data: 0.0724 max mem: 9377 +Train: [86] [4900/6250] eta: 0:03:20 lr: 0.000006 grad: 0.1973 (0.2087) loss: 0.7520 (0.7567) time: 0.1420 data: 0.0581 max mem: 9377 +Train: [86] [5000/6250] eta: 0:03:05 lr: 0.000006 grad: 0.2087 (0.2088) loss: 0.7563 (0.7566) time: 0.1199 data: 0.0370 max mem: 9377 +Train: [86] [5100/6250] eta: 0:02:50 lr: 0.000006 grad: 0.2192 (0.2088) loss: 0.7438 (0.7565) time: 0.1545 data: 0.0674 max mem: 9377 +Train: [86] [5200/6250] eta: 0:02:35 lr: 0.000006 grad: 0.2098 (0.2089) loss: 0.7485 (0.7565) time: 0.1496 data: 0.0672 max mem: 9377 +Train: [86] [5300/6250] eta: 0:02:20 lr: 0.000006 grad: 0.2012 (0.2089) loss: 0.7530 (0.7565) time: 0.1116 data: 0.0312 max mem: 9377 +Train: [86] [5400/6250] eta: 0:02:05 lr: 0.000006 grad: 0.2068 (0.2088) loss: 0.7496 (0.7565) time: 0.1428 data: 0.0583 max mem: 9377 +Train: [86] [5500/6250] eta: 0:01:50 lr: 0.000006 grad: 0.2091 (0.2089) loss: 0.7517 (0.7564) time: 0.1356 data: 0.0533 max mem: 9377 +Train: [86] [5600/6250] eta: 0:01:36 lr: 0.000006 grad: 0.2106 (0.2089) loss: 0.7481 (0.7563) time: 0.1397 data: 0.0605 max mem: 9377 +Train: [86] [5700/6250] eta: 0:01:21 lr: 0.000006 grad: 0.2066 (0.2089) loss: 0.7608 (0.7563) time: 0.1419 data: 0.0561 max mem: 9377 +Train: [86] [5800/6250] eta: 0:01:06 lr: 0.000006 grad: 0.2059 (0.2090) loss: 0.7570 (0.7562) time: 0.1693 data: 0.0902 max mem: 9377 +Train: [86] [5900/6250] eta: 0:00:51 lr: 0.000006 grad: 0.2086 (0.2090) loss: 0.7488 (0.7562) time: 0.1492 data: 0.0718 max mem: 9377 +Train: [86] [6000/6250] eta: 0:00:36 lr: 0.000006 grad: 0.2148 (0.2090) loss: 0.7515 (0.7562) time: 0.1250 data: 0.0390 max mem: 9377 +Train: [86] [6100/6250] eta: 0:00:22 lr: 0.000006 grad: 0.2109 (0.2090) loss: 0.7534 (0.7562) time: 0.1399 data: 0.0542 max mem: 9377 +Train: [86] [6200/6250] eta: 0:00:07 lr: 0.000006 grad: 0.2064 (0.2091) loss: 0.7482 (0.7561) time: 0.1801 data: 0.1080 max mem: 9377 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.1992 (0.2090) loss: 0.7622 (0.7561) time: 0.1459 data: 0.0663 max mem: 9377 +Train: [86] Total time: 0:15:25 (0.1480 s / it) +Averaged stats: lr: 0.000006 grad: 0.1992 (0.2090) loss: 0.7622 (0.7561) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:03:33 loss: 0.8121 (0.8121) time: 3.4375 data: 3.3479 max mem: 9377 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.8041 (0.8040) time: 0.1192 data: 0.0925 max mem: 9377 +Eval (hcp-train-subset): [86] Total time: 0:00:13 (0.2137 s / it) +Averaged stats (hcp-train-subset): loss: 0.8041 (0.8040) +Eval (hcp-val): [86] [ 0/62] eta: 0:04:25 loss: 0.8632 (0.8632) time: 4.2823 data: 4.2252 max mem: 9377 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.8541 (0.8547) time: 0.1287 data: 0.1039 max mem: 9377 +Eval (hcp-val): [86] Total time: 0:00:13 (0.2182 s / it) +Averaged stats (hcp-val): loss: 0.8541 (0.8547) +Eval (nsd-val): [86] [ 0/62] eta: 0:04:27 loss: 0.8252 (0.8252) time: 4.3134 data: 4.2446 max mem: 9377 +Eval (nsd-val): [86] [61/62] eta: 0:00:00 loss: 0.8357 (0.8368) time: 0.1250 data: 0.0983 max mem: 9377 +Eval (nsd-val): [86] Total time: 0:00:14 (0.2260 s / it) +Averaged stats (nsd-val): loss: 0.8357 (0.8368) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [87] [ 0/6250] eta: 11:10:11 lr: 0.000006 grad: 0.3501 (0.3501) loss: 0.6839 (0.6839) time: 6.4338 data: 6.2507 max mem: 9377 +Train: [87] [ 100/6250] eta: 0:21:23 lr: 0.000006 grad: 0.2439 (0.2738) loss: 0.7417 (0.7455) time: 0.1788 data: 0.0607 max mem: 9377 +Train: [87] [ 200/6250] eta: 0:18:17 lr: 0.000006 grad: 0.2003 (0.2467) loss: 0.7727 (0.7511) time: 0.1518 data: 0.0387 max mem: 9377 +Train: [87] [ 300/6250] eta: 0:16:44 lr: 0.000006 grad: 0.1990 (0.2337) loss: 0.7691 (0.7547) time: 0.1171 data: 0.0157 max mem: 9377 +Train: [87] [ 400/6250] eta: 0:15:43 lr: 0.000006 grad: 0.2089 (0.2271) loss: 0.7654 (0.7566) time: 0.1445 data: 0.0498 max mem: 9377 +Train: [87] [ 500/6250] eta: 0:14:54 lr: 0.000006 grad: 0.2063 (0.2239) loss: 0.7649 (0.7575) time: 0.1183 data: 0.0273 max mem: 9377 +Train: [87] [ 600/6250] eta: 0:14:21 lr: 0.000006 grad: 0.2045 (0.2211) loss: 0.7677 (0.7582) time: 0.1502 data: 0.0589 max mem: 9377 +Train: [87] [ 700/6250] eta: 0:13:51 lr: 0.000006 grad: 0.2030 (0.2195) loss: 0.7693 (0.7585) time: 0.1446 data: 0.0574 max mem: 9377 +Train: [87] [ 800/6250] eta: 0:13:26 lr: 0.000006 grad: 0.2013 (0.2181) loss: 0.7687 (0.7592) time: 0.1114 data: 0.0187 max mem: 9377 +Train: [87] [ 900/6250] eta: 0:13:06 lr: 0.000006 grad: 0.2036 (0.2171) loss: 0.7580 (0.7597) time: 0.1396 data: 0.0441 max mem: 9377 +Train: [87] [1000/6250] eta: 0:12:48 lr: 0.000006 grad: 0.2155 (0.2163) loss: 0.7615 (0.7598) time: 0.1578 data: 0.0764 max mem: 9377 +Train: [87] [1100/6250] eta: 0:12:29 lr: 0.000006 grad: 0.2140 (0.2157) loss: 0.7485 (0.7598) time: 0.1447 data: 0.0592 max mem: 9377 +Train: [87] [1200/6250] eta: 0:12:06 lr: 0.000006 grad: 0.2002 (0.2150) loss: 0.7589 (0.7600) time: 0.1362 data: 0.0563 max mem: 9377 +Train: [87] [1300/6250] eta: 0:11:57 lr: 0.000006 grad: 0.2064 (0.2145) loss: 0.7603 (0.7601) time: 0.1647 data: 0.0750 max mem: 9377 +Train: [87] [1400/6250] eta: 0:11:42 lr: 0.000005 grad: 0.2041 (0.2141) loss: 0.7722 (0.7602) time: 0.1432 data: 0.0583 max mem: 9377 +Train: [87] [1500/6250] eta: 0:11:29 lr: 0.000005 grad: 0.2086 (0.2138) loss: 0.7571 (0.7600) time: 0.1597 data: 0.0717 max mem: 9377 +Train: [87] [1600/6250] eta: 0:11:11 lr: 0.000005 grad: 0.1975 (0.2133) loss: 0.7599 (0.7601) time: 0.1025 data: 0.0172 max mem: 9377 +Train: [87] [1700/6250] eta: 0:10:55 lr: 0.000005 grad: 0.2022 (0.2127) loss: 0.7657 (0.7604) time: 0.1164 data: 0.0320 max mem: 9377 +Train: [87] [1800/6250] eta: 0:10:40 lr: 0.000005 grad: 0.2086 (0.2122) loss: 0.7577 (0.7605) time: 0.1302 data: 0.0466 max mem: 9377 +Train: [87] [1900/6250] eta: 0:10:25 lr: 0.000005 grad: 0.2007 (0.2118) loss: 0.7585 (0.7607) time: 0.1319 data: 0.0445 max mem: 9377 +Train: [87] [2000/6250] eta: 0:10:11 lr: 0.000005 grad: 0.2075 (0.2115) loss: 0.7561 (0.7607) time: 0.1548 data: 0.0693 max mem: 9377 +Train: [87] [2100/6250] eta: 0:09:55 lr: 0.000005 grad: 0.2006 (0.2111) loss: 0.7669 (0.7608) time: 0.1381 data: 0.0535 max mem: 9377 +Train: [87] [2200/6250] eta: 0:09:42 lr: 0.000005 grad: 0.2008 (0.2108) loss: 0.7571 (0.7608) time: 0.1563 data: 0.0814 max mem: 9377 +Train: [87] [2300/6250] eta: 0:09:29 lr: 0.000005 grad: 0.1986 (0.2105) loss: 0.7625 (0.7608) time: 0.1675 data: 0.0769 max mem: 9377 +Train: [87] [2400/6250] eta: 0:09:14 lr: 0.000005 grad: 0.2008 (0.2102) loss: 0.7662 (0.7608) time: 0.1453 data: 0.0622 max mem: 9377 +Train: [87] [2500/6250] eta: 0:09:00 lr: 0.000005 grad: 0.2113 (0.2100) loss: 0.7588 (0.7608) time: 0.1210 data: 0.0392 max mem: 9377 +Train: [87] [2600/6250] eta: 0:08:47 lr: 0.000005 grad: 0.2080 (0.2100) loss: 0.7536 (0.7608) time: 0.1653 data: 0.0729 max mem: 9377 +Train: [87] [2700/6250] eta: 0:08:33 lr: 0.000005 grad: 0.2106 (0.2099) loss: 0.7593 (0.7605) time: 0.1716 data: 0.0761 max mem: 9377 +Train: [87] [2800/6250] eta: 0:08:22 lr: 0.000005 grad: 0.2047 (0.2098) loss: 0.7649 (0.7605) time: 0.1513 data: 0.0629 max mem: 9377 +Train: [87] [2900/6250] eta: 0:08:06 lr: 0.000005 grad: 0.2020 (0.2097) loss: 0.7659 (0.7604) time: 0.1337 data: 0.0480 max mem: 9377 +Train: [87] [3000/6250] eta: 0:07:51 lr: 0.000005 grad: 0.2000 (0.2096) loss: 0.7551 (0.7603) time: 0.1259 data: 0.0409 max mem: 9377 +Train: [87] [3100/6250] eta: 0:07:36 lr: 0.000005 grad: 0.2000 (0.2094) loss: 0.7624 (0.7603) time: 0.1372 data: 0.0477 max mem: 9377 +Train: [87] [3200/6250] eta: 0:07:21 lr: 0.000005 grad: 0.2054 (0.2094) loss: 0.7601 (0.7602) time: 0.1419 data: 0.0565 max mem: 9377 +Train: [87] [3300/6250] eta: 0:07:06 lr: 0.000005 grad: 0.2025 (0.2095) loss: 0.7560 (0.7601) time: 0.1357 data: 0.0549 max mem: 9377 +Train: [87] [3400/6250] eta: 0:06:55 lr: 0.000005 grad: 0.2078 (0.2095) loss: 0.7539 (0.7600) time: 0.0983 data: 0.0005 max mem: 9377 +Train: [87] [3500/6250] eta: 0:06:41 lr: 0.000005 grad: 0.2092 (0.2095) loss: 0.7495 (0.7599) time: 0.1639 data: 0.0805 max mem: 9377 +Train: [87] [3600/6250] eta: 0:06:26 lr: 0.000005 grad: 0.2119 (0.2093) loss: 0.7487 (0.7598) time: 0.1349 data: 0.0465 max mem: 9377 +Train: [87] [3700/6250] eta: 0:06:11 lr: 0.000005 grad: 0.2048 (0.2093) loss: 0.7626 (0.7598) time: 0.1578 data: 0.0675 max mem: 9377 +Train: [87] [3800/6250] eta: 0:05:57 lr: 0.000005 grad: 0.2101 (0.2092) loss: 0.7560 (0.7597) time: 0.1553 data: 0.0783 max mem: 9377 +Train: [87] [3900/6250] eta: 0:05:44 lr: 0.000005 grad: 0.1989 (0.2092) loss: 0.7627 (0.7596) time: 0.1489 data: 0.0578 max mem: 9377 +Train: [87] [4000/6250] eta: 0:05:30 lr: 0.000005 grad: 0.2073 (0.2093) loss: 0.7570 (0.7594) time: 0.1539 data: 0.0659 max mem: 9377 +Train: [87] [4100/6250] eta: 0:05:16 lr: 0.000005 grad: 0.2014 (0.2093) loss: 0.7583 (0.7593) time: 0.1425 data: 0.0559 max mem: 9377 +Train: [87] [4200/6250] eta: 0:05:01 lr: 0.000005 grad: 0.1977 (0.2092) loss: 0.7662 (0.7592) time: 0.1392 data: 0.0438 max mem: 9377 +Train: [87] [4300/6250] eta: 0:04:45 lr: 0.000005 grad: 0.2034 (0.2092) loss: 0.7604 (0.7591) time: 0.1459 data: 0.0542 max mem: 9377 +Train: [87] [4400/6250] eta: 0:04:30 lr: 0.000005 grad: 0.2042 (0.2092) loss: 0.7529 (0.7591) time: 0.1440 data: 0.0631 max mem: 9377 +Train: [87] [4500/6250] eta: 0:04:17 lr: 0.000005 grad: 0.2129 (0.2092) loss: 0.7470 (0.7590) time: 0.3617 data: 0.2886 max mem: 9377 +Train: [87] [4600/6250] eta: 0:04:05 lr: 0.000005 grad: 0.2088 (0.2092) loss: 0.7610 (0.7590) time: 0.1977 data: 0.0995 max mem: 9377 +Train: [87] [4700/6250] eta: 0:03:51 lr: 0.000005 grad: 0.2052 (0.2092) loss: 0.7573 (0.7589) time: 0.1170 data: 0.0224 max mem: 9377 +Train: [87] [4800/6250] eta: 0:03:36 lr: 0.000005 grad: 0.2028 (0.2091) loss: 0.7636 (0.7589) time: 0.1383 data: 0.0523 max mem: 9377 +Train: [87] [4900/6250] eta: 0:03:21 lr: 0.000005 grad: 0.2039 (0.2092) loss: 0.7574 (0.7588) time: 0.1540 data: 0.0707 max mem: 9377 +Train: [87] [5000/6250] eta: 0:03:06 lr: 0.000005 grad: 0.2155 (0.2092) loss: 0.7553 (0.7587) time: 0.1468 data: 0.0572 max mem: 9377 +Train: [87] [5100/6250] eta: 0:02:51 lr: 0.000005 grad: 0.2050 (0.2093) loss: 0.7498 (0.7586) time: 0.1736 data: 0.0915 max mem: 9377 +Train: [87] [5200/6250] eta: 0:02:36 lr: 0.000005 grad: 0.2099 (0.2094) loss: 0.7541 (0.7585) time: 0.1530 data: 0.0739 max mem: 9377 +Train: [87] [5300/6250] eta: 0:02:21 lr: 0.000005 grad: 0.2212 (0.2096) loss: 0.7508 (0.7583) time: 0.1367 data: 0.0502 max mem: 9377 +Train: [87] [5400/6250] eta: 0:02:06 lr: 0.000005 grad: 0.2174 (0.2096) loss: 0.7587 (0.7582) time: 0.1447 data: 0.0624 max mem: 9377 +Train: [87] [5500/6250] eta: 0:01:51 lr: 0.000005 grad: 0.2113 (0.2097) loss: 0.7481 (0.7581) time: 0.1213 data: 0.0393 max mem: 9377 +Train: [87] [5600/6250] eta: 0:01:36 lr: 0.000005 grad: 0.2192 (0.2098) loss: 0.7452 (0.7580) time: 0.1028 data: 0.0135 max mem: 9377 +Train: [87] [5700/6250] eta: 0:01:21 lr: 0.000005 grad: 0.2045 (0.2098) loss: 0.7573 (0.7579) time: 0.1233 data: 0.0353 max mem: 9377 +Train: [87] [5800/6250] eta: 0:01:06 lr: 0.000005 grad: 0.2105 (0.2099) loss: 0.7564 (0.7578) time: 0.1368 data: 0.0518 max mem: 9377 +Train: [87] [5900/6250] eta: 0:00:51 lr: 0.000005 grad: 0.2168 (0.2101) loss: 0.7551 (0.7577) time: 0.1353 data: 0.0518 max mem: 9377 +Train: [87] [6000/6250] eta: 0:00:36 lr: 0.000005 grad: 0.2057 (0.2101) loss: 0.7515 (0.7576) time: 0.1250 data: 0.0402 max mem: 9377 +Train: [87] [6100/6250] eta: 0:00:22 lr: 0.000005 grad: 0.2220 (0.2102) loss: 0.7504 (0.7574) time: 0.1288 data: 0.0458 max mem: 9377 +Train: [87] [6200/6250] eta: 0:00:07 lr: 0.000005 grad: 0.2155 (0.2103) loss: 0.7497 (0.7573) time: 0.1309 data: 0.0493 max mem: 9377 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.2183 (0.2103) loss: 0.7437 (0.7572) time: 0.1409 data: 0.0583 max mem: 9377 +Train: [87] Total time: 0:15:27 (0.1484 s / it) +Averaged stats: lr: 0.000005 grad: 0.2183 (0.2103) loss: 0.7437 (0.7572) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:03:46 loss: 0.8099 (0.8099) time: 3.6561 data: 3.6025 max mem: 9377 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.8056 (0.8038) time: 0.1255 data: 0.1008 max mem: 9377 +Eval (hcp-train-subset): [87] Total time: 0:00:13 (0.2104 s / it) +Averaged stats (hcp-train-subset): loss: 0.8056 (0.8038) +Eval (hcp-val): [87] [ 0/62] eta: 0:04:44 loss: 0.8542 (0.8542) time: 4.5952 data: 4.5486 max mem: 9377 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.8528 (0.8549) time: 0.1098 data: 0.0849 max mem: 9377 +Eval (hcp-val): [87] Total time: 0:00:13 (0.2099 s / it) +Averaged stats (hcp-val): loss: 0.8528 (0.8549) +Eval (nsd-val): [87] [ 0/62] eta: 0:05:28 loss: 0.8276 (0.8276) time: 5.3014 data: 5.2712 max mem: 9377 +Eval (nsd-val): [87] [61/62] eta: 0:00:00 loss: 0.8338 (0.8353) time: 0.1441 data: 0.1169 max mem: 9377 +Eval (nsd-val): [87] Total time: 0:00:13 (0.2119 s / it) +Averaged stats (nsd-val): loss: 0.8338 (0.8353) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [88] [ 0/6250] eta: 8:06:53 lr: 0.000005 grad: 0.1485 (0.1485) loss: 0.8488 (0.8488) time: 4.6742 data: 4.4004 max mem: 9377 +Train: [88] [ 100/6250] eta: 0:20:54 lr: 0.000005 grad: 0.1998 (0.2382) loss: 0.7847 (0.7750) time: 0.1714 data: 0.0685 max mem: 9377 +Train: [88] [ 200/6250] eta: 0:17:58 lr: 0.000005 grad: 0.2213 (0.2279) loss: 0.7764 (0.7740) time: 0.1720 data: 0.0697 max mem: 9377 +Train: [88] [ 300/6250] eta: 0:16:28 lr: 0.000005 grad: 0.2369 (0.2253) loss: 0.7570 (0.7699) time: 0.1357 data: 0.0415 max mem: 9377 +Train: [88] [ 400/6250] eta: 0:15:32 lr: 0.000005 grad: 0.2153 (0.2232) loss: 0.7640 (0.7672) time: 0.1356 data: 0.0424 max mem: 9377 +Train: [88] [ 500/6250] eta: 0:14:49 lr: 0.000005 grad: 0.2217 (0.2218) loss: 0.7552 (0.7664) time: 0.1293 data: 0.0204 max mem: 9377 +Train: [88] [ 600/6250] eta: 0:14:19 lr: 0.000005 grad: 0.2086 (0.2201) loss: 0.7654 (0.7655) time: 0.1315 data: 0.0424 max mem: 9377 +Train: [88] [ 700/6250] eta: 0:13:51 lr: 0.000005 grad: 0.2138 (0.2188) loss: 0.7547 (0.7649) time: 0.1259 data: 0.0403 max mem: 9377 +Train: [88] [ 800/6250] eta: 0:13:33 lr: 0.000005 grad: 0.2109 (0.2183) loss: 0.7675 (0.7645) time: 0.1643 data: 0.0787 max mem: 9377 +Train: [88] [ 900/6250] eta: 0:13:16 lr: 0.000005 grad: 0.2077 (0.2175) loss: 0.7642 (0.7642) time: 0.1463 data: 0.0554 max mem: 9377 +Train: [88] [1000/6250] eta: 0:12:57 lr: 0.000005 grad: 0.2098 (0.2166) loss: 0.7595 (0.7638) time: 0.1561 data: 0.0669 max mem: 9377 +Train: [88] [1100/6250] eta: 0:12:54 lr: 0.000005 grad: 0.2120 (0.2158) loss: 0.7574 (0.7637) time: 0.2126 data: 0.1298 max mem: 9377 +Train: [88] [1200/6250] eta: 0:12:29 lr: 0.000005 grad: 0.2002 (0.2152) loss: 0.7542 (0.7635) time: 0.1248 data: 0.0344 max mem: 9377 +Train: [88] [1300/6250] eta: 0:12:13 lr: 0.000005 grad: 0.2053 (0.2146) loss: 0.7598 (0.7633) time: 0.1441 data: 0.0614 max mem: 9377 +Train: [88] [1400/6250] eta: 0:11:55 lr: 0.000005 grad: 0.2040 (0.2141) loss: 0.7626 (0.7632) time: 0.1317 data: 0.0441 max mem: 9377 +Train: [88] [1500/6250] eta: 0:11:39 lr: 0.000005 grad: 0.2108 (0.2137) loss: 0.7622 (0.7630) time: 0.1305 data: 0.0433 max mem: 9377 +Train: [88] [1600/6250] eta: 0:11:23 lr: 0.000005 grad: 0.2017 (0.2133) loss: 0.7659 (0.7630) time: 0.1434 data: 0.0577 max mem: 9377 +Train: [88] [1700/6250] eta: 0:11:06 lr: 0.000005 grad: 0.2065 (0.2128) loss: 0.7697 (0.7630) time: 0.1197 data: 0.0350 max mem: 9377 +Train: [88] [1800/6250] eta: 0:10:50 lr: 0.000005 grad: 0.2086 (0.2123) loss: 0.7554 (0.7630) time: 0.1439 data: 0.0608 max mem: 9377 +Train: [88] [1900/6250] eta: 0:10:33 lr: 0.000005 grad: 0.2112 (0.2121) loss: 0.7615 (0.7629) time: 0.1316 data: 0.0505 max mem: 9377 +Train: [88] [2000/6250] eta: 0:10:18 lr: 0.000005 grad: 0.1938 (0.2120) loss: 0.7585 (0.7626) time: 0.1365 data: 0.0540 max mem: 9377 +Train: [88] [2100/6250] eta: 0:10:02 lr: 0.000005 grad: 0.2032 (0.2117) loss: 0.7603 (0.7625) time: 0.1311 data: 0.0397 max mem: 9377 +Train: [88] [2200/6250] eta: 0:09:47 lr: 0.000005 grad: 0.1996 (0.2115) loss: 0.7647 (0.7624) time: 0.1536 data: 0.0694 max mem: 9377 +Train: [88] [2300/6250] eta: 0:09:32 lr: 0.000005 grad: 0.2034 (0.2114) loss: 0.7612 (0.7623) time: 0.1509 data: 0.0656 max mem: 9377 +Train: [88] [2400/6250] eta: 0:09:17 lr: 0.000005 grad: 0.2084 (0.2114) loss: 0.7471 (0.7620) time: 0.1606 data: 0.0732 max mem: 9377 +Train: [88] [2500/6250] eta: 0:09:03 lr: 0.000005 grad: 0.2026 (0.2113) loss: 0.7542 (0.7619) time: 0.1373 data: 0.0498 max mem: 9377 +Train: [88] [2600/6250] eta: 0:08:47 lr: 0.000005 grad: 0.2050 (0.2112) loss: 0.7581 (0.7617) time: 0.1342 data: 0.0560 max mem: 9377 +Train: [88] [2700/6250] eta: 0:08:33 lr: 0.000005 grad: 0.2103 (0.2110) loss: 0.7643 (0.7617) time: 0.1458 data: 0.0615 max mem: 9377 +Train: [88] [2800/6250] eta: 0:08:19 lr: 0.000005 grad: 0.2070 (0.2109) loss: 0.7558 (0.7615) time: 0.1504 data: 0.0590 max mem: 9377 +Train: [88] [2900/6250] eta: 0:08:05 lr: 0.000004 grad: 0.2055 (0.2109) loss: 0.7596 (0.7614) time: 0.1439 data: 0.0536 max mem: 9377 +Train: [88] [3000/6250] eta: 0:07:50 lr: 0.000004 grad: 0.1923 (0.2108) loss: 0.7690 (0.7613) time: 0.1468 data: 0.0696 max mem: 9377 +Train: [88] [3100/6250] eta: 0:07:34 lr: 0.000004 grad: 0.2037 (0.2107) loss: 0.7626 (0.7613) time: 0.1398 data: 0.0534 max mem: 9377 +Train: [88] [3200/6250] eta: 0:07:19 lr: 0.000004 grad: 0.1983 (0.2105) loss: 0.7714 (0.7613) time: 0.1281 data: 0.0450 max mem: 9377 +Train: [88] [3300/6250] eta: 0:07:03 lr: 0.000004 grad: 0.2052 (0.2103) loss: 0.7612 (0.7613) time: 0.1242 data: 0.0336 max mem: 9377 +Train: [88] [3400/6250] eta: 0:06:48 lr: 0.000004 grad: 0.2060 (0.2102) loss: 0.7543 (0.7612) time: 0.1680 data: 0.0882 max mem: 9377 +Train: [88] [3500/6250] eta: 0:06:32 lr: 0.000004 grad: 0.2065 (0.2101) loss: 0.7589 (0.7613) time: 0.1153 data: 0.0249 max mem: 9377 +Train: [88] [3600/6250] eta: 0:06:17 lr: 0.000004 grad: 0.2040 (0.2099) loss: 0.7594 (0.7612) time: 0.1334 data: 0.0468 max mem: 9377 +Train: [88] [3700/6250] eta: 0:06:02 lr: 0.000004 grad: 0.1982 (0.2099) loss: 0.7579 (0.7611) time: 0.1342 data: 0.0552 max mem: 9377 +Train: [88] [3800/6250] eta: 0:05:48 lr: 0.000004 grad: 0.2125 (0.2099) loss: 0.7571 (0.7610) time: 0.1602 data: 0.0809 max mem: 9377 +Train: [88] [3900/6250] eta: 0:05:33 lr: 0.000004 grad: 0.2041 (0.2098) loss: 0.7679 (0.7609) time: 0.1573 data: 0.0670 max mem: 9377 +Train: [88] [4000/6250] eta: 0:05:19 lr: 0.000004 grad: 0.1998 (0.2097) loss: 0.7625 (0.7610) time: 0.1384 data: 0.0520 max mem: 9377 +Train: [88] [4100/6250] eta: 0:05:06 lr: 0.000004 grad: 0.2053 (0.2097) loss: 0.7586 (0.7609) time: 0.1498 data: 0.0564 max mem: 9377 +Train: [88] [4200/6250] eta: 0:04:51 lr: 0.000004 grad: 0.2054 (0.2096) loss: 0.7544 (0.7608) time: 0.1528 data: 0.0742 max mem: 9377 +Train: [88] [4300/6250] eta: 0:04:37 lr: 0.000004 grad: 0.2039 (0.2095) loss: 0.7653 (0.7608) time: 0.1487 data: 0.0675 max mem: 9377 +Train: [88] [4400/6250] eta: 0:04:22 lr: 0.000004 grad: 0.2076 (0.2094) loss: 0.7544 (0.7607) time: 0.1144 data: 0.0205 max mem: 9377 +Train: [88] [4500/6250] eta: 0:04:07 lr: 0.000004 grad: 0.2011 (0.2094) loss: 0.7617 (0.7607) time: 0.1306 data: 0.0486 max mem: 9377 +Train: [88] [4600/6250] eta: 0:03:53 lr: 0.000004 grad: 0.1997 (0.2093) loss: 0.7577 (0.7607) time: 0.2223 data: 0.1369 max mem: 9377 +Train: [88] [4700/6250] eta: 0:03:38 lr: 0.000004 grad: 0.2084 (0.2093) loss: 0.7544 (0.7606) time: 0.1330 data: 0.0512 max mem: 9377 +Train: [88] [4800/6250] eta: 0:03:24 lr: 0.000004 grad: 0.2059 (0.2093) loss: 0.7485 (0.7605) time: 0.1439 data: 0.0599 max mem: 9377 +Train: [88] [4900/6250] eta: 0:03:10 lr: 0.000004 grad: 0.2048 (0.2093) loss: 0.7576 (0.7604) time: 0.1424 data: 0.0590 max mem: 9377 +Train: [88] [5000/6250] eta: 0:02:57 lr: 0.000004 grad: 0.2022 (0.2093) loss: 0.7555 (0.7604) time: 0.1798 data: 0.1031 max mem: 9377 +Train: [88] [5100/6250] eta: 0:02:43 lr: 0.000004 grad: 0.2119 (0.2093) loss: 0.7564 (0.7604) time: 0.1592 data: 0.0820 max mem: 9377 +Train: [88] [5200/6250] eta: 0:02:29 lr: 0.000004 grad: 0.2062 (0.2093) loss: 0.7497 (0.7603) time: 0.1683 data: 0.0954 max mem: 9377 +Train: [88] [5300/6250] eta: 0:02:15 lr: 0.000004 grad: 0.2041 (0.2092) loss: 0.7547 (0.7603) time: 0.1471 data: 0.0671 max mem: 9377 +Train: [88] [5400/6250] eta: 0:02:02 lr: 0.000004 grad: 0.2116 (0.2093) loss: 0.7591 (0.7602) time: 0.1469 data: 0.0652 max mem: 9377 +Train: [88] [5500/6250] eta: 0:01:47 lr: 0.000004 grad: 0.2103 (0.2093) loss: 0.7489 (0.7601) time: 0.1552 data: 0.0720 max mem: 9377 +Train: [88] [5600/6250] eta: 0:01:33 lr: 0.000004 grad: 0.2046 (0.2093) loss: 0.7452 (0.7600) time: 0.1412 data: 0.0550 max mem: 9377 +Train: [88] [5700/6250] eta: 0:01:19 lr: 0.000004 grad: 0.2068 (0.2092) loss: 0.7503 (0.7599) time: 0.1367 data: 0.0500 max mem: 9377 +Train: [88] [5800/6250] eta: 0:01:04 lr: 0.000004 grad: 0.2069 (0.2092) loss: 0.7567 (0.7598) time: 0.1611 data: 0.0844 max mem: 9377 +Train: [88] [5900/6250] eta: 0:00:50 lr: 0.000004 grad: 0.2081 (0.2092) loss: 0.7535 (0.7598) time: 0.1449 data: 0.0630 max mem: 9377 +Train: [88] [6000/6250] eta: 0:00:36 lr: 0.000004 grad: 0.2025 (0.2091) loss: 0.7457 (0.7597) time: 0.1338 data: 0.0513 max mem: 9377 +Train: [88] [6100/6250] eta: 0:00:21 lr: 0.000004 grad: 0.2008 (0.2090) loss: 0.7674 (0.7597) time: 0.1627 data: 0.0796 max mem: 9377 +Train: [88] [6200/6250] eta: 0:00:07 lr: 0.000004 grad: 0.1996 (0.2090) loss: 0.7630 (0.7596) time: 0.1564 data: 0.0751 max mem: 9377 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.2012 (0.2090) loss: 0.7584 (0.7596) time: 0.1661 data: 0.0863 max mem: 9377 +Train: [88] Total time: 0:15:07 (0.1452 s / it) +Averaged stats: lr: 0.000004 grad: 0.2012 (0.2090) loss: 0.7584 (0.7596) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:04:49 loss: 0.8071 (0.8071) time: 4.6683 data: 4.6318 max mem: 9377 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.8030 (0.8030) time: 0.1314 data: 0.1065 max mem: 9377 +Eval (hcp-train-subset): [88] Total time: 0:00:12 (0.2093 s / it) +Averaged stats (hcp-train-subset): loss: 0.8030 (0.8030) +Eval (hcp-val): [88] [ 0/62] eta: 0:04:49 loss: 0.8542 (0.8542) time: 4.6613 data: 4.6300 max mem: 9377 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.8561 (0.8546) time: 0.1274 data: 0.1024 max mem: 9377 +Eval (hcp-val): [88] Total time: 0:00:13 (0.2160 s / it) +Averaged stats (hcp-val): loss: 0.8561 (0.8546) +Eval (nsd-val): [88] [ 0/62] eta: 0:04:53 loss: 0.8316 (0.8316) time: 4.7405 data: 4.7107 max mem: 9377 +Eval (nsd-val): [88] [61/62] eta: 0:00:00 loss: 0.8355 (0.8361) time: 0.1221 data: 0.0969 max mem: 9377 +Eval (nsd-val): [88] Total time: 0:00:12 (0.2079 s / it) +Averaged stats (nsd-val): loss: 0.8355 (0.8361) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 10:45:25 lr: 0.000004 grad: 0.3364 (0.3364) loss: 0.6793 (0.6793) time: 6.1961 data: 6.0887 max mem: 9377 +Train: [89] [ 100/6250] eta: 0:20:13 lr: 0.000004 grad: 0.2094 (0.2473) loss: 0.7737 (0.7743) time: 0.1510 data: 0.0494 max mem: 9377 +Train: [89] [ 200/6250] eta: 0:17:30 lr: 0.000004 grad: 0.1970 (0.2388) loss: 0.7693 (0.7692) time: 0.1598 data: 0.0581 max mem: 9377 +Train: [89] [ 300/6250] eta: 0:15:50 lr: 0.000004 grad: 0.2270 (0.2350) loss: 0.7552 (0.7657) time: 0.1368 data: 0.0351 max mem: 9377 +Train: [89] [ 400/6250] eta: 0:15:05 lr: 0.000004 grad: 0.2193 (0.2316) loss: 0.7552 (0.7630) time: 0.1464 data: 0.0566 max mem: 9377 +Train: [89] [ 500/6250] eta: 0:14:23 lr: 0.000004 grad: 0.2108 (0.2291) loss: 0.7471 (0.7608) time: 0.1337 data: 0.0374 max mem: 9377 +Train: [89] [ 600/6250] eta: 0:14:02 lr: 0.000004 grad: 0.2110 (0.2278) loss: 0.7517 (0.7592) time: 0.1537 data: 0.0682 max mem: 9377 +Train: [89] [ 700/6250] eta: 0:13:38 lr: 0.000004 grad: 0.2097 (0.2253) loss: 0.7599 (0.7593) time: 0.1367 data: 0.0418 max mem: 9377 +Train: [89] [ 800/6250] eta: 0:13:27 lr: 0.000004 grad: 0.2136 (0.2235) loss: 0.7552 (0.7589) time: 0.1927 data: 0.1038 max mem: 9377 +Train: [89] [ 900/6250] eta: 0:13:09 lr: 0.000004 grad: 0.2102 (0.2222) loss: 0.7589 (0.7586) time: 0.1565 data: 0.0710 max mem: 9377 +Train: [89] [1000/6250] eta: 0:12:59 lr: 0.000004 grad: 0.2112 (0.2214) loss: 0.7511 (0.7581) time: 0.1038 data: 0.0003 max mem: 9377 +Train: [89] [1100/6250] eta: 0:12:42 lr: 0.000004 grad: 0.2058 (0.2205) loss: 0.7493 (0.7581) time: 0.1519 data: 0.0692 max mem: 9377 +Train: [89] [1200/6250] eta: 0:12:25 lr: 0.000004 grad: 0.2110 (0.2198) loss: 0.7439 (0.7576) time: 0.1417 data: 0.0521 max mem: 9377 +Train: [89] [1300/6250] eta: 0:12:07 lr: 0.000004 grad: 0.2151 (0.2194) loss: 0.7585 (0.7572) time: 0.1191 data: 0.0286 max mem: 9377 +Train: [89] [1400/6250] eta: 0:11:51 lr: 0.000004 grad: 0.2156 (0.2190) loss: 0.7483 (0.7568) time: 0.1349 data: 0.0484 max mem: 9377 +Train: [89] [1500/6250] eta: 0:11:44 lr: 0.000004 grad: 0.2123 (0.2188) loss: 0.7444 (0.7564) time: 0.2285 data: 0.1275 max mem: 9377 +Train: [89] [1600/6250] eta: 0:11:24 lr: 0.000004 grad: 0.2145 (0.2183) loss: 0.7474 (0.7564) time: 0.1356 data: 0.0503 max mem: 9377 +Train: [89] [1700/6250] eta: 0:11:26 lr: 0.000004 grad: 0.2017 (0.2178) loss: 0.7562 (0.7562) time: 0.2506 data: 0.1531 max mem: 9377 +Train: [89] [1800/6250] eta: 0:11:11 lr: 0.000004 grad: 0.2116 (0.2175) loss: 0.7478 (0.7560) time: 0.0975 data: 0.0002 max mem: 9377 +Train: [89] [1900/6250] eta: 0:10:56 lr: 0.000004 grad: 0.2132 (0.2172) loss: 0.7389 (0.7557) time: 0.1420 data: 0.0539 max mem: 9377 +Train: [89] [2000/6250] eta: 0:10:40 lr: 0.000004 grad: 0.2103 (0.2170) loss: 0.7474 (0.7554) time: 0.1389 data: 0.0531 max mem: 9377 +Train: [89] [2100/6250] eta: 0:10:24 lr: 0.000004 grad: 0.2086 (0.2166) loss: 0.7568 (0.7554) time: 0.1308 data: 0.0400 max mem: 9377 +Train: [89] [2200/6250] eta: 0:10:08 lr: 0.000004 grad: 0.2108 (0.2165) loss: 0.7429 (0.7551) time: 0.1588 data: 0.0731 max mem: 9377 +Train: [89] [2300/6250] eta: 0:09:51 lr: 0.000004 grad: 0.2135 (0.2163) loss: 0.7528 (0.7549) time: 0.1340 data: 0.0471 max mem: 9377 +Train: [89] [2400/6250] eta: 0:09:34 lr: 0.000004 grad: 0.2107 (0.2160) loss: 0.7552 (0.7548) time: 0.1385 data: 0.0497 max mem: 9377 +Train: [89] [2500/6250] eta: 0:09:18 lr: 0.000004 grad: 0.2117 (0.2158) loss: 0.7519 (0.7547) time: 0.1259 data: 0.0372 max mem: 9377 +Train: [89] [2600/6250] eta: 0:09:02 lr: 0.000004 grad: 0.2074 (0.2155) loss: 0.7457 (0.7547) time: 0.1337 data: 0.0423 max mem: 9377 +Train: [89] [2700/6250] eta: 0:08:45 lr: 0.000004 grad: 0.2135 (0.2154) loss: 0.7475 (0.7545) time: 0.1228 data: 0.0363 max mem: 9377 +Train: [89] [2800/6250] eta: 0:08:34 lr: 0.000004 grad: 0.2150 (0.2152) loss: 0.7459 (0.7543) time: 0.0985 data: 0.0002 max mem: 9377 +Train: [89] [2900/6250] eta: 0:08:18 lr: 0.000004 grad: 0.1994 (0.2151) loss: 0.7613 (0.7543) time: 0.1384 data: 0.0609 max mem: 9377 +Train: [89] [3000/6250] eta: 0:08:03 lr: 0.000004 grad: 0.2093 (0.2149) loss: 0.7549 (0.7542) time: 0.1169 data: 0.0296 max mem: 9377 +Train: [89] [3100/6250] eta: 0:07:48 lr: 0.000004 grad: 0.2098 (0.2148) loss: 0.7597 (0.7543) time: 0.1423 data: 0.0491 max mem: 9377 +Train: [89] [3200/6250] eta: 0:07:33 lr: 0.000004 grad: 0.2061 (0.2146) loss: 0.7602 (0.7544) time: 0.1430 data: 0.0571 max mem: 9377 +Train: [89] [3300/6250] eta: 0:07:17 lr: 0.000004 grad: 0.2032 (0.2144) loss: 0.7573 (0.7544) time: 0.1323 data: 0.0442 max mem: 9377 +Train: [89] [3400/6250] eta: 0:07:02 lr: 0.000004 grad: 0.2094 (0.2143) loss: 0.7637 (0.7544) time: 0.1497 data: 0.0672 max mem: 9377 +Train: [89] [3500/6250] eta: 0:06:46 lr: 0.000004 grad: 0.2075 (0.2142) loss: 0.7456 (0.7544) time: 0.1305 data: 0.0385 max mem: 9377 +Train: [89] [3600/6250] eta: 0:06:30 lr: 0.000004 grad: 0.2096 (0.2142) loss: 0.7553 (0.7544) time: 0.1341 data: 0.0532 max mem: 9377 +Train: [89] [3700/6250] eta: 0:06:14 lr: 0.000004 grad: 0.2015 (0.2142) loss: 0.7550 (0.7544) time: 0.1269 data: 0.0365 max mem: 9377 +Train: [89] [3800/6250] eta: 0:05:59 lr: 0.000004 grad: 0.2043 (0.2141) loss: 0.7602 (0.7544) time: 0.1502 data: 0.0658 max mem: 9377 +Train: [89] [3900/6250] eta: 0:05:45 lr: 0.000004 grad: 0.2153 (0.2142) loss: 0.7524 (0.7544) time: 0.1801 data: 0.1017 max mem: 9377 +Train: [89] [4000/6250] eta: 0:05:30 lr: 0.000004 grad: 0.2108 (0.2142) loss: 0.7655 (0.7544) time: 0.1461 data: 0.0630 max mem: 9377 +Train: [89] [4100/6250] eta: 0:05:15 lr: 0.000004 grad: 0.2050 (0.2141) loss: 0.7612 (0.7544) time: 0.1454 data: 0.0655 max mem: 9377 +Train: [89] [4200/6250] eta: 0:05:01 lr: 0.000004 grad: 0.2108 (0.2140) loss: 0.7504 (0.7544) time: 0.1475 data: 0.0645 max mem: 9377 +Train: [89] [4300/6250] eta: 0:04:46 lr: 0.000004 grad: 0.2103 (0.2140) loss: 0.7578 (0.7544) time: 0.1323 data: 0.0462 max mem: 9377 +Train: [89] [4400/6250] eta: 0:04:31 lr: 0.000004 grad: 0.2070 (0.2139) loss: 0.7531 (0.7544) time: 0.1269 data: 0.0397 max mem: 9377 +Train: [89] [4500/6250] eta: 0:04:16 lr: 0.000004 grad: 0.2027 (0.2138) loss: 0.7609 (0.7544) time: 0.1238 data: 0.0299 max mem: 9377 +Train: [89] [4600/6250] eta: 0:04:00 lr: 0.000004 grad: 0.2031 (0.2137) loss: 0.7622 (0.7545) time: 0.1203 data: 0.0315 max mem: 9377 +Train: [89] [4700/6250] eta: 0:03:45 lr: 0.000004 grad: 0.2066 (0.2136) loss: 0.7436 (0.7545) time: 0.1271 data: 0.0445 max mem: 9377 +Train: [89] [4800/6250] eta: 0:03:30 lr: 0.000004 grad: 0.2114 (0.2135) loss: 0.7512 (0.7546) time: 0.1421 data: 0.0583 max mem: 9377 +Train: [89] [4900/6250] eta: 0:03:16 lr: 0.000004 grad: 0.1999 (0.2134) loss: 0.7616 (0.7547) time: 0.1232 data: 0.0398 max mem: 9377 +Train: [89] [5000/6250] eta: 0:03:01 lr: 0.000004 grad: 0.2112 (0.2132) loss: 0.7672 (0.7547) time: 0.1180 data: 0.0360 max mem: 9377 +Train: [89] [5100/6250] eta: 0:02:46 lr: 0.000004 grad: 0.2085 (0.2132) loss: 0.7542 (0.7549) time: 0.1183 data: 0.0418 max mem: 9377 +Train: [89] [5200/6250] eta: 0:02:32 lr: 0.000003 grad: 0.2075 (0.2131) loss: 0.7504 (0.7549) time: 0.1554 data: 0.0775 max mem: 9377 +Train: [89] [5300/6250] eta: 0:02:17 lr: 0.000003 grad: 0.1973 (0.2129) loss: 0.7691 (0.7550) time: 0.1578 data: 0.0767 max mem: 9377 +Train: [89] [5400/6250] eta: 0:02:03 lr: 0.000003 grad: 0.2130 (0.2127) loss: 0.7637 (0.7552) time: 0.1614 data: 0.0817 max mem: 9377 +Train: [89] [5500/6250] eta: 0:01:49 lr: 0.000003 grad: 0.2005 (0.2126) loss: 0.7641 (0.7552) time: 0.1635 data: 0.0854 max mem: 9377 +Train: [89] [5600/6250] eta: 0:01:35 lr: 0.000003 grad: 0.2085 (0.2125) loss: 0.7675 (0.7553) time: 0.1346 data: 0.0624 max mem: 9377 +Train: [89] [5700/6250] eta: 0:01:20 lr: 0.000003 grad: 0.1999 (0.2124) loss: 0.7568 (0.7554) time: 0.1319 data: 0.0539 max mem: 9377 +Train: [89] [5800/6250] eta: 0:01:06 lr: 0.000003 grad: 0.2045 (0.2123) loss: 0.7598 (0.7555) time: 0.2248 data: 0.1481 max mem: 9377 +Train: [89] [5900/6250] eta: 0:00:51 lr: 0.000003 grad: 0.2013 (0.2122) loss: 0.7670 (0.7555) time: 0.1545 data: 0.0779 max mem: 9377 +Train: [89] [6000/6250] eta: 0:00:36 lr: 0.000003 grad: 0.2044 (0.2121) loss: 0.7716 (0.7556) time: 0.1660 data: 0.0833 max mem: 9377 +Train: [89] [6100/6250] eta: 0:00:22 lr: 0.000003 grad: 0.2072 (0.2121) loss: 0.7501 (0.7557) time: 0.1330 data: 0.0501 max mem: 9377 +Train: [89] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.2052 (0.2120) loss: 0.7582 (0.7557) time: 0.1629 data: 0.0852 max mem: 9377 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.2042 (0.2120) loss: 0.7539 (0.7557) time: 0.1106 data: 0.0260 max mem: 9377 +Train: [89] Total time: 0:15:23 (0.1478 s / it) +Averaged stats: lr: 0.000003 grad: 0.2042 (0.2120) loss: 0.7539 (0.7557) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:05:18 loss: 0.8120 (0.8120) time: 5.1448 data: 5.1153 max mem: 9377 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.8021 (0.8024) time: 0.1132 data: 0.0882 max mem: 9377 +Eval (hcp-train-subset): [89] Total time: 0:00:12 (0.2090 s / it) +Averaged stats (hcp-train-subset): loss: 0.8021 (0.8024) +Making plots (hcp-train-subset): example=23 +Eval (hcp-val): [89] [ 0/62] eta: 0:05:23 loss: 0.8555 (0.8555) time: 5.2131 data: 5.1786 max mem: 9377 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.8526 (0.8551) time: 0.1302 data: 0.1051 max mem: 9377 +Eval (hcp-val): [89] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (hcp-val): loss: 0.8526 (0.8551) +Making plots (hcp-val): example=5 +Eval (nsd-val): [89] [ 0/62] eta: 0:05:48 loss: 0.8245 (0.8245) time: 5.6200 data: 5.5547 max mem: 9377 +Eval (nsd-val): [89] [61/62] eta: 0:00:00 loss: 0.8329 (0.8359) time: 0.1006 data: 0.0758 max mem: 9377 +Eval (nsd-val): [89] Total time: 0:00:13 (0.2154 s / it) +Averaged stats (nsd-val): loss: 0.8329 (0.8359) +Making plots (nsd-val): example=1 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00089.pth +Train: [90] [ 0/6250] eta: 11:39:28 lr: 0.000003 grad: 0.2759 (0.2759) loss: 0.7372 (0.7372) time: 6.7149 data: 6.6189 max mem: 9377 +Train: [90] [ 100/6250] eta: 0:20:48 lr: 0.000003 grad: 0.2082 (0.2422) loss: 0.7728 (0.7754) time: 0.1369 data: 0.0390 max mem: 9377 +Train: [90] [ 200/6250] eta: 0:17:22 lr: 0.000003 grad: 0.2031 (0.2338) loss: 0.7707 (0.7703) time: 0.1188 data: 0.0068 max mem: 9377 +Train: [90] [ 300/6250] eta: 0:15:55 lr: 0.000003 grad: 0.2084 (0.2272) loss: 0.7809 (0.7684) time: 0.1417 data: 0.0435 max mem: 9377 +Train: [90] [ 400/6250] eta: 0:14:47 lr: 0.000003 grad: 0.2084 (0.2240) loss: 0.7537 (0.7658) time: 0.1315 data: 0.0497 max mem: 9377 +Train: [90] [ 500/6250] eta: 0:14:19 lr: 0.000003 grad: 0.2078 (0.2213) loss: 0.7713 (0.7649) time: 0.1692 data: 0.0801 max mem: 9377 +Train: [90] [ 600/6250] eta: 0:13:52 lr: 0.000003 grad: 0.2097 (0.2201) loss: 0.7678 (0.7638) time: 0.1367 data: 0.0516 max mem: 9377 +Train: [90] [ 700/6250] eta: 0:13:35 lr: 0.000003 grad: 0.2128 (0.2194) loss: 0.7484 (0.7625) time: 0.1368 data: 0.0434 max mem: 9377 +Train: [90] [ 800/6250] eta: 0:13:22 lr: 0.000003 grad: 0.2139 (0.2187) loss: 0.7514 (0.7618) time: 0.1457 data: 0.0572 max mem: 9377 +Train: [90] [ 900/6250] eta: 0:13:09 lr: 0.000003 grad: 0.2084 (0.2179) loss: 0.7539 (0.7614) time: 0.1729 data: 0.0819 max mem: 9377 +Train: [90] [1000/6250] eta: 0:12:55 lr: 0.000003 grad: 0.2089 (0.2175) loss: 0.7473 (0.7609) time: 0.1337 data: 0.0506 max mem: 9377 +Train: [90] [1100/6250] eta: 0:12:37 lr: 0.000003 grad: 0.2153 (0.2170) loss: 0.7483 (0.7605) time: 0.1128 data: 0.0236 max mem: 9377 +Train: [90] [1200/6250] eta: 0:12:18 lr: 0.000003 grad: 0.2055 (0.2165) loss: 0.7456 (0.7602) time: 0.1371 data: 0.0539 max mem: 9377 +Train: [90] [1300/6250] eta: 0:12:04 lr: 0.000003 grad: 0.2168 (0.2163) loss: 0.7517 (0.7600) time: 0.1774 data: 0.0991 max mem: 9377 +Train: [90] [1400/6250] eta: 0:11:47 lr: 0.000003 grad: 0.2060 (0.2157) loss: 0.7594 (0.7601) time: 0.1371 data: 0.0521 max mem: 9377 +Train: [90] [1500/6250] eta: 0:11:31 lr: 0.000003 grad: 0.2090 (0.2156) loss: 0.7551 (0.7598) time: 0.1600 data: 0.0779 max mem: 9377 +Train: [90] [1600/6250] eta: 0:11:18 lr: 0.000003 grad: 0.2063 (0.2154) loss: 0.7589 (0.7596) time: 0.1910 data: 0.1135 max mem: 9377 +Train: [90] [1700/6250] eta: 0:11:01 lr: 0.000003 grad: 0.2080 (0.2152) loss: 0.7545 (0.7596) time: 0.1288 data: 0.0481 max mem: 9377 +Train: [90] [1800/6250] eta: 0:10:46 lr: 0.000003 grad: 0.1992 (0.2148) loss: 0.7692 (0.7595) time: 0.1375 data: 0.0581 max mem: 9377 +Train: [90] [1900/6250] eta: 0:10:31 lr: 0.000003 grad: 0.2085 (0.2146) loss: 0.7541 (0.7595) time: 0.1411 data: 0.0646 max mem: 9377 +Train: [90] [2000/6250] eta: 0:10:14 lr: 0.000003 grad: 0.2047 (0.2142) loss: 0.7543 (0.7595) time: 0.1316 data: 0.0493 max mem: 9377 +Train: [90] [2100/6250] eta: 0:09:59 lr: 0.000003 grad: 0.2078 (0.2138) loss: 0.7604 (0.7595) time: 0.1498 data: 0.0697 max mem: 9377 +Train: [90] [2200/6250] eta: 0:09:46 lr: 0.000003 grad: 0.2096 (0.2135) loss: 0.7604 (0.7597) time: 0.1180 data: 0.0239 max mem: 9377 +Train: [90] [2300/6250] eta: 0:09:32 lr: 0.000003 grad: 0.2121 (0.2134) loss: 0.7519 (0.7596) time: 0.1582 data: 0.0784 max mem: 9377 +Train: [90] [2400/6250] eta: 0:09:17 lr: 0.000003 grad: 0.2077 (0.2133) loss: 0.7518 (0.7594) time: 0.1369 data: 0.0526 max mem: 9377 +Train: [90] [2500/6250] eta: 0:09:02 lr: 0.000003 grad: 0.2130 (0.2132) loss: 0.7442 (0.7592) time: 0.1241 data: 0.0425 max mem: 9377 +Train: [90] [2600/6250] eta: 0:08:48 lr: 0.000003 grad: 0.2183 (0.2133) loss: 0.7455 (0.7589) time: 0.1302 data: 0.0520 max mem: 9377 +Train: [90] [2700/6250] eta: 0:08:33 lr: 0.000003 grad: 0.2032 (0.2132) loss: 0.7593 (0.7587) time: 0.1320 data: 0.0420 max mem: 9377 +Train: [90] [2800/6250] eta: 0:08:20 lr: 0.000003 grad: 0.2116 (0.2131) loss: 0.7575 (0.7585) time: 0.1281 data: 0.0462 max mem: 9377 +Train: [90] [2900/6250] eta: 0:08:04 lr: 0.000003 grad: 0.2155 (0.2132) loss: 0.7438 (0.7581) time: 0.1390 data: 0.0541 max mem: 9377 +Train: [90] [3000/6250] eta: 0:07:49 lr: 0.000003 grad: 0.2112 (0.2133) loss: 0.7407 (0.7579) time: 0.1209 data: 0.0397 max mem: 9377 +Train: [90] [3100/6250] eta: 0:07:35 lr: 0.000003 grad: 0.2127 (0.2134) loss: 0.7513 (0.7575) time: 0.1556 data: 0.0779 max mem: 9377 +Train: [90] [3200/6250] eta: 0:07:21 lr: 0.000003 grad: 0.2061 (0.2134) loss: 0.7514 (0.7574) time: 0.1400 data: 0.0575 max mem: 9377 +Train: [90] [3300/6250] eta: 0:07:06 lr: 0.000003 grad: 0.2011 (0.2135) loss: 0.7525 (0.7571) time: 0.1474 data: 0.0628 max mem: 9377 +Train: [90] [3400/6250] eta: 0:06:51 lr: 0.000003 grad: 0.2074 (0.2135) loss: 0.7527 (0.7569) time: 0.1353 data: 0.0509 max mem: 9377 +Train: [90] [3500/6250] eta: 0:06:36 lr: 0.000003 grad: 0.2078 (0.2135) loss: 0.7562 (0.7567) time: 0.1385 data: 0.0602 max mem: 9377 +Train: [90] [3600/6250] eta: 0:06:21 lr: 0.000003 grad: 0.2209 (0.2135) loss: 0.7389 (0.7565) time: 0.1517 data: 0.0673 max mem: 9377 +Train: [90] [3700/6250] eta: 0:06:06 lr: 0.000003 grad: 0.2072 (0.2136) loss: 0.7494 (0.7562) time: 0.1572 data: 0.0780 max mem: 9377 +Train: [90] [3800/6250] eta: 0:05:51 lr: 0.000003 grad: 0.2141 (0.2136) loss: 0.7472 (0.7560) time: 0.1244 data: 0.0412 max mem: 9377 +Train: [90] [3900/6250] eta: 0:05:37 lr: 0.000003 grad: 0.2143 (0.2136) loss: 0.7413 (0.7557) time: 0.1465 data: 0.0686 max mem: 9377 +Train: [90] [4000/6250] eta: 0:05:22 lr: 0.000003 grad: 0.2062 (0.2136) loss: 0.7464 (0.7555) time: 0.1332 data: 0.0513 max mem: 9377 +Train: [90] [4100/6250] eta: 0:05:08 lr: 0.000003 grad: 0.2126 (0.2136) loss: 0.7412 (0.7553) time: 0.1286 data: 0.0482 max mem: 9377 +Train: [90] [4200/6250] eta: 0:04:54 lr: 0.000003 grad: 0.2113 (0.2135) loss: 0.7434 (0.7550) time: 0.1262 data: 0.0397 max mem: 9377 +Train: [90] [4300/6250] eta: 0:04:40 lr: 0.000003 grad: 0.2116 (0.2135) loss: 0.7426 (0.7548) time: 0.1430 data: 0.0568 max mem: 9377 +Train: [90] [4400/6250] eta: 0:04:25 lr: 0.000003 grad: 0.2164 (0.2135) loss: 0.7399 (0.7547) time: 0.1184 data: 0.0333 max mem: 9377 +Train: [90] [4500/6250] eta: 0:04:10 lr: 0.000003 grad: 0.2132 (0.2136) loss: 0.7440 (0.7545) time: 0.1353 data: 0.0571 max mem: 9377 +Train: [90] [4600/6250] eta: 0:03:55 lr: 0.000003 grad: 0.2148 (0.2136) loss: 0.7551 (0.7543) time: 0.1159 data: 0.0289 max mem: 9377 +Train: [90] [4700/6250] eta: 0:03:41 lr: 0.000003 grad: 0.2125 (0.2136) loss: 0.7477 (0.7542) time: 0.1565 data: 0.0736 max mem: 9377 +Train: [90] [4800/6250] eta: 0:03:26 lr: 0.000003 grad: 0.2064 (0.2137) loss: 0.7524 (0.7541) time: 0.1532 data: 0.0731 max mem: 9377 +Train: [90] [4900/6250] eta: 0:03:13 lr: 0.000003 grad: 0.2153 (0.2138) loss: 0.7455 (0.7540) time: 0.2289 data: 0.1521 max mem: 9377 +Train: [90] [5000/6250] eta: 0:02:58 lr: 0.000003 grad: 0.2076 (0.2138) loss: 0.7423 (0.7538) time: 0.1443 data: 0.0631 max mem: 9377 +Train: [90] [5100/6250] eta: 0:02:44 lr: 0.000003 grad: 0.2095 (0.2137) loss: 0.7463 (0.7537) time: 0.1439 data: 0.0653 max mem: 9377 +Train: [90] [5200/6250] eta: 0:02:30 lr: 0.000003 grad: 0.2200 (0.2137) loss: 0.7405 (0.7536) time: 0.1467 data: 0.0612 max mem: 9377 +Train: [90] [5300/6250] eta: 0:02:16 lr: 0.000003 grad: 0.2128 (0.2137) loss: 0.7451 (0.7535) time: 0.1528 data: 0.0741 max mem: 9377 +Train: [90] [5400/6250] eta: 0:02:01 lr: 0.000003 grad: 0.2046 (0.2136) loss: 0.7530 (0.7535) time: 0.1448 data: 0.0652 max mem: 9377 +Train: [90] [5500/6250] eta: 0:01:47 lr: 0.000003 grad: 0.2024 (0.2135) loss: 0.7502 (0.7535) time: 0.1572 data: 0.0777 max mem: 9377 +Train: [90] [5600/6250] eta: 0:01:32 lr: 0.000003 grad: 0.2057 (0.2134) loss: 0.7550 (0.7535) time: 0.0952 data: 0.0090 max mem: 9377 +Train: [90] [5700/6250] eta: 0:01:18 lr: 0.000003 grad: 0.2015 (0.2133) loss: 0.7569 (0.7536) time: 0.1430 data: 0.0677 max mem: 9377 +Train: [90] [5800/6250] eta: 0:01:04 lr: 0.000003 grad: 0.2007 (0.2131) loss: 0.7643 (0.7537) time: 0.1422 data: 0.0680 max mem: 9377 +Train: [90] [5900/6250] eta: 0:00:49 lr: 0.000003 grad: 0.1967 (0.2129) loss: 0.7652 (0.7539) time: 0.1309 data: 0.0483 max mem: 9377 +Train: [90] [6000/6250] eta: 0:00:35 lr: 0.000003 grad: 0.2078 (0.2128) loss: 0.7602 (0.7540) time: 0.1970 data: 0.1164 max mem: 9377 +Train: [90] [6100/6250] eta: 0:00:21 lr: 0.000003 grad: 0.2028 (0.2127) loss: 0.7601 (0.7542) time: 0.1227 data: 0.0425 max mem: 9377 +Train: [90] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.2060 (0.2127) loss: 0.7628 (0.7542) time: 0.1394 data: 0.0616 max mem: 9377 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.2025 (0.2126) loss: 0.7536 (0.7543) time: 0.0967 data: 0.0116 max mem: 9377 +Train: [90] Total time: 0:14:57 (0.1436 s / it) +Averaged stats: lr: 0.000003 grad: 0.2025 (0.2126) loss: 0.7536 (0.7543) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:04:26 loss: 0.8083 (0.8083) time: 4.2939 data: 4.2201 max mem: 9377 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.8039 (0.8022) time: 0.1297 data: 0.1047 max mem: 9377 +Eval (hcp-train-subset): [90] Total time: 0:00:12 (0.2093 s / it) +Averaged stats (hcp-train-subset): loss: 0.8039 (0.8022) +Eval (hcp-val): [90] [ 0/62] eta: 0:05:26 loss: 0.8539 (0.8539) time: 5.2636 data: 5.2339 max mem: 9377 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.8540 (0.8550) time: 0.1355 data: 0.1075 max mem: 9377 +Eval (hcp-val): [90] Total time: 0:00:13 (0.2131 s / it) +Averaged stats (hcp-val): loss: 0.8540 (0.8550) +Eval (nsd-val): [90] [ 0/62] eta: 0:05:46 loss: 0.8264 (0.8264) time: 5.5883 data: 5.5580 max mem: 9377 +Eval (nsd-val): [90] [61/62] eta: 0:00:00 loss: 0.8354 (0.8364) time: 0.1166 data: 0.0914 max mem: 9377 +Eval (nsd-val): [90] Total time: 0:00:12 (0.2067 s / it) +Averaged stats (nsd-val): loss: 0.8354 (0.8364) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 10:18:44 lr: 0.000003 grad: 0.2738 (0.2738) loss: 0.7916 (0.7916) time: 5.9399 data: 5.8109 max mem: 9377 +Train: [91] [ 100/6250] eta: 0:21:03 lr: 0.000003 grad: 0.2471 (0.2567) loss: 0.7691 (0.7696) time: 0.1575 data: 0.0398 max mem: 9377 +Train: [91] [ 200/6250] eta: 0:18:01 lr: 0.000003 grad: 0.2233 (0.2436) loss: 0.7561 (0.7642) time: 0.1618 data: 0.0677 max mem: 9377 +Train: [91] [ 300/6250] eta: 0:16:41 lr: 0.000003 grad: 0.2289 (0.2374) loss: 0.7524 (0.7593) time: 0.1266 data: 0.0230 max mem: 9377 +Train: [91] [ 400/6250] eta: 0:15:41 lr: 0.000003 grad: 0.2172 (0.2322) loss: 0.7669 (0.7594) time: 0.1348 data: 0.0439 max mem: 9377 +Train: [91] [ 500/6250] eta: 0:15:00 lr: 0.000003 grad: 0.2087 (0.2284) loss: 0.7609 (0.7601) time: 0.1303 data: 0.0478 max mem: 9377 +Train: [91] [ 600/6250] eta: 0:14:49 lr: 0.000003 grad: 0.2161 (0.2265) loss: 0.7612 (0.7601) time: 0.2095 data: 0.1185 max mem: 9377 +Train: [91] [ 700/6250] eta: 0:14:13 lr: 0.000003 grad: 0.2126 (0.2245) loss: 0.7581 (0.7602) time: 0.1444 data: 0.0597 max mem: 9377 +Train: [91] [ 800/6250] eta: 0:13:54 lr: 0.000003 grad: 0.1985 (0.2227) loss: 0.7545 (0.7602) time: 0.1212 data: 0.0324 max mem: 9377 +Train: [91] [ 900/6250] eta: 0:13:39 lr: 0.000003 grad: 0.2157 (0.2215) loss: 0.7490 (0.7597) time: 0.1642 data: 0.0765 max mem: 9377 +Train: [91] [1000/6250] eta: 0:13:18 lr: 0.000003 grad: 0.2050 (0.2206) loss: 0.7544 (0.7587) time: 0.1612 data: 0.0800 max mem: 9377 +Train: [91] [1100/6250] eta: 0:12:59 lr: 0.000003 grad: 0.2088 (0.2199) loss: 0.7593 (0.7580) time: 0.1432 data: 0.0585 max mem: 9377 +Train: [91] [1200/6250] eta: 0:12:41 lr: 0.000003 grad: 0.2122 (0.2193) loss: 0.7498 (0.7572) time: 0.1458 data: 0.0615 max mem: 9377 +Train: [91] [1300/6250] eta: 0:12:28 lr: 0.000003 grad: 0.2165 (0.2187) loss: 0.7392 (0.7566) time: 0.1039 data: 0.0003 max mem: 9377 +Train: [91] [1400/6250] eta: 0:12:07 lr: 0.000003 grad: 0.2106 (0.2181) loss: 0.7530 (0.7565) time: 0.1409 data: 0.0537 max mem: 9377 +Train: [91] [1500/6250] eta: 0:11:50 lr: 0.000003 grad: 0.2089 (0.2178) loss: 0.7551 (0.7562) time: 0.1678 data: 0.0879 max mem: 9377 +Train: [91] [1600/6250] eta: 0:11:35 lr: 0.000003 grad: 0.2050 (0.2173) loss: 0.7502 (0.7559) time: 0.2114 data: 0.1324 max mem: 9377 +Train: [91] [1700/6250] eta: 0:11:18 lr: 0.000003 grad: 0.2082 (0.2168) loss: 0.7553 (0.7560) time: 0.0958 data: 0.0003 max mem: 9377 +Train: [91] [1800/6250] eta: 0:11:02 lr: 0.000003 grad: 0.2072 (0.2163) loss: 0.7446 (0.7559) time: 0.1748 data: 0.0978 max mem: 9377 +Train: [91] [1900/6250] eta: 0:10:44 lr: 0.000003 grad: 0.2037 (0.2159) loss: 0.7663 (0.7559) time: 0.1494 data: 0.0657 max mem: 9377 +Train: [91] [2000/6250] eta: 0:10:27 lr: 0.000003 grad: 0.2089 (0.2155) loss: 0.7485 (0.7558) time: 0.1606 data: 0.0768 max mem: 9377 +Train: [91] [2100/6250] eta: 0:10:10 lr: 0.000003 grad: 0.2159 (0.2152) loss: 0.7423 (0.7557) time: 0.1363 data: 0.0603 max mem: 9377 +Train: [91] [2200/6250] eta: 0:09:54 lr: 0.000003 grad: 0.2100 (0.2150) loss: 0.7545 (0.7557) time: 0.1362 data: 0.0506 max mem: 9377 +Train: [91] [2300/6250] eta: 0:09:39 lr: 0.000003 grad: 0.2087 (0.2147) loss: 0.7502 (0.7557) time: 0.1545 data: 0.0720 max mem: 9377 +Train: [91] [2400/6250] eta: 0:09:23 lr: 0.000003 grad: 0.2055 (0.2143) loss: 0.7591 (0.7559) time: 0.1260 data: 0.0414 max mem: 9377 +Train: [91] [2500/6250] eta: 0:09:07 lr: 0.000003 grad: 0.2016 (0.2143) loss: 0.7549 (0.7561) time: 0.1535 data: 0.0740 max mem: 9377 +Train: [91] [2600/6250] eta: 0:08:52 lr: 0.000003 grad: 0.2031 (0.2140) loss: 0.7564 (0.7562) time: 0.1412 data: 0.0628 max mem: 9377 +Train: [91] [2700/6250] eta: 0:08:37 lr: 0.000002 grad: 0.2054 (0.2138) loss: 0.7535 (0.7561) time: 0.1410 data: 0.0516 max mem: 9377 +Train: [91] [2800/6250] eta: 0:08:22 lr: 0.000002 grad: 0.2077 (0.2137) loss: 0.7519 (0.7562) time: 0.1588 data: 0.0759 max mem: 9377 +Train: [91] [2900/6250] eta: 0:08:08 lr: 0.000002 grad: 0.2047 (0.2135) loss: 0.7640 (0.7563) time: 0.1757 data: 0.0933 max mem: 9377 +Train: [91] [3000/6250] eta: 0:07:53 lr: 0.000002 grad: 0.2013 (0.2133) loss: 0.7655 (0.7564) time: 0.1316 data: 0.0483 max mem: 9377 +Train: [91] [3100/6250] eta: 0:07:38 lr: 0.000002 grad: 0.2059 (0.2131) loss: 0.7593 (0.7564) time: 0.1350 data: 0.0535 max mem: 9377 +Train: [91] [3200/6250] eta: 0:07:23 lr: 0.000002 grad: 0.2053 (0.2130) loss: 0.7523 (0.7565) time: 0.1361 data: 0.0489 max mem: 9377 +Train: [91] [3300/6250] eta: 0:07:09 lr: 0.000002 grad: 0.1999 (0.2128) loss: 0.7621 (0.7566) time: 0.1418 data: 0.0543 max mem: 9377 +Train: [91] [3400/6250] eta: 0:06:54 lr: 0.000002 grad: 0.2073 (0.2126) loss: 0.7534 (0.7566) time: 0.1314 data: 0.0491 max mem: 9377 +Train: [91] [3500/6250] eta: 0:06:39 lr: 0.000002 grad: 0.2034 (0.2125) loss: 0.7630 (0.7567) time: 0.1172 data: 0.0309 max mem: 9377 +Train: [91] [3600/6250] eta: 0:06:23 lr: 0.000002 grad: 0.2131 (0.2124) loss: 0.7449 (0.7568) time: 0.1265 data: 0.0419 max mem: 9377 +Train: [91] [3700/6250] eta: 0:06:08 lr: 0.000002 grad: 0.1965 (0.2122) loss: 0.7540 (0.7569) time: 0.1328 data: 0.0457 max mem: 9377 +Train: [91] [3800/6250] eta: 0:05:52 lr: 0.000002 grad: 0.2041 (0.2121) loss: 0.7557 (0.7570) time: 0.1198 data: 0.0261 max mem: 9377 +Train: [91] [3900/6250] eta: 0:05:37 lr: 0.000002 grad: 0.2041 (0.2121) loss: 0.7597 (0.7571) time: 0.1332 data: 0.0510 max mem: 9377 +Train: [91] [4000/6250] eta: 0:05:23 lr: 0.000002 grad: 0.1935 (0.2118) loss: 0.7644 (0.7572) time: 0.1909 data: 0.1188 max mem: 9377 +Train: [91] [4100/6250] eta: 0:05:08 lr: 0.000002 grad: 0.1980 (0.2117) loss: 0.7633 (0.7573) time: 0.1209 data: 0.0456 max mem: 9377 +Train: [91] [4200/6250] eta: 0:04:54 lr: 0.000002 grad: 0.2055 (0.2115) loss: 0.7541 (0.7573) time: 0.1447 data: 0.0555 max mem: 9377 +Train: [91] [4300/6250] eta: 0:04:40 lr: 0.000002 grad: 0.2067 (0.2115) loss: 0.7664 (0.7573) time: 0.1845 data: 0.1084 max mem: 9377 +Train: [91] [4400/6250] eta: 0:04:26 lr: 0.000002 grad: 0.1985 (0.2114) loss: 0.7625 (0.7573) time: 0.1324 data: 0.0512 max mem: 9377 +Train: [91] [4500/6250] eta: 0:04:11 lr: 0.000002 grad: 0.1964 (0.2114) loss: 0.7640 (0.7572) time: 0.1298 data: 0.0468 max mem: 9377 +Train: [91] [4600/6250] eta: 0:03:56 lr: 0.000002 grad: 0.2079 (0.2113) loss: 0.7513 (0.7571) time: 0.1318 data: 0.0506 max mem: 9377 +Train: [91] [4700/6250] eta: 0:03:41 lr: 0.000002 grad: 0.2026 (0.2113) loss: 0.7659 (0.7571) time: 0.1188 data: 0.0356 max mem: 9377 +Train: [91] [4800/6250] eta: 0:03:27 lr: 0.000002 grad: 0.2093 (0.2113) loss: 0.7574 (0.7570) time: 0.1299 data: 0.0428 max mem: 9377 +Train: [91] [4900/6250] eta: 0:03:12 lr: 0.000002 grad: 0.2083 (0.2113) loss: 0.7546 (0.7569) time: 0.1477 data: 0.0716 max mem: 9377 +Train: [91] [5000/6250] eta: 0:02:58 lr: 0.000002 grad: 0.2119 (0.2113) loss: 0.7477 (0.7567) time: 0.1149 data: 0.0408 max mem: 9377 +Train: [91] [5100/6250] eta: 0:02:44 lr: 0.000002 grad: 0.2056 (0.2113) loss: 0.7599 (0.7567) time: 0.1272 data: 0.0454 max mem: 9377 +Train: [91] [5200/6250] eta: 0:02:30 lr: 0.000002 grad: 0.2066 (0.2113) loss: 0.7509 (0.7567) time: 0.1671 data: 0.0882 max mem: 9377 +Train: [91] [5300/6250] eta: 0:02:16 lr: 0.000002 grad: 0.2084 (0.2113) loss: 0.7507 (0.7566) time: 0.1732 data: 0.0931 max mem: 9377 +Train: [91] [5400/6250] eta: 0:02:01 lr: 0.000002 grad: 0.2075 (0.2113) loss: 0.7514 (0.7566) time: 0.1535 data: 0.0731 max mem: 9377 +Train: [91] [5500/6250] eta: 0:01:47 lr: 0.000002 grad: 0.2111 (0.2113) loss: 0.7588 (0.7566) time: 0.1267 data: 0.0468 max mem: 9377 +Train: [91] [5600/6250] eta: 0:01:33 lr: 0.000002 grad: 0.2108 (0.2114) loss: 0.7498 (0.7566) time: 0.1159 data: 0.0312 max mem: 9377 +Train: [91] [5700/6250] eta: 0:01:18 lr: 0.000002 grad: 0.2096 (0.2113) loss: 0.7496 (0.7565) time: 0.1350 data: 0.0507 max mem: 9377 +Train: [91] [5800/6250] eta: 0:01:04 lr: 0.000002 grad: 0.2156 (0.2114) loss: 0.7492 (0.7564) time: 0.1333 data: 0.0531 max mem: 9377 +Train: [91] [5900/6250] eta: 0:00:50 lr: 0.000002 grad: 0.2126 (0.2114) loss: 0.7577 (0.7564) time: 0.1474 data: 0.0669 max mem: 9377 +Train: [91] [6000/6250] eta: 0:00:35 lr: 0.000002 grad: 0.2127 (0.2115) loss: 0.7514 (0.7563) time: 0.1108 data: 0.0298 max mem: 9377 +Train: [91] [6100/6250] eta: 0:00:21 lr: 0.000002 grad: 0.2097 (0.2115) loss: 0.7509 (0.7562) time: 0.1598 data: 0.0801 max mem: 9377 +Train: [91] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.2160 (0.2115) loss: 0.7528 (0.7562) time: 0.1283 data: 0.0484 max mem: 9377 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.2108 (0.2115) loss: 0.7491 (0.7562) time: 0.1556 data: 0.0769 max mem: 9377 +Train: [91] Total time: 0:14:55 (0.1433 s / it) +Averaged stats: lr: 0.000002 grad: 0.2108 (0.2115) loss: 0.7491 (0.7562) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:05:22 loss: 0.8109 (0.8109) time: 5.2024 data: 5.1721 max mem: 9377 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.8043 (0.8021) time: 0.1240 data: 0.0975 max mem: 9377 +Eval (hcp-train-subset): [91] Total time: 0:00:12 (0.2059 s / it) +Averaged stats (hcp-train-subset): loss: 0.8043 (0.8021) +Eval (hcp-val): [91] [ 0/62] eta: 0:04:41 loss: 0.8599 (0.8599) time: 4.5343 data: 4.4919 max mem: 9377 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.8524 (0.8552) time: 0.1013 data: 0.0763 max mem: 9377 +Eval (hcp-val): [91] Total time: 0:00:12 (0.2090 s / it) +Averaged stats (hcp-val): loss: 0.8524 (0.8552) +Eval (nsd-val): [91] [ 0/62] eta: 0:04:53 loss: 0.8258 (0.8258) time: 4.7396 data: 4.7088 max mem: 9377 +Eval (nsd-val): [91] [61/62] eta: 0:00:00 loss: 0.8357 (0.8372) time: 0.1127 data: 0.0874 max mem: 9377 +Eval (nsd-val): [91] Total time: 0:00:13 (0.2164 s / it) +Averaged stats (nsd-val): loss: 0.8357 (0.8372) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [92] [ 0/6250] eta: 10:26:06 lr: 0.000002 grad: 0.1278 (0.1278) loss: 0.8656 (0.8656) time: 6.0107 data: 5.8906 max mem: 9377 +Train: [92] [ 100/6250] eta: 0:20:51 lr: 0.000002 grad: 0.2252 (0.2442) loss: 0.7561 (0.7605) time: 0.1369 data: 0.0307 max mem: 9377 +Train: [92] [ 200/6250] eta: 0:17:44 lr: 0.000002 grad: 0.2290 (0.2420) loss: 0.7483 (0.7540) time: 0.1387 data: 0.0393 max mem: 9377 +Train: [92] [ 300/6250] eta: 0:16:13 lr: 0.000002 grad: 0.2301 (0.2392) loss: 0.7609 (0.7526) time: 0.1394 data: 0.0529 max mem: 9377 +Train: [92] [ 400/6250] eta: 0:15:22 lr: 0.000002 grad: 0.2248 (0.2363) loss: 0.7710 (0.7531) time: 0.1261 data: 0.0265 max mem: 9377 +Train: [92] [ 500/6250] eta: 0:14:45 lr: 0.000002 grad: 0.2150 (0.2326) loss: 0.7645 (0.7545) time: 0.1319 data: 0.0324 max mem: 9377 +Train: [92] [ 600/6250] eta: 0:14:15 lr: 0.000002 grad: 0.2278 (0.2317) loss: 0.7372 (0.7535) time: 0.1481 data: 0.0556 max mem: 9377 +Train: [92] [ 700/6250] eta: 0:13:54 lr: 0.000002 grad: 0.2219 (0.2308) loss: 0.7583 (0.7530) time: 0.1658 data: 0.0777 max mem: 9377 +Train: [92] [ 800/6250] eta: 0:13:30 lr: 0.000002 grad: 0.2155 (0.2290) loss: 0.7554 (0.7532) time: 0.1447 data: 0.0616 max mem: 9377 +Train: [92] [ 900/6250] eta: 0:13:17 lr: 0.000002 grad: 0.2141 (0.2275) loss: 0.7545 (0.7535) time: 0.1419 data: 0.0537 max mem: 9377 +Train: [92] [1000/6250] eta: 0:12:59 lr: 0.000002 grad: 0.2077 (0.2260) loss: 0.7515 (0.7537) time: 0.1319 data: 0.0397 max mem: 9377 +Train: [92] [1100/6250] eta: 0:12:36 lr: 0.000002 grad: 0.2155 (0.2246) loss: 0.7479 (0.7539) time: 0.1384 data: 0.0543 max mem: 9377 +Train: [92] [1200/6250] eta: 0:12:19 lr: 0.000002 grad: 0.2067 (0.2236) loss: 0.7609 (0.7538) time: 0.1664 data: 0.0825 max mem: 9377 +Train: [92] [1300/6250] eta: 0:12:04 lr: 0.000002 grad: 0.2081 (0.2227) loss: 0.7593 (0.7539) time: 0.1398 data: 0.0549 max mem: 9377 +Train: [92] [1400/6250] eta: 0:11:45 lr: 0.000002 grad: 0.2122 (0.2219) loss: 0.7452 (0.7539) time: 0.1118 data: 0.0158 max mem: 9377 +Train: [92] [1500/6250] eta: 0:11:30 lr: 0.000002 grad: 0.2097 (0.2212) loss: 0.7478 (0.7539) time: 0.0973 data: 0.0074 max mem: 9377 +Train: [92] [1600/6250] eta: 0:11:12 lr: 0.000002 grad: 0.2082 (0.2207) loss: 0.7579 (0.7537) time: 0.1266 data: 0.0401 max mem: 9377 +Train: [92] [1700/6250] eta: 0:10:57 lr: 0.000002 grad: 0.2054 (0.2201) loss: 0.7603 (0.7538) time: 0.1419 data: 0.0567 max mem: 9377 +Train: [92] [1800/6250] eta: 0:10:40 lr: 0.000002 grad: 0.2070 (0.2194) loss: 0.7495 (0.7542) time: 0.1344 data: 0.0474 max mem: 9377 +Train: [92] [1900/6250] eta: 0:10:25 lr: 0.000002 grad: 0.2126 (0.2189) loss: 0.7431 (0.7542) time: 0.1512 data: 0.0731 max mem: 9377 +Train: [92] [2000/6250] eta: 0:10:09 lr: 0.000002 grad: 0.2079 (0.2186) loss: 0.7506 (0.7541) time: 0.1294 data: 0.0463 max mem: 9377 +Train: [92] [2100/6250] eta: 0:09:56 lr: 0.000002 grad: 0.2105 (0.2181) loss: 0.7605 (0.7542) time: 0.1578 data: 0.0775 max mem: 9377 +Train: [92] [2200/6250] eta: 0:09:39 lr: 0.000002 grad: 0.2070 (0.2176) loss: 0.7608 (0.7543) time: 0.1221 data: 0.0360 max mem: 9377 +Train: [92] [2300/6250] eta: 0:09:24 lr: 0.000002 grad: 0.2022 (0.2171) loss: 0.7564 (0.7545) time: 0.1640 data: 0.0793 max mem: 9377 +Train: [92] [2400/6250] eta: 0:09:09 lr: 0.000002 grad: 0.2121 (0.2167) loss: 0.7512 (0.7546) time: 0.1526 data: 0.0688 max mem: 9377 +Train: [92] [2500/6250] eta: 0:08:54 lr: 0.000002 grad: 0.2038 (0.2164) loss: 0.7551 (0.7547) time: 0.1370 data: 0.0554 max mem: 9377 +Train: [92] [2600/6250] eta: 0:08:40 lr: 0.000002 grad: 0.2075 (0.2160) loss: 0.7535 (0.7549) time: 0.1483 data: 0.0644 max mem: 9377 +Train: [92] [2700/6250] eta: 0:08:24 lr: 0.000002 grad: 0.2090 (0.2157) loss: 0.7519 (0.7551) time: 0.1377 data: 0.0586 max mem: 9377 +Train: [92] [2800/6250] eta: 0:08:10 lr: 0.000002 grad: 0.2105 (0.2154) loss: 0.7552 (0.7553) time: 0.1453 data: 0.0590 max mem: 9377 +Train: [92] [2900/6250] eta: 0:07:56 lr: 0.000002 grad: 0.1952 (0.2151) loss: 0.7598 (0.7555) time: 0.1500 data: 0.0642 max mem: 9377 +Train: [92] [3000/6250] eta: 0:07:42 lr: 0.000002 grad: 0.2114 (0.2149) loss: 0.7458 (0.7557) time: 0.1557 data: 0.0769 max mem: 9377 +Train: [92] [3100/6250] eta: 0:07:29 lr: 0.000002 grad: 0.2111 (0.2147) loss: 0.7565 (0.7558) time: 0.1552 data: 0.0737 max mem: 9377 +Train: [92] [3200/6250] eta: 0:07:15 lr: 0.000002 grad: 0.2104 (0.2145) loss: 0.7521 (0.7560) time: 0.1629 data: 0.0815 max mem: 9377 +Train: [92] [3300/6250] eta: 0:07:01 lr: 0.000002 grad: 0.1932 (0.2143) loss: 0.7690 (0.7560) time: 0.1444 data: 0.0704 max mem: 9377 +Train: [92] [3400/6250] eta: 0:06:47 lr: 0.000002 grad: 0.2086 (0.2141) loss: 0.7594 (0.7562) time: 0.1462 data: 0.0614 max mem: 9377 +Train: [92] [3500/6250] eta: 0:06:34 lr: 0.000002 grad: 0.2038 (0.2139) loss: 0.7708 (0.7564) time: 0.1689 data: 0.0806 max mem: 9377 +Train: [92] [3600/6250] eta: 0:06:19 lr: 0.000002 grad: 0.2134 (0.2139) loss: 0.7535 (0.7565) time: 0.1355 data: 0.0500 max mem: 9377 +Train: [92] [3700/6250] eta: 0:06:04 lr: 0.000002 grad: 0.2058 (0.2137) loss: 0.7611 (0.7567) time: 0.1164 data: 0.0291 max mem: 9377 +Train: [92] [3800/6250] eta: 0:05:50 lr: 0.000002 grad: 0.2125 (0.2136) loss: 0.7539 (0.7569) time: 0.1617 data: 0.0822 max mem: 9377 +Train: [92] [3900/6250] eta: 0:05:34 lr: 0.000002 grad: 0.2023 (0.2134) loss: 0.7604 (0.7570) time: 0.1329 data: 0.0505 max mem: 9377 +Train: [92] [4000/6250] eta: 0:05:20 lr: 0.000002 grad: 0.2149 (0.2132) loss: 0.7469 (0.7570) time: 0.1415 data: 0.0597 max mem: 9377 +Train: [92] [4100/6250] eta: 0:05:05 lr: 0.000002 grad: 0.2079 (0.2131) loss: 0.7567 (0.7571) time: 0.1286 data: 0.0437 max mem: 9377 +Train: [92] [4200/6250] eta: 0:04:51 lr: 0.000002 grad: 0.2127 (0.2130) loss: 0.7473 (0.7571) time: 0.1242 data: 0.0400 max mem: 9377 +Train: [92] [4300/6250] eta: 0:04:37 lr: 0.000002 grad: 0.2019 (0.2129) loss: 0.7626 (0.7571) time: 0.1413 data: 0.0556 max mem: 9377 +Train: [92] [4400/6250] eta: 0:04:23 lr: 0.000002 grad: 0.2070 (0.2128) loss: 0.7622 (0.7572) time: 0.1432 data: 0.0586 max mem: 9377 +Train: [92] [4500/6250] eta: 0:04:09 lr: 0.000002 grad: 0.2057 (0.2128) loss: 0.7645 (0.7573) time: 0.1343 data: 0.0560 max mem: 9377 +Train: [92] [4600/6250] eta: 0:03:54 lr: 0.000002 grad: 0.2065 (0.2127) loss: 0.7585 (0.7573) time: 0.1466 data: 0.0679 max mem: 9377 +Train: [92] [4700/6250] eta: 0:03:40 lr: 0.000002 grad: 0.1988 (0.2125) loss: 0.7603 (0.7574) time: 0.1503 data: 0.0578 max mem: 9377 +Train: [92] [4800/6250] eta: 0:03:25 lr: 0.000002 grad: 0.2111 (0.2124) loss: 0.7637 (0.7575) time: 0.1312 data: 0.0479 max mem: 9377 +Train: [92] [4900/6250] eta: 0:03:10 lr: 0.000002 grad: 0.2088 (0.2123) loss: 0.7563 (0.7576) time: 0.1004 data: 0.0013 max mem: 9377 +Train: [92] [5000/6250] eta: 0:02:57 lr: 0.000002 grad: 0.2040 (0.2122) loss: 0.7593 (0.7576) time: 0.1455 data: 0.0724 max mem: 9377 +Train: [92] [5100/6250] eta: 0:02:43 lr: 0.000002 grad: 0.1990 (0.2120) loss: 0.7556 (0.7576) time: 0.1410 data: 0.0617 max mem: 9377 +Train: [92] [5200/6250] eta: 0:02:29 lr: 0.000002 grad: 0.1985 (0.2119) loss: 0.7677 (0.7577) time: 0.1490 data: 0.0645 max mem: 9377 +Train: [92] [5300/6250] eta: 0:02:15 lr: 0.000002 grad: 0.1960 (0.2118) loss: 0.7657 (0.7578) time: 0.1649 data: 0.0836 max mem: 9377 +Train: [92] [5400/6250] eta: 0:02:01 lr: 0.000002 grad: 0.2042 (0.2116) loss: 0.7504 (0.7579) time: 0.1620 data: 0.0871 max mem: 9377 +Train: [92] [5500/6250] eta: 0:01:47 lr: 0.000002 grad: 0.2042 (0.2115) loss: 0.7558 (0.7579) time: 0.1154 data: 0.0295 max mem: 9377 +Train: [92] [5600/6250] eta: 0:01:32 lr: 0.000002 grad: 0.2107 (0.2115) loss: 0.7546 (0.7579) time: 0.1365 data: 0.0553 max mem: 9377 +Train: [92] [5700/6250] eta: 0:01:18 lr: 0.000002 grad: 0.2069 (0.2114) loss: 0.7576 (0.7578) time: 0.1384 data: 0.0551 max mem: 9377 +Train: [92] [5800/6250] eta: 0:01:04 lr: 0.000002 grad: 0.2068 (0.2113) loss: 0.7576 (0.7578) time: 0.1442 data: 0.0577 max mem: 9377 +Train: [92] [5900/6250] eta: 0:00:49 lr: 0.000002 grad: 0.2104 (0.2112) loss: 0.7545 (0.7578) time: 0.1354 data: 0.0511 max mem: 9377 +Train: [92] [6000/6250] eta: 0:00:35 lr: 0.000002 grad: 0.2041 (0.2112) loss: 0.7616 (0.7578) time: 0.1628 data: 0.0794 max mem: 9377 +Train: [92] [6100/6250] eta: 0:00:21 lr: 0.000002 grad: 0.1995 (0.2111) loss: 0.7680 (0.7579) time: 0.1259 data: 0.0410 max mem: 9377 +Train: [92] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.2044 (0.2109) loss: 0.7624 (0.7580) time: 0.1379 data: 0.0605 max mem: 9377 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.2115 (0.2109) loss: 0.7616 (0.7580) time: 0.1453 data: 0.0674 max mem: 9377 +Train: [92] Total time: 0:14:55 (0.1433 s / it) +Averaged stats: lr: 0.000002 grad: 0.2115 (0.2109) loss: 0.7616 (0.7580) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:04:55 loss: 0.8124 (0.8124) time: 4.7728 data: 4.7427 max mem: 9377 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.8047 (0.8025) time: 0.1120 data: 0.0869 max mem: 9377 +Eval (hcp-train-subset): [92] Total time: 0:00:12 (0.2081 s / it) +Averaged stats (hcp-train-subset): loss: 0.8047 (0.8025) +Eval (hcp-val): [92] [ 0/62] eta: 0:03:31 loss: 0.8536 (0.8536) time: 3.4126 data: 3.3469 max mem: 9377 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.8534 (0.8555) time: 0.1214 data: 0.0967 max mem: 9377 +Eval (hcp-val): [92] Total time: 0:00:13 (0.2101 s / it) +Averaged stats (hcp-val): loss: 0.8534 (0.8555) +Eval (nsd-val): [92] [ 0/62] eta: 0:04:35 loss: 0.8287 (0.8287) time: 4.4514 data: 4.3978 max mem: 9377 +Eval (nsd-val): [92] [61/62] eta: 0:00:00 loss: 0.8340 (0.8364) time: 0.1156 data: 0.0888 max mem: 9377 +Eval (nsd-val): [92] Total time: 0:00:12 (0.2077 s / it) +Averaged stats (nsd-val): loss: 0.8340 (0.8364) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 9:30:52 lr: 0.000002 grad: 0.4003 (0.4003) loss: 0.6514 (0.6514) time: 5.4804 data: 5.3798 max mem: 9377 +Train: [93] [ 100/6250] eta: 0:20:21 lr: 0.000002 grad: 0.2218 (0.2277) loss: 0.7706 (0.7750) time: 0.1223 data: 0.0149 max mem: 9377 +Train: [93] [ 200/6250] eta: 0:17:54 lr: 0.000002 grad: 0.2048 (0.2294) loss: 0.7596 (0.7652) time: 0.1528 data: 0.0532 max mem: 9377 +Train: [93] [ 300/6250] eta: 0:16:36 lr: 0.000002 grad: 0.2142 (0.2274) loss: 0.7603 (0.7626) time: 0.1528 data: 0.0563 max mem: 9377 +Train: [93] [ 400/6250] eta: 0:15:27 lr: 0.000002 grad: 0.2162 (0.2250) loss: 0.7597 (0.7610) time: 0.1354 data: 0.0465 max mem: 9377 +Train: [93] [ 500/6250] eta: 0:14:42 lr: 0.000002 grad: 0.2076 (0.2232) loss: 0.7617 (0.7603) time: 0.1316 data: 0.0473 max mem: 9377 +Train: [93] [ 600/6250] eta: 0:14:05 lr: 0.000002 grad: 0.2116 (0.2221) loss: 0.7625 (0.7599) time: 0.1019 data: 0.0070 max mem: 9377 +Train: [93] [ 700/6250] eta: 0:13:42 lr: 0.000002 grad: 0.2120 (0.2211) loss: 0.7577 (0.7595) time: 0.1510 data: 0.0612 max mem: 9377 +Train: [93] [ 800/6250] eta: 0:13:14 lr: 0.000002 grad: 0.2009 (0.2206) loss: 0.7598 (0.7590) time: 0.1413 data: 0.0495 max mem: 9377 +Train: [93] [ 900/6250] eta: 0:12:53 lr: 0.000002 grad: 0.2162 (0.2202) loss: 0.7580 (0.7588) time: 0.1281 data: 0.0377 max mem: 9377 +Train: [93] [1000/6250] eta: 0:12:35 lr: 0.000002 grad: 0.2122 (0.2192) loss: 0.7697 (0.7588) time: 0.1261 data: 0.0425 max mem: 9377 +Train: [93] [1100/6250] eta: 0:12:14 lr: 0.000002 grad: 0.2053 (0.2186) loss: 0.7609 (0.7585) time: 0.1272 data: 0.0442 max mem: 9377 +Train: [93] [1200/6250] eta: 0:11:56 lr: 0.000002 grad: 0.2061 (0.2178) loss: 0.7591 (0.7584) time: 0.1200 data: 0.0301 max mem: 9377 +Train: [93] [1300/6250] eta: 0:11:40 lr: 0.000002 grad: 0.1991 (0.2169) loss: 0.7627 (0.7583) time: 0.1324 data: 0.0526 max mem: 9377 +Train: [93] [1400/6250] eta: 0:11:27 lr: 0.000002 grad: 0.2051 (0.2164) loss: 0.7618 (0.7582) time: 0.1608 data: 0.0764 max mem: 9377 +Train: [93] [1500/6250] eta: 0:11:10 lr: 0.000002 grad: 0.2030 (0.2160) loss: 0.7682 (0.7581) time: 0.1546 data: 0.0771 max mem: 9377 +Train: [93] [1600/6250] eta: 0:10:55 lr: 0.000002 grad: 0.2094 (0.2156) loss: 0.7636 (0.7581) time: 0.1355 data: 0.0624 max mem: 9377 +Train: [93] [1700/6250] eta: 0:10:40 lr: 0.000002 grad: 0.2023 (0.2152) loss: 0.7700 (0.7580) time: 0.1447 data: 0.0681 max mem: 9377 +Train: [93] [1800/6250] eta: 0:10:27 lr: 0.000002 grad: 0.2016 (0.2146) loss: 0.7553 (0.7580) time: 0.1510 data: 0.0660 max mem: 9377 +Train: [93] [1900/6250] eta: 0:10:11 lr: 0.000002 grad: 0.2055 (0.2142) loss: 0.7575 (0.7578) time: 0.1223 data: 0.0391 max mem: 9377 +Train: [93] [2000/6250] eta: 0:09:56 lr: 0.000002 grad: 0.2093 (0.2139) loss: 0.7544 (0.7578) time: 0.1188 data: 0.0355 max mem: 9377 +Train: [93] [2100/6250] eta: 0:09:44 lr: 0.000002 grad: 0.2078 (0.2137) loss: 0.7573 (0.7578) time: 0.1681 data: 0.0923 max mem: 9377 +Train: [93] [2200/6250] eta: 0:09:28 lr: 0.000002 grad: 0.2005 (0.2135) loss: 0.7567 (0.7577) time: 0.1415 data: 0.0582 max mem: 9377 +Train: [93] [2300/6250] eta: 0:09:14 lr: 0.000001 grad: 0.2060 (0.2132) loss: 0.7678 (0.7576) time: 0.1657 data: 0.0813 max mem: 9377 +Train: [93] [2400/6250] eta: 0:09:00 lr: 0.000001 grad: 0.2034 (0.2130) loss: 0.7602 (0.7576) time: 0.1177 data: 0.0312 max mem: 9377 +Train: [93] [2500/6250] eta: 0:08:45 lr: 0.000001 grad: 0.2141 (0.2129) loss: 0.7522 (0.7575) time: 0.1401 data: 0.0576 max mem: 9377 +Train: [93] [2600/6250] eta: 0:08:31 lr: 0.000001 grad: 0.2101 (0.2129) loss: 0.7477 (0.7573) time: 0.1316 data: 0.0495 max mem: 9377 +Train: [93] [2700/6250] eta: 0:08:17 lr: 0.000001 grad: 0.2079 (0.2128) loss: 0.7587 (0.7572) time: 0.1468 data: 0.0606 max mem: 9377 +Train: [93] [2800/6250] eta: 0:08:04 lr: 0.000001 grad: 0.2052 (0.2129) loss: 0.7542 (0.7571) time: 0.1443 data: 0.0691 max mem: 9377 +Train: [93] [2900/6250] eta: 0:07:49 lr: 0.000001 grad: 0.2122 (0.2130) loss: 0.7596 (0.7569) time: 0.1275 data: 0.0446 max mem: 9377 +Train: [93] [3000/6250] eta: 0:07:35 lr: 0.000001 grad: 0.2177 (0.2129) loss: 0.7559 (0.7568) time: 0.1161 data: 0.0370 max mem: 9377 +Train: [93] [3100/6250] eta: 0:07:22 lr: 0.000001 grad: 0.2124 (0.2130) loss: 0.7492 (0.7566) time: 0.1622 data: 0.0834 max mem: 9377 +Train: [93] [3200/6250] eta: 0:07:08 lr: 0.000001 grad: 0.2117 (0.2132) loss: 0.7490 (0.7565) time: 0.1417 data: 0.0615 max mem: 9377 +Train: [93] [3300/6250] eta: 0:06:54 lr: 0.000001 grad: 0.2123 (0.2133) loss: 0.7619 (0.7563) time: 0.1344 data: 0.0596 max mem: 9377 +Train: [93] [3400/6250] eta: 0:06:40 lr: 0.000001 grad: 0.2205 (0.2134) loss: 0.7475 (0.7561) time: 0.1414 data: 0.0643 max mem: 9377 +Train: [93] [3500/6250] eta: 0:06:26 lr: 0.000001 grad: 0.2136 (0.2135) loss: 0.7491 (0.7560) time: 0.1371 data: 0.0518 max mem: 9377 +Train: [93] [3600/6250] eta: 0:06:13 lr: 0.000001 grad: 0.2192 (0.2136) loss: 0.7502 (0.7560) time: 0.1580 data: 0.0777 max mem: 9377 +Train: [93] [3700/6250] eta: 0:05:59 lr: 0.000001 grad: 0.2152 (0.2137) loss: 0.7450 (0.7559) time: 0.1530 data: 0.0657 max mem: 9377 +Train: [93] [3800/6250] eta: 0:05:46 lr: 0.000001 grad: 0.2122 (0.2138) loss: 0.7458 (0.7558) time: 0.1332 data: 0.0527 max mem: 9377 +Train: [93] [3900/6250] eta: 0:05:31 lr: 0.000001 grad: 0.2155 (0.2139) loss: 0.7520 (0.7558) time: 0.1170 data: 0.0309 max mem: 9377 +Train: [93] [4000/6250] eta: 0:05:17 lr: 0.000001 grad: 0.2077 (0.2140) loss: 0.7587 (0.7557) time: 0.1240 data: 0.0427 max mem: 9377 +Train: [93] [4100/6250] eta: 0:05:02 lr: 0.000001 grad: 0.2133 (0.2140) loss: 0.7536 (0.7557) time: 0.1245 data: 0.0453 max mem: 9377 +Train: [93] [4200/6250] eta: 0:04:48 lr: 0.000001 grad: 0.2199 (0.2141) loss: 0.7534 (0.7557) time: 0.1122 data: 0.0352 max mem: 9377 +Train: [93] [4300/6250] eta: 0:04:34 lr: 0.000001 grad: 0.2164 (0.2142) loss: 0.7535 (0.7555) time: 0.1406 data: 0.0629 max mem: 9377 +Train: [93] [4400/6250] eta: 0:04:20 lr: 0.000001 grad: 0.2178 (0.2142) loss: 0.7530 (0.7555) time: 0.1440 data: 0.0585 max mem: 9377 +Train: [93] [4500/6250] eta: 0:04:06 lr: 0.000001 grad: 0.2168 (0.2142) loss: 0.7567 (0.7555) time: 0.1706 data: 0.0820 max mem: 9377 +Train: [93] [4600/6250] eta: 0:03:52 lr: 0.000001 grad: 0.2123 (0.2142) loss: 0.7626 (0.7555) time: 0.1341 data: 0.0474 max mem: 9377 +Train: [93] [4700/6250] eta: 0:03:38 lr: 0.000001 grad: 0.2109 (0.2142) loss: 0.7482 (0.7555) time: 0.1267 data: 0.0349 max mem: 9377 +Train: [93] [4800/6250] eta: 0:03:23 lr: 0.000001 grad: 0.2125 (0.2142) loss: 0.7569 (0.7555) time: 0.1451 data: 0.0646 max mem: 9377 +Train: [93] [4900/6250] eta: 0:03:09 lr: 0.000001 grad: 0.2148 (0.2142) loss: 0.7507 (0.7555) time: 0.1312 data: 0.0487 max mem: 9377 +Train: [93] [5000/6250] eta: 0:02:55 lr: 0.000001 grad: 0.2138 (0.2142) loss: 0.7489 (0.7554) time: 0.1289 data: 0.0423 max mem: 9377 +Train: [93] [5100/6250] eta: 0:02:40 lr: 0.000001 grad: 0.2131 (0.2141) loss: 0.7512 (0.7554) time: 0.1040 data: 0.0154 max mem: 9377 +Train: [93] [5200/6250] eta: 0:02:27 lr: 0.000001 grad: 0.2067 (0.2141) loss: 0.7507 (0.7553) time: 0.1309 data: 0.0578 max mem: 9377 +Train: [93] [5300/6250] eta: 0:02:13 lr: 0.000001 grad: 0.2124 (0.2141) loss: 0.7606 (0.7553) time: 0.1068 data: 0.0249 max mem: 9377 +Train: [93] [5400/6250] eta: 0:01:59 lr: 0.000001 grad: 0.2156 (0.2141) loss: 0.7548 (0.7553) time: 0.1524 data: 0.0717 max mem: 9377 +Train: [93] [5500/6250] eta: 0:01:45 lr: 0.000001 grad: 0.2099 (0.2140) loss: 0.7600 (0.7553) time: 0.1536 data: 0.0696 max mem: 9377 +Train: [93] [5600/6250] eta: 0:01:31 lr: 0.000001 grad: 0.2101 (0.2140) loss: 0.7374 (0.7553) time: 0.1595 data: 0.0781 max mem: 9377 +Train: [93] [5700/6250] eta: 0:01:17 lr: 0.000001 grad: 0.2158 (0.2140) loss: 0.7468 (0.7552) time: 0.1568 data: 0.0764 max mem: 9377 +Train: [93] [5800/6250] eta: 0:01:03 lr: 0.000001 grad: 0.2185 (0.2141) loss: 0.7376 (0.7551) time: 0.1315 data: 0.0535 max mem: 9377 +Train: [93] [5900/6250] eta: 0:00:49 lr: 0.000001 grad: 0.2064 (0.2141) loss: 0.7562 (0.7551) time: 0.1616 data: 0.0777 max mem: 9377 +Train: [93] [6000/6250] eta: 0:00:35 lr: 0.000001 grad: 0.2119 (0.2141) loss: 0.7498 (0.7550) time: 0.1742 data: 0.0889 max mem: 9377 +Train: [93] [6100/6250] eta: 0:00:21 lr: 0.000001 grad: 0.2192 (0.2141) loss: 0.7483 (0.7550) time: 0.1216 data: 0.0359 max mem: 9377 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2121 (0.2141) loss: 0.7479 (0.7550) time: 0.1408 data: 0.0618 max mem: 9377 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2088 (0.2140) loss: 0.7505 (0.7550) time: 0.1339 data: 0.0450 max mem: 9377 +Train: [93] Total time: 0:14:44 (0.1415 s / it) +Averaged stats: lr: 0.000001 grad: 0.2088 (0.2140) loss: 0.7505 (0.7550) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:03:55 loss: 0.8108 (0.8108) time: 3.7943 data: 3.7235 max mem: 9377 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.8031 (0.8014) time: 0.1145 data: 0.0882 max mem: 9377 +Eval (hcp-train-subset): [93] Total time: 0:00:12 (0.2087 s / it) +Averaged stats (hcp-train-subset): loss: 0.8031 (0.8014) +Eval (hcp-val): [93] [ 0/62] eta: 0:05:22 loss: 0.8578 (0.8578) time: 5.2096 data: 5.1771 max mem: 9377 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.8519 (0.8543) time: 0.1131 data: 0.0884 max mem: 9377 +Eval (hcp-val): [93] Total time: 0:00:12 (0.2056 s / it) +Averaged stats (hcp-val): loss: 0.8519 (0.8543) +Eval (nsd-val): [93] [ 0/62] eta: 0:03:41 loss: 0.8253 (0.8253) time: 3.5676 data: 3.4932 max mem: 9377 +Eval (nsd-val): [93] [61/62] eta: 0:00:00 loss: 0.8325 (0.8351) time: 0.1211 data: 0.0961 max mem: 9377 +Eval (nsd-val): [93] Total time: 0:00:12 (0.2010 s / it) +Averaged stats (nsd-val): loss: 0.8325 (0.8351) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [94] [ 0/6250] eta: 9:49:15 lr: 0.000001 grad: 0.2478 (0.2478) loss: 0.7032 (0.7032) time: 5.6568 data: 5.5085 max mem: 9377 +Train: [94] [ 100/6250] eta: 0:20:40 lr: 0.000001 grad: 0.2101 (0.2585) loss: 0.7679 (0.7614) time: 0.1386 data: 0.0331 max mem: 9377 +Train: [94] [ 200/6250] eta: 0:17:35 lr: 0.000001 grad: 0.2164 (0.2413) loss: 0.7680 (0.7653) time: 0.1278 data: 0.0136 max mem: 9377 +Train: [94] [ 300/6250] eta: 0:16:25 lr: 0.000001 grad: 0.2000 (0.2344) loss: 0.7742 (0.7656) time: 0.1238 data: 0.0230 max mem: 9377 +Train: [94] [ 400/6250] eta: 0:15:24 lr: 0.000001 grad: 0.2188 (0.2300) loss: 0.7546 (0.7647) time: 0.1427 data: 0.0546 max mem: 9377 +Train: [94] [ 500/6250] eta: 0:14:40 lr: 0.000001 grad: 0.1995 (0.2260) loss: 0.7718 (0.7643) time: 0.1254 data: 0.0308 max mem: 9377 +Train: [94] [ 600/6250] eta: 0:14:04 lr: 0.000001 grad: 0.2095 (0.2233) loss: 0.7547 (0.7635) time: 0.1473 data: 0.0518 max mem: 9377 +Train: [94] [ 700/6250] eta: 0:13:40 lr: 0.000001 grad: 0.2050 (0.2212) loss: 0.7674 (0.7629) time: 0.1410 data: 0.0428 max mem: 9377 +Train: [94] [ 800/6250] eta: 0:13:18 lr: 0.000001 grad: 0.2120 (0.2197) loss: 0.7614 (0.7623) time: 0.1291 data: 0.0347 max mem: 9377 +Train: [94] [ 900/6250] eta: 0:12:58 lr: 0.000001 grad: 0.2186 (0.2186) loss: 0.7643 (0.7617) time: 0.1497 data: 0.0664 max mem: 9377 +Train: [94] [1000/6250] eta: 0:12:36 lr: 0.000001 grad: 0.2090 (0.2177) loss: 0.7580 (0.7612) time: 0.1361 data: 0.0514 max mem: 9377 +Train: [94] [1100/6250] eta: 0:12:18 lr: 0.000001 grad: 0.2006 (0.2169) loss: 0.7631 (0.7609) time: 0.1413 data: 0.0567 max mem: 9377 +Train: [94] [1200/6250] eta: 0:11:59 lr: 0.000001 grad: 0.2143 (0.2162) loss: 0.7538 (0.7606) time: 0.1299 data: 0.0424 max mem: 9377 +Train: [94] [1300/6250] eta: 0:11:45 lr: 0.000001 grad: 0.2147 (0.2160) loss: 0.7480 (0.7600) time: 0.1417 data: 0.0534 max mem: 9377 +Train: [94] [1400/6250] eta: 0:11:30 lr: 0.000001 grad: 0.2084 (0.2159) loss: 0.7482 (0.7593) time: 0.1421 data: 0.0659 max mem: 9377 +Train: [94] [1500/6250] eta: 0:11:13 lr: 0.000001 grad: 0.2027 (0.2156) loss: 0.7593 (0.7589) time: 0.1301 data: 0.0488 max mem: 9377 +Train: [94] [1600/6250] eta: 0:10:58 lr: 0.000001 grad: 0.2148 (0.2152) loss: 0.7524 (0.7587) time: 0.0988 data: 0.0109 max mem: 9377 +Train: [94] [1700/6250] eta: 0:10:43 lr: 0.000001 grad: 0.2151 (0.2150) loss: 0.7521 (0.7585) time: 0.1432 data: 0.0583 max mem: 9377 +Train: [94] [1800/6250] eta: 0:10:30 lr: 0.000001 grad: 0.2136 (0.2149) loss: 0.7452 (0.7581) time: 0.1612 data: 0.0806 max mem: 9377 +Train: [94] [1900/6250] eta: 0:10:14 lr: 0.000001 grad: 0.2131 (0.2149) loss: 0.7480 (0.7578) time: 0.1262 data: 0.0419 max mem: 9377 +Train: [94] [2000/6250] eta: 0:10:00 lr: 0.000001 grad: 0.2124 (0.2148) loss: 0.7543 (0.7576) time: 0.1560 data: 0.0759 max mem: 9377 +Train: [94] [2100/6250] eta: 0:09:45 lr: 0.000001 grad: 0.2211 (0.2149) loss: 0.7385 (0.7573) time: 0.1447 data: 0.0604 max mem: 9377 +Train: [94] [2200/6250] eta: 0:09:30 lr: 0.000001 grad: 0.2126 (0.2152) loss: 0.7441 (0.7568) time: 0.1348 data: 0.0494 max mem: 9377 +Train: [94] [2300/6250] eta: 0:09:16 lr: 0.000001 grad: 0.2213 (0.2155) loss: 0.7440 (0.7562) time: 0.1476 data: 0.0657 max mem: 9377 +Train: [94] [2400/6250] eta: 0:09:01 lr: 0.000001 grad: 0.2071 (0.2156) loss: 0.7542 (0.7559) time: 0.1231 data: 0.0432 max mem: 9377 +Train: [94] [2500/6250] eta: 0:08:47 lr: 0.000001 grad: 0.2067 (0.2156) loss: 0.7528 (0.7555) time: 0.1371 data: 0.0485 max mem: 9377 +Train: [94] [2600/6250] eta: 0:08:33 lr: 0.000001 grad: 0.2133 (0.2155) loss: 0.7483 (0.7555) time: 0.1150 data: 0.0332 max mem: 9377 +Train: [94] [2700/6250] eta: 0:08:19 lr: 0.000001 grad: 0.1975 (0.2153) loss: 0.7714 (0.7556) time: 0.1331 data: 0.0468 max mem: 9377 +Train: [94] [2800/6250] eta: 0:08:04 lr: 0.000001 grad: 0.2066 (0.2152) loss: 0.7584 (0.7555) time: 0.1349 data: 0.0471 max mem: 9377 +Train: [94] [2900/6250] eta: 0:07:50 lr: 0.000001 grad: 0.2064 (0.2149) loss: 0.7481 (0.7556) time: 0.1513 data: 0.0678 max mem: 9377 +Train: [94] [3000/6250] eta: 0:07:37 lr: 0.000001 grad: 0.2062 (0.2147) loss: 0.7572 (0.7557) time: 0.1616 data: 0.0783 max mem: 9377 +Train: [94] [3100/6250] eta: 0:07:22 lr: 0.000001 grad: 0.2001 (0.2144) loss: 0.7541 (0.7559) time: 0.1375 data: 0.0572 max mem: 9377 +Train: [94] [3200/6250] eta: 0:07:08 lr: 0.000001 grad: 0.2056 (0.2142) loss: 0.7598 (0.7559) time: 0.1318 data: 0.0487 max mem: 9377 +Train: [94] [3300/6250] eta: 0:06:54 lr: 0.000001 grad: 0.2126 (0.2141) loss: 0.7543 (0.7561) time: 0.1342 data: 0.0476 max mem: 9377 +Train: [94] [3400/6250] eta: 0:06:40 lr: 0.000001 grad: 0.2049 (0.2139) loss: 0.7563 (0.7561) time: 0.1287 data: 0.0467 max mem: 9377 +Train: [94] [3500/6250] eta: 0:06:26 lr: 0.000001 grad: 0.2028 (0.2136) loss: 0.7615 (0.7562) time: 0.1158 data: 0.0366 max mem: 9377 +Train: [94] [3600/6250] eta: 0:06:12 lr: 0.000001 grad: 0.2008 (0.2135) loss: 0.7630 (0.7564) time: 0.1513 data: 0.0759 max mem: 9377 +Train: [94] [3700/6250] eta: 0:05:58 lr: 0.000001 grad: 0.2038 (0.2134) loss: 0.7650 (0.7564) time: 0.1666 data: 0.0850 max mem: 9377 +Train: [94] [3800/6250] eta: 0:05:45 lr: 0.000001 grad: 0.2067 (0.2133) loss: 0.7492 (0.7564) time: 0.1340 data: 0.0456 max mem: 9377 +Train: [94] [3900/6250] eta: 0:05:31 lr: 0.000001 grad: 0.2093 (0.2132) loss: 0.7592 (0.7564) time: 0.1414 data: 0.0577 max mem: 9377 +Train: [94] [4000/6250] eta: 0:05:17 lr: 0.000001 grad: 0.2059 (0.2131) loss: 0.7554 (0.7564) time: 0.1385 data: 0.0552 max mem: 9377 +Train: [94] [4100/6250] eta: 0:05:02 lr: 0.000001 grad: 0.2209 (0.2131) loss: 0.7528 (0.7564) time: 0.1207 data: 0.0360 max mem: 9377 +Train: [94] [4200/6250] eta: 0:04:48 lr: 0.000001 grad: 0.2131 (0.2130) loss: 0.7524 (0.7563) time: 0.1812 data: 0.0945 max mem: 9377 +Train: [94] [4300/6250] eta: 0:04:34 lr: 0.000001 grad: 0.2145 (0.2129) loss: 0.7532 (0.7563) time: 0.1382 data: 0.0614 max mem: 9377 +Train: [94] [4400/6250] eta: 0:04:20 lr: 0.000001 grad: 0.2108 (0.2128) loss: 0.7611 (0.7564) time: 0.1381 data: 0.0526 max mem: 9377 +Train: [94] [4500/6250] eta: 0:04:06 lr: 0.000001 grad: 0.2147 (0.2128) loss: 0.7475 (0.7563) time: 0.1309 data: 0.0548 max mem: 9377 +Train: [94] [4600/6250] eta: 0:03:52 lr: 0.000001 grad: 0.1970 (0.2128) loss: 0.7627 (0.7563) time: 0.1635 data: 0.0804 max mem: 9377 +Train: [94] [4700/6250] eta: 0:03:39 lr: 0.000001 grad: 0.1993 (0.2128) loss: 0.7603 (0.7563) time: 0.1409 data: 0.0569 max mem: 9377 +Train: [94] [4800/6250] eta: 0:03:24 lr: 0.000001 grad: 0.2090 (0.2128) loss: 0.7573 (0.7563) time: 0.1208 data: 0.0347 max mem: 9377 +Train: [94] [4900/6250] eta: 0:03:10 lr: 0.000001 grad: 0.2076 (0.2128) loss: 0.7630 (0.7563) time: 0.1369 data: 0.0544 max mem: 9377 +Train: [94] [5000/6250] eta: 0:02:55 lr: 0.000001 grad: 0.2068 (0.2128) loss: 0.7519 (0.7562) time: 0.1313 data: 0.0518 max mem: 9377 +Train: [94] [5100/6250] eta: 0:02:41 lr: 0.000001 grad: 0.2114 (0.2127) loss: 0.7526 (0.7561) time: 0.1302 data: 0.0441 max mem: 9377 +Train: [94] [5200/6250] eta: 0:02:27 lr: 0.000001 grad: 0.2060 (0.2128) loss: 0.7619 (0.7561) time: 0.1268 data: 0.0413 max mem: 9377 +Train: [94] [5300/6250] eta: 0:02:12 lr: 0.000001 grad: 0.2250 (0.2128) loss: 0.7559 (0.7560) time: 0.1440 data: 0.0605 max mem: 9377 +Train: [94] [5400/6250] eta: 0:01:59 lr: 0.000001 grad: 0.2094 (0.2128) loss: 0.7646 (0.7560) time: 0.2516 data: 0.1693 max mem: 9377 +Train: [94] [5500/6250] eta: 0:01:45 lr: 0.000001 grad: 0.2071 (0.2127) loss: 0.7578 (0.7561) time: 0.1656 data: 0.0700 max mem: 9377 +Train: [94] [5600/6250] eta: 0:01:31 lr: 0.000001 grad: 0.2215 (0.2127) loss: 0.7399 (0.7560) time: 0.1124 data: 0.0298 max mem: 9377 +Train: [94] [5700/6250] eta: 0:01:17 lr: 0.000001 grad: 0.2102 (0.2128) loss: 0.7468 (0.7559) time: 0.1421 data: 0.0610 max mem: 9377 +Train: [94] [5800/6250] eta: 0:01:03 lr: 0.000001 grad: 0.2129 (0.2128) loss: 0.7524 (0.7558) time: 0.1586 data: 0.0743 max mem: 9377 +Train: [94] [5900/6250] eta: 0:00:49 lr: 0.000001 grad: 0.2108 (0.2128) loss: 0.7498 (0.7558) time: 0.1430 data: 0.0651 max mem: 9377 +Train: [94] [6000/6250] eta: 0:00:35 lr: 0.000001 grad: 0.2092 (0.2129) loss: 0.7482 (0.7558) time: 0.1790 data: 0.0980 max mem: 9377 +Train: [94] [6100/6250] eta: 0:00:21 lr: 0.000001 grad: 0.2162 (0.2129) loss: 0.7505 (0.7557) time: 0.1340 data: 0.0545 max mem: 9377 +Train: [94] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2104 (0.2129) loss: 0.7496 (0.7557) time: 0.1612 data: 0.0782 max mem: 9377 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2149 (0.2129) loss: 0.7530 (0.7557) time: 0.1536 data: 0.0690 max mem: 9377 +Train: [94] Total time: 0:14:47 (0.1420 s / it) +Averaged stats: lr: 0.000001 grad: 0.2149 (0.2129) loss: 0.7530 (0.7557) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:03:49 loss: 0.8088 (0.8088) time: 3.7043 data: 3.6290 max mem: 9377 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.8020 (0.8009) time: 0.1238 data: 0.0988 max mem: 9377 +Eval (hcp-train-subset): [94] Total time: 0:00:13 (0.2123 s / it) +Averaged stats (hcp-train-subset): loss: 0.8020 (0.8009) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [94] [ 0/62] eta: 0:05:05 loss: 0.8547 (0.8547) time: 4.9239 data: 4.8943 max mem: 9377 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.8557 (0.8557) time: 0.1267 data: 0.1002 max mem: 9377 +Eval (hcp-val): [94] Total time: 0:00:13 (0.2107 s / it) +Averaged stats (hcp-val): loss: 0.8557 (0.8557) +Making plots (hcp-val): example=43 +Eval (nsd-val): [94] [ 0/62] eta: 0:05:33 loss: 0.8292 (0.8292) time: 5.3868 data: 5.3561 max mem: 9377 +Eval (nsd-val): [94] [61/62] eta: 0:00:00 loss: 0.8356 (0.8360) time: 0.1157 data: 0.0910 max mem: 9377 +Eval (nsd-val): [94] Total time: 0:00:13 (0.2104 s / it) +Averaged stats (nsd-val): loss: 0.8356 (0.8360) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00094.pth +Train: [95] [ 0/6250] eta: 11:17:41 lr: 0.000001 grad: 0.2182 (0.2182) loss: 0.8066 (0.8066) time: 6.5058 data: 6.3910 max mem: 9377 +Train: [95] [ 100/6250] eta: 0:20:36 lr: 0.000001 grad: 0.2247 (0.2327) loss: 0.7590 (0.7723) time: 0.1480 data: 0.0509 max mem: 9377 +Train: [95] [ 200/6250] eta: 0:17:17 lr: 0.000001 grad: 0.2192 (0.2315) loss: 0.7571 (0.7629) time: 0.1244 data: 0.0277 max mem: 9377 +Train: [95] [ 300/6250] eta: 0:15:48 lr: 0.000001 grad: 0.2132 (0.2289) loss: 0.7528 (0.7609) time: 0.1428 data: 0.0487 max mem: 9377 +Train: [95] [ 400/6250] eta: 0:14:52 lr: 0.000001 grad: 0.2189 (0.2281) loss: 0.7531 (0.7598) time: 0.1327 data: 0.0492 max mem: 9377 +Train: [95] [ 500/6250] eta: 0:14:08 lr: 0.000001 grad: 0.2142 (0.2268) loss: 0.7563 (0.7585) time: 0.1283 data: 0.0371 max mem: 9377 +Train: [95] [ 600/6250] eta: 0:13:38 lr: 0.000001 grad: 0.2178 (0.2256) loss: 0.7605 (0.7575) time: 0.1330 data: 0.0363 max mem: 9377 +Train: [95] [ 700/6250] eta: 0:13:19 lr: 0.000001 grad: 0.2113 (0.2241) loss: 0.7495 (0.7571) time: 0.1341 data: 0.0515 max mem: 9377 +Train: [95] [ 800/6250] eta: 0:13:08 lr: 0.000001 grad: 0.2178 (0.2235) loss: 0.7439 (0.7567) time: 0.1421 data: 0.0467 max mem: 9377 +Train: [95] [ 900/6250] eta: 0:12:51 lr: 0.000001 grad: 0.2160 (0.2224) loss: 0.7445 (0.7565) time: 0.1569 data: 0.0667 max mem: 9377 +Train: [95] [1000/6250] eta: 0:12:35 lr: 0.000001 grad: 0.2111 (0.2218) loss: 0.7624 (0.7564) time: 0.1397 data: 0.0601 max mem: 9377 +Train: [95] [1100/6250] eta: 0:12:14 lr: 0.000001 grad: 0.2191 (0.2210) loss: 0.7538 (0.7563) time: 0.1245 data: 0.0373 max mem: 9377 +Train: [95] [1200/6250] eta: 0:11:57 lr: 0.000001 grad: 0.2125 (0.2203) loss: 0.7667 (0.7565) time: 0.1473 data: 0.0662 max mem: 9377 +Train: [95] [1300/6250] eta: 0:11:43 lr: 0.000001 grad: 0.2139 (0.2201) loss: 0.7541 (0.7562) time: 0.1498 data: 0.0674 max mem: 9377 +Train: [95] [1400/6250] eta: 0:11:24 lr: 0.000001 grad: 0.2091 (0.2197) loss: 0.7569 (0.7563) time: 0.1410 data: 0.0660 max mem: 9377 +Train: [95] [1500/6250] eta: 0:11:08 lr: 0.000001 grad: 0.2058 (0.2191) loss: 0.7591 (0.7563) time: 0.1361 data: 0.0537 max mem: 9377 +Train: [95] [1600/6250] eta: 0:10:53 lr: 0.000001 grad: 0.2124 (0.2187) loss: 0.7582 (0.7563) time: 0.1194 data: 0.0321 max mem: 9377 +Train: [95] [1700/6250] eta: 0:10:39 lr: 0.000001 grad: 0.2091 (0.2183) loss: 0.7552 (0.7563) time: 0.1556 data: 0.0716 max mem: 9377 +Train: [95] [1800/6250] eta: 0:10:24 lr: 0.000001 grad: 0.2058 (0.2179) loss: 0.7632 (0.7564) time: 0.1387 data: 0.0601 max mem: 9377 +Train: [95] [1900/6250] eta: 0:10:09 lr: 0.000001 grad: 0.2067 (0.2176) loss: 0.7614 (0.7564) time: 0.1384 data: 0.0481 max mem: 9377 +Train: [95] [2000/6250] eta: 0:09:54 lr: 0.000001 grad: 0.2178 (0.2173) loss: 0.7528 (0.7565) time: 0.1347 data: 0.0523 max mem: 9377 +Train: [95] [2100/6250] eta: 0:09:40 lr: 0.000001 grad: 0.1983 (0.2170) loss: 0.7542 (0.7565) time: 0.1553 data: 0.0698 max mem: 9377 +Train: [95] [2200/6250] eta: 0:09:26 lr: 0.000001 grad: 0.2028 (0.2166) loss: 0.7668 (0.7566) time: 0.1385 data: 0.0581 max mem: 9377 +Train: [95] [2300/6250] eta: 0:09:11 lr: 0.000001 grad: 0.2081 (0.2164) loss: 0.7565 (0.7567) time: 0.1317 data: 0.0466 max mem: 9377 +Train: [95] [2400/6250] eta: 0:08:57 lr: 0.000001 grad: 0.2069 (0.2162) loss: 0.7599 (0.7567) time: 0.1281 data: 0.0455 max mem: 9377 +Train: [95] [2500/6250] eta: 0:08:44 lr: 0.000001 grad: 0.2077 (0.2160) loss: 0.7548 (0.7568) time: 0.1115 data: 0.0220 max mem: 9377 +Train: [95] [2600/6250] eta: 0:08:30 lr: 0.000001 grad: 0.2027 (0.2157) loss: 0.7684 (0.7571) time: 0.1542 data: 0.0695 max mem: 9377 +Train: [95] [2700/6250] eta: 0:08:15 lr: 0.000001 grad: 0.2063 (0.2155) loss: 0.7699 (0.7574) time: 0.1415 data: 0.0609 max mem: 9377 +Train: [95] [2800/6250] eta: 0:08:01 lr: 0.000001 grad: 0.2108 (0.2153) loss: 0.7579 (0.7575) time: 0.1476 data: 0.0665 max mem: 9377 +Train: [95] [2900/6250] eta: 0:07:46 lr: 0.000001 grad: 0.2052 (0.2151) loss: 0.7538 (0.7577) time: 0.1403 data: 0.0575 max mem: 9377 +Train: [95] [3000/6250] eta: 0:07:33 lr: 0.000001 grad: 0.2040 (0.2150) loss: 0.7618 (0.7578) time: 0.1503 data: 0.0651 max mem: 9377 +Train: [95] [3100/6250] eta: 0:07:18 lr: 0.000001 grad: 0.2083 (0.2148) loss: 0.7534 (0.7579) time: 0.1519 data: 0.0711 max mem: 9377 +Train: [95] [3200/6250] eta: 0:07:05 lr: 0.000001 grad: 0.2005 (0.2146) loss: 0.7576 (0.7580) time: 0.1372 data: 0.0533 max mem: 9377 +Train: [95] [3300/6250] eta: 0:06:51 lr: 0.000001 grad: 0.2031 (0.2145) loss: 0.7595 (0.7580) time: 0.1480 data: 0.0635 max mem: 9377 +Train: [95] [3400/6250] eta: 0:06:37 lr: 0.000001 grad: 0.2123 (0.2144) loss: 0.7621 (0.7581) time: 0.1322 data: 0.0477 max mem: 9377 +Train: [95] [3500/6250] eta: 0:06:24 lr: 0.000001 grad: 0.2141 (0.2144) loss: 0.7506 (0.7580) time: 0.1670 data: 0.0887 max mem: 9377 +Train: [95] [3600/6250] eta: 0:06:10 lr: 0.000001 grad: 0.2089 (0.2144) loss: 0.7596 (0.7579) time: 0.1262 data: 0.0553 max mem: 9377 +Train: [95] [3700/6250] eta: 0:05:56 lr: 0.000001 grad: 0.2106 (0.2144) loss: 0.7611 (0.7579) time: 0.1295 data: 0.0529 max mem: 9377 +Train: [95] [3800/6250] eta: 0:05:42 lr: 0.000001 grad: 0.2125 (0.2144) loss: 0.7542 (0.7578) time: 0.1517 data: 0.0661 max mem: 9377 +Train: [95] [3900/6250] eta: 0:05:29 lr: 0.000001 grad: 0.2137 (0.2144) loss: 0.7576 (0.7577) time: 0.1505 data: 0.0678 max mem: 9377 +Train: [95] [4000/6250] eta: 0:05:15 lr: 0.000001 grad: 0.2092 (0.2144) loss: 0.7547 (0.7575) time: 0.1555 data: 0.0657 max mem: 9377 +Train: [95] [4100/6250] eta: 0:05:01 lr: 0.000001 grad: 0.2120 (0.2142) loss: 0.7507 (0.7574) time: 0.1437 data: 0.0562 max mem: 9377 +Train: [95] [4200/6250] eta: 0:04:47 lr: 0.000001 grad: 0.2075 (0.2142) loss: 0.7512 (0.7574) time: 0.1418 data: 0.0581 max mem: 9377 +Train: [95] [4300/6250] eta: 0:04:33 lr: 0.000001 grad: 0.2165 (0.2142) loss: 0.7511 (0.7573) time: 0.1268 data: 0.0466 max mem: 9377 +Train: [95] [4400/6250] eta: 0:04:19 lr: 0.000001 grad: 0.2037 (0.2141) loss: 0.7539 (0.7572) time: 0.1457 data: 0.0651 max mem: 9377 +Train: [95] [4500/6250] eta: 0:04:05 lr: 0.000001 grad: 0.2168 (0.2142) loss: 0.7414 (0.7571) time: 0.1578 data: 0.0668 max mem: 9377 +Train: [95] [4600/6250] eta: 0:03:51 lr: 0.000001 grad: 0.2160 (0.2143) loss: 0.7452 (0.7570) time: 0.1229 data: 0.0338 max mem: 9377 +Train: [95] [4700/6250] eta: 0:03:37 lr: 0.000001 grad: 0.2021 (0.2143) loss: 0.7552 (0.7569) time: 0.1497 data: 0.0614 max mem: 9377 +Train: [95] [4800/6250] eta: 0:03:23 lr: 0.000001 grad: 0.2080 (0.2143) loss: 0.7576 (0.7568) time: 0.1348 data: 0.0533 max mem: 9377 +Train: [95] [4900/6250] eta: 0:03:09 lr: 0.000001 grad: 0.2069 (0.2143) loss: 0.7644 (0.7568) time: 0.1456 data: 0.0587 max mem: 9377 +Train: [95] [5000/6250] eta: 0:02:54 lr: 0.000001 grad: 0.2088 (0.2142) loss: 0.7546 (0.7568) time: 0.1159 data: 0.0268 max mem: 9377 +Train: [95] [5100/6250] eta: 0:02:40 lr: 0.000001 grad: 0.2127 (0.2142) loss: 0.7492 (0.7568) time: 0.1250 data: 0.0350 max mem: 9377 +Train: [95] [5200/6250] eta: 0:02:26 lr: 0.000001 grad: 0.2056 (0.2142) loss: 0.7583 (0.7568) time: 0.1329 data: 0.0462 max mem: 9377 +Train: [95] [5300/6250] eta: 0:02:12 lr: 0.000001 grad: 0.2107 (0.2141) loss: 0.7573 (0.7567) time: 0.1252 data: 0.0391 max mem: 9377 +Train: [95] [5400/6250] eta: 0:01:58 lr: 0.000001 grad: 0.2091 (0.2141) loss: 0.7502 (0.7567) time: 0.1742 data: 0.1009 max mem: 9377 +Train: [95] [5500/6250] eta: 0:01:44 lr: 0.000001 grad: 0.2086 (0.2141) loss: 0.7517 (0.7566) time: 0.1629 data: 0.0857 max mem: 9377 +Train: [95] [5600/6250] eta: 0:01:30 lr: 0.000001 grad: 0.2150 (0.2140) loss: 0.7485 (0.7565) time: 0.1415 data: 0.0618 max mem: 9377 +Train: [95] [5700/6250] eta: 0:01:16 lr: 0.000001 grad: 0.2066 (0.2141) loss: 0.7514 (0.7565) time: 0.1888 data: 0.1029 max mem: 9377 +Train: [95] [5800/6250] eta: 0:01:02 lr: 0.000001 grad: 0.2146 (0.2140) loss: 0.7534 (0.7564) time: 0.1225 data: 0.0416 max mem: 9377 +Train: [95] [5900/6250] eta: 0:00:48 lr: 0.000001 grad: 0.2110 (0.2140) loss: 0.7465 (0.7563) time: 0.1440 data: 0.0669 max mem: 9377 +Train: [95] [6000/6250] eta: 0:00:34 lr: 0.000001 grad: 0.2159 (0.2141) loss: 0.7421 (0.7562) time: 0.1412 data: 0.0593 max mem: 9377 +Train: [95] [6100/6250] eta: 0:00:20 lr: 0.000001 grad: 0.2156 (0.2141) loss: 0.7391 (0.7561) time: 0.1593 data: 0.0816 max mem: 9377 +Train: [95] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2126 (0.2141) loss: 0.7486 (0.7560) time: 0.1576 data: 0.0789 max mem: 9377 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2108 (0.2142) loss: 0.7496 (0.7559) time: 0.1509 data: 0.0670 max mem: 9377 +Train: [95] Total time: 0:14:41 (0.1410 s / it) +Averaged stats: lr: 0.000001 grad: 0.2108 (0.2142) loss: 0.7496 (0.7559) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:04:05 loss: 0.8059 (0.8059) time: 3.9581 data: 3.8708 max mem: 9377 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.8030 (0.8008) time: 0.1113 data: 0.0849 max mem: 9377 +Eval (hcp-train-subset): [95] Total time: 0:00:12 (0.2066 s / it) +Averaged stats (hcp-train-subset): loss: 0.8030 (0.8008) +Eval (hcp-val): [95] [ 0/62] eta: 0:05:05 loss: 0.8528 (0.8528) time: 4.9237 data: 4.8884 max mem: 9377 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.8516 (0.8545) time: 0.1094 data: 0.0828 max mem: 9377 +Eval (hcp-val): [95] Total time: 0:00:12 (0.2082 s / it) +Averaged stats (hcp-val): loss: 0.8516 (0.8545) +Eval (nsd-val): [95] [ 0/62] eta: 0:04:04 loss: 0.8322 (0.8322) time: 3.9393 data: 3.8683 max mem: 9377 +Eval (nsd-val): [95] [61/62] eta: 0:00:00 loss: 0.8329 (0.8359) time: 0.1264 data: 0.1014 max mem: 9377 +Eval (nsd-val): [95] Total time: 0:00:13 (0.2143 s / it) +Averaged stats (nsd-val): loss: 0.8329 (0.8359) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [96] [ 0/6250] eta: 8:34:02 lr: 0.000001 grad: 0.1750 (0.1750) loss: 0.8575 (0.8575) time: 4.9348 data: 4.6720 max mem: 9377 +Train: [96] [ 100/6250] eta: 0:20:26 lr: 0.000001 grad: 0.2216 (0.2269) loss: 0.7645 (0.7777) time: 0.1468 data: 0.0436 max mem: 9377 +Train: [96] [ 200/6250] eta: 0:17:35 lr: 0.000001 grad: 0.2283 (0.2305) loss: 0.7504 (0.7673) time: 0.1687 data: 0.0813 max mem: 9377 +Train: [96] [ 300/6250] eta: 0:15:59 lr: 0.000001 grad: 0.2252 (0.2294) loss: 0.7571 (0.7641) time: 0.1321 data: 0.0419 max mem: 9377 +Train: [96] [ 400/6250] eta: 0:14:57 lr: 0.000001 grad: 0.2207 (0.2275) loss: 0.7600 (0.7629) time: 0.1395 data: 0.0549 max mem: 9377 +Train: [96] [ 500/6250] eta: 0:14:08 lr: 0.000001 grad: 0.2087 (0.2247) loss: 0.7635 (0.7633) time: 0.1305 data: 0.0412 max mem: 9377 +Train: [96] [ 600/6250] eta: 0:13:44 lr: 0.000001 grad: 0.2092 (0.2232) loss: 0.7602 (0.7631) time: 0.1579 data: 0.0767 max mem: 9377 +Train: [96] [ 700/6250] eta: 0:13:22 lr: 0.000001 grad: 0.2115 (0.2221) loss: 0.7646 (0.7631) time: 0.1418 data: 0.0480 max mem: 9377 +Train: [96] [ 800/6250] eta: 0:13:11 lr: 0.000001 grad: 0.2088 (0.2206) loss: 0.7531 (0.7634) time: 0.1422 data: 0.0537 max mem: 9377 +Train: [96] [ 900/6250] eta: 0:12:59 lr: 0.000001 grad: 0.2022 (0.2190) loss: 0.7627 (0.7638) time: 0.1500 data: 0.0618 max mem: 9377 +Train: [96] [1000/6250] eta: 0:12:46 lr: 0.000001 grad: 0.2035 (0.2173) loss: 0.7685 (0.7642) time: 0.1253 data: 0.0448 max mem: 9377 +Train: [96] [1100/6250] eta: 0:12:33 lr: 0.000000 grad: 0.2102 (0.2161) loss: 0.7621 (0.7643) time: 0.1584 data: 0.0783 max mem: 9377 +Train: [96] [1200/6250] eta: 0:12:19 lr: 0.000000 grad: 0.2080 (0.2155) loss: 0.7573 (0.7639) time: 0.1736 data: 0.0945 max mem: 9377 +Train: [96] [1300/6250] eta: 0:12:03 lr: 0.000000 grad: 0.2063 (0.2148) loss: 0.7675 (0.7636) time: 0.1514 data: 0.0746 max mem: 9377 +Train: [96] [1400/6250] eta: 0:11:48 lr: 0.000000 grad: 0.2034 (0.2141) loss: 0.7590 (0.7634) time: 0.1412 data: 0.0546 max mem: 9377 +Train: [96] [1500/6250] eta: 0:11:35 lr: 0.000000 grad: 0.2075 (0.2137) loss: 0.7552 (0.7631) time: 0.1510 data: 0.0675 max mem: 9377 +Train: [96] [1600/6250] eta: 0:11:21 lr: 0.000000 grad: 0.2004 (0.2133) loss: 0.7648 (0.7629) time: 0.1571 data: 0.0787 max mem: 9377 +Train: [96] [1700/6250] eta: 0:11:05 lr: 0.000000 grad: 0.2097 (0.2132) loss: 0.7474 (0.7625) time: 0.1164 data: 0.0363 max mem: 9377 +Train: [96] [1800/6250] eta: 0:10:52 lr: 0.000000 grad: 0.2031 (0.2128) loss: 0.7599 (0.7623) time: 0.1944 data: 0.1176 max mem: 9377 +Train: [96] [1900/6250] eta: 0:10:35 lr: 0.000000 grad: 0.2147 (0.2126) loss: 0.7444 (0.7620) time: 0.1693 data: 0.0897 max mem: 9377 +Train: [96] [2000/6250] eta: 0:10:19 lr: 0.000000 grad: 0.2077 (0.2123) loss: 0.7657 (0.7620) time: 0.1461 data: 0.0582 max mem: 9377 +Train: [96] [2100/6250] eta: 0:10:05 lr: 0.000000 grad: 0.2030 (0.2120) loss: 0.7596 (0.7619) time: 0.1499 data: 0.0739 max mem: 9377 +Train: [96] [2200/6250] eta: 0:09:49 lr: 0.000000 grad: 0.2001 (0.2119) loss: 0.7554 (0.7618) time: 0.1245 data: 0.0440 max mem: 9377 +Train: [96] [2300/6250] eta: 0:09:35 lr: 0.000000 grad: 0.2070 (0.2117) loss: 0.7633 (0.7617) time: 0.1583 data: 0.0808 max mem: 9377 +Train: [96] [2400/6250] eta: 0:09:21 lr: 0.000000 grad: 0.2056 (0.2116) loss: 0.7619 (0.7616) time: 0.1344 data: 0.0534 max mem: 9377 +Train: [96] [2500/6250] eta: 0:09:05 lr: 0.000000 grad: 0.1989 (0.2115) loss: 0.7605 (0.7614) time: 0.1497 data: 0.0646 max mem: 9377 +Train: [96] [2600/6250] eta: 0:08:51 lr: 0.000000 grad: 0.2087 (0.2114) loss: 0.7524 (0.7613) time: 0.1503 data: 0.0743 max mem: 9377 +Train: [96] [2700/6250] eta: 0:08:36 lr: 0.000000 grad: 0.2100 (0.2114) loss: 0.7542 (0.7611) time: 0.1548 data: 0.0735 max mem: 9377 +Train: [96] [2800/6250] eta: 0:08:22 lr: 0.000000 grad: 0.2059 (0.2114) loss: 0.7607 (0.7609) time: 0.1356 data: 0.0488 max mem: 9377 +Train: [96] [2900/6250] eta: 0:08:07 lr: 0.000000 grad: 0.2072 (0.2114) loss: 0.7593 (0.7607) time: 0.1646 data: 0.0864 max mem: 9377 +Train: [96] [3000/6250] eta: 0:07:52 lr: 0.000000 grad: 0.2026 (0.2113) loss: 0.7584 (0.7606) time: 0.1262 data: 0.0484 max mem: 9377 +Train: [96] [3100/6250] eta: 0:07:38 lr: 0.000000 grad: 0.2052 (0.2112) loss: 0.7569 (0.7607) time: 0.1545 data: 0.0762 max mem: 9377 +Train: [96] [3200/6250] eta: 0:07:24 lr: 0.000000 grad: 0.2038 (0.2111) loss: 0.7653 (0.7606) time: 0.1572 data: 0.0844 max mem: 9377 +Train: [96] [3300/6250] eta: 0:07:09 lr: 0.000000 grad: 0.2069 (0.2110) loss: 0.7580 (0.7607) time: 0.1550 data: 0.0780 max mem: 9377 +Train: [96] [3400/6250] eta: 0:06:55 lr: 0.000000 grad: 0.2019 (0.2109) loss: 0.7578 (0.7607) time: 0.1002 data: 0.0196 max mem: 9377 +Train: [96] [3500/6250] eta: 0:06:39 lr: 0.000000 grad: 0.2031 (0.2107) loss: 0.7628 (0.7609) time: 0.1529 data: 0.0723 max mem: 9377 +Train: [96] [3600/6250] eta: 0:06:24 lr: 0.000000 grad: 0.2011 (0.2106) loss: 0.7712 (0.7610) time: 0.1402 data: 0.0612 max mem: 9377 +Train: [96] [3700/6250] eta: 0:06:09 lr: 0.000000 grad: 0.2049 (0.2106) loss: 0.7652 (0.7610) time: 0.1250 data: 0.0484 max mem: 9377 +Train: [96] [3800/6250] eta: 0:05:54 lr: 0.000000 grad: 0.2093 (0.2105) loss: 0.7588 (0.7611) time: 0.1367 data: 0.0607 max mem: 9377 +Train: [96] [3900/6250] eta: 0:05:39 lr: 0.000000 grad: 0.2134 (0.2105) loss: 0.7554 (0.7610) time: 0.1156 data: 0.0357 max mem: 9377 +Train: [96] [4000/6250] eta: 0:05:24 lr: 0.000000 grad: 0.2088 (0.2104) loss: 0.7598 (0.7611) time: 0.1344 data: 0.0583 max mem: 9377 +Train: [96] [4100/6250] eta: 0:05:09 lr: 0.000000 grad: 0.2037 (0.2103) loss: 0.7703 (0.7610) time: 0.1491 data: 0.0741 max mem: 9377 +Train: [96] [4200/6250] eta: 0:04:54 lr: 0.000000 grad: 0.2078 (0.2103) loss: 0.7582 (0.7610) time: 0.1401 data: 0.0689 max mem: 9377 +Train: [96] [4300/6250] eta: 0:04:39 lr: 0.000000 grad: 0.1976 (0.2102) loss: 0.7600 (0.7610) time: 0.1317 data: 0.0572 max mem: 9377 +Train: [96] [4400/6250] eta: 0:04:24 lr: 0.000000 grad: 0.1998 (0.2101) loss: 0.7705 (0.7611) time: 0.1428 data: 0.0673 max mem: 9377 +Train: [96] [4500/6250] eta: 0:04:09 lr: 0.000000 grad: 0.2059 (0.2100) loss: 0.7663 (0.7611) time: 0.1366 data: 0.0596 max mem: 9377 +Train: [96] [4600/6250] eta: 0:03:54 lr: 0.000000 grad: 0.2072 (0.2100) loss: 0.7589 (0.7612) time: 0.1182 data: 0.0450 max mem: 9377 +Train: [96] [4700/6250] eta: 0:03:39 lr: 0.000000 grad: 0.2003 (0.2099) loss: 0.7657 (0.7612) time: 0.1217 data: 0.0477 max mem: 9377 +Train: [96] [4800/6250] eta: 0:03:24 lr: 0.000000 grad: 0.2028 (0.2097) loss: 0.7662 (0.7613) time: 0.1175 data: 0.0428 max mem: 9377 +Train: [96] [4900/6250] eta: 0:03:10 lr: 0.000000 grad: 0.2050 (0.2096) loss: 0.7612 (0.7614) time: 0.1024 data: 0.0296 max mem: 9377 +Train: [96] [5000/6250] eta: 0:02:55 lr: 0.000000 grad: 0.1998 (0.2095) loss: 0.7695 (0.7615) time: 0.1011 data: 0.0283 max mem: 9377 +Train: [96] [5100/6250] eta: 0:02:41 lr: 0.000000 grad: 0.2085 (0.2094) loss: 0.7515 (0.7615) time: 0.1240 data: 0.0513 max mem: 9377 +Train: [96] [5200/6250] eta: 0:02:27 lr: 0.000000 grad: 0.2113 (0.2094) loss: 0.7561 (0.7615) time: 0.1313 data: 0.0512 max mem: 9377 +Train: [96] [5300/6250] eta: 0:02:12 lr: 0.000000 grad: 0.2093 (0.2093) loss: 0.7649 (0.7615) time: 0.1286 data: 0.0494 max mem: 9377 +Train: [96] [5400/6250] eta: 0:01:58 lr: 0.000000 grad: 0.2097 (0.2093) loss: 0.7613 (0.7615) time: 0.1201 data: 0.0420 max mem: 9377 +Train: [96] [5500/6250] eta: 0:01:44 lr: 0.000000 grad: 0.2168 (0.2093) loss: 0.7594 (0.7615) time: 0.1046 data: 0.0295 max mem: 9377 +Train: [96] [5600/6250] eta: 0:01:30 lr: 0.000000 grad: 0.2080 (0.2094) loss: 0.7504 (0.7614) time: 0.1158 data: 0.0445 max mem: 9377 +Train: [96] [5700/6250] eta: 0:01:16 lr: 0.000000 grad: 0.2132 (0.2094) loss: 0.7499 (0.7614) time: 0.1425 data: 0.0718 max mem: 9377 +Train: [96] [5800/6250] eta: 0:01:02 lr: 0.000000 grad: 0.2096 (0.2095) loss: 0.7608 (0.7614) time: 0.1295 data: 0.0534 max mem: 9377 +Train: [96] [5900/6250] eta: 0:00:48 lr: 0.000000 grad: 0.2181 (0.2096) loss: 0.7444 (0.7612) time: 0.1208 data: 0.0474 max mem: 9377 +Train: [96] [6000/6250] eta: 0:00:34 lr: 0.000000 grad: 0.2154 (0.2097) loss: 0.7556 (0.7611) time: 0.1197 data: 0.0400 max mem: 9377 +Train: [96] [6100/6250] eta: 0:00:20 lr: 0.000000 grad: 0.2103 (0.2098) loss: 0.7612 (0.7610) time: 0.1238 data: 0.0494 max mem: 9377 +Train: [96] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.2159 (0.2098) loss: 0.7561 (0.7610) time: 0.1072 data: 0.0403 max mem: 9377 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2144 (0.2099) loss: 0.7521 (0.7610) time: 0.1383 data: 0.0641 max mem: 9377 +Train: [96] Total time: 0:14:22 (0.1380 s / it) +Averaged stats: lr: 0.000000 grad: 0.2144 (0.2099) loss: 0.7521 (0.7610) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:05:04 loss: 0.8080 (0.8080) time: 4.9175 data: 4.8886 max mem: 9377 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.8019 (0.8010) time: 0.1195 data: 0.0949 max mem: 9377 +Eval (hcp-train-subset): [96] Total time: 0:00:12 (0.1939 s / it) +Averaged stats (hcp-train-subset): loss: 0.8019 (0.8010) +Eval (hcp-val): [96] [ 0/62] eta: 0:04:27 loss: 0.8553 (0.8553) time: 4.3072 data: 4.2668 max mem: 9377 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.8541 (0.8545) time: 0.1173 data: 0.0911 max mem: 9377 +Eval (hcp-val): [96] Total time: 0:00:11 (0.1933 s / it) +Averaged stats (hcp-val): loss: 0.8541 (0.8545) +Eval (nsd-val): [96] [ 0/62] eta: 0:04:27 loss: 0.8196 (0.8196) time: 4.3168 data: 4.2881 max mem: 9377 +Eval (nsd-val): [96] [61/62] eta: 0:00:00 loss: 0.8345 (0.8355) time: 0.1053 data: 0.0809 max mem: 9377 +Eval (nsd-val): [96] Total time: 0:00:11 (0.1854 s / it) +Averaged stats (nsd-val): loss: 0.8345 (0.8355) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [97] [ 0/6250] eta: 7:27:57 lr: 0.000000 grad: 0.2104 (0.2104) loss: 0.8415 (0.8415) time: 4.3004 data: 4.0584 max mem: 9377 +Train: [97] [ 100/6250] eta: 0:17:01 lr: 0.000000 grad: 0.2193 (0.2196) loss: 0.7528 (0.7837) time: 0.1152 data: 0.0310 max mem: 9377 +Train: [97] [ 200/6250] eta: 0:14:49 lr: 0.000000 grad: 0.2181 (0.2203) loss: 0.7723 (0.7771) time: 0.1322 data: 0.0456 max mem: 9377 +Train: [97] [ 300/6250] eta: 0:14:05 lr: 0.000000 grad: 0.2214 (0.2196) loss: 0.7713 (0.7743) time: 0.1178 data: 0.0234 max mem: 9377 +Train: [97] [ 400/6250] eta: 0:13:31 lr: 0.000000 grad: 0.2058 (0.2179) loss: 0.7776 (0.7733) time: 0.1269 data: 0.0481 max mem: 9377 +Train: [97] [ 500/6250] eta: 0:12:56 lr: 0.000000 grad: 0.2086 (0.2175) loss: 0.7627 (0.7719) time: 0.1110 data: 0.0270 max mem: 9377 +Train: [97] [ 600/6250] eta: 0:12:31 lr: 0.000000 grad: 0.2101 (0.2172) loss: 0.7672 (0.7710) time: 0.1191 data: 0.0377 max mem: 9377 +Train: [97] [ 700/6250] eta: 0:12:02 lr: 0.000000 grad: 0.1992 (0.2166) loss: 0.7579 (0.7699) time: 0.0999 data: 0.0212 max mem: 9377 +Train: [97] [ 800/6250] eta: 0:11:47 lr: 0.000000 grad: 0.2026 (0.2163) loss: 0.7663 (0.7690) time: 0.1046 data: 0.0199 max mem: 9377 +Train: [97] [ 900/6250] eta: 0:11:34 lr: 0.000000 grad: 0.2178 (0.2161) loss: 0.7524 (0.7676) time: 0.1163 data: 0.0335 max mem: 9377 +Train: [97] [1000/6250] eta: 0:11:18 lr: 0.000000 grad: 0.2124 (0.2158) loss: 0.7551 (0.7669) time: 0.1361 data: 0.0610 max mem: 9377 +Train: [97] [1100/6250] eta: 0:10:58 lr: 0.000000 grad: 0.2144 (0.2157) loss: 0.7629 (0.7661) time: 0.1068 data: 0.0270 max mem: 9377 +Train: [97] [1200/6250] eta: 0:10:39 lr: 0.000000 grad: 0.2145 (0.2156) loss: 0.7476 (0.7653) time: 0.0916 data: 0.0180 max mem: 9377 +Train: [97] [1300/6250] eta: 0:10:23 lr: 0.000000 grad: 0.2193 (0.2155) loss: 0.7561 (0.7647) time: 0.1340 data: 0.0519 max mem: 9377 +Train: [97] [1400/6250] eta: 0:10:07 lr: 0.000000 grad: 0.2085 (0.2153) loss: 0.7686 (0.7644) time: 0.1121 data: 0.0367 max mem: 9377 +Train: [97] [1500/6250] eta: 0:09:53 lr: 0.000000 grad: 0.2063 (0.2150) loss: 0.7540 (0.7641) time: 0.1339 data: 0.0571 max mem: 9377 +Train: [97] [1600/6250] eta: 0:09:40 lr: 0.000000 grad: 0.2159 (0.2150) loss: 0.7596 (0.7637) time: 0.1214 data: 0.0450 max mem: 9377 +Train: [97] [1700/6250] eta: 0:09:26 lr: 0.000000 grad: 0.2133 (0.2151) loss: 0.7501 (0.7633) time: 0.1208 data: 0.0441 max mem: 9377 +Train: [97] [1800/6250] eta: 0:09:11 lr: 0.000000 grad: 0.2156 (0.2150) loss: 0.7552 (0.7629) time: 0.1047 data: 0.0290 max mem: 9377 +Train: [97] [1900/6250] eta: 0:08:55 lr: 0.000000 grad: 0.2110 (0.2149) loss: 0.7529 (0.7627) time: 0.1107 data: 0.0212 max mem: 9377 +Train: [97] [2000/6250] eta: 0:08:39 lr: 0.000000 grad: 0.2157 (0.2149) loss: 0.7619 (0.7624) time: 0.1057 data: 0.0339 max mem: 9377 +Train: [97] [2100/6250] eta: 0:08:27 lr: 0.000000 grad: 0.2051 (0.2149) loss: 0.7671 (0.7621) time: 0.1185 data: 0.0370 max mem: 9377 +Train: [97] [2200/6250] eta: 0:08:14 lr: 0.000000 grad: 0.2083 (0.2148) loss: 0.7641 (0.7619) time: 0.1204 data: 0.0445 max mem: 9377 +Train: [97] [2300/6250] eta: 0:08:02 lr: 0.000000 grad: 0.2142 (0.2147) loss: 0.7536 (0.7616) time: 0.1248 data: 0.0501 max mem: 9377 +Train: [97] [2400/6250] eta: 0:07:48 lr: 0.000000 grad: 0.2202 (0.2147) loss: 0.7505 (0.7612) time: 0.1051 data: 0.0293 max mem: 9377 +Train: [97] [2500/6250] eta: 0:07:35 lr: 0.000000 grad: 0.2193 (0.2146) loss: 0.7587 (0.7608) time: 0.1098 data: 0.0373 max mem: 9377 +Train: [97] [2600/6250] eta: 0:07:21 lr: 0.000000 grad: 0.2099 (0.2146) loss: 0.7603 (0.7606) time: 0.1302 data: 0.0572 max mem: 9377 +Train: [97] [2700/6250] eta: 0:07:09 lr: 0.000000 grad: 0.2127 (0.2145) loss: 0.7426 (0.7604) time: 0.1214 data: 0.0420 max mem: 9377 +Train: [97] [2800/6250] eta: 0:06:56 lr: 0.000000 grad: 0.2111 (0.2143) loss: 0.7539 (0.7603) time: 0.1025 data: 0.0227 max mem: 9377 +Train: [97] [2900/6250] eta: 0:06:44 lr: 0.000000 grad: 0.2093 (0.2142) loss: 0.7459 (0.7601) time: 0.1235 data: 0.0496 max mem: 9377 +Train: [97] [3000/6250] eta: 0:06:32 lr: 0.000000 grad: 0.2072 (0.2141) loss: 0.7600 (0.7599) time: 0.1214 data: 0.0476 max mem: 9377 +Train: [97] [3100/6250] eta: 0:06:19 lr: 0.000000 grad: 0.2124 (0.2139) loss: 0.7601 (0.7599) time: 0.1109 data: 0.0350 max mem: 9377 +Train: [97] [3200/6250] eta: 0:06:07 lr: 0.000000 grad: 0.2128 (0.2138) loss: 0.7534 (0.7598) time: 0.1200 data: 0.0450 max mem: 9377 +Train: [97] [3300/6250] eta: 0:05:55 lr: 0.000000 grad: 0.2028 (0.2137) loss: 0.7644 (0.7598) time: 0.1123 data: 0.0311 max mem: 9377 +Train: [97] [3400/6250] eta: 0:05:43 lr: 0.000000 grad: 0.2089 (0.2136) loss: 0.7571 (0.7598) time: 0.1272 data: 0.0426 max mem: 9377 +Train: [97] [3500/6250] eta: 0:05:31 lr: 0.000000 grad: 0.2137 (0.2136) loss: 0.7517 (0.7596) time: 0.1044 data: 0.0280 max mem: 9377 +Train: [97] [3600/6250] eta: 0:05:19 lr: 0.000000 grad: 0.2103 (0.2137) loss: 0.7600 (0.7595) time: 0.1082 data: 0.0332 max mem: 9377 +Train: [97] [3700/6250] eta: 0:05:07 lr: 0.000000 grad: 0.2118 (0.2138) loss: 0.7595 (0.7594) time: 0.1121 data: 0.0340 max mem: 9377 +Train: [97] [3800/6250] eta: 0:04:54 lr: 0.000000 grad: 0.2082 (0.2137) loss: 0.7621 (0.7594) time: 0.1233 data: 0.0450 max mem: 9377 +Train: [97] [3900/6250] eta: 0:04:42 lr: 0.000000 grad: 0.2116 (0.2138) loss: 0.7548 (0.7593) time: 0.1276 data: 0.0471 max mem: 9377 +Train: [97] [4000/6250] eta: 0:04:31 lr: 0.000000 grad: 0.2157 (0.2138) loss: 0.7505 (0.7593) time: 0.1215 data: 0.0410 max mem: 9377 +Train: [97] [4100/6250] eta: 0:04:19 lr: 0.000000 grad: 0.2077 (0.2138) loss: 0.7551 (0.7591) time: 0.1340 data: 0.0595 max mem: 9377 +Train: [97] [4200/6250] eta: 0:04:06 lr: 0.000000 grad: 0.2092 (0.2138) loss: 0.7621 (0.7590) time: 0.1102 data: 0.0367 max mem: 9377 +Train: [97] [4300/6250] eta: 0:03:54 lr: 0.000000 grad: 0.2181 (0.2138) loss: 0.7571 (0.7590) time: 0.1240 data: 0.0473 max mem: 9377 +Train: [97] [4400/6250] eta: 0:03:42 lr: 0.000000 grad: 0.2125 (0.2137) loss: 0.7574 (0.7590) time: 0.1236 data: 0.0511 max mem: 9377 +Train: [97] [4500/6250] eta: 0:03:30 lr: 0.000000 grad: 0.2096 (0.2137) loss: 0.7611 (0.7589) time: 0.1251 data: 0.0431 max mem: 9377 +Train: [97] [4600/6250] eta: 0:03:18 lr: 0.000000 grad: 0.2105 (0.2137) loss: 0.7604 (0.7589) time: 0.1177 data: 0.0410 max mem: 9377 +Train: [97] [4700/6250] eta: 0:03:06 lr: 0.000000 grad: 0.2175 (0.2137) loss: 0.7564 (0.7588) time: 0.1177 data: 0.0385 max mem: 9377 +Train: [97] [4800/6250] eta: 0:02:54 lr: 0.000000 grad: 0.2103 (0.2138) loss: 0.7546 (0.7587) time: 0.1173 data: 0.0372 max mem: 9377 +Train: [97] [4900/6250] eta: 0:02:42 lr: 0.000000 grad: 0.2059 (0.2138) loss: 0.7660 (0.7587) time: 0.1129 data: 0.0426 max mem: 9377 +Train: [97] [5000/6250] eta: 0:02:30 lr: 0.000000 grad: 0.2032 (0.2137) loss: 0.7647 (0.7587) time: 0.1144 data: 0.0339 max mem: 9377 +Train: [97] [5100/6250] eta: 0:02:18 lr: 0.000000 grad: 0.2159 (0.2137) loss: 0.7577 (0.7587) time: 0.1290 data: 0.0538 max mem: 9377 +Train: [97] [5200/6250] eta: 0:02:06 lr: 0.000000 grad: 0.2100 (0.2136) loss: 0.7520 (0.7587) time: 0.1326 data: 0.0507 max mem: 9377 +Train: [97] [5300/6250] eta: 0:01:54 lr: 0.000000 grad: 0.2011 (0.2136) loss: 0.7714 (0.7587) time: 0.1271 data: 0.0445 max mem: 9377 +Train: [97] [5400/6250] eta: 0:01:42 lr: 0.000000 grad: 0.2080 (0.2135) loss: 0.7604 (0.7588) time: 0.1302 data: 0.0542 max mem: 9377 +Train: [97] [5500/6250] eta: 0:01:30 lr: 0.000000 grad: 0.2109 (0.2135) loss: 0.7479 (0.7588) time: 0.1124 data: 0.0354 max mem: 9377 +Train: [97] [5600/6250] eta: 0:01:18 lr: 0.000000 grad: 0.2131 (0.2135) loss: 0.7555 (0.7587) time: 0.1398 data: 0.0631 max mem: 9377 +Train: [97] [5700/6250] eta: 0:01:06 lr: 0.000000 grad: 0.2141 (0.2135) loss: 0.7539 (0.7587) time: 0.1393 data: 0.0599 max mem: 9377 +Train: [97] [5800/6250] eta: 0:00:54 lr: 0.000000 grad: 0.2155 (0.2135) loss: 0.7523 (0.7586) time: 0.1243 data: 0.0445 max mem: 9377 +Train: [97] [5900/6250] eta: 0:00:42 lr: 0.000000 grad: 0.2131 (0.2135) loss: 0.7444 (0.7585) time: 0.1031 data: 0.0252 max mem: 9377 +Train: [97] [6000/6250] eta: 0:00:30 lr: 0.000000 grad: 0.2088 (0.2135) loss: 0.7587 (0.7586) time: 0.1220 data: 0.0454 max mem: 9377 +Train: [97] [6100/6250] eta: 0:00:18 lr: 0.000000 grad: 0.2089 (0.2135) loss: 0.7548 (0.7585) time: 0.1050 data: 0.0259 max mem: 9377 +Train: [97] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.2165 (0.2135) loss: 0.7525 (0.7585) time: 0.1314 data: 0.0521 max mem: 9377 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2109 (0.2135) loss: 0.7598 (0.7585) time: 0.1203 data: 0.0397 max mem: 9377 +Train: [97] Total time: 0:12:40 (0.1217 s / it) +Averaged stats: lr: 0.000000 grad: 0.2109 (0.2135) loss: 0.7598 (0.7585) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:05:08 loss: 0.8118 (0.8118) time: 4.9799 data: 4.9496 max mem: 9377 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.8031 (0.8011) time: 0.1039 data: 0.0794 max mem: 9377 +Eval (hcp-train-subset): [97] Total time: 0:00:12 (0.1974 s / it) +Averaged stats (hcp-train-subset): loss: 0.8031 (0.8011) +Eval (hcp-val): [97] [ 0/62] eta: 0:04:03 loss: 0.8534 (0.8534) time: 3.9290 data: 3.8576 max mem: 9377 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.8531 (0.8542) time: 0.1037 data: 0.0792 max mem: 9377 +Eval (hcp-val): [97] Total time: 0:00:12 (0.2027 s / it) +Averaged stats (hcp-val): loss: 0.8531 (0.8542) +Eval (nsd-val): [97] [ 0/62] eta: 0:03:50 loss: 0.8206 (0.8206) time: 3.7170 data: 3.6544 max mem: 9377 +Eval (nsd-val): [97] [61/62] eta: 0:00:00 loss: 0.8342 (0.8357) time: 0.1169 data: 0.0897 max mem: 9377 +Eval (nsd-val): [97] Total time: 0:00:12 (0.1970 s / it) +Averaged stats (nsd-val): loss: 0.8342 (0.8357) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [98] [ 0/6250] eta: 7:16:06 lr: 0.000000 grad: 0.4303 (0.4303) loss: 0.7139 (0.7139) time: 4.1866 data: 3.9224 max mem: 9377 +Train: [98] [ 100/6250] eta: 0:18:38 lr: 0.000000 grad: 0.2354 (0.2435) loss: 0.7436 (0.7601) time: 0.1377 data: 0.0436 max mem: 9377 +Train: [98] [ 200/6250] eta: 0:16:02 lr: 0.000000 grad: 0.2213 (0.2350) loss: 0.7569 (0.7583) time: 0.1286 data: 0.0374 max mem: 9377 +Train: [98] [ 300/6250] eta: 0:14:59 lr: 0.000000 grad: 0.2150 (0.2292) loss: 0.7440 (0.7585) time: 0.1210 data: 0.0284 max mem: 9377 +Train: [98] [ 400/6250] eta: 0:14:24 lr: 0.000000 grad: 0.2201 (0.2264) loss: 0.7439 (0.7573) time: 0.1332 data: 0.0376 max mem: 9377 +Train: [98] [ 500/6250] eta: 0:13:50 lr: 0.000000 grad: 0.2053 (0.2242) loss: 0.7586 (0.7568) time: 0.1344 data: 0.0500 max mem: 9377 +Train: [98] [ 600/6250] eta: 0:13:23 lr: 0.000000 grad: 0.2250 (0.2232) loss: 0.7404 (0.7563) time: 0.1297 data: 0.0503 max mem: 9377 +Train: [98] [ 700/6250] eta: 0:12:54 lr: 0.000000 grad: 0.2043 (0.2219) loss: 0.7587 (0.7557) time: 0.1250 data: 0.0439 max mem: 9377 +Train: [98] [ 800/6250] eta: 0:12:36 lr: 0.000000 grad: 0.2160 (0.2212) loss: 0.7467 (0.7552) time: 0.1337 data: 0.0456 max mem: 9377 +Train: [98] [ 900/6250] eta: 0:12:15 lr: 0.000000 grad: 0.2146 (0.2208) loss: 0.7444 (0.7547) time: 0.1026 data: 0.0163 max mem: 9377 +Train: [98] [1000/6250] eta: 0:11:54 lr: 0.000000 grad: 0.2190 (0.2205) loss: 0.7431 (0.7540) time: 0.1225 data: 0.0437 max mem: 9377 +Train: [98] [1100/6250] eta: 0:11:31 lr: 0.000000 grad: 0.2153 (0.2203) loss: 0.7467 (0.7537) time: 0.1206 data: 0.0306 max mem: 9377 +Train: [98] [1200/6250] eta: 0:11:09 lr: 0.000000 grad: 0.2115 (0.2201) loss: 0.7550 (0.7535) time: 0.1161 data: 0.0408 max mem: 9377 +Train: [98] [1300/6250] eta: 0:10:51 lr: 0.000000 grad: 0.2213 (0.2201) loss: 0.7518 (0.7530) time: 0.1245 data: 0.0447 max mem: 9377 +Train: [98] [1400/6250] eta: 0:10:34 lr: 0.000000 grad: 0.2128 (0.2200) loss: 0.7520 (0.7527) time: 0.1247 data: 0.0396 max mem: 9377 +Train: [98] [1500/6250] eta: 0:10:18 lr: 0.000000 grad: 0.2153 (0.2199) loss: 0.7436 (0.7522) time: 0.1397 data: 0.0638 max mem: 9377 +Train: [98] [1600/6250] eta: 0:10:00 lr: 0.000000 grad: 0.2192 (0.2199) loss: 0.7470 (0.7518) time: 0.1128 data: 0.0319 max mem: 9377 +Train: [98] [1700/6250] eta: 0:09:45 lr: 0.000000 grad: 0.2201 (0.2199) loss: 0.7430 (0.7516) time: 0.1291 data: 0.0568 max mem: 9377 +Train: [98] [1800/6250] eta: 0:09:28 lr: 0.000000 grad: 0.2170 (0.2196) loss: 0.7484 (0.7516) time: 0.1203 data: 0.0398 max mem: 9377 +Train: [98] [1900/6250] eta: 0:09:13 lr: 0.000000 grad: 0.2207 (0.2195) loss: 0.7439 (0.7517) time: 0.1312 data: 0.0527 max mem: 9377 +Train: [98] [2000/6250] eta: 0:08:59 lr: 0.000000 grad: 0.2113 (0.2194) loss: 0.7585 (0.7518) time: 0.1119 data: 0.0273 max mem: 9377 +Train: [98] [2100/6250] eta: 0:08:46 lr: 0.000000 grad: 0.2156 (0.2191) loss: 0.7523 (0.7518) time: 0.1217 data: 0.0450 max mem: 9377 +Train: [98] [2200/6250] eta: 0:08:33 lr: 0.000000 grad: 0.2073 (0.2189) loss: 0.7638 (0.7521) time: 0.1129 data: 0.0327 max mem: 9377 +Train: [98] [2300/6250] eta: 0:08:19 lr: 0.000000 grad: 0.2132 (0.2185) loss: 0.7613 (0.7524) time: 0.1184 data: 0.0404 max mem: 9377 +Train: [98] [2400/6250] eta: 0:08:05 lr: 0.000000 grad: 0.2059 (0.2182) loss: 0.7556 (0.7525) time: 0.1223 data: 0.0483 max mem: 9377 +Train: [98] [2500/6250] eta: 0:07:53 lr: 0.000000 grad: 0.2107 (0.2179) loss: 0.7499 (0.7526) time: 0.1382 data: 0.0602 max mem: 9377 +Train: [98] [2600/6250] eta: 0:07:40 lr: 0.000000 grad: 0.2065 (0.2176) loss: 0.7600 (0.7528) time: 0.1164 data: 0.0375 max mem: 9377 +Train: [98] [2700/6250] eta: 0:07:28 lr: 0.000000 grad: 0.2173 (0.2173) loss: 0.7455 (0.7529) time: 0.1359 data: 0.0530 max mem: 9377 +Train: [98] [2800/6250] eta: 0:07:15 lr: 0.000000 grad: 0.2126 (0.2171) loss: 0.7495 (0.7531) time: 0.1244 data: 0.0460 max mem: 9377 +Train: [98] [2900/6250] eta: 0:07:01 lr: 0.000000 grad: 0.2138 (0.2169) loss: 0.7496 (0.7534) time: 0.1194 data: 0.0394 max mem: 9377 +Train: [98] [3000/6250] eta: 0:06:49 lr: 0.000000 grad: 0.2124 (0.2168) loss: 0.7477 (0.7533) time: 0.1309 data: 0.0468 max mem: 9377 +Train: [98] [3100/6250] eta: 0:06:36 lr: 0.000000 grad: 0.2134 (0.2167) loss: 0.7542 (0.7535) time: 0.1114 data: 0.0312 max mem: 9377 +Train: [98] [3200/6250] eta: 0:06:23 lr: 0.000000 grad: 0.2111 (0.2165) loss: 0.7571 (0.7536) time: 0.1130 data: 0.0268 max mem: 9377 +Train: [98] [3300/6250] eta: 0:06:11 lr: 0.000000 grad: 0.2030 (0.2164) loss: 0.7616 (0.7537) time: 0.1631 data: 0.0845 max mem: 9377 +Train: [98] [3400/6250] eta: 0:05:57 lr: 0.000000 grad: 0.2042 (0.2162) loss: 0.7614 (0.7537) time: 0.1095 data: 0.0305 max mem: 9377 +Train: [98] [3500/6250] eta: 0:05:45 lr: 0.000000 grad: 0.2131 (0.2162) loss: 0.7572 (0.7537) time: 0.1222 data: 0.0375 max mem: 9377 +Train: [98] [3600/6250] eta: 0:05:32 lr: 0.000000 grad: 0.2097 (0.2162) loss: 0.7572 (0.7536) time: 0.1245 data: 0.0489 max mem: 9377 +Train: [98] [3700/6250] eta: 0:05:19 lr: 0.000000 grad: 0.2187 (0.2161) loss: 0.7437 (0.7536) time: 0.1222 data: 0.0415 max mem: 9377 +Train: [98] [3800/6250] eta: 0:05:06 lr: 0.000000 grad: 0.2123 (0.2160) loss: 0.7570 (0.7536) time: 0.1223 data: 0.0468 max mem: 9377 +Train: [98] [3900/6250] eta: 0:04:54 lr: 0.000000 grad: 0.2099 (0.2159) loss: 0.7556 (0.7538) time: 0.1235 data: 0.0480 max mem: 9377 +Train: [98] [4000/6250] eta: 0:04:41 lr: 0.000000 grad: 0.2200 (0.2158) loss: 0.7450 (0.7538) time: 0.1199 data: 0.0372 max mem: 9377 +Train: [98] [4100/6250] eta: 0:04:28 lr: 0.000000 grad: 0.2088 (0.2157) loss: 0.7619 (0.7539) time: 0.1180 data: 0.0414 max mem: 9377 +Train: [98] [4200/6250] eta: 0:04:16 lr: 0.000000 grad: 0.2091 (0.2157) loss: 0.7607 (0.7539) time: 0.1364 data: 0.0609 max mem: 9377 +Train: [98] [4300/6250] eta: 0:04:03 lr: 0.000000 grad: 0.2141 (0.2157) loss: 0.7623 (0.7539) time: 0.1229 data: 0.0466 max mem: 9377 +Train: [98] [4400/6250] eta: 0:03:51 lr: 0.000000 grad: 0.2102 (0.2157) loss: 0.7607 (0.7540) time: 0.1311 data: 0.0552 max mem: 9377 +Train: [98] [4500/6250] eta: 0:03:38 lr: 0.000000 grad: 0.2176 (0.2157) loss: 0.7496 (0.7540) time: 0.1288 data: 0.0535 max mem: 9377 +Train: [98] [4600/6250] eta: 0:03:26 lr: 0.000000 grad: 0.2055 (0.2156) loss: 0.7640 (0.7542) time: 0.1219 data: 0.0418 max mem: 9377 +Train: [98] [4700/6250] eta: 0:03:13 lr: 0.000000 grad: 0.2067 (0.2155) loss: 0.7592 (0.7543) time: 0.1382 data: 0.0619 max mem: 9377 +Train: [98] [4800/6250] eta: 0:03:01 lr: 0.000000 grad: 0.2108 (0.2154) loss: 0.7642 (0.7544) time: 0.1190 data: 0.0418 max mem: 9377 +Train: [98] [4900/6250] eta: 0:02:48 lr: 0.000000 grad: 0.2143 (0.2154) loss: 0.7508 (0.7545) time: 0.1114 data: 0.0334 max mem: 9377 +Train: [98] [5000/6250] eta: 0:02:36 lr: 0.000000 grad: 0.2090 (0.2153) loss: 0.7607 (0.7547) time: 0.1165 data: 0.0302 max mem: 9377 +Train: [98] [5100/6250] eta: 0:02:23 lr: 0.000000 grad: 0.2066 (0.2152) loss: 0.7659 (0.7549) time: 0.1372 data: 0.0590 max mem: 9377 +Train: [98] [5200/6250] eta: 0:02:11 lr: 0.000000 grad: 0.2143 (0.2151) loss: 0.7623 (0.7550) time: 0.1212 data: 0.0372 max mem: 9377 +Train: [98] [5300/6250] eta: 0:01:58 lr: 0.000000 grad: 0.2144 (0.2150) loss: 0.7467 (0.7551) time: 0.1114 data: 0.0281 max mem: 9377 +Train: [98] [5400/6250] eta: 0:01:46 lr: 0.000000 grad: 0.2106 (0.2148) loss: 0.7599 (0.7552) time: 0.1388 data: 0.0560 max mem: 9377 +Train: [98] [5500/6250] eta: 0:01:33 lr: 0.000000 grad: 0.2144 (0.2148) loss: 0.7648 (0.7552) time: 0.1415 data: 0.0678 max mem: 9377 +Train: [98] [5600/6250] eta: 0:01:20 lr: 0.000000 grad: 0.2118 (0.2147) loss: 0.7586 (0.7553) time: 0.1191 data: 0.0386 max mem: 9377 +Train: [98] [5700/6250] eta: 0:01:08 lr: 0.000000 grad: 0.2054 (0.2145) loss: 0.7671 (0.7555) time: 0.1082 data: 0.0278 max mem: 9377 +Train: [98] [5800/6250] eta: 0:00:55 lr: 0.000000 grad: 0.2140 (0.2145) loss: 0.7582 (0.7555) time: 0.1132 data: 0.0345 max mem: 9377 +Train: [98] [5900/6250] eta: 0:00:43 lr: 0.000000 grad: 0.1984 (0.2143) loss: 0.7582 (0.7557) time: 0.1107 data: 0.0324 max mem: 9377 +Train: [98] [6000/6250] eta: 0:00:31 lr: 0.000000 grad: 0.2076 (0.2142) loss: 0.7510 (0.7558) time: 0.1295 data: 0.0512 max mem: 9377 +Train: [98] [6100/6250] eta: 0:00:18 lr: 0.000000 grad: 0.2074 (0.2142) loss: 0.7552 (0.7558) time: 0.1586 data: 0.0360 max mem: 9377 +Train: [98] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.2114 (0.2141) loss: 0.7616 (0.7558) time: 0.1282 data: 0.0514 max mem: 9377 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2126 (0.2141) loss: 0.7649 (0.7559) time: 0.1299 data: 0.0467 max mem: 9377 +Train: [98] Total time: 0:13:01 (0.1251 s / it) +Averaged stats: lr: 0.000000 grad: 0.2126 (0.2141) loss: 0.7649 (0.7559) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:05:49 loss: 0.8084 (0.8084) time: 5.6411 data: 5.6112 max mem: 9377 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.8032 (0.8011) time: 0.1180 data: 0.0934 max mem: 9377 +Eval (hcp-train-subset): [98] Total time: 0:00:12 (0.2024 s / it) +Averaged stats (hcp-train-subset): loss: 0.8032 (0.8011) +Eval (hcp-val): [98] [ 0/62] eta: 0:05:38 loss: 0.8553 (0.8553) time: 5.4576 data: 5.4284 max mem: 9377 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.8526 (0.8545) time: 0.1114 data: 0.0852 max mem: 9377 +Eval (hcp-val): [98] Total time: 0:00:12 (0.2004 s / it) +Averaged stats (hcp-val): loss: 0.8526 (0.8545) +Eval (nsd-val): [98] [ 0/62] eta: 0:03:52 loss: 0.8314 (0.8314) time: 3.7580 data: 3.6913 max mem: 9377 +Eval (nsd-val): [98] [61/62] eta: 0:00:00 loss: 0.8352 (0.8360) time: 0.1192 data: 0.0946 max mem: 9377 +Eval (nsd-val): [98] Total time: 0:00:12 (0.1982 s / it) +Averaged stats (nsd-val): loss: 0.8352 (0.8360) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +Train: [99] [ 0/6250] eta: 8:56:26 lr: 0.000000 grad: 0.2660 (0.2660) loss: 0.7344 (0.7344) time: 5.1498 data: 5.0227 max mem: 9377 +Train: [99] [ 100/6250] eta: 0:18:41 lr: 0.000000 grad: 0.2409 (0.2351) loss: 0.7467 (0.7644) time: 0.1499 data: 0.0612 max mem: 9377 +Train: [99] [ 200/6250] eta: 0:15:49 lr: 0.000000 grad: 0.2219 (0.2344) loss: 0.7643 (0.7582) time: 0.1105 data: 0.0181 max mem: 9377 +Train: [99] [ 300/6250] eta: 0:14:58 lr: 0.000000 grad: 0.2286 (0.2319) loss: 0.7477 (0.7577) time: 0.1523 data: 0.0679 max mem: 9377 +Train: [99] [ 400/6250] eta: 0:14:13 lr: 0.000000 grad: 0.2167 (0.2299) loss: 0.7403 (0.7581) time: 0.1422 data: 0.0543 max mem: 9377 +Train: [99] [ 500/6250] eta: 0:13:33 lr: 0.000000 grad: 0.2137 (0.2285) loss: 0.7557 (0.7571) time: 0.1303 data: 0.0440 max mem: 9377 +Train: [99] [ 600/6250] eta: 0:13:09 lr: 0.000000 grad: 0.2121 (0.2266) loss: 0.7504 (0.7569) time: 0.1284 data: 0.0439 max mem: 9377 +Train: [99] [ 700/6250] eta: 0:12:42 lr: 0.000000 grad: 0.2158 (0.2253) loss: 0.7423 (0.7565) time: 0.1110 data: 0.0250 max mem: 9377 +Train: [99] [ 800/6250] eta: 0:12:17 lr: 0.000000 grad: 0.2159 (0.2243) loss: 0.7584 (0.7565) time: 0.1078 data: 0.0213 max mem: 9377 +Train: [99] [ 900/6250] eta: 0:11:56 lr: 0.000000 grad: 0.2119 (0.2232) loss: 0.7570 (0.7568) time: 0.1053 data: 0.0185 max mem: 9377 +Train: [99] [1000/6250] eta: 0:11:39 lr: 0.000000 grad: 0.2085 (0.2220) loss: 0.7595 (0.7573) time: 0.1419 data: 0.0589 max mem: 9377 +Train: [99] [1100/6250] eta: 0:11:20 lr: 0.000000 grad: 0.2071 (0.2212) loss: 0.7646 (0.7575) time: 0.1205 data: 0.0375 max mem: 9377 +Train: [99] [1200/6250] eta: 0:11:03 lr: 0.000000 grad: 0.2182 (0.2211) loss: 0.7502 (0.7570) time: 0.1384 data: 0.0565 max mem: 9377 +Train: [99] [1300/6250] eta: 0:10:47 lr: 0.000000 grad: 0.2178 (0.2210) loss: 0.7527 (0.7568) time: 0.1281 data: 0.0426 max mem: 9377 +Train: [99] [1400/6250] eta: 0:10:31 lr: 0.000000 grad: 0.2195 (0.2206) loss: 0.7450 (0.7565) time: 0.1287 data: 0.0471 max mem: 9377 +Train: [99] [1500/6250] eta: 0:10:16 lr: 0.000000 grad: 0.2156 (0.2203) loss: 0.7518 (0.7562) time: 0.1096 data: 0.0317 max mem: 9377 +Train: [99] [1600/6250] eta: 0:10:00 lr: 0.000000 grad: 0.2108 (0.2199) loss: 0.7537 (0.7559) time: 0.1335 data: 0.0539 max mem: 9377 +Train: [99] [1700/6250] eta: 0:09:42 lr: 0.000000 grad: 0.2149 (0.2200) loss: 0.7563 (0.7558) time: 0.0992 data: 0.0188 max mem: 9377 +Train: [99] [1800/6250] eta: 0:09:26 lr: 0.000000 grad: 0.2083 (0.2197) loss: 0.7576 (0.7558) time: 0.1163 data: 0.0361 max mem: 9377 +Train: [99] [1900/6250] eta: 0:09:15 lr: 0.000000 grad: 0.2061 (0.2193) loss: 0.7578 (0.7558) time: 0.1322 data: 0.0580 max mem: 9377 +Train: [99] [2000/6250] eta: 0:09:01 lr: 0.000000 grad: 0.2052 (0.2189) loss: 0.7496 (0.7558) time: 0.0980 data: 0.0172 max mem: 9377 +Train: [99] [2100/6250] eta: 0:08:47 lr: 0.000000 grad: 0.1998 (0.2185) loss: 0.7614 (0.7558) time: 0.1158 data: 0.0350 max mem: 9377 +Train: [99] [2200/6250] eta: 0:08:31 lr: 0.000000 grad: 0.2158 (0.2181) loss: 0.7473 (0.7558) time: 0.1166 data: 0.0378 max mem: 9377 +Train: [99] [2300/6250] eta: 0:08:17 lr: 0.000000 grad: 0.2122 (0.2178) loss: 0.7532 (0.7557) time: 0.1094 data: 0.0305 max mem: 9377 +Train: [99] [2400/6250] eta: 0:08:02 lr: 0.000000 grad: 0.2031 (0.2175) loss: 0.7609 (0.7558) time: 0.1170 data: 0.0339 max mem: 9377 +Train: [99] [2500/6250] eta: 0:07:49 lr: 0.000000 grad: 0.2064 (0.2171) loss: 0.7691 (0.7560) time: 0.1117 data: 0.0285 max mem: 9377 +Train: [99] [2600/6250] eta: 0:07:36 lr: 0.000000 grad: 0.2105 (0.2168) loss: 0.7594 (0.7562) time: 0.1275 data: 0.0475 max mem: 9377 +Train: [99] [2700/6250] eta: 0:07:24 lr: 0.000000 grad: 0.2001 (0.2164) loss: 0.7669 (0.7563) time: 0.1267 data: 0.0462 max mem: 9377 +Train: [99] [2800/6250] eta: 0:07:11 lr: 0.000000 grad: 0.2107 (0.2161) loss: 0.7590 (0.7566) time: 0.1007 data: 0.0166 max mem: 9377 +Train: [99] [2900/6250] eta: 0:06:59 lr: 0.000000 grad: 0.2036 (0.2158) loss: 0.7623 (0.7565) time: 0.1297 data: 0.0513 max mem: 9377 +Train: [99] [3000/6250] eta: 0:06:45 lr: 0.000000 grad: 0.2000 (0.2155) loss: 0.7600 (0.7565) time: 0.1198 data: 0.0409 max mem: 9377 +Train: [99] [3100/6250] eta: 0:06:32 lr: 0.000000 grad: 0.2091 (0.2153) loss: 0.7601 (0.7565) time: 0.1295 data: 0.0513 max mem: 9377 +Train: [99] [3200/6250] eta: 0:06:21 lr: 0.000000 grad: 0.2007 (0.2151) loss: 0.7557 (0.7565) time: 0.1390 data: 0.0598 max mem: 9377 +Train: [99] [3300/6250] eta: 0:06:08 lr: 0.000000 grad: 0.2174 (0.2150) loss: 0.7483 (0.7564) time: 0.1221 data: 0.0384 max mem: 9377 +Train: [99] [3400/6250] eta: 0:05:55 lr: 0.000000 grad: 0.2111 (0.2148) loss: 0.7538 (0.7564) time: 0.1160 data: 0.0331 max mem: 9377 +Train: [99] [3500/6250] eta: 0:05:42 lr: 0.000000 grad: 0.2005 (0.2147) loss: 0.7619 (0.7564) time: 0.1018 data: 0.0117 max mem: 9377 +Train: [99] [3600/6250] eta: 0:05:29 lr: 0.000000 grad: 0.2075 (0.2145) loss: 0.7597 (0.7565) time: 0.1200 data: 0.0434 max mem: 9377 +Train: [99] [3700/6250] eta: 0:05:17 lr: 0.000000 grad: 0.2084 (0.2144) loss: 0.7580 (0.7564) time: 0.1387 data: 0.0575 max mem: 9377 +Train: [99] [3800/6250] eta: 0:05:04 lr: 0.000000 grad: 0.2081 (0.2144) loss: 0.7609 (0.7565) time: 0.1136 data: 0.0270 max mem: 9377 +Train: [99] [3900/6250] eta: 0:04:52 lr: 0.000000 grad: 0.2088 (0.2144) loss: 0.7564 (0.7565) time: 0.1370 data: 0.0575 max mem: 9377 +Train: [99] [4000/6250] eta: 0:04:40 lr: 0.000000 grad: 0.2101 (0.2144) loss: 0.7484 (0.7564) time: 0.1302 data: 0.0559 max mem: 9377 +Train: [99] [4100/6250] eta: 0:04:27 lr: 0.000000 grad: 0.2141 (0.2145) loss: 0.7554 (0.7562) time: 0.1165 data: 0.0324 max mem: 9377 +Train: [99] [4200/6250] eta: 0:04:15 lr: 0.000000 grad: 0.2178 (0.2145) loss: 0.7433 (0.7561) time: 0.1238 data: 0.0501 max mem: 9377 +Train: [99] [4300/6250] eta: 0:04:02 lr: 0.000000 grad: 0.2049 (0.2145) loss: 0.7565 (0.7560) time: 0.1238 data: 0.0398 max mem: 9377 +Train: [99] [4400/6250] eta: 0:03:50 lr: 0.000000 grad: 0.2177 (0.2145) loss: 0.7429 (0.7560) time: 0.1171 data: 0.0303 max mem: 9377 +Train: [99] [4500/6250] eta: 0:03:37 lr: 0.000000 grad: 0.2088 (0.2144) loss: 0.7504 (0.7559) time: 0.1195 data: 0.0384 max mem: 9377 +Train: [99] [4600/6250] eta: 0:03:24 lr: 0.000000 grad: 0.2104 (0.2144) loss: 0.7625 (0.7559) time: 0.1113 data: 0.0333 max mem: 9377 +Train: [99] [4700/6250] eta: 0:03:11 lr: 0.000000 grad: 0.2064 (0.2143) loss: 0.7667 (0.7560) time: 0.1236 data: 0.0384 max mem: 9377 +Train: [99] [4800/6250] eta: 0:02:59 lr: 0.000000 grad: 0.1992 (0.2142) loss: 0.7657 (0.7560) time: 0.1118 data: 0.0306 max mem: 9377 +Train: [99] [4900/6250] eta: 0:02:46 lr: 0.000000 grad: 0.2079 (0.2141) loss: 0.7578 (0.7560) time: 0.1178 data: 0.0372 max mem: 9377 +Train: [99] [5000/6250] eta: 0:02:34 lr: 0.000000 grad: 0.2096 (0.2140) loss: 0.7545 (0.7560) time: 0.1199 data: 0.0428 max mem: 9377 +Train: [99] [5100/6250] eta: 0:02:22 lr: 0.000000 grad: 0.2122 (0.2139) loss: 0.7477 (0.7560) time: 0.1044 data: 0.0207 max mem: 9377 +Train: [99] [5200/6250] eta: 0:02:09 lr: 0.000000 grad: 0.2125 (0.2138) loss: 0.7493 (0.7560) time: 0.1073 data: 0.0257 max mem: 9377 +Train: [99] [5300/6250] eta: 0:01:57 lr: 0.000000 grad: 0.2139 (0.2138) loss: 0.7439 (0.7559) time: 0.1387 data: 0.0613 max mem: 9377 +Train: [99] [5400/6250] eta: 0:01:44 lr: 0.000000 grad: 0.2090 (0.2137) loss: 0.7561 (0.7560) time: 0.1204 data: 0.0445 max mem: 9377 +Train: [99] [5500/6250] eta: 0:01:32 lr: 0.000000 grad: 0.2038 (0.2137) loss: 0.7619 (0.7560) time: 0.1170 data: 0.0359 max mem: 9377 +Train: [99] [5600/6250] eta: 0:01:20 lr: 0.000000 grad: 0.2113 (0.2137) loss: 0.7525 (0.7560) time: 0.1131 data: 0.0310 max mem: 9377 +Train: [99] [5700/6250] eta: 0:01:07 lr: 0.000000 grad: 0.2078 (0.2137) loss: 0.7592 (0.7560) time: 0.1270 data: 0.0473 max mem: 9377 +Train: [99] [5800/6250] eta: 0:00:55 lr: 0.000000 grad: 0.2119 (0.2137) loss: 0.7646 (0.7561) time: 0.1215 data: 0.0422 max mem: 9377 +Train: [99] [5900/6250] eta: 0:00:43 lr: 0.000000 grad: 0.2108 (0.2137) loss: 0.7534 (0.7561) time: 0.1145 data: 0.0339 max mem: 9377 +Train: [99] [6000/6250] eta: 0:00:30 lr: 0.000000 grad: 0.2064 (0.2137) loss: 0.7670 (0.7562) time: 0.1341 data: 0.0507 max mem: 9377 +Train: [99] [6100/6250] eta: 0:00:18 lr: 0.000000 grad: 0.2125 (0.2137) loss: 0.7546 (0.7562) time: 0.1257 data: 0.0461 max mem: 9377 +Train: [99] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.2084 (0.2136) loss: 0.7624 (0.7563) time: 0.1302 data: 0.0457 max mem: 9377 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2050 (0.2136) loss: 0.7600 (0.7563) time: 0.1234 data: 0.0431 max mem: 9377 +Train: [99] Total time: 0:12:54 (0.1239 s / it) +Averaged stats: lr: 0.000000 grad: 0.2050 (0.2136) loss: 0.7600 (0.7563) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:05:40 loss: 0.8110 (0.8110) time: 5.4964 data: 5.4668 max mem: 9377 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.8027 (0.8009) time: 0.1149 data: 0.0903 max mem: 9377 +Eval (hcp-train-subset): [99] Total time: 0:00:12 (0.2048 s / it) +Averaged stats (hcp-train-subset): loss: 0.8027 (0.8009) +Making plots (hcp-train-subset): example=30 +Eval (hcp-val): [99] [ 0/62] eta: 0:04:48 loss: 0.8571 (0.8571) time: 4.6480 data: 4.6180 max mem: 9377 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.8540 (0.8545) time: 0.1026 data: 0.0781 max mem: 9377 +Eval (hcp-val): [99] Total time: 0:00:11 (0.1919 s / it) +Averaged stats (hcp-val): loss: 0.8540 (0.8545) +Making plots (hcp-val): example=3 +Eval (nsd-val): [99] [ 0/62] eta: 0:03:31 loss: 0.8276 (0.8276) time: 3.4088 data: 3.3220 max mem: 9377 +Eval (nsd-val): [99] [61/62] eta: 0:00:00 loss: 0.8356 (0.8362) time: 0.0962 data: 0.0716 max mem: 9377 +Eval (nsd-val): [99] Total time: 0:00:12 (0.2024 s / it) +Averaged stats (nsd-val): loss: 0.8356 (0.8362) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_1/pretrain/checkpoint-00099.pth +done! training time: 1 day, 5:03:07 diff --git a/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic/config.yaml b/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..473d5c8272a179f8af5dd2f10584effddab0d685 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic/eval_table.csv b/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..ece0085bc06a83fb81e3289f667b836a30286aee --- /dev/null +++ b/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,0.3593813663804626,train,0.9566929133858267,0.00949342816112054,0.9571943398020756,0.009427788930897692,0.9567581978139164,0.009494037053785235 +flat_mae,patch,logistic,aabc_age,,0.3593813663804626,test,0.38461538461538464,0.06130272154349311,0.3714093701996928,0.059836414270216756,0.3740842490842491,0.060597956035797935 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,train,0.7933070866141733,0.016928119361655343,0.791457724900084,0.017329911624968466,0.7944069830778938,0.0169371506879299 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,test,0.4807692307692308,0.059953186865959136,0.47651515151515156,0.05705472433065422,0.47435897435897434,0.059393927208925425 +flat_mae,patch,logistic,aabc_age,2,9.999999999999999e-05,train,0.468503937007874,0.02002362359541858,0.4466473618419726,0.020123419191410828,0.4669817065569085,0.019896462336377985 +flat_mae,patch,logistic,aabc_age,2,9.999999999999999e-05,test,0.4807692307692308,0.06023694584696683,0.44235039385901453,0.058418970139196995,0.47275641025641024,0.059084301067142034 +flat_mae,patch,logistic,aabc_age,3,0.3593813663804626,train,0.952755905511811,0.009033224988801465,0.9530635424294199,0.009010049880146342,0.9540913789895836,0.00881374408732175 +flat_mae,patch,logistic,aabc_age,3,0.3593813663804626,test,0.5192307692307693,0.06824662681045794,0.5180732369547236,0.06906053508281755,0.5176282051282052,0.06843792032442782 +flat_mae,patch,logistic,aabc_age,4,0.046415888336127774,train,0.7755905511811023,0.018302073227691153,0.7741861817603495,0.018544961402014358,0.7759442805659498,0.018329646810808346 +flat_mae,patch,logistic,aabc_age,4,0.046415888336127774,test,0.5384615384615384,0.06548782578039819,0.5331868131868132,0.06571607061088218,0.5441849816849816,0.06596188505989026 +flat_mae,patch,logistic,aabc_age,5,9.999999999999999e-05,train,0.48031496062992124,0.01983904286187323,0.4531300382633243,0.019516011988787545,0.4767528419280038,0.01964083450750862 +flat_mae,patch,logistic,aabc_age,5,9.999999999999999e-05,test,0.3269230769230769,0.05513644989033004,0.29749589490968803,0.04542095741117823,0.3241758241758242,0.054346275432297295 +flat_mae,patch,logistic,aabc_age,6,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,6,166.81005372000556,test,0.5384615384615384,0.06959077338013259,0.5390322580645162,0.0707704746832954,0.543040293040293,0.06970496674576424 +flat_mae,patch,logistic,aabc_age,7,0.005994842503189409,train,0.6417322834645669,0.020891128999614685,0.64019128277383,0.021007631178693444,0.643219362126934,0.02086570943065405 +flat_mae,patch,logistic,aabc_age,7,0.005994842503189409,test,0.46153846153846156,0.060376959338268366,0.4212257143618787,0.0600992923599756,0.4562728937728938,0.059599187048762374 +flat_mae,patch,logistic,aabc_age,8,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,8,166.81005372000556,test,0.40384615384615385,0.06508586727042358,0.4090961308703245,0.0644777273244411,0.40934065934065933,0.06550161457266795 +flat_mae,patch,logistic,aabc_age,9,0.3593813663804626,train,0.9566929133858267,0.008983257310008643,0.9570054460778838,0.00894126762663542,0.9575209200807843,0.008857206538775534 +flat_mae,patch,logistic,aabc_age,9,0.3593813663804626,test,0.5384615384615384,0.0648116410575141,0.5345792365529207,0.06409863724271497,0.5322802197802198,0.06462549365761275 +flat_mae,patch,logistic,aabc_age,10,0.046415888336127774,train,0.7874015748031497,0.017871314704514016,0.7862782453622149,0.01814427409678465,0.7880234335592043,0.017880673664263266 +flat_mae,patch,logistic,aabc_age,10,0.046415888336127774,test,0.5,0.06208234415372297,0.4993131868131868,0.06025942984972219,0.4993131868131868,0.062008670709441875 +flat_mae,patch,logistic,aabc_age,11,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,11,1291.5496650148827,test,0.4807692307692308,0.07088201714842479,0.48152519893899204,0.07051031135897697,0.4848901098901099,0.07128343076998815 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,train,0.6535433070866141,0.020289260356360257,0.6502226810679836,0.020594279101867348,0.6547310405474608,0.02025836467721277 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,test,0.36538461538461536,0.06222305505101511,0.36606696651674164,0.06020957318693568,0.3630952380952381,0.06183373194043046 +flat_mae,patch,logistic,aabc_age,13,0.046415888336127774,train,0.8070866141732284,0.016195288051612716,0.8057822412262227,0.016497776636683528,0.8077495881194271,0.016102394505500668 +flat_mae,patch,logistic,aabc_age,13,0.046415888336127774,test,0.5192307692307693,0.06599375754950583,0.5196438299886575,0.06644231103467628,0.5203754578754579,0.06628792505372394 +flat_mae,patch,logistic,aabc_age,14,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,14,166.81005372000556,test,0.4807692307692308,0.051589799344925774,0.4533383283383283,0.05517701821392876,0.48443223443223443,0.05214847699990942 +flat_mae,patch,logistic,aabc_age,15,0.3593813663804626,train,0.952755905511811,0.009283244172014682,0.9529371201496026,0.009252025508234675,0.9533387020056042,0.009168484337778816 +flat_mae,patch,logistic,aabc_age,15,0.3593813663804626,test,0.4807692307692308,0.064590379777203,0.4774862637362637,0.0646464617068074,0.4773351648351648,0.06444665111970746 +flat_mae,patch,logistic,aabc_age,16,0.046415888336127774,train,0.7854330708661418,0.018107556717216734,0.7846937906575141,0.018307593418502632,0.7876302530356413,0.01799838546540911 +flat_mae,patch,logistic,aabc_age,16,0.046415888336127774,test,0.40384615384615385,0.063464323948193,0.4019318181818182,0.0632592093261322,0.4004120879120879,0.06322606721683001 +flat_mae,patch,logistic,aabc_age,17,0.005994842503189409,train,0.639763779527559,0.02063931797475132,0.6378439132677086,0.021122936520039885,0.6413208276354123,0.02064597750190951 +flat_mae,patch,logistic,aabc_age,17,0.005994842503189409,test,0.4230769230769231,0.05671452179335131,0.3924366426412462,0.05992122294343163,0.42216117216117216,0.056490478508073726 +flat_mae,patch,logistic,aabc_age,18,0.005994842503189409,train,0.65748031496063,0.020376408623603355,0.6554352368722258,0.02071452758615753,0.6589132586226407,0.0203831672388253 +flat_mae,patch,logistic,aabc_age,18,0.005994842503189409,test,0.5,0.06885808878071294,0.4933816183816184,0.06965441649591016,0.49954212454212454,0.06888471520354766 +flat_mae,patch,logistic,aabc_age,19,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,19,166.81005372000556,test,0.4807692307692308,0.06902931158564575,0.4853376882987078,0.068508019299768,0.4791666666666667,0.0692722429833365 +flat_mae,patch,logistic,aabc_age,20,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,20,2.782559402207126,test,0.5384615384615384,0.05389737947983091,0.4963738368910783,0.05315218612761995,0.5331959706959707,0.05286737015455687 +flat_mae,patch,logistic,aabc_age,21,0.005994842503189409,train,0.6377952755905512,0.01990270836433073,0.6349401195041926,0.020084689370904813,0.6381168858568171,0.019900520072528804 +flat_mae,patch,logistic,aabc_age,21,0.005994842503189409,test,0.5,0.058922775656992436,0.4679742835349242,0.0639864427792282,0.5006868131868132,0.059432204793786386 +flat_mae,patch,logistic,aabc_age,22,0.046415888336127774,train,0.7795275590551181,0.018411258524307385,0.7784402354642159,0.01860178607815896,0.7802440931818662,0.018414602065884456 +flat_mae,patch,logistic,aabc_age,22,0.046415888336127774,test,0.4423076923076923,0.06303810954048249,0.44434573578595316,0.06365347285172979,0.44184981684981683,0.0632239788820719 +flat_mae,patch,logistic,aabc_age,23,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,23,166.81005372000556,test,0.4807692307692308,0.060298505481592234,0.4755244755244755,0.06144817101431734,0.48168498168498164,0.060620795787737505 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,train,0.6476377952755905,0.022107640489105483,0.6439250064886638,0.022466440332220818,0.6486326667804653,0.022128482280987728 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,test,0.4230769230769231,0.06491068055184863,0.4016239316239316,0.06121653819348925,0.4150641025641026,0.06383515965263555 +flat_mae,patch,logistic,aabc_age,25,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,25,166.81005372000556,test,0.36538461538461536,0.06338894449368651,0.372439381270903,0.0632883890767822,0.3649267399267399,0.06354587649201841 +flat_mae,patch,logistic,aabc_age,26,0.005994842503189409,train,0.6515748031496063,0.02076528739012107,0.6473007301387176,0.021201044832156315,0.6526149381747601,0.020802404267668488 +flat_mae,patch,logistic,aabc_age,26,0.005994842503189409,test,0.4807692307692308,0.06494781701420951,0.4751719576719577,0.06519341957562673,0.4832875457875458,0.06511619510869207 +flat_mae,patch,logistic,aabc_age,27,0.005994842503189409,train,0.639763779527559,0.021851045957491804,0.6382984788292299,0.022087758024809834,0.6407181106620968,0.021876697054417398 +flat_mae,patch,logistic,aabc_age,27,0.005994842503189409,test,0.40384615384615385,0.061063297743906265,0.37646889400921657,0.06289571927703985,0.39880952380952384,0.06058638340010907 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,train,0.6377952755905512,0.020224752410875638,0.6339285471016961,0.0207961634892802,0.6391047519222691,0.020141806420992326 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,test,0.4807692307692308,0.06814972204921026,0.4796165125197383,0.06954384342878313,0.4832875457875458,0.06828706927335301 +flat_mae,patch,logistic,aabc_age,29,0.3593813663804626,train,0.9645669291338582,0.008425237324209231,0.9648454945931719,0.008381235498651841,0.9652002871176799,0.008298662216215339 +flat_mae,patch,logistic,aabc_age,29,0.3593813663804626,test,0.46153846153846156,0.06322889759535794,0.467111013986014,0.06348048200687322,0.46108058608058605,0.06325583153109411 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,train,0.7775590551181102,0.01724205215676851,0.7765697241935561,0.01744700407133098,0.7789306544633661,0.017148023333131243 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,test,0.4807692307692308,0.06685950782784934,0.474341290893015,0.06639947954925271,0.4816849816849817,0.06691408209204057 +flat_mae,patch,logistic,aabc_age,31,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,31,21.54434690031882,test,0.5192307692307693,0.06538772617034339,0.5039819376026272,0.0658285531765651,0.5157967032967032,0.06519806347588643 +flat_mae,patch,logistic,aabc_age,32,0.046415888336127774,train,0.7775590551181102,0.019295717002728602,0.7771831429426328,0.01942359209875354,0.7782779508198293,0.019233230809517447 +flat_mae,patch,logistic,aabc_age,32,0.046415888336127774,test,0.4423076923076923,0.0636477338455081,0.43320105820105814,0.06387300782824339,0.4416208791208791,0.0637619087206716 +flat_mae,patch,logistic,aabc_age,33,0.3593813663804626,train,0.9704724409448819,0.007476139530889258,0.97083715214484,0.007388764085668309,0.9711310796737176,0.007328257815518675 +flat_mae,patch,logistic,aabc_age,33,0.3593813663804626,test,0.4230769230769231,0.06606455288525757,0.428030303030303,0.06499617832829038,0.4285714285714286,0.06654423400363303 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,train,0.6673228346456693,0.02087070370573418,0.6646301782486302,0.02118185643133726,0.6674709286156786,0.020807346112851705 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,test,0.36538461538461536,0.06538202483555884,0.37139502441292727,0.06666951177385846,0.3649267399267399,0.06563182174377166 +flat_mae,patch,logistic,aabc_age,35,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,35,2.782559402207126,test,0.3269230769230769,0.058278610567721524,0.2994891443167305,0.04928389132930685,0.32142857142857145,0.057070688524925764 +flat_mae,patch,logistic,aabc_age,36,0.000774263682681127,train,0.5433070866141733,0.021172273721316498,0.5374875378674087,0.021358159086926863,0.5434388959107308,0.02109922777263989 +flat_mae,patch,logistic,aabc_age,36,0.000774263682681127,test,0.5,0.05329957480652367,0.45081487140310667,0.058868764340723634,0.4990842490842491,0.05305184275053426 +flat_mae,patch,logistic,aabc_age,37,0.005994842503189409,train,0.6338582677165354,0.021646707582445374,0.6328069006591691,0.021897261138813916,0.6367777945065969,0.021612496625028982 +flat_mae,patch,logistic,aabc_age,37,0.005994842503189409,test,0.4807692307692308,0.06519288631465478,0.46904761904761905,0.06746946057903312,0.4789377289377289,0.06532715544973278 +flat_mae,patch,logistic,aabc_age,38,0.005994842503189409,train,0.6259842519685039,0.020689991883795607,0.6214776859965985,0.021043727755412224,0.6268580177180569,0.020736399588374914 +flat_mae,patch,logistic,aabc_age,38,0.005994842503189409,test,0.5192307692307693,0.06373589877690129,0.5173001949317738,0.06652132473196247,0.5187728937728937,0.06388722641163906 +flat_mae,patch,logistic,aabc_age,39,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,39,21.54434690031882,test,0.46153846153846156,0.06363245050598358,0.46,0.06372957106782623,0.45833333333333337,0.06388609327801296 +flat_mae,patch,logistic,aabc_age,40,0.046415888336127774,train,0.7854330708661418,0.017234610702409628,0.7846415593101077,0.01731187317001615,0.786174885737904,0.017124551150732054 +flat_mae,patch,logistic,aabc_age,40,0.046415888336127774,test,0.46153846153846156,0.06359189409101478,0.45067432567432564,0.060512598321717015,0.4592490842490842,0.06327538328504151 +flat_mae,patch,logistic,aabc_age,41,0.046415888336127774,train,0.7913385826771654,0.017288351505397957,0.7910231955317072,0.01735719324838374,0.7918881104127629,0.017243962821152932 +flat_mae,patch,logistic,aabc_age,41,0.046415888336127774,test,0.5,0.061623738989942764,0.4853982099859161,0.06330795151974199,0.4965659340659341,0.06148105045862764 +flat_mae,patch,logistic,aabc_age,42,0.046415888336127774,train,0.7854330708661418,0.01808045994453858,0.7851709070438998,0.01823138252561433,0.7869775493921045,0.018007593863853033 +flat_mae,patch,logistic,aabc_age,42,0.046415888336127774,test,0.5576923076923077,0.061653006376544785,0.5504329004329004,0.0636322667466866,0.565018315018315,0.06189374146004702 +flat_mae,patch,logistic,aabc_age,43,0.005994842503189409,train,0.6456692913385826,0.020052977141656166,0.6441292227195947,0.020346987514453012,0.6480042971754295,0.020000763869708112 +flat_mae,patch,logistic,aabc_age,43,0.005994842503189409,test,0.5576923076923077,0.06919995040529056,0.5596273291925467,0.06827783428714097,0.5547161172161172,0.06935822412591867 +flat_mae,patch,logistic,aabc_age,44,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,44,2.782559402207126,test,0.34615384615384615,0.06485190229106029,0.35014492753623194,0.06307519299302408,0.34386446886446886,0.06454253395866412 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,train,0.6240157480314961,0.020976864161682023,0.6205957481287383,0.021519810462877053,0.6256945390102208,0.02102038260613567 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,test,0.4230769230769231,0.062612029772156,0.42401709401709403,0.06115426286822241,0.42536630036630035,0.06294066131629113 +flat_mae,patch,logistic,aabc_age,46,0.005994842503189409,train,0.6259842519685039,0.021752321584763964,0.6233720773354919,0.021982080095593496,0.6267904098475419,0.021701101363107198 +flat_mae,patch,logistic,aabc_age,46,0.005994842503189409,test,0.5576923076923077,0.06310759104368727,0.5430409356725145,0.0676097961279081,0.5558608058608059,0.06323323916227001 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,train,0.6417322834645669,0.020395220811015115,0.6387410826432943,0.02059898981804601,0.6426342663539124,0.02029803804975376 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,test,0.5192307692307693,0.06656912365993765,0.5212215320910972,0.06842678957223709,0.5203754578754579,0.06651234334999881 +flat_mae,patch,logistic,aabc_age,48,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,48,2.782559402207126,test,0.38461538461538464,0.06559080610995605,0.3759259259259259,0.06413656454709912,0.3823260073260073,0.06507744112926717 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,train,0.7775590551181102,0.017463015487418555,0.7769098307093875,0.01766047042063073,0.7784955187010083,0.017393420180089852 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,test,0.4230769230769231,0.06929552527028861,0.4331723027375201,0.06743483240245234,0.42422161172161177,0.06966703059443007 +flat_mae,patch,logistic,aabc_age,50,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,50,166.81005372000556,test,0.38461538461538464,0.06675109422235456,0.38183912861332214,0.06792145493953006,0.385989010989011,0.06678609156416902 +flat_mae,patch,logistic,aabc_age,51,0.005994842503189409,train,0.6377952755905512,0.020518954957673283,0.6350174904514004,0.020866866943484774,0.6389371707113115,0.020469118392530544 +flat_mae,patch,logistic,aabc_age,51,0.005994842503189409,test,0.4807692307692308,0.06762386181216311,0.4768629116455203,0.06767176713771243,0.4789377289377289,0.06749653454359575 +flat_mae,patch,logistic,aabc_age,52,0.046415888336127774,train,0.7913385826771654,0.01831132115957135,0.790422311398083,0.01859721540417474,0.7931435310296151,0.018180166714709442 +flat_mae,patch,logistic,aabc_age,52,0.046415888336127774,test,0.46153846153846156,0.06711951717495306,0.46011904761904765,0.0673836031462255,0.4626831501831502,0.06729813389775156 +flat_mae,patch,logistic,aabc_age,53,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,53,166.81005372000556,test,0.6538461538461539,0.06233835902489863,0.6524175824175824,0.06352777980441303,0.6540750915750917,0.06241770228732751 +flat_mae,patch,logistic,aabc_age,54,0.3593813663804626,train,0.9586614173228346,0.008765158963534171,0.9589359029943165,0.008752024773277706,0.9594870624428209,0.008628177393706261 +flat_mae,patch,logistic,aabc_age,54,0.3593813663804626,test,0.38461538461538464,0.06281619425239823,0.373762157382847,0.06287485328670042,0.3898809523809524,0.06377799387024706 +flat_mae,patch,logistic,aabc_age,55,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,55,2.782559402207126,test,0.5961538461538461,0.06760707046109424,0.5949872286079183,0.0678786464589304,0.6021062271062272,0.06778604110310682 +flat_mae,patch,logistic,aabc_age,56,0.005994842503189409,train,0.6594488188976378,0.020538308259060983,0.6565046769123378,0.020861035885459765,0.6601767106709193,0.02053853064867584 +flat_mae,patch,logistic,aabc_age,56,0.005994842503189409,test,0.4230769230769231,0.060714434886197335,0.40325224292615597,0.06324301335261548,0.4237637362637363,0.061153627807268504 +flat_mae,patch,logistic,aabc_age,57,0.000774263682681127,train,0.5354330708661418,0.02177557026465486,0.5300895898561369,0.022005880185866602,0.536394611317078,0.021790695337606098 +flat_mae,patch,logistic,aabc_age,57,0.000774263682681127,test,0.5,0.06271924857658799,0.4862237998647735,0.06409716248452398,0.49793956043956045,0.06232939298793361 +flat_mae,patch,logistic,aabc_age,58,0.3593813663804626,train,0.9704724409448819,0.007400606193950907,0.9706026272414827,0.00739000578056902,0.9705959705709173,0.007406816174869764 +flat_mae,patch,logistic,aabc_age,58,0.3593813663804626,test,0.40384615384615385,0.05233393007109624,0.3508522727272727,0.05044832150274848,0.396978021978022,0.05060360542748507 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,train,0.639763779527559,0.020962644176365293,0.636620025732929,0.02136226785311342,0.6415383955165912,0.02085201923680481 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,test,0.4807692307692308,0.06963242460386554,0.47916666666666674,0.07076922403253483,0.47779304029304026,0.06989423703525144 +flat_mae,patch,logistic,aabc_age,60,0.046415888336127774,train,0.7893700787401575,0.017235866156075957,0.7881478020531718,0.01746658040126002,0.7903747250133778,0.017186996095689962 +flat_mae,patch,logistic,aabc_age,60,0.046415888336127774,test,0.34615384615384615,0.0623752719930985,0.34745670995670996,0.06388778300995847,0.3498168498168498,0.06336344121378248 +flat_mae,patch,logistic,aabc_age,61,0.3593813663804626,train,0.9665354330708661,0.007968916300680693,0.9667548102686494,0.007939561290232816,0.9668988749283165,0.007903458334474735 +flat_mae,patch,logistic,aabc_age,61,0.3593813663804626,test,0.38461538461538464,0.06400437993518547,0.37841750841750843,0.06280615698630694,0.38095238095238093,0.06365734529559795 +flat_mae,patch,logistic,aabc_age,62,0.005994842503189409,train,0.6614173228346457,0.021429100224056846,0.6599190195812161,0.0216993270748828,0.6627779354761991,0.021393527601753646 +flat_mae,patch,logistic,aabc_age,62,0.005994842503189409,test,0.5,0.0634409173956367,0.4766417960610338,0.06303804356550019,0.4935897435897436,0.0629509321845651 +flat_mae,patch,logistic,aabc_age,63,0.046415888336127774,train,0.8031496062992126,0.017210387993735338,0.8019624318144698,0.017424583254978827,0.8039848846063112,0.017153081543745757 +flat_mae,patch,logistic,aabc_age,63,0.046415888336127774,test,0.4230769230769231,0.06370488309523466,0.416460658559609,0.06361956334634662,0.42696886446886445,0.06447759709733057 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,train,0.7893700787401575,0.017302552102410176,0.7881046295419418,0.01748634532040727,0.7909098341161781,0.01723749171505823 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,test,0.46153846153846156,0.06272207882076003,0.4639010989010989,0.062125672713552245,0.46108058608058605,0.06267540483825287 +flat_mae,patch,logistic,aabc_age,65,0.3593813663804626,train,0.9645669291338582,0.008068752363874464,0.9649475354341944,0.00800663139379605,0.9654678416690801,0.00790499308186942 +flat_mae,patch,logistic,aabc_age,65,0.3593813663804626,test,0.36538461538461536,0.06507819602319302,0.356268115942029,0.060105404117091706,0.36149267399267404,0.06434027049562704 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,train,0.6417322834645669,0.02086414304794065,0.6388354199038543,0.021190994749599477,0.6431193887864914,0.020706712734574407 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,test,0.46153846153846156,0.05671493912124589,0.45261904761904764,0.060672794642736186,0.46543040293040294,0.057443672198179 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,train,0.7933070866141733,0.018321794109100748,0.7923648377937604,0.01859413734731849,0.7945569430885576,0.018309539187450426 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,test,0.46153846153846156,0.06487452614094252,0.4342563566701498,0.06164842380879978,0.4548992673992674,0.0638822208218073 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,train,0.781496062992126,0.016702000850606653,0.7810905853215032,0.016845894898930864,0.7828953046573672,0.016671601068880118 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,test,0.4230769230769231,0.06670755526170992,0.4226731333182946,0.0668511895041032,0.4210164835164835,0.06673698465406144 +flat_mae,patch,logistic,aabc_age,69,0.046415888336127774,train,0.7933070866141733,0.01746491261204433,0.792252575862323,0.017606574875328684,0.7938542527747996,0.01749097607368894 +flat_mae,patch,logistic,aabc_age,69,0.046415888336127774,test,0.4807692307692308,0.06765264353678127,0.4928571428571429,0.0663028720776737,0.48672161172161166,0.06795574119781014 +flat_mae,patch,logistic,aabc_age,70,0.046415888336127774,train,0.7854330708661418,0.017845905173250902,0.7855147169224561,0.017876119713046545,0.7877302263760839,0.01773501726188072 +flat_mae,patch,logistic,aabc_age,70,0.046415888336127774,test,0.4230769230769231,0.06726812023536856,0.42028985507246375,0.06745943015938695,0.4212454212454212,0.06731431388898863 +flat_mae,patch,logistic,aabc_age,71,0.046415888336127774,train,0.7933070866141733,0.01843733028044722,0.7924865989337324,0.018605416898368143,0.7942893885371575,0.01841066083421764 +flat_mae,patch,logistic,aabc_age,71,0.046415888336127774,test,0.46153846153846156,0.06608085194731407,0.4627239951452845,0.06633204940658143,0.46108058608058605,0.06629067680060055 +flat_mae,patch,logistic,aabc_age,72,0.046415888336127774,train,0.7854330708661418,0.017637079481397866,0.7839343902904679,0.018029306708069303,0.7865600348300406,0.01760690182071634 +flat_mae,patch,logistic,aabc_age,72,0.046415888336127774,test,0.5384615384615384,0.06461703294600861,0.5359946236559139,0.06380365890896725,0.5366300366300366,0.06456329793083959 +flat_mae,patch,logistic,aabc_age,73,0.005994842503189409,train,0.6515748031496063,0.021858066326876386,0.6493327636794616,0.022036662993836612,0.6528648715258665,0.021846289498912694 +flat_mae,patch,logistic,aabc_age,73,0.005994842503189409,test,0.5384615384615384,0.06231984686766702,0.5195751314626088,0.06543890394472127,0.5350274725274725,0.06205798517750057 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,train,0.6476377952755905,0.020718314566998863,0.645400281117956,0.020935749147605914,0.6489502080020868,0.020623202137094634 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,test,0.4423076923076923,0.06642720801371259,0.4372294372294372,0.06611782635680302,0.44024725274725274,0.06637966638293848 +flat_mae,patch,logistic,aabc_age,75,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,75,166.81005372000556,test,0.5192307692307693,0.06710690931719705,0.5206043956043955,0.067203573030792,0.5206043956043955,0.06737768715292997 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,train,0.797244094488189,0.01784819669087749,0.796307215282968,0.01796585166860095,0.7976689429581368,0.017794941532691302 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,test,0.4230769230769231,0.06748368200741647,0.43239144316730527,0.06620717018107664,0.42559523809523814,0.06778594030961714 +flat_mae,patch,logistic,aabc_age,77,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,77,21.54434690031882,test,0.4423076923076923,0.06794641989340838,0.4453324808184143,0.06707545356456698,0.4375,0.06761133499602652 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,train,0.7775590551181102,0.017287282773770195,0.7754322064880131,0.017670114656781317,0.7783455586903445,0.017169258960307493 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,test,0.5,0.06588418878123146,0.5074136008918617,0.06463569699931841,0.5011446886446886,0.0661699973409937 +flat_mae,patch,logistic,aabc_age,79,0.005994842503189409,train,0.6437007874015748,0.020282789028707605,0.6415291106526166,0.02051172470552574,0.6451855044889708,0.020239614763720947 +flat_mae,patch,logistic,aabc_age,79,0.005994842503189409,test,0.4807692307692308,0.06256701732386429,0.4584331797235023,0.06287549344822241,0.4757326007326007,0.06211051232700818 +flat_mae,patch,logistic,aabc_age,80,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,80,2.782559402207126,test,0.4807692307692308,0.05858140545504825,0.4587848932676518,0.061228115829730034,0.483058608058608,0.05889303066231339 +flat_mae,patch,logistic,aabc_age,81,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,81,2.782559402207126,test,0.5192307692307693,0.06536736197701662,0.5164502164502165,0.06653288168605515,0.5146520146520146,0.06535743546180883 +flat_mae,patch,logistic,aabc_age,82,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,82,21.54434690031882,test,0.4807692307692308,0.06692975209857221,0.4859523809523809,0.06640801865095561,0.48031135531135527,0.06693388620899995 +flat_mae,patch,logistic,aabc_age,83,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,83,21.54434690031882,test,0.4807692307692308,0.06754874417956774,0.4781061850027367,0.06867635193604536,0.4816849816849817,0.06766998240260366 +flat_mae,patch,logistic,aabc_age,84,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,84,21.54434690031882,test,0.4423076923076923,0.06697637135234058,0.44018874643874645,0.06965847916230253,0.44070512820512825,0.06715727506689977 +flat_mae,patch,logistic,aabc_age,85,9.999999999999999e-05,train,0.4763779527559055,0.01997684931737278,0.4405186446344983,0.020474204032677325,0.4732233274963607,0.01978524850238352 +flat_mae,patch,logistic,aabc_age,85,9.999999999999999e-05,test,0.4230769230769231,0.05256160094109696,0.3635976129582268,0.04747046146959992,0.4148351648351648,0.05106287871259887 +flat_mae,patch,logistic,aabc_age,86,0.3593813663804626,train,0.9625984251968503,0.008298652284272411,0.9626339639028066,0.008285886726382355,0.9629665902042428,0.00819280606269936 +flat_mae,patch,logistic,aabc_age,86,0.3593813663804626,test,0.5384615384615384,0.06629430527008034,0.5269111476008028,0.06916946797233815,0.5352564102564102,0.06622192372981645 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,train,0.6299212598425197,0.02047762313272949,0.6256702951364399,0.02090781698266457,0.6305051266904365,0.02047868951638332 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,test,0.5384615384615384,0.06608649299375037,0.5269978106185003,0.06858372583384197,0.5366300366300366,0.06602793694219368 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,train,0.4822834645669291,0.018910826680468822,0.4551558498842771,0.01961135594765062,0.48104462311249374,0.018715197061921 +flat_mae,patch,logistic,aabc_age,88,9.999999999999999e-05,test,0.38461538461538464,0.05916423585861418,0.34075757575757576,0.0484923396268132,0.37774725274725274,0.057883683110513555 +flat_mae,patch,logistic,aabc_age,89,0.005994842503189409,train,0.639763779527559,0.02002920243888659,0.6354198942941591,0.02057825232551214,0.6416060033871063,0.020023100492375902 +flat_mae,patch,logistic,aabc_age,89,0.005994842503189409,test,0.38461538461538464,0.07126336685577873,0.3864734299516908,0.07173352372779535,0.3871336996336997,0.07140467737503255 +flat_mae,patch,logistic,aabc_age,90,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,90,166.81005372000556,test,0.5576923076923077,0.06092968107173185,0.5392672770314595,0.06498408528482508,0.5556318681318682,0.06074034738496979 +flat_mae,patch,logistic,aabc_age,91,9.999999999999999e-05,train,0.4625984251968504,0.02118293869919336,0.4280399906420562,0.02064487068730872,0.45993070912504863,0.020910437944822217 +flat_mae,patch,logistic,aabc_age,91,9.999999999999999e-05,test,0.5192307692307693,0.059279650159872924,0.48358721624850654,0.06363198275738172,0.5155677655677656,0.05873187329859456 +flat_mae,patch,logistic,aabc_age,92,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,92,2.782559402207126,test,0.46153846153846156,0.060693916328491616,0.44855769230769227,0.06264599288116883,0.46382783882783885,0.061334622037944514 +flat_mae,patch,logistic,aabc_age,93,0.3593813663804626,train,0.9586614173228346,0.009096621762888411,0.9591068211477803,0.009038373453292062,0.9600721582158427,0.008853056735440647 +flat_mae,patch,logistic,aabc_age,93,0.3593813663804626,test,0.4230769230769231,0.06519178579323658,0.43123543123543123,0.0648696235775868,0.42261904761904756,0.06543127105362823 +flat_mae,patch,logistic,aabc_age,94,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,94,1291.5496650148827,test,0.4807692307692308,0.06269872548556943,0.4653535353535353,0.0661757877123964,0.48489010989010994,0.0631974211562563 +flat_mae,patch,logistic,aabc_age,95,0.3593813663804626,train,0.9566929133858267,0.008928310138511307,0.9570580489522312,0.008889102453666078,0.9579560558431421,0.008666659911796529 +flat_mae,patch,logistic,aabc_age,95,0.3593813663804626,test,0.5,0.06844208885347684,0.49702380952380953,0.06874557204060246,0.5011446886446886,0.06873330007865668 +flat_mae,patch,logistic,aabc_age,96,0.3593813663804626,train,0.9606299212598425,0.008785379039390955,0.9609700661710688,0.008709782387475133,0.9611680290531637,0.008679467291439253 +flat_mae,patch,logistic,aabc_age,96,0.3593813663804626,test,0.4423076923076923,0.06487925745056951,0.4584684249831515,0.06392842178591274,0.44345238095238093,0.06504724315831957 +flat_mae,patch,logistic,aabc_age,97,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,97,21.54434690031882,test,0.40384615384615385,0.055234518835651294,0.3708717357910907,0.05310377723455509,0.4027014652014652,0.05498555868208142 +flat_mae,patch,logistic,aabc_age,98,0.3593813663804626,train,0.9625984251968503,0.008325497380205793,0.9628276708646307,0.008265077460113577,0.9631841580854219,0.008193190193668532 +flat_mae,patch,logistic,aabc_age,98,0.3593813663804626,test,0.4230769230769231,0.06614010367099123,0.4205794205794206,0.06708998237030392,0.423992673992674,0.06657112351410312 +flat_mae,patch,logistic,aabc_age,99,0.3593813663804626,train,0.9547244094488189,0.00918572701288044,0.9553289770046136,0.009088830922025741,0.9557723455999264,0.009010172284697885 +flat_mae,patch,logistic,aabc_age,99,0.3593813663804626,test,0.5769230769230769,0.06868945424343065,0.5712962962962963,0.07168501722106982,0.5753205128205128,0.06881485757005477 +flat_mae,patch,logistic,aabc_age,100,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,100,1291.5496650148827,test,0.40384615384615385,0.06494850030838042,0.4119363929146538,0.06429809133363296,0.40636446886446886,0.06521722522692935 diff --git a/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic/log.txt b/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fd234e5384ece91c560d1e5d02c4c015f5495eb --- /dev/null +++ b/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:20 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:17:08 time: 4.5127 data: 3.7729 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:32 time: 0.2414 data: 0.0757 max mem: 3393 +extract (train) [ 40/228] eta: 0:00:58 time: 0.1687 data: 0.0422 max mem: 3393 +extract (train) [ 60/228] eta: 0:00:46 time: 0.2015 data: 0.0587 max mem: 3393 +extract (train) [ 80/228] eta: 0:00:38 time: 0.2078 data: 0.0673 max mem: 3393 +extract (train) [100/228] eta: 0:00:31 time: 0.1898 data: 0.0558 max mem: 3393 +extract (train) [120/228] eta: 0:00:26 time: 0.2260 data: 0.0774 max mem: 3393 +extract (train) [140/228] eta: 0:00:20 time: 0.2039 data: 0.0678 max mem: 3393 +extract (train) [160/228] eta: 0:00:15 time: 0.2091 data: 0.0703 max mem: 3393 +extract (train) [180/228] eta: 0:00:11 time: 0.2098 data: 0.0703 max mem: 3393 +extract (train) [200/228] eta: 0:00:06 time: 0.2095 data: 0.0712 max mem: 3393 +extract (train) [220/228] eta: 0:00:01 time: 0.1825 data: 0.0580 max mem: 3393 +extract (train) [227/228] eta: 0:00:00 time: 0.1797 data: 0.0577 max mem: 3393 +extract (train) Total time: 0:00:51 (0.2249 s / it) +extract (validation) [ 0/27] eta: 0:01:46 time: 3.9508 data: 3.8104 max mem: 3393 +extract (validation) [20/27] eta: 0:00:02 time: 0.1995 data: 0.0615 max mem: 3393 +extract (validation) [26/27] eta: 0:00:00 time: 0.1695 data: 0.0485 max mem: 3393 +extract (validation) Total time: 0:00:09 (0.3448 s / it) +extract (test) [ 0/26] eta: 0:01:57 time: 4.5014 data: 4.3358 max mem: 3393 +extract (test) [20/26] eta: 0:00:02 time: 0.1903 data: 0.0535 max mem: 3393 +extract (test) [25/26] eta: 0:00:00 time: 0.1740 data: 0.0460 max mem: 3393 +extract (test) Total time: 0:00:09 (0.3674 s / it) +feature extraction time: 0:01:10 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|--------:|:--------|--------:|----------:|--------:|----------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.35938 | train | 0.95669 | 0.0094934 | 0.95719 | 0.0094278 | 0.95676 | 0.009494 | +| flat_mae | patch | logistic | aabc_age | | 0.35938 | test | 0.38462 | 0.061303 | 0.37141 | 0.059836 | 0.37408 | 0.060598 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.059953186865959136, "f1": 0.47651515151515156, "f1_std": 0.05705472433065422, "bacc": 0.47435897435897434, "bacc_std": 0.059393927208925425} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06023694584696683, "f1": 0.44235039385901453, "f1_std": 0.058418970139196995, "bacc": 0.47275641025641024, "bacc_std": 0.059084301067142034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06824662681045794, "f1": 0.5180732369547236, "f1_std": 0.06906053508281755, "bacc": 0.5176282051282052, "bacc_std": 0.06843792032442782} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06548782578039819, "f1": 0.5331868131868132, "f1_std": 0.06571607061088218, "bacc": 0.5441849816849816, "bacc_std": 0.06596188505989026} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 9.999999999999999e-05, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.05513644989033004, "f1": 0.29749589490968803, "f1_std": 0.04542095741117823, "bacc": 0.3241758241758242, "bacc_std": 0.054346275432297295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 166.81005372000556, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06959077338013259, "f1": 0.5390322580645162, "f1_std": 0.0707704746832954, "bacc": 0.543040293040293, "bacc_std": 0.06970496674576424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.060376959338268366, "f1": 0.4212257143618787, "f1_std": 0.0600992923599756, "bacc": 0.4562728937728938, "bacc_std": 0.059599187048762374} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 166.81005372000556, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06508586727042358, "f1": 0.4090961308703245, "f1_std": 0.0644777273244411, "bacc": 0.40934065934065933, "bacc_std": 0.06550161457266795} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.0648116410575141, "f1": 0.5345792365529207, "f1_std": 0.06409863724271497, "bacc": 0.5322802197802198, "bacc_std": 0.06462549365761275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06208234415372297, "f1": 0.4993131868131868, "f1_std": 0.06025942984972219, "bacc": 0.4993131868131868, "bacc_std": 0.062008670709441875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 1291.5496650148827, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.07088201714842479, "f1": 0.48152519893899204, "f1_std": 0.07051031135897697, "bacc": 0.4848901098901099, "bacc_std": 0.07128343076998815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06222305505101511, "f1": 0.36606696651674164, "f1_std": 0.06020957318693568, "bacc": 0.3630952380952381, "bacc_std": 0.06183373194043046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06599375754950583, "f1": 0.5196438299886575, "f1_std": 0.06644231103467628, "bacc": 0.5203754578754579, "bacc_std": 0.06628792505372394} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 166.81005372000556, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.051589799344925774, "f1": 0.4533383283383283, "f1_std": 0.05517701821392876, "bacc": 0.48443223443223443, "bacc_std": 0.05214847699990942} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.064590379777203, "f1": 0.4774862637362637, "f1_std": 0.0646464617068074, "bacc": 0.4773351648351648, "bacc_std": 0.06444665111970746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.063464323948193, "f1": 0.4019318181818182, "f1_std": 0.0632592093261322, "bacc": 0.4004120879120879, "bacc_std": 0.06322606721683001} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05671452179335131, "f1": 0.3924366426412462, "f1_std": 0.05992122294343163, "bacc": 0.42216117216117216, "bacc_std": 0.056490478508073726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06885808878071294, "f1": 0.4933816183816184, "f1_std": 0.06965441649591016, "bacc": 0.49954212454212454, "bacc_std": 0.06888471520354766} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 166.81005372000556, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06902931158564575, "f1": 0.4853376882987078, "f1_std": 0.068508019299768, "bacc": 0.4791666666666667, "bacc_std": 0.0692722429833365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 2.782559402207126, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05389737947983091, "f1": 0.4963738368910783, "f1_std": 0.05315218612761995, "bacc": 0.5331959706959707, "bacc_std": 0.05286737015455687} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.058922775656992436, "f1": 0.4679742835349242, "f1_std": 0.0639864427792282, "bacc": 0.5006868131868132, "bacc_std": 0.059432204793786386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06303810954048249, "f1": 0.44434573578595316, "f1_std": 0.06365347285172979, "bacc": 0.44184981684981683, "bacc_std": 0.0632239788820719} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 166.81005372000556, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.060298505481592234, "f1": 0.4755244755244755, "f1_std": 0.06144817101431734, "bacc": 0.48168498168498164, "bacc_std": 0.060620795787737505} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06491068055184863, "f1": 0.4016239316239316, "f1_std": 0.06121653819348925, "bacc": 0.4150641025641026, "bacc_std": 0.06383515965263555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 166.81005372000556, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06338894449368651, "f1": 0.372439381270903, "f1_std": 0.0632883890767822, "bacc": 0.3649267399267399, "bacc_std": 0.06354587649201841} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06494781701420951, "f1": 0.4751719576719577, "f1_std": 0.06519341957562673, "bacc": 0.4832875457875458, "bacc_std": 0.06511619510869207} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.061063297743906265, "f1": 0.37646889400921657, "f1_std": 0.06289571927703985, "bacc": 0.39880952380952384, "bacc_std": 0.06058638340010907} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06814972204921026, "f1": 0.4796165125197383, "f1_std": 0.06954384342878313, "bacc": 0.4832875457875458, "bacc_std": 0.06828706927335301} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06322889759535794, "f1": 0.467111013986014, "f1_std": 0.06348048200687322, "bacc": 0.46108058608058605, "bacc_std": 0.06325583153109411} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06685950782784934, "f1": 0.474341290893015, "f1_std": 0.06639947954925271, "bacc": 0.4816849816849817, "bacc_std": 0.06691408209204057} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 21.54434690031882, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06538772617034339, "f1": 0.5039819376026272, "f1_std": 0.0658285531765651, "bacc": 0.5157967032967032, "bacc_std": 0.06519806347588643} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.0636477338455081, "f1": 0.43320105820105814, "f1_std": 0.06387300782824339, "bacc": 0.4416208791208791, "bacc_std": 0.0637619087206716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06606455288525757, "f1": 0.428030303030303, "f1_std": 0.06499617832829038, "bacc": 0.4285714285714286, "bacc_std": 0.06654423400363303} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06538202483555884, "f1": 0.37139502441292727, "f1_std": 0.06666951177385846, "bacc": 0.3649267399267399, "bacc_std": 0.06563182174377166} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 2.782559402207126, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.058278610567721524, "f1": 0.2994891443167305, "f1_std": 0.04928389132930685, "bacc": 0.32142857142857145, "bacc_std": 0.057070688524925764} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.05329957480652367, "f1": 0.45081487140310667, "f1_std": 0.058868764340723634, "bacc": 0.4990842490842491, "bacc_std": 0.05305184275053426} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06519288631465478, "f1": 0.46904761904761905, "f1_std": 0.06746946057903312, "bacc": 0.4789377289377289, "bacc_std": 0.06532715544973278} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06373589877690129, "f1": 0.5173001949317738, "f1_std": 0.06652132473196247, "bacc": 0.5187728937728937, "bacc_std": 0.06388722641163906} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 21.54434690031882, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06363245050598358, "f1": 0.46, "f1_std": 0.06372957106782623, "bacc": 0.45833333333333337, "bacc_std": 0.06388609327801296} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06359189409101478, "f1": 0.45067432567432564, "f1_std": 0.060512598321717015, "bacc": 0.4592490842490842, "bacc_std": 0.06327538328504151} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.061623738989942764, "f1": 0.4853982099859161, "f1_std": 0.06330795151974199, "bacc": 0.4965659340659341, "bacc_std": 0.06148105045862764} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.061653006376544785, "f1": 0.5504329004329004, "f1_std": 0.0636322667466866, "bacc": 0.565018315018315, "bacc_std": 0.06189374146004702} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06919995040529056, "f1": 0.5596273291925467, "f1_std": 0.06827783428714097, "bacc": 0.5547161172161172, "bacc_std": 0.06935822412591867} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 2.782559402207126, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06485190229106029, "f1": 0.35014492753623194, "f1_std": 0.06307519299302408, "bacc": 0.34386446886446886, "bacc_std": 0.06454253395866412} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.062612029772156, "f1": 0.42401709401709403, "f1_std": 0.06115426286822241, "bacc": 0.42536630036630035, "bacc_std": 0.06294066131629113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06310759104368727, "f1": 0.5430409356725145, "f1_std": 0.0676097961279081, "bacc": 0.5558608058608059, "bacc_std": 0.06323323916227001} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06656912365993765, "f1": 0.5212215320910972, "f1_std": 0.06842678957223709, "bacc": 0.5203754578754579, "bacc_std": 0.06651234334999881} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 2.782559402207126, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06559080610995605, "f1": 0.3759259259259259, "f1_std": 0.06413656454709912, "bacc": 0.3823260073260073, "bacc_std": 0.06507744112926717} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06929552527028861, "f1": 0.4331723027375201, "f1_std": 0.06743483240245234, "bacc": 0.42422161172161177, "bacc_std": 0.06966703059443007} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 166.81005372000556, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06675109422235456, "f1": 0.38183912861332214, "f1_std": 0.06792145493953006, "bacc": 0.385989010989011, "bacc_std": 0.06678609156416902} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06762386181216311, "f1": 0.4768629116455203, "f1_std": 0.06767176713771243, "bacc": 0.4789377289377289, "bacc_std": 0.06749653454359575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06711951717495306, "f1": 0.46011904761904765, "f1_std": 0.0673836031462255, "bacc": 0.4626831501831502, "bacc_std": 0.06729813389775156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 166.81005372000556, "split": "test", "acc": 0.6538461538461539, "acc_std": 0.06233835902489863, "f1": 0.6524175824175824, "f1_std": 0.06352777980441303, "bacc": 0.6540750915750917, "bacc_std": 0.06241770228732751} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.3593813663804626, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06281619425239823, "f1": 0.373762157382847, "f1_std": 0.06287485328670042, "bacc": 0.3898809523809524, "bacc_std": 0.06377799387024706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 2.782559402207126, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06760707046109424, "f1": 0.5949872286079183, "f1_std": 0.0678786464589304, "bacc": 0.6021062271062272, "bacc_std": 0.06778604110310682} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.060714434886197335, "f1": 0.40325224292615597, "f1_std": 0.06324301335261548, "bacc": 0.4237637362637363, "bacc_std": 0.061153627807268504} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.06271924857658799, "f1": 0.4862237998647735, "f1_std": 0.06409716248452398, "bacc": 0.49793956043956045, "bacc_std": 0.06232939298793361} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05233393007109624, "f1": 0.3508522727272727, "f1_std": 0.05044832150274848, "bacc": 0.396978021978022, "bacc_std": 0.05060360542748507} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06963242460386554, "f1": 0.47916666666666674, "f1_std": 0.07076922403253483, "bacc": 0.47779304029304026, "bacc_std": 0.06989423703525144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.0623752719930985, "f1": 0.34745670995670996, "f1_std": 0.06388778300995847, "bacc": 0.3498168498168498, "bacc_std": 0.06336344121378248} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06400437993518547, "f1": 0.37841750841750843, "f1_std": 0.06280615698630694, "bacc": 0.38095238095238093, "bacc_std": 0.06365734529559795} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.0634409173956367, "f1": 0.4766417960610338, "f1_std": 0.06303804356550019, "bacc": 0.4935897435897436, "bacc_std": 0.0629509321845651} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06370488309523466, "f1": 0.416460658559609, "f1_std": 0.06361956334634662, "bacc": 0.42696886446886445, "bacc_std": 0.06447759709733057} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06272207882076003, "f1": 0.4639010989010989, "f1_std": 0.062125672713552245, "bacc": 0.46108058608058605, "bacc_std": 0.06267540483825287} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06507819602319302, "f1": 0.356268115942029, "f1_std": 0.060105404117091706, "bacc": 0.36149267399267404, "bacc_std": 0.06434027049562704} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05671493912124589, "f1": 0.45261904761904764, "f1_std": 0.060672794642736186, "bacc": 0.46543040293040294, "bacc_std": 0.057443672198179} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06487452614094252, "f1": 0.4342563566701498, "f1_std": 0.06164842380879978, "bacc": 0.4548992673992674, "bacc_std": 0.0638822208218073} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06670755526170992, "f1": 0.4226731333182946, "f1_std": 0.0668511895041032, "bacc": 0.4210164835164835, "bacc_std": 0.06673698465406144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06765264353678127, "f1": 0.4928571428571429, "f1_std": 0.0663028720776737, "bacc": 0.48672161172161166, "bacc_std": 0.06795574119781014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06726812023536856, "f1": 0.42028985507246375, "f1_std": 0.06745943015938695, "bacc": 0.4212454212454212, "bacc_std": 0.06731431388898863} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06608085194731407, "f1": 0.4627239951452845, "f1_std": 0.06633204940658143, "bacc": 0.46108058608058605, "bacc_std": 0.06629067680060055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06461703294600861, "f1": 0.5359946236559139, "f1_std": 0.06380365890896725, "bacc": 0.5366300366300366, "bacc_std": 0.06456329793083959} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06231984686766702, "f1": 0.5195751314626088, "f1_std": 0.06543890394472127, "bacc": 0.5350274725274725, "bacc_std": 0.06205798517750057} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06642720801371259, "f1": 0.4372294372294372, "f1_std": 0.06611782635680302, "bacc": 0.44024725274725274, "bacc_std": 0.06637966638293848} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 166.81005372000556, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06710690931719705, "f1": 0.5206043956043955, "f1_std": 0.067203573030792, "bacc": 0.5206043956043955, "bacc_std": 0.06737768715292997} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06748368200741647, "f1": 0.43239144316730527, "f1_std": 0.06620717018107664, "bacc": 0.42559523809523814, "bacc_std": 0.06778594030961714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 21.54434690031882, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06794641989340838, "f1": 0.4453324808184143, "f1_std": 0.06707545356456698, "bacc": 0.4375, "bacc_std": 0.06761133499602652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06588418878123146, "f1": 0.5074136008918617, "f1_std": 0.06463569699931841, "bacc": 0.5011446886446886, "bacc_std": 0.0661699973409937} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06256701732386429, "f1": 0.4584331797235023, "f1_std": 0.06287549344822241, "bacc": 0.4757326007326007, "bacc_std": 0.06211051232700818} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05858140545504825, "f1": 0.4587848932676518, "f1_std": 0.061228115829730034, "bacc": 0.483058608058608, "bacc_std": 0.05889303066231339} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 2.782559402207126, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06536736197701662, "f1": 0.5164502164502165, "f1_std": 0.06653288168605515, "bacc": 0.5146520146520146, "bacc_std": 0.06535743546180883} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 21.54434690031882, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06692975209857221, "f1": 0.4859523809523809, "f1_std": 0.06640801865095561, "bacc": 0.48031135531135527, "bacc_std": 0.06693388620899995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 21.54434690031882, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06754874417956774, "f1": 0.4781061850027367, "f1_std": 0.06867635193604536, "bacc": 0.4816849816849817, "bacc_std": 0.06766998240260366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 21.54434690031882, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06697637135234058, "f1": 0.44018874643874645, "f1_std": 0.06965847916230253, "bacc": 0.44070512820512825, "bacc_std": 0.06715727506689977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05256160094109696, "f1": 0.3635976129582268, "f1_std": 0.04747046146959992, "bacc": 0.4148351648351648, "bacc_std": 0.05106287871259887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06629430527008034, "f1": 0.5269111476008028, "f1_std": 0.06916946797233815, "bacc": 0.5352564102564102, "bacc_std": 0.06622192372981645} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06608649299375037, "f1": 0.5269978106185003, "f1_std": 0.06858372583384197, "bacc": 0.5366300366300366, "bacc_std": 0.06602793694219368} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 9.999999999999999e-05, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.05916423585861418, "f1": 0.34075757575757576, "f1_std": 0.0484923396268132, "bacc": 0.37774725274725274, "bacc_std": 0.057883683110513555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.07126336685577873, "f1": 0.3864734299516908, "f1_std": 0.07173352372779535, "bacc": 0.3871336996336997, "bacc_std": 0.07140467737503255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 166.81005372000556, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06092968107173185, "f1": 0.5392672770314595, "f1_std": 0.06498408528482508, "bacc": 0.5556318681318682, "bacc_std": 0.06074034738496979} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.059279650159872924, "f1": 0.48358721624850654, "f1_std": 0.06363198275738172, "bacc": 0.5155677655677656, "bacc_std": 0.05873187329859456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.060693916328491616, "f1": 0.44855769230769227, "f1_std": 0.06264599288116883, "bacc": 0.46382783882783885, "bacc_std": 0.061334622037944514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06519178579323658, "f1": 0.43123543123543123, "f1_std": 0.0648696235775868, "bacc": 0.42261904761904756, "bacc_std": 0.06543127105362823} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 1291.5496650148827, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06269872548556943, "f1": 0.4653535353535353, "f1_std": 0.0661757877123964, "bacc": 0.48489010989010994, "bacc_std": 0.0631974211562563} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.06844208885347684, "f1": 0.49702380952380953, "f1_std": 0.06874557204060246, "bacc": 0.5011446886446886, "bacc_std": 0.06873330007865668} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06487925745056951, "f1": 0.4584684249831515, "f1_std": 0.06392842178591274, "bacc": 0.44345238095238093, "bacc_std": 0.06504724315831957} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 21.54434690031882, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.055234518835651294, "f1": 0.3708717357910907, "f1_std": 0.05310377723455509, "bacc": 0.4027014652014652, "bacc_std": 0.05498555868208142} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06614010367099123, "f1": 0.4205794205794206, "f1_std": 0.06708998237030392, "bacc": 0.423992673992674, "bacc_std": 0.06657112351410312} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06868945424343065, "f1": 0.5712962962962963, "f1_std": 0.07168501722106982, "bacc": 0.5753205128205128, "bacc_std": 0.06881485757005477} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 1291.5496650148827, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06494850030838042, "f1": 0.4119363929146538, "f1_std": 0.06429809133363296, "bacc": 0.40636446886446886, "bacc_std": 0.06521722522692935} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 57.225 | 223.8 | 0.8149 | 0.16821 | 0.81241 | 0.1723 | 0.81549 | 0.16816 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 57.225 | 223.8 | 0.46462 | 0.061589 | 0.45563 | 0.063662 | 0.4638 | 0.061974 | + + +done! total time: 0:05:56 diff --git a/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic/config.yaml b/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b06dd6e4b158f0eb2058f6e27ed3a48ee5d588c3 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..f4ca5a05092d52cbd5572745c07b473f4bda6509 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,0.005994842503189409,train,0.8827977315689981,0.013521901262183262,0.8793836422477199,0.013965181938893028,0.8776127049180328,0.014137972791240856 +flat_mae,patch,logistic,aabc_sex,,0.005994842503189409,test,0.9272727272727272,0.03557278186925161,0.9252717391304348,0.03627515279125493,0.9318181818181819,0.034458539652814535 +flat_mae,patch,logistic,aabc_sex,1,0.046415888336127774,train,0.9262759924385633,0.011759200399239176,0.9241777748376498,0.012135217915074763,0.9228948679621325,0.012413613706880683 +flat_mae,patch,logistic,aabc_sex,1,0.046415888336127774,test,0.8181818181818182,0.05345375385932114,0.8166666666666667,0.05343665728228675,0.8254076086956521,0.051928451690672894 +flat_mae,patch,logistic,aabc_sex,2,0.046415888336127774,train,0.9130434782608695,0.012571544534239712,0.9105104442483083,0.013010306848980042,0.9090242973123479,0.013380654509967735 +flat_mae,patch,logistic,aabc_sex,2,0.046415888336127774,test,0.9272727272727272,0.036743542750860295,0.9252717391304348,0.037767639208486234,0.9252717391304348,0.037769832289802234 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,train,0.9243856332703214,0.011789040621199868,0.922283598754187,0.012168390514682223,0.9212608810340279,0.012494308225984126 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,test,0.7636363636363637,0.057938628117165256,0.7518222839291913,0.06182766545057537,0.7479619565217391,0.06103752781049474 +flat_mae,patch,logistic,aabc_sex,4,0.046415888336127774,train,0.9130434782608695,0.012694645593749557,0.9105104442483083,0.013134040929258718,0.9090242973123479,0.01348291460963222 +flat_mae,patch,logistic,aabc_sex,4,0.046415888336127774,test,0.8363636363636363,0.050527086263518,0.8343927735028438,0.05063279981617115,0.8410326086956521,0.049115607808028594 +flat_mae,patch,logistic,aabc_sex,5,0.3593813663804626,train,0.9716446124763705,0.007068502628101845,0.9709111571384057,0.0072431980682776415,0.9706248717723263,0.007240773261759402 +flat_mae,patch,logistic,aabc_sex,5,0.3593813663804626,test,0.8181818181818182,0.05123144695423691,0.8131793478260869,0.05256470993664909,0.8131793478260869,0.052380018682174664 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,train,0.9187145557655955,0.012182299944221923,0.91640113635946,0.012594955892438843,0.9151425891731879,0.012959676020740407 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,test,0.9090909090909091,0.040134830610072024,0.905982905982906,0.041821574997918484,0.9035326086956521,0.04261508216957869 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,train,0.9187145557655955,0.011238330091708595,0.9165079190295289,0.011544192186843641,0.9157507547114512,0.011660173243868152 +flat_mae,patch,logistic,aabc_sex,7,0.046415888336127774,test,0.9090909090909091,0.039021740073286175,0.9071259709557582,0.039780411589282236,0.9096467391304348,0.03940193672623515 +flat_mae,patch,logistic,aabc_sex,8,0.005994842503189409,train,0.8903591682419659,0.013410635355322694,0.887165342747867,0.013850718547889567,0.8857674609455142,0.014045615521495075 +flat_mae,patch,logistic,aabc_sex,8,0.005994842503189409,test,0.7818181818181819,0.05402657442223828,0.7758152173913043,0.05584420865656002,0.7758152173913043,0.056220623022879196 +flat_mae,patch,logistic,aabc_sex,9,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,9,2.782559402207126,test,0.8363636363636363,0.050836558719875365,0.8328267477203647,0.05187969271910928,0.8349184782608696,0.05212409454919339 +flat_mae,patch,logistic,aabc_sex,10,0.3593813663804626,train,0.9716446124763705,0.007265222582867298,0.970874855475417,0.007479983743122196,0.9700167062340631,0.007778679449236959 +flat_mae,patch,logistic,aabc_sex,10,0.3593813663804626,test,0.8363636363636363,0.049499124293113726,0.8343927735028438,0.04964090151746143,0.8410326086956521,0.048583796672703246 +flat_mae,patch,logistic,aabc_sex,11,0.005994842503189409,train,0.8771266540642723,0.013910924368083001,0.8736296247294164,0.0143704772274588,0.8725050558339928,0.014577311332118604 +flat_mae,patch,logistic,aabc_sex,11,0.005994842503189409,test,0.9090909090909091,0.03996644873882828,0.9045470322804582,0.043407626401297715,0.8974184782608696,0.04499273039106961 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,train,0.9678638941398866,0.007439334563199511,0.9669915028721394,0.007649382584548517,0.9661405668395908,0.007847682565854391 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,test,0.7818181818181819,0.05500387965279816,0.76890756302521,0.060444640795139115,0.7635869565217391,0.059219352050148985 +flat_mae,patch,logistic,aabc_sex,13,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,13,2.782559402207126,test,0.8,0.05499879487785643,0.7989365237620472,0.054923667321093494,0.8097826086956521,0.05320159009764436 +flat_mae,patch,logistic,aabc_sex,14,0.046415888336127774,train,0.9130434782608695,0.012484996731477092,0.9105104442483083,0.01289914467516548,0.9090242973123479,0.013166603420382677 +flat_mae,patch,logistic,aabc_sex,14,0.046415888336127774,test,0.8909090909090909,0.04176994846826271,0.8879076086956521,0.04321675153684068,0.8879076086956521,0.04370788759381442 +flat_mae,patch,logistic,aabc_sex,15,0.3593813663804626,train,0.9603024574669187,0.008743004955303267,0.9590643274853801,0.009079435313738216,0.9565637914358569,0.00963566038177506 +flat_mae,patch,logistic,aabc_sex,15,0.3593813663804626,test,0.8363636363636363,0.05107764305285344,0.8343927735028438,0.05139963217873348,0.8410326086956521,0.050508335195814144 +flat_mae,patch,logistic,aabc_sex,16,0.000774263682681127,train,0.8657844990548205,0.01479260536389947,0.8614084610985038,0.015366811835986476,0.8590521410357865,0.015533395230840845 +flat_mae,patch,logistic,aabc_sex,16,0.000774263682681127,test,0.8,0.05524358083198892,0.7931623931623932,0.05774088404140212,0.7914402173913043,0.05781950972613876 +flat_mae,patch,logistic,aabc_sex,17,0.005994842503189409,train,0.888468809073724,0.0137743824912757,0.885294582446701,0.014222794254752213,0.8841334740174096,0.01441357801309008 +flat_mae,patch,logistic,aabc_sex,17,0.005994842503189409,test,0.8,0.05270344289097233,0.790003471017008,0.05643963889324101,0.7853260869565217,0.055836924171046104 +flat_mae,patch,logistic,aabc_sex,18,0.005994842503189409,train,0.8865784499054821,0.013961883128040387,0.8831197525408749,0.014472157316581623,0.8812831560127787,0.014746495944744772 +flat_mae,patch,logistic,aabc_sex,18,0.005994842503189409,test,0.8545454545454545,0.04609915292959779,0.8541114058355437,0.04599626802527937,0.8688858695652174,0.04207453009626324 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,train,0.9206049149338374,0.011386712101766708,0.9183977786918963,0.011721627254524214,0.9173847416395557,0.0118850813092648 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,test,0.8363636363636363,0.047766982614554705,0.8354935194416749,0.04765655380741128,0.8471467391304348,0.045402933436101085 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,train,0.9206049149338374,0.011664487862135944,0.9182921447484554,0.012052027250064688,0.9167765761012925,0.012331540996943478 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,test,0.8909090909090909,0.04172346585922345,0.8863636363636364,0.04472086272847154,0.8817934782608696,0.04586741757477451 +flat_mae,patch,logistic,aabc_sex,21,0.005994842503189409,train,0.8903591682419659,0.013370319435393201,0.8868624443198915,0.01388020193643616,0.8845511298689879,0.014096024184084692 +flat_mae,patch,logistic,aabc_sex,21,0.005994842503189409,test,0.7818181818181819,0.05443745672067051,0.7758152173913043,0.056115425320621595,0.7758152173913043,0.05591774747353632 +flat_mae,patch,logistic,aabc_sex,22,0.3593813663804626,train,0.9640831758034026,0.007924236820201866,0.9631081502688617,0.008155467671553033,0.9622644274451185,0.008393244337822571 +flat_mae,patch,logistic,aabc_sex,22,0.3593813663804626,test,0.8727272727272727,0.04489099377730514,0.8720505151213027,0.04477540900394644,0.8845108695652174,0.04149489378998495 +flat_mae,patch,logistic,aabc_sex,23,0.005994842503189409,train,0.8865784499054821,0.013414514516900304,0.8829611492964394,0.01392993033470696,0.8806749904745157,0.01419269010985835 +flat_mae,patch,logistic,aabc_sex,23,0.005994842503189409,test,0.8909090909090909,0.04146449641331626,0.89,0.04140540422162383,0.9001358695652174,0.03856756868548274 +flat_mae,patch,logistic,aabc_sex,24,0.3593813663804626,train,0.9621928166351607,0.009007866705661723,0.9612386060552771,0.009244652853566659,0.9612386060552771,0.009406849680044335 +flat_mae,patch,logistic,aabc_sex,24,0.3593813663804626,test,0.8363636363636363,0.05099902445996164,0.8307692307692308,0.05305511959998543,0.8288043478260869,0.05303014480160868 +flat_mae,patch,logistic,aabc_sex,25,0.3593813663804626,train,0.9621928166351607,0.008985057490482867,0.9611908325263374,0.009238905203218151,0.960630440517014,0.009428123518496525 +flat_mae,patch,logistic,aabc_sex,25,0.3593813663804626,test,0.8545454545454545,0.04895970345073047,0.8533333333333333,0.04897343590409668,0.8627717391304348,0.047113160635992475 +flat_mae,patch,logistic,aabc_sex,26,0.005994842503189409,train,0.8865784499054821,0.01346592891966152,0.8831197525408749,0.013931052244414242,0.8812831560127787,0.014102648315379728 +flat_mae,patch,logistic,aabc_sex,26,0.005994842503189409,test,0.8181818181818182,0.04847866021114616,0.8106060606060606,0.051597052806010305,0.8070652173913043,0.05156378485539002 +flat_mae,patch,logistic,aabc_sex,27,0.000774263682681127,train,0.8582230623818525,0.015737716802216323,0.853184427002964,0.01651744601822141,0.8500835311703157,0.016830700062229732 +flat_mae,patch,logistic,aabc_sex,27,0.000774263682681127,test,0.9090909090909091,0.03533967405032864,0.905982905982906,0.03685623770637453,0.9035326086956521,0.037549306549883546 +flat_mae,patch,logistic,aabc_sex,28,0.000774263682681127,train,0.8695652173913043,0.014758802910693394,0.8656772441405604,0.015270360527328374,0.864144611506785,0.015512770108878778 +flat_mae,patch,logistic,aabc_sex,28,0.000774263682681127,test,0.7454545454545455,0.05723044904175278,0.7303921568627451,0.06272351574840966,0.7262228260869565,0.06103230590721574 +flat_mae,patch,logistic,aabc_sex,29,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,29,21.54434690031882,test,0.8727272727272727,0.04637609814698561,0.8699763593380614,0.04722889946588117,0.8722826086956521,0.04670174345188973 +flat_mae,patch,logistic,aabc_sex,30,0.005994842503189409,train,0.888468809073724,0.01342103675900026,0.8849902534113061,0.013920914001420634,0.8829171429408833,0.014191467005001946 +flat_mae,patch,logistic,aabc_sex,30,0.005994842503189409,test,0.8545454545454545,0.04860254049068586,0.8521505376344086,0.04923798063924978,0.8566576086956521,0.04860847346223987 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,train,0.9262759924385633,0.011246286077942253,0.9242746242360844,0.011569978300574968,0.9235030335003956,0.011716816755736213 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,test,0.7636363636363637,0.05728347373925889,0.7555555555555555,0.05955524089184082,0.7540760869565217,0.05917686376224038 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,train,0.9262759924385633,0.01110301841937352,0.9242746242360844,0.01144084064751934,0.9235030335003956,0.011696104099257159 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,test,0.8181818181818182,0.052330797783850115,0.8131793478260869,0.05399566222272284,0.8131793478260869,0.053988770175442145 +flat_mae,patch,logistic,aabc_sex,33,0.000774263682681127,train,0.8563327032136105,0.014525045529148245,0.8515449604159282,0.0150693627376565,0.8490577097804742,0.015149491406538644 +flat_mae,patch,logistic,aabc_sex,33,0.000774263682681127,test,0.8363636363636363,0.04595562358235358,0.8354935194416749,0.04584293970368359,0.8471467391304348,0.04417707764789911 +flat_mae,patch,logistic,aabc_sex,34,0.000774263682681127,train,0.8638941398865785,0.01466018276445606,0.859358383551932,0.015279146687957969,0.8568099885694188,0.015520068236139736 +flat_mae,patch,logistic,aabc_sex,34,0.000774263682681127,test,0.8545454545454545,0.04866442351145729,0.8484848484848485,0.05151307282909686,0.8444293478260869,0.05180715187737196 +flat_mae,patch,logistic,aabc_sex,35,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,35,2.782559402207126,test,0.7454545454545455,0.05487565508184078,0.7303921568627451,0.06041713246084236,0.7262228260869565,0.058796374601984086 +flat_mae,patch,logistic,aabc_sex,36,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,36,2.782559402207126,test,0.8545454545454545,0.04563072764908666,0.8505434782608696,0.04702340062159747,0.8505434782608696,0.04714745574884524 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,train,0.9187145557655955,0.012075197107418776,0.91640113635946,0.01243158223200659,0.9151425891731879,0.01254691926132022 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,test,0.7818181818181819,0.056284801172032266,0.7758152173913043,0.05837919276441475,0.7758152173913043,0.05841465396802551 +flat_mae,patch,logistic,aabc_sex,38,0.005994842503189409,train,0.888468809073724,0.013768201015138327,0.8851443102071458,0.014259748935270536,0.8835253084791466,0.01450016402957063 +flat_mae,patch,logistic,aabc_sex,38,0.005994842503189409,test,0.8181818181818182,0.048348815227307225,0.8074229691876751,0.05286496120252397,0.8009510869565217,0.0523990450994989 +flat_mae,patch,logistic,aabc_sex,39,0.005994842503189409,train,0.8941398865784499,0.013112662741185667,0.8907637393433434,0.01361463431524928,0.8884272692634603,0.013856383711173367 +flat_mae,patch,logistic,aabc_sex,39,0.005994842503189409,test,0.7818181818181819,0.05415496589029034,0.7782258064516129,0.054867239446614924,0.7819293478260869,0.05436293911811521 +flat_mae,patch,logistic,aabc_sex,40,0.3593813663804626,train,0.9678638941398866,0.007243441941492021,0.9670727197501436,0.007418493445729536,0.9673568979161171,0.007448459466731519 +flat_mae,patch,logistic,aabc_sex,40,0.3593813663804626,test,0.8909090909090909,0.03968262521307849,0.8879076086956521,0.04083798507143102,0.8879076086956521,0.04106793181582791 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,train,0.8827977315689981,0.013880305858047236,0.8795395780689899,0.014306093579852299,0.8786233476948329,0.014507193785559022 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,test,0.9090909090909091,0.038480287966483194,0.905982905982906,0.040362495966507414,0.9035326086956521,0.04107032746648314 +flat_mae,patch,logistic,aabc_sex,42,0.3593813663804626,train,0.9716446124763705,0.007092727356302262,0.970874855475417,0.007294522812614984,0.9700167062340631,0.007501613068160644 +flat_mae,patch,logistic,aabc_sex,42,0.3593813663804626,test,0.8,0.055129287562124686,0.7989365237620472,0.055031923320996455,0.8097826086956521,0.05304205161124931 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,train,0.9206049149338374,0.011285595588780287,0.9183977786918963,0.011636519530917716,0.9173847416395557,0.011888959754278873 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,test,0.8909090909090909,0.03959482393753795,0.8879076086956521,0.04077379446731728,0.8879076086956521,0.0408179689512438 +flat_mae,patch,logistic,aabc_sex,44,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,44,21.54434690031882,test,0.8181818181818182,0.049035335547250036,0.8074229691876751,0.053641715713952556,0.8009510869565217,0.052768597385545575 +flat_mae,patch,logistic,aabc_sex,45,0.3593813663804626,train,0.9678638941398866,0.007468617451714627,0.9670326447568598,0.007666258782325106,0.966748732377854,0.00778699670062393 +flat_mae,patch,logistic,aabc_sex,45,0.3593813663804626,test,0.8909090909090909,0.038947406792593633,0.8905835543766578,0.03875926935438281,0.90625,0.033470427712385155 +flat_mae,patch,logistic,aabc_sex,46,0.005994842503189409,train,0.8865784499054821,0.014635775486058666,0.8831197525408749,0.015188309814609816,0.8812831560127787,0.015495880000481717 +flat_mae,patch,logistic,aabc_sex,46,0.005994842503189409,test,0.9090909090909091,0.03916287667027168,0.9079959852793577,0.03931333901780281,0.9157608695652174,0.03705621989620135 +flat_mae,patch,logistic,aabc_sex,47,0.3593813663804626,train,0.9659735349716446,0.00788882463069811,0.9650717492737036,0.00811656040801962,0.9645065799114863,0.008368140754815168 +flat_mae,patch,logistic,aabc_sex,47,0.3593813663804626,test,0.8545454545454545,0.048936038537754495,0.8505434782608696,0.050671072469432775,0.8505434782608696,0.050852785225848275 +flat_mae,patch,logistic,aabc_sex,48,0.000774263682681127,train,0.8657844990548205,0.01479941173920973,0.8610145908961393,0.015447001992745079,0.8578358099592602,0.015685379575892285 +flat_mae,patch,logistic,aabc_sex,48,0.000774263682681127,test,0.8545454545454545,0.046679848590829943,0.8505434782608696,0.048274801235968695,0.8505434782608696,0.048505305744408804 +flat_mae,patch,logistic,aabc_sex,49,0.005994842503189409,train,0.8846880907372401,0.01338558988889746,0.8810916179337231,0.013904789049595462,0.8790410035464111,0.014196200565534405 +flat_mae,patch,logistic,aabc_sex,49,0.005994842503189409,test,0.8363636363636363,0.04742815009556667,0.8343927735028438,0.04793823078467611,0.8410326086956521,0.04744327525297636 +flat_mae,patch,logistic,aabc_sex,50,0.046415888336127774,train,0.9206049149338374,0.011176588679881036,0.9183977786918963,0.011521401176633042,0.9173847416395557,0.011792560429787479 +flat_mae,patch,logistic,aabc_sex,50,0.046415888336127774,test,0.8181818181818182,0.051071636610898814,0.8106060606060606,0.05412659700092281,0.8070652173913043,0.05434311263528095 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,train,0.9187145557655955,0.01141236680354965,0.9167133499562459,0.011714485767195915,0.9169670857879774,0.011926859637595058 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,test,0.8181818181818182,0.046752324377625776,0.8074229691876751,0.05277400829412781,0.8009510869565217,0.05195068859591898 +flat_mae,patch,logistic,aabc_sex,52,0.3593813663804626,train,0.9603024574669187,0.008630421067090744,0.9591726479895037,0.008904753196499162,0.9577801225123832,0.009243654233829654 +flat_mae,patch,logistic,aabc_sex,52,0.3593813663804626,test,0.8181818181818182,0.048878246441608444,0.8106060606060606,0.051702055789674274,0.8070652173913043,0.05167764451677211 +flat_mae,patch,logistic,aabc_sex,53,0.000774263682681127,train,0.8657844990548205,0.01608940602748374,0.8614084610985038,0.01667388098663603,0.8590521410357865,0.016771223515433684 +flat_mae,patch,logistic,aabc_sex,53,0.000774263682681127,test,0.8545454545454545,0.04668551370695885,0.8505434782608696,0.04806947581202879,0.8505434782608696,0.048081131556000835 +flat_mae,patch,logistic,aabc_sex,54,0.005994842503189409,train,0.8865784499054821,0.013615035504582894,0.8831197525408749,0.01410644070175435,0.8812831560127787,0.014359115715165956 +flat_mae,patch,logistic,aabc_sex,54,0.005994842503189409,test,0.8545454545454545,0.047584982699507145,0.8521505376344086,0.048061229422214816,0.8566576086956521,0.047089442909126124 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,train,0.9716446124763705,0.0074280830661339215,0.970874855475417,0.007646102343948438,0.9700167062340631,0.007943631897697142 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,test,0.8181818181818182,0.05130979962141813,0.8131793478260869,0.05289982990639339,0.8131793478260869,0.05309103931959122 +flat_mae,patch,logistic,aabc_sex,56,0.046415888336127774,train,0.9149338374291115,0.012415834693652771,0.9125128171203651,0.012795186884060507,0.9112664497787157,0.012956479390938724 +flat_mae,patch,logistic,aabc_sex,56,0.046415888336127774,test,0.8363636363636363,0.048303106345706434,0.8307692307692308,0.050494865110545385,0.8288043478260869,0.05078730095183825 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,train,0.9149338374291115,0.01190755771393335,0.9125128171203651,0.012286001803750422,0.9112664497787157,0.012514898500217342 +flat_mae,patch,logistic,aabc_sex,57,0.046415888336127774,test,0.8727272727272727,0.04418256639845311,0.8699763593380614,0.04514228714587683,0.8722826086956521,0.045077612965095076 +flat_mae,patch,logistic,aabc_sex,58,0.000774263682681127,train,0.8638941398865785,0.014151923144382288,0.8591586794462194,0.014738153406678552,0.8562018230311557,0.01490458772014724 +flat_mae,patch,logistic,aabc_sex,58,0.000774263682681127,test,0.7090909090909091,0.05940714262646642,0.7043010752688172,0.060149211305726896,0.7072010869565217,0.06024668222045741 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,train,0.9168241965973535,0.0120157961982738,0.9145119586296058,0.012375240170711913,0.9135086022450833,0.012525658460432644 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,test,0.8727272727272727,0.04360330565980988,0.8711943793911007,0.04374188717202844,0.8783967391304348,0.04204950466987741 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,train,0.8809073724007561,0.01352450144713073,0.8773574837805116,0.013947839611353152,0.875773029690202,0.014046823933293724 +flat_mae,patch,logistic,aabc_sex,60,0.005994842503189409,test,0.8727272727272727,0.04393600154093403,0.8663658451926415,0.04754109468698033,0.8600543478260869,0.0483420301297699 +flat_mae,patch,logistic,aabc_sex,61,0.046415888336127774,train,0.9130434782608695,0.011948416449209522,0.9103918102813374,0.012381919995707152,0.9084161317740849,0.01271464172125658 +flat_mae,patch,logistic,aabc_sex,61,0.046415888336127774,test,0.8363636363636363,0.04908417462224566,0.8328267477203647,0.05008452928551458,0.8349184782608696,0.05023998106380485 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,train,0.9621928166351607,0.00826378894053036,0.9611908325263374,0.008504131107893166,0.960630440517014,0.008765424105627199 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,test,0.9090909090909091,0.03616849457746341,0.9045470322804582,0.03939841085965545,0.8974184782608696,0.041183847575934976 +flat_mae,patch,logistic,aabc_sex,63,0.3593813663804626,train,0.9584120982986768,0.008399986182618531,0.9572559793148028,0.008665798880912838,0.9561461355842786,0.009008997869405268 +flat_mae,patch,logistic,aabc_sex,63,0.3593813663804626,test,0.9272727272727272,0.036290398976192594,0.9242424242424243,0.038616880573169275,0.9191576086956521,0.04072923582265042 +flat_mae,patch,logistic,aabc_sex,64,0.3593813663804626,train,0.9659735349716446,0.007994998275616472,0.9650717492737036,0.008209657739206145,0.9645065799114863,0.008321461675309859 +flat_mae,patch,logistic,aabc_sex,64,0.3593813663804626,test,0.8,0.052404731349073745,0.7975911676145868,0.052781258168665426,0.8036684782608696,0.05253866498613683 +flat_mae,patch,logistic,aabc_sex,65,0.005994842503189409,train,0.8865784499054821,0.01366549729699941,0.8837158181658313,0.014038456775719589,0.8837158181658313,0.014218072203703518 +flat_mae,patch,logistic,aabc_sex,65,0.005994842503189409,test,0.8727272727272727,0.0450683081188346,0.8663658451926415,0.04847370714574701,0.8600543478260869,0.049097999420360895 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,train,0.9168241965973535,0.0121283057691401,0.9146198315579424,0.012444883120405249,0.9141167677833466,0.012529969758416401 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,test,0.8727272727272727,0.04351826442820272,0.8663658451926415,0.04692360558179858,0.8600543478260869,0.04759204879761667 +flat_mae,patch,logistic,aabc_sex,67,0.005994842503189409,train,0.8790170132325141,0.0142519838127606,0.8753277360435999,0.014775944301511116,0.8735308772238342,0.01497988527887398 +flat_mae,patch,logistic,aabc_sex,67,0.005994842503189409,test,0.8727272727272727,0.04477264513144286,0.8683760683760684,0.04665395816283645,0.8661684782608696,0.047109016730706686 +flat_mae,patch,logistic,aabc_sex,68,0.3593813663804626,train,0.9640831758034026,0.008250746263956114,0.9631541323753139,0.008478740394895375,0.9628725929833817,0.00869429707807549 +flat_mae,patch,logistic,aabc_sex,68,0.3593813663804626,test,0.8909090909090909,0.04006399838935386,0.8863636363636364,0.04249219488091017,0.8817934782608696,0.04360631488537191 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,train,0.9149338374291115,0.012122497066358649,0.9123982027003654,0.012526350119366574,0.9106582842404525,0.012777895554951322 +flat_mae,patch,logistic,aabc_sex,69,0.046415888336127774,test,0.9454545454545454,0.029807093281924622,0.9442755825734549,0.030386033639502368,0.9470108695652174,0.02957088574952899 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,train,0.9130434782608695,0.012307001274329623,0.9106261385673151,0.01269066862162456,0.9096324628506112,0.01291374253511874 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,test,0.9454545454545454,0.030771007068879926,0.9442755825734549,0.031298918161542864,0.9470108695652174,0.03034394793587456 +flat_mae,patch,logistic,aabc_sex,71,0.005994842503189409,train,0.8903591682419659,0.012859598098704655,0.887165342747867,0.013303797933809059,0.8857674609455142,0.013564803576311337 +flat_mae,patch,logistic,aabc_sex,71,0.005994842503189409,test,0.8363636363636363,0.04600267329637725,0.8281846581048247,0.049665836522629875,0.8226902173913043,0.04934354741300053 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,train,0.9697542533081286,0.007248033453545975,0.9689908848442217,0.007431948104898494,0.9689908848442217,0.007518072688874566 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,test,0.8181818181818182,0.04805521617527997,0.8035714285714286,0.05524400725138334,0.7948369565217391,0.05355161083721272 +flat_mae,patch,logistic,aabc_sex,73,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,73,2.782559402207126,test,0.9454545454545454,0.030321146100146787,0.9435897435897436,0.031666321061234695,0.9408967391304348,0.03289900672497691 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,train,0.8846880907372401,0.013291435655502773,0.8810916179337231,0.013783650089123647,0.8790410035464111,0.014053560420385011 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,test,0.8727272727272727,0.046747558384112764,0.8699763593380614,0.04770391581320217,0.8722826086956521,0.04757125621375379 +flat_mae,patch,logistic,aabc_sex,75,0.005994842503189409,train,0.8846880907372401,0.014181741577171844,0.8814062632076061,0.014593703154707805,0.8802573346229374,0.014677078670083213 +flat_mae,patch,logistic,aabc_sex,75,0.005994842503189409,test,0.8909090909090909,0.03931385052338687,0.8863636363636364,0.0416884030128631,0.8817934782608696,0.042572610728623936 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,train,0.9243856332703214,0.011599644965374764,0.922182994998529,0.012026592876930663,0.9206527154957649,0.012467161600398463 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,test,0.8545454545454545,0.04666488937977293,0.8505434782608696,0.048050969595217326,0.8505434782608696,0.047770239827216815 +flat_mae,patch,logistic,aabc_sex,77,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,77,2.782559402207126,test,0.8181818181818182,0.05045904975799025,0.8106060606060606,0.05369336466239111,0.8070652173913043,0.05416048560444908 +flat_mae,patch,logistic,aabc_sex,78,0.005994842503189409,train,0.8790170132325141,0.014422771503736468,0.8756537580066992,0.014900062616155568,0.8747472083003605,0.015175012889763901 +flat_mae,patch,logistic,aabc_sex,78,0.005994842503189409,test,0.8727272727272727,0.045682859327800786,0.8683760683760684,0.047610515258164954,0.8661684782608696,0.04805182529115244 +flat_mae,patch,logistic,aabc_sex,79,0.005994842503189409,train,0.8922495274102079,0.013758995016387575,0.8893244508065848,0.01416990840184278,0.888617778950145,0.01432782530358564 +flat_mae,patch,logistic,aabc_sex,79,0.005994842503189409,test,0.8,0.054734931508797234,0.7931623931623932,0.05728113932119837,0.7914402173913043,0.057196249730923236 +flat_mae,patch,logistic,aabc_sex,80,0.005994842503189409,train,0.8903591682419659,0.01356730358047094,0.8874534143263785,0.013969063405824149,0.8869837920220405,0.014174651591360849 +flat_mae,patch,logistic,aabc_sex,80,0.005994842503189409,test,0.7636363636363637,0.05477492338109619,0.7518222839291913,0.0587430562104279,0.7479619565217391,0.057822932420367054 +flat_mae,patch,logistic,aabc_sex,81,0.3593813663804626,train,0.9640831758034026,0.007863643299765563,0.9631081502688617,0.008089319783344302,0.9622644274451185,0.008294919331536808 +flat_mae,patch,logistic,aabc_sex,81,0.3593813663804626,test,0.8909090909090909,0.041371911260205445,0.8863636363636364,0.043820628122047824,0.8817934782608696,0.04476997013576171 +flat_mae,patch,logistic,aabc_sex,82,0.046415888336127774,train,0.9187145557655955,0.011557714699696676,0.9162916159136825,0.011974010967975736,0.9145344236349249,0.012342173297679004 +flat_mae,patch,logistic,aabc_sex,82,0.046415888336127774,test,0.8363636363636363,0.046719504129599874,0.8307692307692308,0.04839938767731535,0.8288043478260869,0.04859132770800029 +flat_mae,patch,logistic,aabc_sex,83,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,83,1291.5496650148827,test,0.8363636363636363,0.0405195351314983,0.81673454276194,0.052330430502065844,0.8043478260869565,0.048447270265921924 +flat_mae,patch,logistic,aabc_sex,84,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,84,21.54434690031882,test,0.7636363636363637,0.06231682314015206,0.7623795280824195,0.062315653594833126,0.7724184782608696,0.06156806353383415 +flat_mae,patch,logistic,aabc_sex,85,0.005994842503189409,train,0.8827977315689981,0.014035486426732567,0.8792237442922375,0.014565233629982438,0.8774070166183066,0.014866726349090753 +flat_mae,patch,logistic,aabc_sex,85,0.005994842503189409,test,0.9272727272727272,0.03685550969721248,0.9252717391304348,0.03792441979617142,0.9252717391304348,0.03822319922897879 +flat_mae,patch,logistic,aabc_sex,86,0.000774263682681127,train,0.8582230623818525,0.01547986763763439,0.8533949191685912,0.016125857504943213,0.8506916967085788,0.016335485046412156 +flat_mae,patch,logistic,aabc_sex,86,0.000774263682681127,test,0.8727272727272727,0.04768053469110587,0.8711943793911007,0.047907155346685555,0.8783967391304348,0.04651928468357661 +flat_mae,patch,logistic,aabc_sex,87,0.3593813663804626,train,0.9659735349716446,0.00804414885504576,0.9650717492737036,0.008271994464307217,0.9645065799114863,0.00849639118984759 +flat_mae,patch,logistic,aabc_sex,87,0.3593813663804626,test,0.8,0.053572547552264746,0.795677136102668,0.054664263723678726,0.7975543478260869,0.054563580757625574 +flat_mae,patch,logistic,aabc_sex,88,0.046415888336127774,train,0.9187145557655955,0.011830628264722913,0.9161793372319689,0.012266745038953872,0.9139262580966616,0.012580581018259157 +flat_mae,patch,logistic,aabc_sex,88,0.046415888336127774,test,0.8363636363636363,0.05192294354248129,0.8328267477203647,0.05299738209818629,0.8349184782608696,0.05315559527087562 +flat_mae,patch,logistic,aabc_sex,89,0.3593813663804626,train,0.9640831758034026,0.007930444010901323,0.9631989220736901,0.00812219683570719,0.9634807585216448,0.00816375871680053 +flat_mae,patch,logistic,aabc_sex,89,0.3593813663804626,test,0.7636363636363637,0.05843078550058073,0.7555555555555555,0.06111117998100611,0.7540760869565217,0.060775736470643854 +flat_mae,patch,logistic,aabc_sex,90,0.3593813663804626,train,0.9678638941398866,0.007771864985724296,0.9669915028721394,0.007998451905667982,0.9661405668395908,0.008258177304802786 +flat_mae,patch,logistic,aabc_sex,90,0.3593813663804626,test,0.8727272727272727,0.043746467153699974,0.8683760683760684,0.046035140619400575,0.8661684782608696,0.046785330900828315 +flat_mae,patch,logistic,aabc_sex,91,0.005994842503189409,train,0.8941398865784499,0.013938835303056722,0.89091176903815,0.014447343726719733,0.8890354348017233,0.014735722309531716 +flat_mae,patch,logistic,aabc_sex,91,0.005994842503189409,test,0.8,0.05105354829381037,0.7989365237620472,0.05103618268451549,0.8097826086956521,0.04947789597247391 +flat_mae,patch,logistic,aabc_sex,92,0.3593813663804626,train,0.9659735349716446,0.007987407050375955,0.9650717492737036,0.008207944215883368,0.9645065799114863,0.00841027360548282 +flat_mae,patch,logistic,aabc_sex,92,0.3593813663804626,test,0.8909090909090909,0.04069107157332089,0.8863636363636364,0.0430376043388297,0.8817934782608696,0.04384756488923303 +flat_mae,patch,logistic,aabc_sex,93,0.046415888336127774,train,0.9149338374291115,0.012427581755534486,0.9126245664262511,0.012796457650202282,0.9118746153169788,0.012979206415738721 +flat_mae,patch,logistic,aabc_sex,93,0.046415888336127774,test,0.8363636363636363,0.04768108934359337,0.8250265111346766,0.05378052432788733,0.8165760869565217,0.0534193793273984 +flat_mae,patch,logistic,aabc_sex,94,0.046415888336127774,train,0.9149338374291115,0.012483691499312007,0.9123982027003654,0.012913027272470455,0.9106582842404525,0.013147749286011264 +flat_mae,patch,logistic,aabc_sex,94,0.046415888336127774,test,0.9090909090909091,0.03721412688307254,0.905982905982906,0.03888714581403604,0.9035326086956521,0.039799776621288245 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,train,0.9130434782608695,0.01207136415764119,0.9106261385673151,0.012431904374113779,0.9096324628506112,0.012606484142328508 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,test,0.8909090909090909,0.04225027360662544,0.8891129032258065,0.04266528450184467,0.8940217391304348,0.041470414069410676 +flat_mae,patch,logistic,aabc_sex,96,0.000774263682681127,train,0.8601134215500945,0.014978171134792455,0.85545167198393,0.015611853141749165,0.8529338491749465,0.015842592620860504 +flat_mae,patch,logistic,aabc_sex,96,0.000774263682681127,test,0.8909090909090909,0.043549170462063726,0.8879076086956521,0.04473971211363276,0.8879076086956521,0.044696257420486496 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,train,0.9621928166351607,0.008750300050448205,0.9612386060552771,0.008971556972630142,0.9612386060552771,0.009074174795083167 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,test,0.8181818181818182,0.05250153559344164,0.8106060606060606,0.055245884499287634,0.8070652173913043,0.05480111746026415 +flat_mae,patch,logistic,aabc_sex,98,0.3593813663804626,train,0.9716446124763705,0.007199488421491056,0.9709111571384057,0.007392451442166099,0.9706248717723263,0.0075335856375424205 +flat_mae,patch,logistic,aabc_sex,98,0.3593813663804626,test,0.8545454545454545,0.04716065035756778,0.8505434782608696,0.048618830974758935,0.8505434782608696,0.04874717240721238 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,train,0.8809073724007561,0.013924789054606923,0.8776743929967517,0.014327947204303113,0.8769893607667283,0.014505669538133455 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,test,0.8545454545454545,0.04885750574649297,0.8484848484848485,0.05216045704278036,0.8444293478260869,0.052713729772551225 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,train,0.9187145557655955,0.01222763803058426,0.9165079190295289,0.012584166336280877,0.9157507547114512,0.012752602033245354 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,test,0.8727272727272727,0.04524542440680676,0.8711943793911007,0.04539076192262555,0.8783967391304348,0.04368749112626374 diff --git a/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic/log.txt b/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..7144ebea145ca347a02baae64bafe940b0d8ed63 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:21 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:21:48 time: 5.5428 data: 4.6137 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:40 time: 0.2138 data: 0.0681 max mem: 3393 +extract (train) [ 40/236] eta: 0:01:06 time: 0.2028 data: 0.0645 max mem: 3393 +extract (train) [ 60/236] eta: 0:00:51 time: 0.1953 data: 0.0593 max mem: 3393 +extract (train) [ 80/236] eta: 0:00:41 time: 0.1956 data: 0.0616 max mem: 3393 +extract (train) [100/236] eta: 0:00:35 time: 0.2169 data: 0.0731 max mem: 3393 +extract (train) [120/236] eta: 0:00:29 time: 0.2442 data: 0.0877 max mem: 3393 +extract (train) [140/236] eta: 0:00:24 time: 0.2177 data: 0.0715 max mem: 3393 +extract (train) [160/236] eta: 0:00:18 time: 0.2154 data: 0.0700 max mem: 3393 +extract (train) [180/236] eta: 0:00:13 time: 0.1960 data: 0.0602 max mem: 3393 +extract (train) [200/236] eta: 0:00:08 time: 0.2069 data: 0.0646 max mem: 3393 +extract (train) [220/236] eta: 0:00:03 time: 0.1949 data: 0.0595 max mem: 3393 +extract (train) [235/236] eta: 0:00:00 time: 0.1691 data: 0.0481 max mem: 3393 +extract (train) Total time: 0:00:54 (0.2318 s / it) +extract (validation) [ 0/29] eta: 0:02:30 time: 5.2028 data: 5.0369 max mem: 3393 +extract (validation) [20/29] eta: 0:00:03 time: 0.2003 data: 0.0541 max mem: 3393 +extract (validation) [28/29] eta: 0:00:00 time: 0.1745 data: 0.0455 max mem: 3393 +extract (validation) Total time: 0:00:10 (0.3772 s / it) +extract (test) [ 0/28] eta: 0:02:27 time: 5.2535 data: 5.0967 max mem: 3393 +extract (test) [20/28] eta: 0:00:03 time: 0.1783 data: 0.0473 max mem: 3393 +extract (test) [27/28] eta: 0:00:00 time: 0.1715 data: 0.0477 max mem: 3393 +extract (test) Total time: 0:00:10 (0.3719 s / it) +feature extraction time: 0:01:16 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 0.0059948 | train | 0.8828 | 0.013522 | 0.87938 | 0.013965 | 0.87761 | 0.014138 | +| flat_mae | patch | logistic | aabc_sex | | 0.0059948 | test | 0.92727 | 0.035573 | 0.92527 | 0.036275 | 0.93182 | 0.034459 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05345375385932114, "f1": 0.8166666666666667, "f1_std": 0.05343665728228675, "bacc": 0.8254076086956521, "bacc_std": 0.051928451690672894} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.036743542750860295, "f1": 0.9252717391304348, "f1_std": 0.037767639208486234, "bacc": 0.9252717391304348, "bacc_std": 0.037769832289802234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.057938628117165256, "f1": 0.7518222839291913, "f1_std": 0.06182766545057537, "bacc": 0.7479619565217391, "bacc_std": 0.06103752781049474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.050527086263518, "f1": 0.8343927735028438, "f1_std": 0.05063279981617115, "bacc": 0.8410326086956521, "bacc_std": 0.049115607808028594} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05123144695423691, "f1": 0.8131793478260869, "f1_std": 0.05256470993664909, "bacc": 0.8131793478260869, "bacc_std": 0.052380018682174664} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.040134830610072024, "f1": 0.905982905982906, "f1_std": 0.041821574997918484, "bacc": 0.9035326086956521, "bacc_std": 0.04261508216957869} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.039021740073286175, "f1": 0.9071259709557582, "f1_std": 0.039780411589282236, "bacc": 0.9096467391304348, "bacc_std": 0.03940193672623515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05402657442223828, "f1": 0.7758152173913043, "f1_std": 0.05584420865656002, "bacc": 0.7758152173913043, "bacc_std": 0.056220623022879196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.050836558719875365, "f1": 0.8328267477203647, "f1_std": 0.05187969271910928, "bacc": 0.8349184782608696, "bacc_std": 0.05212409454919339} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049499124293113726, "f1": 0.8343927735028438, "f1_std": 0.04964090151746143, "bacc": 0.8410326086956521, "bacc_std": 0.048583796672703246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03996644873882828, "f1": 0.9045470322804582, "f1_std": 0.043407626401297715, "bacc": 0.8974184782608696, "bacc_std": 0.04499273039106961} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05500387965279816, "f1": 0.76890756302521, "f1_std": 0.060444640795139115, "bacc": 0.7635869565217391, "bacc_std": 0.059219352050148985} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 2.782559402207126, "split": "test", "acc": 0.8, "acc_std": 0.05499879487785643, "f1": 0.7989365237620472, "f1_std": 0.054923667321093494, "bacc": 0.8097826086956521, "bacc_std": 0.05320159009764436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04176994846826271, "f1": 0.8879076086956521, "f1_std": 0.04321675153684068, "bacc": 0.8879076086956521, "bacc_std": 0.04370788759381442} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05107764305285344, "f1": 0.8343927735028438, "f1_std": 0.05139963217873348, "bacc": 0.8410326086956521, "bacc_std": 0.050508335195814144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 0.000774263682681127, "split": "test", "acc": 0.8, "acc_std": 0.05524358083198892, "f1": 0.7931623931623932, "f1_std": 0.05774088404140212, "bacc": 0.7914402173913043, "bacc_std": 0.05781950972613876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.05270344289097233, "f1": 0.790003471017008, "f1_std": 0.05643963889324101, "bacc": 0.7853260869565217, "bacc_std": 0.055836924171046104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04609915292959779, "f1": 0.8541114058355437, "f1_std": 0.04599626802527937, "bacc": 0.8688858695652174, "bacc_std": 0.04207453009626324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.047766982614554705, "f1": 0.8354935194416749, "f1_std": 0.04765655380741128, "bacc": 0.8471467391304348, "bacc_std": 0.045402933436101085} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04172346585922345, "f1": 0.8863636363636364, "f1_std": 0.04472086272847154, "bacc": 0.8817934782608696, "bacc_std": 0.04586741757477451} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05443745672067051, "f1": 0.7758152173913043, "f1_std": 0.056115425320621595, "bacc": 0.7758152173913043, "bacc_std": 0.05591774747353632} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04489099377730514, "f1": 0.8720505151213027, "f1_std": 0.04477540900394644, "bacc": 0.8845108695652174, "bacc_std": 0.04149489378998495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04146449641331626, "f1": 0.89, "f1_std": 0.04140540422162383, "bacc": 0.9001358695652174, "bacc_std": 0.03856756868548274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05099902445996164, "f1": 0.8307692307692308, "f1_std": 0.05305511959998543, "bacc": 0.8288043478260869, "bacc_std": 0.05303014480160868} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04895970345073047, "f1": 0.8533333333333333, "f1_std": 0.04897343590409668, "bacc": 0.8627717391304348, "bacc_std": 0.047113160635992475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04847866021114616, "f1": 0.8106060606060606, "f1_std": 0.051597052806010305, "bacc": 0.8070652173913043, "bacc_std": 0.05156378485539002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 0.000774263682681127, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03533967405032864, "f1": 0.905982905982906, "f1_std": 0.03685623770637453, "bacc": 0.9035326086956521, "bacc_std": 0.037549306549883546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.000774263682681127, "split": "test", "acc": 0.7454545454545455, "acc_std": 0.05723044904175278, "f1": 0.7303921568627451, "f1_std": 0.06272351574840966, "bacc": 0.7262228260869565, "bacc_std": 0.06103230590721574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 21.54434690031882, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04637609814698561, "f1": 0.8699763593380614, "f1_std": 0.04722889946588117, "bacc": 0.8722826086956521, "bacc_std": 0.04670174345188973} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04860254049068586, "f1": 0.8521505376344086, "f1_std": 0.04923798063924978, "bacc": 0.8566576086956521, "bacc_std": 0.04860847346223987} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05728347373925889, "f1": 0.7555555555555555, "f1_std": 0.05955524089184082, "bacc": 0.7540760869565217, "bacc_std": 0.05917686376224038} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.052330797783850115, "f1": 0.8131793478260869, "f1_std": 0.05399566222272284, "bacc": 0.8131793478260869, "bacc_std": 0.053988770175442145} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.000774263682681127, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04595562358235358, "f1": 0.8354935194416749, "f1_std": 0.04584293970368359, "bacc": 0.8471467391304348, "bacc_std": 0.04417707764789911} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.000774263682681127, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04866442351145729, "f1": 0.8484848484848485, "f1_std": 0.05151307282909686, "bacc": 0.8444293478260869, "bacc_std": 0.05180715187737196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 2.782559402207126, "split": "test", "acc": 0.7454545454545455, "acc_std": 0.05487565508184078, "f1": 0.7303921568627451, "f1_std": 0.06041713246084236, "bacc": 0.7262228260869565, "bacc_std": 0.058796374601984086} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04563072764908666, "f1": 0.8505434782608696, "f1_std": 0.04702340062159747, "bacc": 0.8505434782608696, "bacc_std": 0.04714745574884524} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.056284801172032266, "f1": 0.7758152173913043, "f1_std": 0.05837919276441475, "bacc": 0.7758152173913043, "bacc_std": 0.05841465396802551} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.048348815227307225, "f1": 0.8074229691876751, "f1_std": 0.05286496120252397, "bacc": 0.8009510869565217, "bacc_std": 0.0523990450994989} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05415496589029034, "f1": 0.7782258064516129, "f1_std": 0.054867239446614924, "bacc": 0.7819293478260869, "bacc_std": 0.05436293911811521} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03968262521307849, "f1": 0.8879076086956521, "f1_std": 0.04083798507143102, "bacc": 0.8879076086956521, "bacc_std": 0.04106793181582791} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.038480287966483194, "f1": 0.905982905982906, "f1_std": 0.040362495966507414, "bacc": 0.9035326086956521, "bacc_std": 0.04107032746648314} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.055129287562124686, "f1": 0.7989365237620472, "f1_std": 0.055031923320996455, "bacc": 0.8097826086956521, "bacc_std": 0.05304205161124931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03959482393753795, "f1": 0.8879076086956521, "f1_std": 0.04077379446731728, "bacc": 0.8879076086956521, "bacc_std": 0.0408179689512438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 21.54434690031882, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.049035335547250036, "f1": 0.8074229691876751, "f1_std": 0.053641715713952556, "bacc": 0.8009510869565217, "bacc_std": 0.052768597385545575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.038947406792593633, "f1": 0.8905835543766578, "f1_std": 0.03875926935438281, "bacc": 0.90625, "bacc_std": 0.033470427712385155} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03916287667027168, "f1": 0.9079959852793577, "f1_std": 0.03931333901780281, "bacc": 0.9157608695652174, "bacc_std": 0.03705621989620135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.048936038537754495, "f1": 0.8505434782608696, "f1_std": 0.050671072469432775, "bacc": 0.8505434782608696, "bacc_std": 0.050852785225848275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.000774263682681127, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046679848590829943, "f1": 0.8505434782608696, "f1_std": 0.048274801235968695, "bacc": 0.8505434782608696, "bacc_std": 0.048505305744408804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04742815009556667, "f1": 0.8343927735028438, "f1_std": 0.04793823078467611, "bacc": 0.8410326086956521, "bacc_std": 0.04744327525297636} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.051071636610898814, "f1": 0.8106060606060606, "f1_std": 0.05412659700092281, "bacc": 0.8070652173913043, "bacc_std": 0.05434311263528095} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.046752324377625776, "f1": 0.8074229691876751, "f1_std": 0.05277400829412781, "bacc": 0.8009510869565217, "bacc_std": 0.05195068859591898} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.048878246441608444, "f1": 0.8106060606060606, "f1_std": 0.051702055789674274, "bacc": 0.8070652173913043, "bacc_std": 0.05167764451677211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.000774263682681127, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04668551370695885, "f1": 0.8505434782608696, "f1_std": 0.04806947581202879, "bacc": 0.8505434782608696, "bacc_std": 0.048081131556000835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047584982699507145, "f1": 0.8521505376344086, "f1_std": 0.048061229422214816, "bacc": 0.8566576086956521, "bacc_std": 0.047089442909126124} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05130979962141813, "f1": 0.8131793478260869, "f1_std": 0.05289982990639339, "bacc": 0.8131793478260869, "bacc_std": 0.05309103931959122} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048303106345706434, "f1": 0.8307692307692308, "f1_std": 0.050494865110545385, "bacc": 0.8288043478260869, "bacc_std": 0.05078730095183825} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04418256639845311, "f1": 0.8699763593380614, "f1_std": 0.04514228714587683, "bacc": 0.8722826086956521, "bacc_std": 0.045077612965095076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.000774263682681127, "split": "test", "acc": 0.7090909090909091, "acc_std": 0.05940714262646642, "f1": 0.7043010752688172, "f1_std": 0.060149211305726896, "bacc": 0.7072010869565217, "bacc_std": 0.06024668222045741} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04360330565980988, "f1": 0.8711943793911007, "f1_std": 0.04374188717202844, "bacc": 0.8783967391304348, "bacc_std": 0.04204950466987741} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04393600154093403, "f1": 0.8663658451926415, "f1_std": 0.04754109468698033, "bacc": 0.8600543478260869, "bacc_std": 0.0483420301297699} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04908417462224566, "f1": 0.8328267477203647, "f1_std": 0.05008452928551458, "bacc": 0.8349184782608696, "bacc_std": 0.05023998106380485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03616849457746341, "f1": 0.9045470322804582, "f1_std": 0.03939841085965545, "bacc": 0.8974184782608696, "bacc_std": 0.041183847575934976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.036290398976192594, "f1": 0.9242424242424243, "f1_std": 0.038616880573169275, "bacc": 0.9191576086956521, "bacc_std": 0.04072923582265042} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.052404731349073745, "f1": 0.7975911676145868, "f1_std": 0.052781258168665426, "bacc": 0.8036684782608696, "bacc_std": 0.05253866498613683} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0450683081188346, "f1": 0.8663658451926415, "f1_std": 0.04847370714574701, "bacc": 0.8600543478260869, "bacc_std": 0.049097999420360895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04351826442820272, "f1": 0.8663658451926415, "f1_std": 0.04692360558179858, "bacc": 0.8600543478260869, "bacc_std": 0.04759204879761667} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04477264513144286, "f1": 0.8683760683760684, "f1_std": 0.04665395816283645, "bacc": 0.8661684782608696, "bacc_std": 0.047109016730706686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04006399838935386, "f1": 0.8863636363636364, "f1_std": 0.04249219488091017, "bacc": 0.8817934782608696, "bacc_std": 0.04360631488537191} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029807093281924622, "f1": 0.9442755825734549, "f1_std": 0.030386033639502368, "bacc": 0.9470108695652174, "bacc_std": 0.02957088574952899} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030771007068879926, "f1": 0.9442755825734549, "f1_std": 0.031298918161542864, "bacc": 0.9470108695652174, "bacc_std": 0.03034394793587456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04600267329637725, "f1": 0.8281846581048247, "f1_std": 0.049665836522629875, "bacc": 0.8226902173913043, "bacc_std": 0.04934354741300053} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04805521617527997, "f1": 0.8035714285714286, "f1_std": 0.05524400725138334, "bacc": 0.7948369565217391, "bacc_std": 0.05355161083721272} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 2.782559402207126, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030321146100146787, "f1": 0.9435897435897436, "f1_std": 0.031666321061234695, "bacc": 0.9408967391304348, "bacc_std": 0.03289900672497691} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.046747558384112764, "f1": 0.8699763593380614, "f1_std": 0.04770391581320217, "bacc": 0.8722826086956521, "bacc_std": 0.04757125621375379} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03931385052338687, "f1": 0.8863636363636364, "f1_std": 0.0416884030128631, "bacc": 0.8817934782608696, "bacc_std": 0.042572610728623936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04666488937977293, "f1": 0.8505434782608696, "f1_std": 0.048050969595217326, "bacc": 0.8505434782608696, "bacc_std": 0.047770239827216815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05045904975799025, "f1": 0.8106060606060606, "f1_std": 0.05369336466239111, "bacc": 0.8070652173913043, "bacc_std": 0.05416048560444908} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.045682859327800786, "f1": 0.8683760683760684, "f1_std": 0.047610515258164954, "bacc": 0.8661684782608696, "bacc_std": 0.04805182529115244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.054734931508797234, "f1": 0.7931623931623932, "f1_std": 0.05728113932119837, "bacc": 0.7914402173913043, "bacc_std": 0.057196249730923236} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05477492338109619, "f1": 0.7518222839291913, "f1_std": 0.0587430562104279, "bacc": 0.7479619565217391, "bacc_std": 0.057822932420367054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041371911260205445, "f1": 0.8863636363636364, "f1_std": 0.043820628122047824, "bacc": 0.8817934782608696, "bacc_std": 0.04476997013576171} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.046719504129599874, "f1": 0.8307692307692308, "f1_std": 0.04839938767731535, "bacc": 0.8288043478260869, "bacc_std": 0.04859132770800029} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 1291.5496650148827, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.0405195351314983, "f1": 0.81673454276194, "f1_std": 0.052330430502065844, "bacc": 0.8043478260869565, "bacc_std": 0.048447270265921924} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 21.54434690031882, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.06231682314015206, "f1": 0.7623795280824195, "f1_std": 0.062315653594833126, "bacc": 0.7724184782608696, "bacc_std": 0.06156806353383415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03685550969721248, "f1": 0.9252717391304348, "f1_std": 0.03792441979617142, "bacc": 0.9252717391304348, "bacc_std": 0.03822319922897879} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.000774263682681127, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04768053469110587, "f1": 0.8711943793911007, "f1_std": 0.047907155346685555, "bacc": 0.8783967391304348, "bacc_std": 0.04651928468357661} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.053572547552264746, "f1": 0.795677136102668, "f1_std": 0.054664263723678726, "bacc": 0.7975543478260869, "bacc_std": 0.054563580757625574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05192294354248129, "f1": 0.8328267477203647, "f1_std": 0.05299738209818629, "bacc": 0.8349184782608696, "bacc_std": 0.05315559527087562} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05843078550058073, "f1": 0.7555555555555555, "f1_std": 0.06111117998100611, "bacc": 0.7540760869565217, "bacc_std": 0.060775736470643854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043746467153699974, "f1": 0.8683760683760684, "f1_std": 0.046035140619400575, "bacc": 0.8661684782608696, "bacc_std": 0.046785330900828315} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.05105354829381037, "f1": 0.7989365237620472, "f1_std": 0.05103618268451549, "bacc": 0.8097826086956521, "bacc_std": 0.04947789597247391} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04069107157332089, "f1": 0.8863636363636364, "f1_std": 0.0430376043388297, "bacc": 0.8817934782608696, "bacc_std": 0.04384756488923303} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04768108934359337, "f1": 0.8250265111346766, "f1_std": 0.05378052432788733, "bacc": 0.8165760869565217, "bacc_std": 0.0534193793273984} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03721412688307254, "f1": 0.905982905982906, "f1_std": 0.03888714581403604, "bacc": 0.9035326086956521, "bacc_std": 0.039799776621288245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04225027360662544, "f1": 0.8891129032258065, "f1_std": 0.04266528450184467, "bacc": 0.8940217391304348, "bacc_std": 0.041470414069410676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 0.000774263682681127, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.043549170462063726, "f1": 0.8879076086956521, "f1_std": 0.04473971211363276, "bacc": 0.8879076086956521, "bacc_std": 0.044696257420486496} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05250153559344164, "f1": 0.8106060606060606, "f1_std": 0.055245884499287634, "bacc": 0.8070652173913043, "bacc_std": 0.05480111746026415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04716065035756778, "f1": 0.8505434782608696, "f1_std": 0.048618830974758935, "bacc": 0.8505434782608696, "bacc_std": 0.04874717240721238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04885750574649297, "f1": 0.8484848484848485, "f1_std": 0.05216045704278036, "bacc": 0.8444293478260869, "bacc_std": 0.052713729772551225} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04524542440680676, "f1": 0.8711943793911007, "f1_std": 0.04539076192262555, "bacc": 0.8783967391304348, "bacc_std": 0.04368749112626374} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 13.834 | 129.12 | 0.92446 | 0.042482 | 0.92223 | 0.043837 | 0.92109 | 0.044485 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 13.834 | 129.12 | 0.84691 | 0.048977 | 0.84208 | 0.050933 | 0.84251 | 0.051528 | + + +done! total time: 0:05:04 diff --git a/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic/config.yaml b/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..11d434dc8db74aa684d87b9176cb08b4d9e667a6 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic/eval_table.csv b/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..d666a7656de21777023eb48736312cabd918be0c --- /dev/null +++ b/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,train,0.811965811965812,0.014184050459613035,0.8075785766496113,0.014705206818707848,0.804993761901635,0.01468931857116284 +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,test,0.5887096774193549,0.03986703642274096,0.5826018084614877,0.0406454111351761,0.5827441738675045,0.04012573913716059 +flat_mae,patch,logistic,abide_dx,1,2.782559402207126,train,0.9914529914529915,0.0036054420906080765,0.9913621262458472,0.003644014872921544,0.9913621262458472,0.0036708331499072755 +flat_mae,patch,logistic,abide_dx,1,2.782559402207126,test,0.6370967741935484,0.042218203312634345,0.6368842324461508,0.042321457616098505,0.6407563025210083,0.04237084169077069 +flat_mae,patch,logistic,abide_dx,2,2.782559402207126,train,0.9857549857549858,0.004565873510685222,0.9856035437430786,0.004614256763289039,0.9856035437430786,0.004627977624818537 +flat_mae,patch,logistic,abide_dx,2,2.782559402207126,test,0.6370967741935484,0.04226341692279025,0.6317074780542539,0.04298412013147237,0.6313025210084033,0.04270889991775985 +flat_mae,patch,logistic,abide_dx,3,0.046415888336127774,train,0.8076923076923077,0.014218857752291654,0.8025768954347079,0.014706636729944041,0.799593946105574,0.014574352563153963 +flat_mae,patch,logistic,abide_dx,3,0.046415888336127774,test,0.5806451612903226,0.045420007784896084,0.5643243243243243,0.04854009055246231,0.5672268907563025,0.046450185755562055 +flat_mae,patch,logistic,abide_dx,4,0.3593813663804626,train,0.9145299145299145,0.010353344951769317,0.9132276353088535,0.01055650083551923,0.9115540789959394,0.010722027482935078 +flat_mae,patch,logistic,abide_dx,4,0.3593813663804626,test,0.6854838709677419,0.04040609416126988,0.6794591370053689,0.041905376035152674,0.6785714285714286,0.041440763177767466 +flat_mae,patch,logistic,abide_dx,5,2.782559402207126,train,0.9814814814814815,0.005318998285563575,0.9812676654781918,0.0053869513843374925,0.9808416389811738,0.005537550689551136 +flat_mae,patch,logistic,abide_dx,5,2.782559402207126,test,0.5403225806451613,0.04440126309478225,0.5366764995083579,0.04478388829765884,0.5367647058823529,0.04479448722869246 +flat_mae,patch,logistic,abide_dx,6,0.3593813663804626,train,0.896011396011396,0.011670696318368496,0.8946775648785461,0.01184827180990549,0.8938722775932078,0.011946825661656075 +flat_mae,patch,logistic,abide_dx,6,0.3593813663804626,test,0.6451612903225806,0.043691223339227425,0.6436781609195402,0.043897930873984754,0.6449579831932774,0.04399796712118936 +flat_mae,patch,logistic,abide_dx,7,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,7,21.54434690031882,test,0.5,0.044912039712467246,0.4952731092436975,0.045139445584536045,0.4952731092436975,0.04503509599525088 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,train,0.9031339031339032,0.011369306316887724,0.9016579866833674,0.011598296412625846,0.9000369139904023,0.011749559515334889 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,test,0.6774193548387096,0.04034387824647876,0.6760710553814002,0.04068935106765971,0.6775210084033614,0.04082539132175891 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,train,0.8988603988603988,0.011289748158701486,0.8973555281426915,0.011478261674122789,0.8958656330749355,0.011535522161251222 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,test,0.6612903225806451,0.04060698091539773,0.6481081081081081,0.04326395866640014,0.6486344537815126,0.041531009150572014 +flat_mae,patch,logistic,abide_dx,10,0.046415888336127774,train,0.8005698005698005,0.015351472160755641,0.7962686567164179,0.015759724397017862,0.7940199335548173,0.015669745422012386 +flat_mae,patch,logistic,abide_dx,10,0.046415888336127774,test,0.6451612903225806,0.04299672990493823,0.6356837606837606,0.044729074743022,0.6355042016806722,0.04370760104163061 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,train,0.9031339031339032,0.011132793148471844,0.9021040974529346,0.011247810709823836,0.9021040974529346,0.011271346089527285 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,test,0.5967741935483871,0.042756467401816795,0.5915678524374176,0.04339356841745959,0.5913865546218487,0.043071913098189626 +flat_mae,patch,logistic,abide_dx,12,0.005994842503189409,train,0.7293447293447294,0.01686379248137065,0.7209156344158019,0.017598796803755902,0.7190845330380214,0.01727132463725803 +flat_mae,patch,logistic,abide_dx,12,0.005994842503189409,test,0.5806451612903226,0.04248297694212029,0.5752305665349143,0.04294893924017232,0.5751050420168067,0.04269797911922068 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,train,0.9045584045584045,0.010552984603722103,0.9033342033816792,0.010698776782423054,0.9025101513473606,0.010761929305109504 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,test,0.5887096774193549,0.04535965651463766,0.5886829268292683,0.04540295739010405,0.5934873949579832,0.04541250017870364 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,train,0.9017094017094017,0.011046939755590821,0.9002469217161368,0.011254157630227383,0.8987449243263197,0.011369295863874189 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,test,0.6209677419354839,0.04400212841318858,0.6097756946769334,0.04570906568197248,0.6102941176470589,0.04461148180701605 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,train,0.8048433048433048,0.014696155027404557,0.8012026152437373,0.015075152791712898,0.7993724621631598,0.015061873450159111 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,test,0.6370967741935484,0.04249032453983518,0.626380984265149,0.04484125549803293,0.6265756302521008,0.04355669106195808 +flat_mae,patch,logistic,abide_dx,16,0.3593813663804626,train,0.9074074074074074,0.011730738007593848,0.9060946873424365,0.011956611604459022,0.904798818752307,0.01216239613965827 +flat_mae,patch,logistic,abide_dx,16,0.3593813663804626,test,0.6048387096774194,0.043527172251888555,0.602745995423341,0.0438257068756977,0.6034663865546219,0.04387562622641546 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,train,0.9017094017094017,0.011639694844953441,0.9003831417624522,0.011835005012397417,0.8993355481727574,0.011945005461121417 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,test,0.5887096774193549,0.043870644567120864,0.5765651155005022,0.045779157696007315,0.5777310924369747,0.04449907324820548 +flat_mae,patch,logistic,abide_dx,18,2.782559402207126,train,0.9843304843304843,0.004640041765035057,0.9841495630969315,0.0046980190530642955,0.9837209302325581,0.004825060388054005 +flat_mae,patch,logistic,abide_dx,18,2.782559402207126,test,0.6048387096774194,0.04169557928540973,0.5989703649924097,0.042443494849432606,0.5987394957983193,0.04205363700926739 +flat_mae,patch,logistic,abide_dx,19,0.3593813663804626,train,0.9002849002849003,0.011214943295682797,0.8990384615384615,0.011368390349119073,0.8983388704318938,0.011431244150614257 +flat_mae,patch,logistic,abide_dx,19,0.3593813663804626,test,0.6935483870967742,0.042771112986914876,0.6869519000797236,0.044106866412747935,0.6859243697478992,0.04356041664682136 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,train,0.8034188034188035,0.01512738849932112,0.7998297408093097,0.015539761579776054,0.7980804724990771,0.015561044280294426 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,test,0.6532258064516129,0.04369669787051054,0.6448884448884449,0.04515951772187306,0.6444327731092437,0.04429909470697927 +flat_mae,patch,logistic,abide_dx,21,0.3593813663804626,train,0.8988603988603988,0.011686943061027707,0.8974265046355845,0.011900412653798273,0.8961609449981542,0.012045248994221873 +flat_mae,patch,logistic,abide_dx,21,0.3593813663804626,test,0.5967741935483871,0.04352149707439134,0.5929621848739496,0.044084927709447594,0.5929621848739496,0.04387646002428194 +flat_mae,patch,logistic,abide_dx,22,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,22,166.81005372000556,test,0.532258064516129,0.04344634101217863,0.5221897422269466,0.044813620274288174,0.523109243697479,0.0440088526508549 +flat_mae,patch,logistic,abide_dx,23,0.046415888336127774,train,0.7977207977207977,0.014363058196899654,0.7928170594837262,0.014917507404187664,0.7902547065337764,0.014865727032756602 +flat_mae,patch,logistic,abide_dx,23,0.046415888336127774,test,0.5806451612903226,0.042779362797657115,0.5643243243243243,0.04585237692583284,0.5672268907563025,0.043803071896159 +flat_mae,patch,logistic,abide_dx,24,0.3593813663804626,train,0.9259259259259259,0.010016667792014017,0.925,0.010167039930663705,0.9242524916943522,0.01027875100887993 +flat_mae,patch,logistic,abide_dx,24,0.3593813663804626,test,0.5645161290322581,0.04537588416420845,0.5588932806324111,0.045569463525020316,0.5588235294117647,0.04534764734689067 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,train,0.9116809116809117,0.010844367366009166,0.9104591836734695,0.01104357616388081,0.909265411590993,0.011222256394172726 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,test,0.5887096774193549,0.0446859314122584,0.5873947935016637,0.04478254730062658,0.5887605042016807,0.04479694953406288 +flat_mae,patch,logistic,abide_dx,26,0.005994842503189409,train,0.7250712250712251,0.01627737106613361,0.7182792255724929,0.01682209965333427,0.7166851236618679,0.016613606882569694 +flat_mae,patch,logistic,abide_dx,26,0.005994842503189409,test,0.5241935483870968,0.04463190589626891,0.5127539127539127,0.0460817556621855,0.5141806722689075,0.04504119615849044 +flat_mae,patch,logistic,abide_dx,27,0.3593813663804626,train,0.8974358974358975,0.011723215384296217,0.8960863808685784,0.011904500293790882,0.8951642672572906,0.011990821515678027 +flat_mae,patch,logistic,abide_dx,27,0.3593813663804626,test,0.6370967741935484,0.041410721012514064,0.6317074780542539,0.042345486029936624,0.6313025210084033,0.04212378665463916 +flat_mae,patch,logistic,abide_dx,28,0.3593813663804626,train,0.8917378917378918,0.012546892102210069,0.8905213736949242,0.01270566591443217,0.8902916205241787,0.01278723995329521 +flat_mae,patch,logistic,abide_dx,28,0.3593813663804626,test,0.6532258064516129,0.04058220931392434,0.6448884448884449,0.04211634261758473,0.6444327731092437,0.04137330755659312 +flat_mae,patch,logistic,abide_dx,29,0.3593813663804626,train,0.9017094017094017,0.011093624928501463,0.9003831417624522,0.011283841832558148,0.8993355481727574,0.011429985271903973 +flat_mae,patch,logistic,abide_dx,29,0.3593813663804626,test,0.6774193548387096,0.04020492188795608,0.6704756842944459,0.04133235923501435,0.6696428571428572,0.040830026166809634 +flat_mae,patch,logistic,abide_dx,30,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,30,21.54434690031882,test,0.6209677419354839,0.047534356926853774,0.6189604445897352,0.04813161732103974,0.6197478991596639,0.04823617040532753 +flat_mae,patch,logistic,abide_dx,31,0.3593813663804626,train,0.8931623931623932,0.011619482347740974,0.8915727409958009,0.011878832744664473,0.8901070505721669,0.012111128337552698 +flat_mae,patch,logistic,abide_dx,31,0.3593813663804626,test,0.6129032258064516,0.044981774549514034,0.6063492063492064,0.046284154247038044,0.60609243697479,0.04567130660818584 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,train,0.9088319088319088,0.010978436092710498,0.9077506837389226,0.0111309974842833,0.9072720561092654,0.011239052172062917 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,test,0.6693548387096774,0.04139438751060018,0.6644445911160979,0.042324513534665695,0.6638655462184874,0.042100397037095685 +flat_mae,patch,logistic,abide_dx,33,0.046415888336127774,train,0.792022792022792,0.01553787621249532,0.7878890728476822,0.015973522530907627,0.7859726836471022,0.015938150893941686 +flat_mae,patch,logistic,abide_dx,33,0.046415888336127774,test,0.6290322580645161,0.043893788537974716,0.6255252100840336,0.04462701636409209,0.6255252100840336,0.04459972637216828 +flat_mae,patch,logistic,abide_dx,34,0.3593813663804626,train,0.9031339031339032,0.011102990677166975,0.9016579866833674,0.011348414288373876,0.9000369139904023,0.011555767788768979 +flat_mae,patch,logistic,abide_dx,34,0.3593813663804626,test,0.6774193548387096,0.04019649965864556,0.671957671957672,0.041299859041489376,0.671218487394958,0.040840894017764307 +flat_mae,patch,logistic,abide_dx,35,0.3593813663804626,train,0.9017094017094017,0.010639575868927302,0.9005745321534795,0.010769928074037904,0.9002214839424141,0.0108163123068963 +flat_mae,patch,logistic,abide_dx,35,0.3593813663804626,test,0.6451612903225806,0.041215061828657515,0.6436781609195402,0.04129597454394656,0.6449579831932774,0.04135049695890393 +flat_mae,patch,logistic,abide_dx,36,2.782559402207126,train,0.9814814814814815,0.004912265880911991,0.9812790399507667,0.0049666047975930865,0.9811369509043928,0.0049948818557491725 +flat_mae,patch,logistic,abide_dx,36,2.782559402207126,test,0.6129032258064516,0.04068519533770254,0.6025641025641025,0.0425833170257595,0.6029411764705883,0.04140629486725668 +flat_mae,patch,logistic,abide_dx,37,0.3593813663804626,train,0.8903133903133903,0.010758900063145184,0.8884387325266293,0.01102516671875098,0.8863418235511259,0.011201308918959596 +flat_mae,patch,logistic,abide_dx,37,0.3593813663804626,test,0.6693548387096774,0.04053683721870948,0.6630211440312852,0.04180180141889523,0.6622899159663866,0.041339725778072794 +flat_mae,patch,logistic,abide_dx,38,0.3593813663804626,train,0.9102564102564102,0.010894555230675538,0.9090454772613693,0.011071476974805962,0.9079734219269102,0.011180144405891615 +flat_mae,patch,logistic,abide_dx,38,0.3593813663804626,test,0.5483870967741935,0.04306927302470023,0.5407407407407407,0.04343785552484149,0.5409663865546219,0.043139631325450276 +flat_mae,patch,logistic,abide_dx,39,2.782559402207126,train,0.9886039886039886,0.004145936450554559,0.988482834994463,0.004189854139512405,0.988482834994463,0.004201358661387505 +flat_mae,patch,logistic,abide_dx,39,2.782559402207126,test,0.5241935483870968,0.046134928001306606,0.5234186697934988,0.04619791320530727,0.5252100840336134,0.04617520293400343 +flat_mae,patch,logistic,abide_dx,40,0.3593813663804626,train,0.905982905982906,0.011003002074827004,0.9048076923076923,0.011174412913270447,0.9040974529346621,0.011306318110847181 +flat_mae,patch,logistic,abide_dx,40,0.3593813663804626,test,0.5645161290322581,0.04415468034133852,0.5503626107977437,0.04632525264221235,0.5525210084033614,0.0448338604421838 +flat_mae,patch,logistic,abide_dx,41,0.3593813663804626,train,0.9145299145299145,0.011140212820477814,0.913288371748314,0.011359790847776455,0.9118493909191583,0.011556244366077037 +flat_mae,patch,logistic,abide_dx,41,0.3593813663804626,test,0.5967741935483871,0.04052014824938237,0.5941345902068604,0.04070846445764479,0.5945378151260504,0.04073134881063851 +flat_mae,patch,logistic,abide_dx,42,0.3593813663804626,train,0.9088319088319088,0.010403689313174938,0.9076923076923077,0.01055637779398555,0.9069767441860466,0.010656206022608283 +flat_mae,patch,logistic,abide_dx,42,0.3593813663804626,test,0.5887096774193549,0.045768552586107236,0.5854473942969518,0.04617555455924505,0.585609243697479,0.04608857231745272 +flat_mae,patch,logistic,abide_dx,43,0.3593813663804626,train,0.8945868945868946,0.011351366967595545,0.8929807502142528,0.011575321778985757,0.8913990402362495,0.011694497466387345 +flat_mae,patch,logistic,abide_dx,43,0.3593813663804626,test,0.6532258064516129,0.042824764539415736,0.6429862738533645,0.04459204173697827,0.6428571428571428,0.043548470335819926 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,train,0.7222222222222222,0.015953209915751054,0.7140039653771182,0.016643146156381225,0.7123292727943891,0.016360032653510534 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,test,0.5725806451612904,0.04328670181002488,0.5478500171998624,0.04678149767601053,0.5551470588235294,0.04391614109192465 +flat_mae,patch,logistic,abide_dx,45,0.046415888336127774,train,0.7962962962962963,0.014889299933377237,0.7918145148746876,0.015364055444778748,0.7895533407161315,0.015315193195835498 +flat_mae,patch,logistic,abide_dx,45,0.046415888336127774,test,0.6451612903225806,0.039707865973724964,0.6288435374149659,0.04282728625064404,0.6307773109243697,0.04066740575742161 +flat_mae,patch,logistic,abide_dx,46,2.782559402207126,train,0.9886039886039886,0.0040369917985590776,0.988482834994463,0.00407965628537601,0.988482834994463,0.0041044363012770315 +flat_mae,patch,logistic,abide_dx,46,2.782559402207126,test,0.5887096774193549,0.04331820588965382,0.5826018084614877,0.043943904309058646,0.5824579831932774,0.043727187450797456 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,train,0.8062678062678063,0.014468519808113773,0.8022551594435837,0.014915498670349,0.8000738279808047,0.014906575756127341 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,test,0.6048387096774194,0.04127732595356936,0.6017043592264831,0.04145355692215413,0.601890756302521,0.041381414428800875 +flat_mae,patch,logistic,abide_dx,48,0.3593813663804626,train,0.9002849002849003,0.011586724710264276,0.8991023103394467,0.01174876792095118,0.8986341823551125,0.011843909461476705 +flat_mae,patch,logistic,abide_dx,48,0.3593813663804626,test,0.6370967741935484,0.0426902639583855,0.6317074780542539,0.04346825869890181,0.6313025210084033,0.04307203205157088 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,train,0.7207977207977208,0.01758002804030306,0.7135051846916254,0.01819629654409282,0.7119232188999631,0.017951910793769784 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,test,0.6451612903225806,0.040773207802498476,0.6313513513513513,0.043250062711484606,0.6323529411764706,0.04157079468091591 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,train,0.7849002849002849,0.014885623844020509,0.7807134951747536,0.015262437910109297,0.778922111480251,0.015198944460862106 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,test,0.5887096774193549,0.046001049053775576,0.5788211788211788,0.04764582471862923,0.5793067226890757,0.04664967338769393 +flat_mae,patch,logistic,abide_dx,51,0.046415888336127774,train,0.8048433048433048,0.014865482881837935,0.8008832243277992,0.015312291733981185,0.798781838316722,0.015300797374344033 +flat_mae,patch,logistic,abide_dx,51,0.046415888336127774,test,0.5161290322580645,0.04234657069879415,0.5032051282051282,0.04390608628561085,0.5052521008403361,0.042900967305268344 +flat_mae,patch,logistic,abide_dx,52,0.046415888336127774,train,0.8062678062678063,0.014097266840156003,0.802089552238806,0.014502423987637672,0.7997785160575859,0.014442398171258584 +flat_mae,patch,logistic,abide_dx,52,0.046415888336127774,test,0.6774193548387096,0.041187431657422634,0.6688034188034189,0.04253306992257641,0.6680672268907563,0.0417211046750161 +flat_mae,patch,logistic,abide_dx,53,0.3593813663804626,train,0.915954415954416,0.010801290857543451,0.9149840202471782,0.010933299898764983,0.9146179401993355,0.010986003834937537 +flat_mae,patch,logistic,abide_dx,53,0.3593813663804626,test,0.6129032258064516,0.04566458038989544,0.607905138339921,0.0462920855443966,0.6076680672268908,0.046031269726797476 +flat_mae,patch,logistic,abide_dx,54,0.046415888336127774,train,0.801994301994302,0.014318408079409208,0.798610179496123,0.014656624189217156,0.7970837947582133,0.014675959419186306 +flat_mae,patch,logistic,abide_dx,54,0.046415888336127774,test,0.6209677419354839,0.043398484535033754,0.6097756946769334,0.045251727282020406,0.6102941176470589,0.04406853879473909 +flat_mae,patch,logistic,abide_dx,55,0.3593813663804626,train,0.9131054131054132,0.0108427958135068,0.911620294599018,0.01110257041057706,0.9093761535622,0.011298128726283722 +flat_mae,patch,logistic,abide_dx,55,0.3593813663804626,test,0.6048387096774194,0.04570425617224357,0.6041951664386684,0.0457648339190002,0.6066176470588236,0.045845897872939564 +flat_mae,patch,logistic,abide_dx,56,0.046415888336127774,train,0.7962962962962963,0.014916890291240647,0.7916347536941212,0.015464760215909498,0.7892580287929125,0.015413842777474826 +flat_mae,patch,logistic,abide_dx,56,0.046415888336127774,test,0.6370967741935484,0.04084565247002908,0.6241664982824813,0.04355276081627931,0.625,0.04196559551126373 +flat_mae,patch,logistic,abide_dx,57,0.3593813663804626,train,0.9002849002849003,0.010751647186580197,0.8990384615384615,0.010891753955193338,0.8983388704318938,0.01094478290788864 +flat_mae,patch,logistic,abide_dx,57,0.3593813663804626,test,0.6693548387096774,0.0420080761275444,0.6644445911160979,0.042619278601365794,0.6638655462184874,0.04234707742260963 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,train,0.8005698005698005,0.014617893521850004,0.7967691505992704,0.01499456162064704,0.7949058693244739,0.014975182750945852 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,test,0.5725806451612904,0.044624913831214155,0.5623043623043623,0.04582797828208603,0.5630252100840336,0.04498807182247631 +flat_mae,patch,logistic,abide_dx,59,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,59,166.81005372000556,test,0.532258064516129,0.045421127508479126,0.5291961246399581,0.045460489229981915,0.5294117647058824,0.04551528890666038 +flat_mae,patch,logistic,abide_dx,60,0.3593813663804626,train,0.9045584045584045,0.011148705484505531,0.9030696466660484,0.0113726946822466,0.901328903654485,0.011514081304335247 +flat_mae,patch,logistic,abide_dx,60,0.3593813663804626,test,0.5967741935483871,0.044395512142998486,0.5880946053680574,0.045568895506948745,0.5882352941176471,0.04490751578920159 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,train,0.8048433048433048,0.014688640876891979,0.8013570034389814,0.01503071893336397,0.7996677740863787,0.015049854973183426 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,test,0.5887096774193549,0.041232554725589725,0.5740553647201454,0.04285332629420543,0.576155462184874,0.04150065718538355 +flat_mae,patch,logistic,abide_dx,62,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,62,166.81005372000556,test,0.5645161290322581,0.04421672043791942,0.5603991596638656,0.04461788846657464,0.5603991596638656,0.04465669892927136 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,train,0.801994301994302,0.014728877419336726,0.7987597007937998,0.01507303195635032,0.7973791066814322,0.015128912275691088 +flat_mae,patch,logistic,abide_dx,63,0.046415888336127774,test,0.6209677419354839,0.04374649086566044,0.6153389215233318,0.04484836574604478,0.6150210084033614,0.044415665834303154 +flat_mae,patch,logistic,abide_dx,64,0.046415888336127774,train,0.8034188034188035,0.015821910837757046,0.8005632550211221,0.01610958149364263,0.7995570321151717,0.01613521455960174 +flat_mae,patch,logistic,abide_dx,64,0.046415888336127774,test,0.6048387096774194,0.04217419342501986,0.5931704050887178,0.04391861940298793,0.5940126050420168,0.04277299154849585 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,train,0.9145299145299145,0.010269893005683907,0.913165381602276,0.010505440693159571,0.9112587670727206,0.01073016584169987 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,test,0.5967741935483871,0.04395005620208571,0.5880946053680574,0.04502880737822516,0.5882352941176471,0.04434217598638901 +flat_mae,patch,logistic,abide_dx,66,0.3593813663804626,train,0.9102564102564102,0.010448195424257583,0.9091052957171013,0.01061411566449327,0.9082687338501292,0.010746845533342868 +flat_mae,patch,logistic,abide_dx,66,0.3593813663804626,test,0.5806451612903226,0.043188052241326935,0.5796610169491525,0.04317206858797906,0.58140756302521,0.04320510359671049 +flat_mae,patch,logistic,abide_dx,67,0.3593813663804626,train,0.896011396011396,0.011401415741337163,0.8946082514298406,0.011596301045486274,0.893576965669989,0.01171017845530642 +flat_mae,patch,logistic,abide_dx,67,0.3593813663804626,test,0.6370967741935484,0.0423797803868774,0.6317074780542539,0.04345647325267356,0.6313025210084033,0.04317116044447894 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,train,0.915954415954416,0.010500605168588311,0.9148203675939808,0.010663470246643963,0.9137320044296788,0.010763231886046904 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,test,0.6209677419354839,0.03964245443395737,0.6167554415729598,0.040291330723535655,0.6165966386554622,0.04012131632067222 +flat_mae,patch,logistic,abide_dx,69,0.3593813663804626,train,0.9102564102564102,0.010899071065150341,0.9091052957171013,0.01107564656628116,0.9082687338501292,0.011212801371624472 +flat_mae,patch,logistic,abide_dx,69,0.3593813663804626,test,0.6935483870967742,0.04046257954446677,0.6869519000797236,0.04179710253513917,0.6859243697478992,0.0411675037398334 +flat_mae,patch,logistic,abide_dx,70,2.782559402207126,train,0.9786324786324786,0.0052041043658629105,0.978372272143774,0.005275717339939196,0.9776670358065707,0.005439383193355006 +flat_mae,patch,logistic,abide_dx,70,2.782559402207126,test,0.6290322580645161,0.0392785617230338,0.6210470369386127,0.040254558314939806,0.6207983193277311,0.03971241046941933 +flat_mae,patch,logistic,abide_dx,71,0.046415888336127774,train,0.8105413105413105,0.01507205416749109,0.8073032652732688,0.015403587623460361,0.8057216685123662,0.015393200615443116 +flat_mae,patch,logistic,abide_dx,71,0.046415888336127774,test,0.6693548387096774,0.03884330430437141,0.6473118279569892,0.04341607946315786,0.6512605042016807,0.04014279605429215 +flat_mae,patch,logistic,abide_dx,72,0.3593813663804626,train,0.8988603988603988,0.011354910212125806,0.8975631110462571,0.01156051447095803,0.8967515688445922,0.011765690103681682 +flat_mae,patch,logistic,abide_dx,72,0.3593813663804626,test,0.6854838709677419,0.04265595429766805,0.6829891838741396,0.043069199706008955,0.6832983193277311,0.043072118547777184 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,train,0.7934472934472935,0.015622975692929662,0.7890812945630259,0.01613281806615644,0.786969361387966,0.016100403158732332 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,test,0.6290322580645161,0.041491144699194925,0.6191239316239316,0.04356701629263569,0.6192226890756303,0.04249808265436233 +flat_mae,patch,logistic,abide_dx,74,0.3593813663804626,train,0.9116809116809117,0.010824750238856429,0.9105769230769231,0.010962878253296409,0.9098560354374308,0.011007459559452516 +flat_mae,patch,logistic,abide_dx,74,0.3593813663804626,test,0.6048387096774194,0.04390634247428613,0.602745995423341,0.04425111017027571,0.6034663865546219,0.044406763201912815 +flat_mae,patch,logistic,abide_dx,75,0.046415888336127774,train,0.7934472934472935,0.015191876996578356,0.7890812945630259,0.015612370080781875,0.786969361387966,0.01551831263729037 +flat_mae,patch,logistic,abide_dx,75,0.046415888336127774,test,0.6451612903225806,0.040111058105092336,0.6313513513513513,0.042568159545772324,0.6323529411764706,0.04086883044506228 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,train,0.9088319088319088,0.010829288436451852,0.9076923076923077,0.01099585728841302,0.9069767441860466,0.011111128569575286 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,test,0.6532258064516129,0.04224828746783258,0.6429862738533645,0.04463284796685339,0.6428571428571428,0.04332009667818785 +flat_mae,patch,logistic,abide_dx,77,0.3593813663804626,train,0.8931623931623932,0.01122260193818856,0.8914958731336362,0.011450029489237038,0.8898117386489479,0.011568097697140026 +flat_mae,patch,logistic,abide_dx,77,0.3593813663804626,test,0.5725806451612904,0.04262086016405383,0.5718845677806006,0.04260897121051207,0.5740546218487395,0.04253411684871575 +flat_mae,patch,logistic,abide_dx,78,0.3593813663804626,train,0.9102564102564102,0.010810654215157527,0.9090454772613693,0.011007907858164885,0.9079734219269102,0.01119261672930629 +flat_mae,patch,logistic,abide_dx,78,0.3593813663804626,test,0.6129032258064516,0.043166835074675135,0.6063492063492064,0.044198564420308464,0.60609243697479,0.04375146203938788 +flat_mae,patch,logistic,abide_dx,79,0.005994842503189409,train,0.7193732193732194,0.015616832180643483,0.7119085066713885,0.01638918992925664,0.7103359173126615,0.016159437017598317 +flat_mae,patch,logistic,abide_dx,79,0.005994842503189409,test,0.5725806451612904,0.043371804476147524,0.5544708833299437,0.04629075233302461,0.5582983193277311,0.04407747020135957 +flat_mae,patch,logistic,abide_dx,80,0.3593813663804626,train,0.9145299145299145,0.010450872539872858,0.9132276353088535,0.010635596133335397,0.9115540789959394,0.010713399759946247 +flat_mae,patch,logistic,abide_dx,80,0.3593813663804626,test,0.6451612903225806,0.04450669313272805,0.6428384393820372,0.044748306366263244,0.6433823529411764,0.04467950962021125 +flat_mae,patch,logistic,abide_dx,81,2.782559402207126,train,0.9843304843304843,0.0046520046639362,0.9841396662387677,0.0047169148404125226,0.9834256183093393,0.004944153773053877 +flat_mae,patch,logistic,abide_dx,81,2.782559402207126,test,0.6532258064516129,0.04116418169134931,0.6429862738533645,0.04305946450374188,0.6428571428571428,0.041865606031987704 +flat_mae,patch,logistic,abide_dx,82,0.3593813663804626,train,0.886039886039886,0.01135280234366001,0.884303513745138,0.011592932236763795,0.8827611664820967,0.011751511856720184 +flat_mae,patch,logistic,abide_dx,82,0.3593813663804626,test,0.6451612903225806,0.041339197384257585,0.6405797101449275,0.042201249059175105,0.6402310924369747,0.04199872324878796 +flat_mae,patch,logistic,abide_dx,83,0.3593813663804626,train,0.8988603988603988,0.011243356475226571,0.8972827598998423,0.011487971013919058,0.8955703211517165,0.011657507695272577 +flat_mae,patch,logistic,abide_dx,83,0.3593813663804626,test,0.6451612903225806,0.042839134762756866,0.6428384393820372,0.04341799874536937,0.6433823529411764,0.04355430613127878 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,train,0.8988603988603988,0.011612170394822787,0.8977547566541875,0.011738564421654893,0.8976375046142488,0.011773486485418538 +flat_mae,patch,logistic,abide_dx,84,0.3593813663804626,test,0.6612903225806451,0.04171159527062962,0.6580882352941176,0.042270723203959214,0.6580882352941176,0.04218995342966819 +flat_mae,patch,logistic,abide_dx,85,0.046415888336127774,train,0.7991452991452992,0.014250208739514353,0.7948997416095631,0.014726690400816094,0.7927279438907346,0.014721454425955816 +flat_mae,patch,logistic,abide_dx,85,0.046415888336127774,test,0.6532258064516129,0.041873188842128234,0.6480760345851759,0.04260036460283504,0.6475840336134454,0.0422727386038064 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,train,0.7934472934472935,0.015048920369977152,0.7899170937189772,0.015430809920555856,0.7884459210040606,0.015449351852499236 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,test,0.6129032258064516,0.04504449182193487,0.5978378378378378,0.04798404932386272,0.5997899159663866,0.045977793048053786 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,train,0.9102564102564102,0.010378796160685053,0.9091635430038512,0.010533375755549867,0.9085640457733482,0.010668883747598184 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,test,0.6532258064516129,0.0415967835745277,0.6530227110040997,0.041607005112683254,0.6570378151260504,0.041337604033593794 +flat_mae,patch,logistic,abide_dx,88,0.3593813663804626,train,0.8903133903133903,0.011342207389237921,0.8887583219287324,0.011559405624171053,0.8875230712440014,0.0117212611542771 +flat_mae,patch,logistic,abide_dx,88,0.3593813663804626,test,0.6048387096774194,0.04457738597395042,0.6017043592264831,0.04484420897030573,0.601890756302521,0.04482819258722477 +flat_mae,patch,logistic,abide_dx,89,0.3593813663804626,train,0.9088319088319088,0.01115405682524735,0.9076323385498475,0.01134259173441518,0.9066814322628276,0.011478519854424631 +flat_mae,patch,logistic,abide_dx,89,0.3593813663804626,test,0.5645161290322581,0.04631796128912919,0.5603991596638656,0.04689085899495814,0.5603991596638656,0.04688105543120096 +flat_mae,patch,logistic,abide_dx,90,0.3593813663804626,train,0.896011396011396,0.01110236305486659,0.8949366803140049,0.0111949653876589,0.8950535252860834,0.011156010570535528 +flat_mae,patch,logistic,abide_dx,90,0.3593813663804626,test,0.6370967741935484,0.04265285604777548,0.6217205613178767,0.045827865203947944,0.6234243697478992,0.04372765616999762 +flat_mae,patch,logistic,abide_dx,91,0.005994842503189409,train,0.7207977207977208,0.01670107996593742,0.713235294117647,0.017391145034934937,0.7116279069767442,0.017132827068252983 +flat_mae,patch,logistic,abide_dx,91,0.005994842503189409,test,0.5887096774193549,0.042974825048267974,0.5740553647201454,0.04518902625309671,0.576155462184874,0.04369201013925574 +flat_mae,patch,logistic,abide_dx,92,0.3593813663804626,train,0.905982905982906,0.011179078771389458,0.9046172089231453,0.011389779659549288,0.9032115171650055,0.01154540578329322 +flat_mae,patch,logistic,abide_dx,92,0.3593813663804626,test,0.6048387096774194,0.04395116898266493,0.6017043592264831,0.04428460652586019,0.601890756302521,0.04424579233431141 +flat_mae,patch,logistic,abide_dx,93,0.3593813663804626,train,0.8945868945868946,0.010884042567471462,0.8931287030941408,0.011059984697150834,0.8919896640826874,0.011142303569420644 +flat_mae,patch,logistic,abide_dx,93,0.3593813663804626,test,0.6290322580645161,0.041323839722955204,0.6191239316239316,0.04276089389707933,0.6192226890756303,0.04189382716760261 +flat_mae,patch,logistic,abide_dx,94,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,94,10000.0,test,0.5645161290322581,0.04454457658132552,0.5626959247648903,0.045398064740041134,0.5761554621848739,0.04437194139281382 +flat_mae,patch,logistic,abide_dx,95,0.3593813663804626,train,0.9173789173789174,0.010521068961999471,0.9162918068107992,0.010697567264277065,0.9153193060169804,0.010849628392419687 +flat_mae,patch,logistic,abide_dx,95,0.3593813663804626,test,0.6532258064516129,0.04132426465305226,0.6429862738533645,0.04314411179611757,0.6428571428571428,0.04203000972969585 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,train,0.9102564102564102,0.010930243522263282,0.9089211024364727,0.011137722240389222,0.9073827980804725,0.011284140383066611 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,test,0.5403225806451613,0.04420676456492432,0.5352140461629513,0.04488465873486792,0.5351890756302521,0.044518673682699766 +flat_mae,patch,logistic,abide_dx,97,0.3593813663804626,train,0.905982905982906,0.01140099463696144,0.904550398839739,0.011641301391406495,0.9029162052417866,0.011823656093448409 +flat_mae,patch,logistic,abide_dx,97,0.3593813663804626,test,0.7016129032258065,0.043545217925305316,0.6982968369829683,0.044141010926519925,0.6980042016806722,0.044139688609073434 +flat_mae,patch,logistic,abide_dx,98,0.3593813663804626,train,0.905982905982906,0.010731320163778955,0.9046172089231453,0.010927099922887469,0.9032115171650055,0.011038750103686814 +flat_mae,patch,logistic,abide_dx,98,0.3593813663804626,test,0.7258064516129032,0.036596045572012896,0.719904331650279,0.03775749025685878,0.7184873949579832,0.03735367757105596 +flat_mae,patch,logistic,abide_dx,99,0.046415888336127774,train,0.7977207977207977,0.01444653918064586,0.7920425244290161,0.015038752037735855,0.7890734588409007,0.014897997635922106 +flat_mae,patch,logistic,abide_dx,99,0.046415888336127774,test,0.6370967741935484,0.04065618130178612,0.6330637206549615,0.041078502060998695,0.6328781512605042,0.040854707989713905 +flat_mae,patch,logistic,abide_dx,100,0.005994842503189409,train,0.7150997150997151,0.016503666118935438,0.7079283717214752,0.01713831558265909,0.7064599483204135,0.01692467778716512 +flat_mae,patch,logistic,abide_dx,100,0.005994842503189409,test,0.6048387096774194,0.04160330909655575,0.5880957223239103,0.04431501510782007,0.5908613445378151,0.042473110122286245 diff --git a/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic/log.txt b/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..118bfc30e202bd1209dc9af361a5d65160078920 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:25:40 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:20:31 time: 4.2613 data: 3.3577 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:40 time: 0.1776 data: 0.0504 max mem: 2851 +extract (train) [ 40/289] eta: 0:01:04 time: 0.1431 data: 0.0342 max mem: 2851 +extract (train) [ 60/289] eta: 0:00:51 time: 0.1556 data: 0.0408 max mem: 2851 +extract (train) [ 80/289] eta: 0:00:43 time: 0.1475 data: 0.0381 max mem: 2851 +extract (train) [100/289] eta: 0:00:36 time: 0.1359 data: 0.0328 max mem: 2851 +extract (train) [120/289] eta: 0:00:31 time: 0.1490 data: 0.0387 max mem: 2851 +extract (train) [140/289] eta: 0:00:27 time: 0.1586 data: 0.0433 max mem: 2851 +extract (train) [160/289] eta: 0:00:22 time: 0.1519 data: 0.0407 max mem: 2851 +extract (train) [180/289] eta: 0:00:19 time: 0.1527 data: 0.0412 max mem: 2851 +extract (train) [200/289] eta: 0:00:15 time: 0.1512 data: 0.0387 max mem: 2851 +extract (train) [220/289] eta: 0:00:11 time: 0.1552 data: 0.0426 max mem: 2851 +extract (train) [240/289] eta: 0:00:08 time: 0.1495 data: 0.0401 max mem: 2851 +extract (train) [260/289] eta: 0:00:04 time: 0.1578 data: 0.0456 max mem: 2851 +extract (train) [280/289] eta: 0:00:01 time: 0.1337 data: 0.0341 max mem: 2851 +extract (train) [288/289] eta: 0:00:00 time: 0.1398 data: 0.0382 max mem: 2851 +extract (train) Total time: 0:00:48 (0.1667 s / it) +extract (validation) [ 0/62] eta: 0:03:38 time: 3.5274 data: 3.3559 max mem: 2851 +extract (validation) [20/62] eta: 0:00:14 time: 0.1833 data: 0.0538 max mem: 2851 +extract (validation) [40/62] eta: 0:00:05 time: 0.1360 data: 0.0342 max mem: 2851 +extract (validation) [60/62] eta: 0:00:00 time: 0.1339 data: 0.0347 max mem: 2851 +extract (validation) [61/62] eta: 0:00:00 time: 0.1341 data: 0.0349 max mem: 2851 +extract (validation) Total time: 0:00:12 (0.2097 s / it) +extract (test) [ 0/62] eta: 0:03:42 time: 3.5920 data: 3.4223 max mem: 2851 +extract (test) [20/62] eta: 0:00:14 time: 0.1895 data: 0.0582 max mem: 2851 +extract (test) [40/62] eta: 0:00:05 time: 0.1441 data: 0.0367 max mem: 2851 +extract (test) [60/62] eta: 0:00:00 time: 0.1303 data: 0.0317 max mem: 2851 +extract (test) [61/62] eta: 0:00:00 time: 0.1307 data: 0.0319 max mem: 2851 +extract (test) Total time: 0:00:13 (0.2142 s / it) +feature extraction time: 0:01:14 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.046416 | train | 0.81197 | 0.014184 | 0.80758 | 0.014705 | 0.80499 | 0.014689 | +| flat_mae | patch | logistic | abide_dx | | 0.046416 | test | 0.58871 | 0.039867 | 0.5826 | 0.040645 | 0.58274 | 0.040126 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 2.782559402207126, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.042218203312634345, "f1": 0.6368842324461508, "f1_std": 0.042321457616098505, "bacc": 0.6407563025210083, "bacc_std": 0.04237084169077069} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 2.782559402207126, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04226341692279025, "f1": 0.6317074780542539, "f1_std": 0.04298412013147237, "bacc": 0.6313025210084033, "bacc_std": 0.04270889991775985} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.045420007784896084, "f1": 0.5643243243243243, "f1_std": 0.04854009055246231, "bacc": 0.5672268907563025, "bacc_std": 0.046450185755562055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.04040609416126988, "f1": 0.6794591370053689, "f1_std": 0.041905376035152674, "bacc": 0.6785714285714286, "bacc_std": 0.041440763177767466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 2.782559402207126, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04440126309478225, "f1": 0.5366764995083579, "f1_std": 0.04478388829765884, "bacc": 0.5367647058823529, "bacc_std": 0.04479448722869246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.043691223339227425, "f1": 0.6436781609195402, "f1_std": 0.043897930873984754, "bacc": 0.6449579831932774, "bacc_std": 0.04399796712118936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 21.54434690031882, "split": "test", "acc": 0.5, "acc_std": 0.044912039712467246, "f1": 0.4952731092436975, "f1_std": 0.045139445584536045, "bacc": 0.4952731092436975, "bacc_std": 0.04503509599525088} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04034387824647876, "f1": 0.6760710553814002, "f1_std": 0.04068935106765971, "bacc": 0.6775210084033614, "bacc_std": 0.04082539132175891} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04060698091539773, "f1": 0.6481081081081081, "f1_std": 0.04326395866640014, "bacc": 0.6486344537815126, "bacc_std": 0.041531009150572014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04299672990493823, "f1": 0.6356837606837606, "f1_std": 0.044729074743022, "bacc": 0.6355042016806722, "bacc_std": 0.04370760104163061} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.042756467401816795, "f1": 0.5915678524374176, "f1_std": 0.04339356841745959, "bacc": 0.5913865546218487, "bacc_std": 0.043071913098189626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04248297694212029, "f1": 0.5752305665349143, "f1_std": 0.04294893924017232, "bacc": 0.5751050420168067, "bacc_std": 0.04269797911922068} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04535965651463766, "f1": 0.5886829268292683, "f1_std": 0.04540295739010405, "bacc": 0.5934873949579832, "bacc_std": 0.04541250017870364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04400212841318858, "f1": 0.6097756946769334, "f1_std": 0.04570906568197248, "bacc": 0.6102941176470589, "bacc_std": 0.04461148180701605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04249032453983518, "f1": 0.626380984265149, "f1_std": 0.04484125549803293, "bacc": 0.6265756302521008, "bacc_std": 0.04355669106195808} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.043527172251888555, "f1": 0.602745995423341, "f1_std": 0.0438257068756977, "bacc": 0.6034663865546219, "bacc_std": 0.04387562622641546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.043870644567120864, "f1": 0.5765651155005022, "f1_std": 0.045779157696007315, "bacc": 0.5777310924369747, "bacc_std": 0.04449907324820548} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 2.782559402207126, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04169557928540973, "f1": 0.5989703649924097, "f1_std": 0.042443494849432606, "bacc": 0.5987394957983193, "bacc_std": 0.04205363700926739} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.042771112986914876, "f1": 0.6869519000797236, "f1_std": 0.044106866412747935, "bacc": 0.6859243697478992, "bacc_std": 0.04356041664682136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04369669787051054, "f1": 0.6448884448884449, "f1_std": 0.04515951772187306, "bacc": 0.6444327731092437, "bacc_std": 0.04429909470697927} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04352149707439134, "f1": 0.5929621848739496, "f1_std": 0.044084927709447594, "bacc": 0.5929621848739496, "bacc_std": 0.04387646002428194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 166.81005372000556, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04344634101217863, "f1": 0.5221897422269466, "f1_std": 0.044813620274288174, "bacc": 0.523109243697479, "bacc_std": 0.0440088526508549} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.042779362797657115, "f1": 0.5643243243243243, "f1_std": 0.04585237692583284, "bacc": 0.5672268907563025, "bacc_std": 0.043803071896159} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04537588416420845, "f1": 0.5588932806324111, "f1_std": 0.045569463525020316, "bacc": 0.5588235294117647, "bacc_std": 0.04534764734689067} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.0446859314122584, "f1": 0.5873947935016637, "f1_std": 0.04478254730062658, "bacc": 0.5887605042016807, "bacc_std": 0.04479694953406288} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04463190589626891, "f1": 0.5127539127539127, "f1_std": 0.0460817556621855, "bacc": 0.5141806722689075, "bacc_std": 0.04504119615849044} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.041410721012514064, "f1": 0.6317074780542539, "f1_std": 0.042345486029936624, "bacc": 0.6313025210084033, "bacc_std": 0.04212378665463916} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04058220931392434, "f1": 0.6448884448884449, "f1_std": 0.04211634261758473, "bacc": 0.6444327731092437, "bacc_std": 0.04137330755659312} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04020492188795608, "f1": 0.6704756842944459, "f1_std": 0.04133235923501435, "bacc": 0.6696428571428572, "bacc_std": 0.040830026166809634} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 21.54434690031882, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.047534356926853774, "f1": 0.6189604445897352, "f1_std": 0.04813161732103974, "bacc": 0.6197478991596639, "bacc_std": 0.04823617040532753} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.044981774549514034, "f1": 0.6063492063492064, "f1_std": 0.046284154247038044, "bacc": 0.60609243697479, "bacc_std": 0.04567130660818584} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04139438751060018, "f1": 0.6644445911160979, "f1_std": 0.042324513534665695, "bacc": 0.6638655462184874, "bacc_std": 0.042100397037095685} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.043893788537974716, "f1": 0.6255252100840336, "f1_std": 0.04462701636409209, "bacc": 0.6255252100840336, "bacc_std": 0.04459972637216828} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04019649965864556, "f1": 0.671957671957672, "f1_std": 0.041299859041489376, "bacc": 0.671218487394958, "bacc_std": 0.040840894017764307} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.041215061828657515, "f1": 0.6436781609195402, "f1_std": 0.04129597454394656, "bacc": 0.6449579831932774, "bacc_std": 0.04135049695890393} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 2.782559402207126, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04068519533770254, "f1": 0.6025641025641025, "f1_std": 0.0425833170257595, "bacc": 0.6029411764705883, "bacc_std": 0.04140629486725668} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04053683721870948, "f1": 0.6630211440312852, "f1_std": 0.04180180141889523, "bacc": 0.6622899159663866, "bacc_std": 0.041339725778072794} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04306927302470023, "f1": 0.5407407407407407, "f1_std": 0.04343785552484149, "bacc": 0.5409663865546219, "bacc_std": 0.043139631325450276} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 2.782559402207126, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.046134928001306606, "f1": 0.5234186697934988, "f1_std": 0.04619791320530727, "bacc": 0.5252100840336134, "bacc_std": 0.04617520293400343} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04415468034133852, "f1": 0.5503626107977437, "f1_std": 0.04632525264221235, "bacc": 0.5525210084033614, "bacc_std": 0.0448338604421838} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04052014824938237, "f1": 0.5941345902068604, "f1_std": 0.04070846445764479, "bacc": 0.5945378151260504, "bacc_std": 0.04073134881063851} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.045768552586107236, "f1": 0.5854473942969518, "f1_std": 0.04617555455924505, "bacc": 0.585609243697479, "bacc_std": 0.04608857231745272} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.042824764539415736, "f1": 0.6429862738533645, "f1_std": 0.04459204173697827, "bacc": 0.6428571428571428, "bacc_std": 0.043548470335819926} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04328670181002488, "f1": 0.5478500171998624, "f1_std": 0.04678149767601053, "bacc": 0.5551470588235294, "bacc_std": 0.04391614109192465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.039707865973724964, "f1": 0.6288435374149659, "f1_std": 0.04282728625064404, "bacc": 0.6307773109243697, "bacc_std": 0.04066740575742161} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 2.782559402207126, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04331820588965382, "f1": 0.5826018084614877, "f1_std": 0.043943904309058646, "bacc": 0.5824579831932774, "bacc_std": 0.043727187450797456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04127732595356936, "f1": 0.6017043592264831, "f1_std": 0.04145355692215413, "bacc": 0.601890756302521, "bacc_std": 0.041381414428800875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.0426902639583855, "f1": 0.6317074780542539, "f1_std": 0.04346825869890181, "bacc": 0.6313025210084033, "bacc_std": 0.04307203205157088} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.040773207802498476, "f1": 0.6313513513513513, "f1_std": 0.043250062711484606, "bacc": 0.6323529411764706, "bacc_std": 0.04157079468091591} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.046001049053775576, "f1": 0.5788211788211788, "f1_std": 0.04764582471862923, "bacc": 0.5793067226890757, "bacc_std": 0.04664967338769393} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.04234657069879415, "f1": 0.5032051282051282, "f1_std": 0.04390608628561085, "bacc": 0.5052521008403361, "bacc_std": 0.042900967305268344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.041187431657422634, "f1": 0.6688034188034189, "f1_std": 0.04253306992257641, "bacc": 0.6680672268907563, "bacc_std": 0.0417211046750161} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04566458038989544, "f1": 0.607905138339921, "f1_std": 0.0462920855443966, "bacc": 0.6076680672268908, "bacc_std": 0.046031269726797476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.043398484535033754, "f1": 0.6097756946769334, "f1_std": 0.045251727282020406, "bacc": 0.6102941176470589, "bacc_std": 0.04406853879473909} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04570425617224357, "f1": 0.6041951664386684, "f1_std": 0.0457648339190002, "bacc": 0.6066176470588236, "bacc_std": 0.045845897872939564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04084565247002908, "f1": 0.6241664982824813, "f1_std": 0.04355276081627931, "bacc": 0.625, "bacc_std": 0.04196559551126373} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.3593813663804626, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.0420080761275444, "f1": 0.6644445911160979, "f1_std": 0.042619278601365794, "bacc": 0.6638655462184874, "bacc_std": 0.04234707742260963} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.044624913831214155, "f1": 0.5623043623043623, "f1_std": 0.04582797828208603, "bacc": 0.5630252100840336, "bacc_std": 0.04498807182247631} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 166.81005372000556, "split": "test", "acc": 0.532258064516129, "acc_std": 0.045421127508479126, "f1": 0.5291961246399581, "f1_std": 0.045460489229981915, "bacc": 0.5294117647058824, "bacc_std": 0.04551528890666038} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044395512142998486, "f1": 0.5880946053680574, "f1_std": 0.045568895506948745, "bacc": 0.5882352941176471, "bacc_std": 0.04490751578920159} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.041232554725589725, "f1": 0.5740553647201454, "f1_std": 0.04285332629420543, "bacc": 0.576155462184874, "bacc_std": 0.04150065718538355} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 166.81005372000556, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04421672043791942, "f1": 0.5603991596638656, "f1_std": 0.04461788846657464, "bacc": 0.5603991596638656, "bacc_std": 0.04465669892927136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04374649086566044, "f1": 0.6153389215233318, "f1_std": 0.04484836574604478, "bacc": 0.6150210084033614, "bacc_std": 0.044415665834303154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04217419342501986, "f1": 0.5931704050887178, "f1_std": 0.04391861940298793, "bacc": 0.5940126050420168, "bacc_std": 0.04277299154849585} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04395005620208571, "f1": 0.5880946053680574, "f1_std": 0.04502880737822516, "bacc": 0.5882352941176471, "bacc_std": 0.04434217598638901} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.043188052241326935, "f1": 0.5796610169491525, "f1_std": 0.04317206858797906, "bacc": 0.58140756302521, "bacc_std": 0.04320510359671049} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.0423797803868774, "f1": 0.6317074780542539, "f1_std": 0.04345647325267356, "bacc": 0.6313025210084033, "bacc_std": 0.04317116044447894} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.03964245443395737, "f1": 0.6167554415729598, "f1_std": 0.040291330723535655, "bacc": 0.6165966386554622, "bacc_std": 0.04012131632067222} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.04046257954446677, "f1": 0.6869519000797236, "f1_std": 0.04179710253513917, "bacc": 0.6859243697478992, "bacc_std": 0.0411675037398334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.0392785617230338, "f1": 0.6210470369386127, "f1_std": 0.040254558314939806, "bacc": 0.6207983193277311, "bacc_std": 0.03971241046941933} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.03884330430437141, "f1": 0.6473118279569892, "f1_std": 0.04341607946315786, "bacc": 0.6512605042016807, "bacc_std": 0.04014279605429215} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.04265595429766805, "f1": 0.6829891838741396, "f1_std": 0.043069199706008955, "bacc": 0.6832983193277311, "bacc_std": 0.043072118547777184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.041491144699194925, "f1": 0.6191239316239316, "f1_std": 0.04356701629263569, "bacc": 0.6192226890756303, "bacc_std": 0.04249808265436233} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04390634247428613, "f1": 0.602745995423341, "f1_std": 0.04425111017027571, "bacc": 0.6034663865546219, "bacc_std": 0.044406763201912815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.040111058105092336, "f1": 0.6313513513513513, "f1_std": 0.042568159545772324, "bacc": 0.6323529411764706, "bacc_std": 0.04086883044506228} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04224828746783258, "f1": 0.6429862738533645, "f1_std": 0.04463284796685339, "bacc": 0.6428571428571428, "bacc_std": 0.04332009667818785} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04262086016405383, "f1": 0.5718845677806006, "f1_std": 0.04260897121051207, "bacc": 0.5740546218487395, "bacc_std": 0.04253411684871575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.043166835074675135, "f1": 0.6063492063492064, "f1_std": 0.044198564420308464, "bacc": 0.60609243697479, "bacc_std": 0.04375146203938788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.043371804476147524, "f1": 0.5544708833299437, "f1_std": 0.04629075233302461, "bacc": 0.5582983193277311, "bacc_std": 0.04407747020135957} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04450669313272805, "f1": 0.6428384393820372, "f1_std": 0.044748306366263244, "bacc": 0.6433823529411764, "bacc_std": 0.04467950962021125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 2.782559402207126, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04116418169134931, "f1": 0.6429862738533645, "f1_std": 0.04305946450374188, "bacc": 0.6428571428571428, "bacc_std": 0.041865606031987704} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.041339197384257585, "f1": 0.6405797101449275, "f1_std": 0.042201249059175105, "bacc": 0.6402310924369747, "bacc_std": 0.04199872324878796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.042839134762756866, "f1": 0.6428384393820372, "f1_std": 0.04341799874536937, "bacc": 0.6433823529411764, "bacc_std": 0.04355430613127878} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04171159527062962, "f1": 0.6580882352941176, "f1_std": 0.042270723203959214, "bacc": 0.6580882352941176, "bacc_std": 0.04218995342966819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.041873188842128234, "f1": 0.6480760345851759, "f1_std": 0.04260036460283504, "bacc": 0.6475840336134454, "bacc_std": 0.0422727386038064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04504449182193487, "f1": 0.5978378378378378, "f1_std": 0.04798404932386272, "bacc": 0.5997899159663866, "bacc_std": 0.045977793048053786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.0415967835745277, "f1": 0.6530227110040997, "f1_std": 0.041607005112683254, "bacc": 0.6570378151260504, "bacc_std": 0.041337604033593794} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04457738597395042, "f1": 0.6017043592264831, "f1_std": 0.04484420897030573, "bacc": 0.601890756302521, "bacc_std": 0.04482819258722477} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04631796128912919, "f1": 0.5603991596638656, "f1_std": 0.04689085899495814, "bacc": 0.5603991596638656, "bacc_std": 0.04688105543120096} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04265285604777548, "f1": 0.6217205613178767, "f1_std": 0.045827865203947944, "bacc": 0.6234243697478992, "bacc_std": 0.04372765616999762} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042974825048267974, "f1": 0.5740553647201454, "f1_std": 0.04518902625309671, "bacc": 0.576155462184874, "bacc_std": 0.04369201013925574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04395116898266493, "f1": 0.6017043592264831, "f1_std": 0.04428460652586019, "bacc": 0.601890756302521, "bacc_std": 0.04424579233431141} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.041323839722955204, "f1": 0.6191239316239316, "f1_std": 0.04276089389707933, "bacc": 0.6192226890756303, "bacc_std": 0.04189382716760261} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 10000.0, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04454457658132552, "f1": 0.5626959247648903, "f1_std": 0.045398064740041134, "bacc": 0.5761554621848739, "bacc_std": 0.04437194139281382} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04132426465305226, "f1": 0.6429862738533645, "f1_std": 0.04314411179611757, "bacc": 0.6428571428571428, "bacc_std": 0.04203000972969585} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04420676456492432, "f1": 0.5352140461629513, "f1_std": 0.04488465873486792, "bacc": 0.5351890756302521, "bacc_std": 0.044518673682699766} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.7016129032258065, "acc_std": 0.043545217925305316, "f1": 0.6982968369829683, "f1_std": 0.044141010926519925, "bacc": 0.6980042016806722, "bacc_std": 0.044139688609073434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.7258064516129032, "acc_std": 0.036596045572012896, "f1": 0.719904331650279, "f1_std": 0.03775749025685878, "bacc": 0.7184873949579832, "bacc_std": 0.03735367757105596} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04065618130178612, "f1": 0.6330637206549615, "f1_std": 0.041078502060998695, "bacc": 0.6328781512605042, "bacc_std": 0.040854707989713905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04160330909655575, "f1": 0.5880957223239103, "f1_std": 0.04431501510782007, "bacc": 0.5908613445378151, "bacc_std": 0.042473110122286245} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 105.89 | 999.81 | 0.88057 | 0.074186 | 0.87837 | 0.076111 | 0.87717 | 0.076637 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 105.89 | 999.81 | 0.61677 | 0.044974 | 0.60945 | 0.045504 | 0.61024 | 0.044899 | + + +done! total time: 0:05:40 diff --git a/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic/config.yaml b/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e8130eac504cc5f2fb2940579d7e29bc5ed9a9a --- /dev/null +++ b/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..c4ed3da6ae4ce7752842901fd47b85e0c99cfa39 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.046415888336127774,train,0.8328767123287671,0.019406950601703797,0.8279113625648279,0.020150319105810843,0.8246779019356415,0.020149093383731626 +flat_mae,patch,logistic,adhd200_dx,,0.046415888336127774,test,0.6615384615384615,0.05954859782529376,0.6474358974358974,0.06320948827070938,0.6462355212355213,0.061460285015455834 +flat_mae,patch,logistic,adhd200_dx,1,0.046415888336127774,train,0.8356164383561644,0.01939915709323324,0.8305687937117039,0.02024497304920058,0.8271050864016609,0.020326126173454532 +flat_mae,patch,logistic,adhd200_dx,1,0.046415888336127774,test,0.6153846153846154,0.06019023884377771,0.606060606060606,0.06187877803169629,0.6056949806949807,0.06139948545478854 +flat_mae,patch,logistic,adhd200_dx,2,0.000774263682681127,train,0.6876712328767123,0.02283299966106593,0.6706401975683891,0.02452979181803417,0.6694907492214691,0.023548728707190672 +flat_mae,patch,logistic,adhd200_dx,2,0.000774263682681127,test,0.676923076923077,0.04940561259829715,0.6351242983159583,0.06111096983818146,0.6423745173745173,0.052701326044149635 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,train,0.7506849315068493,0.023063414011503504,0.7411650107149814,0.0243814404011419,0.7382304451364718,0.02404400829854609 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,test,0.6153846153846154,0.057568104252432487,0.606060606060606,0.05894656073394623,0.6056949806949807,0.05860072595645887 +flat_mae,patch,logistic,adhd200_dx,4,0.046415888336127774,train,0.8438356164383561,0.018594501777159204,0.8408491107286288,0.019029020146826496,0.8401263967759662,0.01922371594787906 +flat_mae,patch,logistic,adhd200_dx,4,0.046415888336127774,test,0.6923076923076923,0.056236390332534116,0.6832358674463938,0.058012311464388476,0.6819498069498069,0.0572856646471886 +flat_mae,patch,logistic,adhd200_dx,5,0.046415888336127774,train,0.8410958904109589,0.01919592438134184,0.8365301457870027,0.020090508494045492,0.8333943945777615,0.0202883774751213 +flat_mae,patch,logistic,adhd200_dx,5,0.046415888336127774,test,0.5538461538461539,0.06196266102215709,0.5534233593935086,0.062099418110663995,0.5603281853281853,0.06305056965506149 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,train,0.7342465753424657,0.02137751882534059,0.7234870080677283,0.02254816361445142,0.7207974598522318,0.022082407455312193 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,test,0.7076923076923077,0.05652062303621898,0.7006060606060607,0.05846678040012765,0.6998069498069499,0.058102183690535096 +flat_mae,patch,logistic,adhd200_dx,7,0.046415888336127774,train,0.8410958904109589,0.01878389091957358,0.8371237766972364,0.019373004601664537,0.8348293338218233,0.01946339907674402 +flat_mae,patch,logistic,adhd200_dx,7,0.046415888336127774,test,0.676923076923077,0.05662906189305606,0.6719538572458543,0.05742197033282907,0.6727799227799228,0.057244127105777795 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,train,0.736986301369863,0.022531810669024585,0.7289100699387263,0.023617780786436064,0.7268119924284057,0.02342547394421754 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,test,0.676923076923077,0.058185290644090434,0.656084656084656,0.06328997841716133,0.6554054054054055,0.06019530925738961 +flat_mae,patch,logistic,adhd200_dx,9,0.046415888336127774,train,0.8575342465753425,0.018492493950794536,0.8539730411768327,0.019152040449517026,0.8515448494840325,0.019428897636088197 +flat_mae,patch,logistic,adhd200_dx,9,0.046415888336127774,test,0.6,0.06205380372074719,0.5775,0.0665937609508224,0.5791505791505791,0.06378095089626452 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,train,0.8383561643835616,0.01948706091882236,0.8352648689998088,0.019829362913411083,0.8345545582218965,0.01978195913181283 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,test,0.5692307692307692,0.058979238148219405,0.5512820512820513,0.06208332193459976,0.5521235521235521,0.06020492117595055 +flat_mae,patch,logistic,adhd200_dx,11,0.3593813663804626,train,0.9698630136986301,0.008702701668204225,0.9692866704914898,0.008888144718662732,0.9682786835195701,0.009176392508338954 +flat_mae,patch,logistic,adhd200_dx,11,0.3593813663804626,test,0.5692307692307692,0.06121956604119462,0.5608108108108107,0.06216944898110753,0.5608108108108107,0.06184992250643221 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,train,0.7671232876712328,0.02155180712416322,0.7576948008840918,0.02282561582021854,0.7542284911766501,0.02245166188053964 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,test,0.5692307692307692,0.05863355260534927,0.545,0.0643018423438985,0.5477799227799228,0.060624257353881085 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,train,0.7534246575342466,0.022665686363695214,0.7437277663358921,0.02390325064036949,0.7406576296024913,0.023545040127060914 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,test,0.6,0.05951023779457502,0.5976190476190476,0.059495547674102055,0.6008687258687259,0.059681775278851205 +flat_mae,patch,logistic,adhd200_dx,14,0.000774263682681127,train,0.6575342465753424,0.02292689696876249,0.634300783097282,0.025182624760698435,0.6356170238749466,0.0237204512271743 +flat_mae,patch,logistic,adhd200_dx,14,0.000774263682681127,test,0.6153846153846154,0.0488138015210341,0.5656241646618552,0.05953012356967095,0.5796332046332047,0.05088403990764041 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,train,0.7589041095890411,0.021354033476861142,0.7488584474885844,0.022735768655251502,0.7455119985345301,0.022310730873540074 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,test,0.6307692307692307,0.05957395880791655,0.6264367816091954,0.06035131773820891,0.627895752895753,0.060583895318543336 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,train,0.7616438356164383,0.021017895314179132,0.7535869759212843,0.02221974840103593,0.7508090614886731,0.022062021885684287 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,test,0.5538461538461539,0.06435519776436043,0.5469838981014179,0.06492285567577333,0.5472972972972974,0.06516691700213183 +flat_mae,patch,logistic,adhd200_dx,17,0.046415888336127774,train,0.8493150684931506,0.017511855566497595,0.8448381137879596,0.018168151744286404,0.8413934175978507,0.018217727551402205 +flat_mae,patch,logistic,adhd200_dx,17,0.046415888336127774,test,0.6,0.05915142737877714,0.5775,0.06427266232340036,0.5791505791505791,0.06112058865015624 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,train,0.7479452054794521,0.02251869205431607,0.736833855799373,0.024108681889791917,0.7336508518043597,0.023564454833092437 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,test,0.5538461538461539,0.05661120385901419,0.5250692869740489,0.0601620893714901,0.5299227799227799,0.057191071395968726 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,train,0.8301369863013699,0.02138484827594822,0.8245736434108527,0.022422571483651715,0.8208157782255603,0.022433162093229635 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,test,0.6461538461538462,0.058408526572165484,0.6375757575757576,0.05968032690148713,0.6370656370656371,0.05919301871267638 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,train,0.7397260273972602,0.022446593232772608,0.729787648548607,0.023644256661403445,0.7270867680283324,0.02324740385321754 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,test,0.5846153846153846,0.05768891066921947,0.5699583435432491,0.06021634682385391,0.5699806949806949,0.05895407350908164 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,train,0.8356164383561644,0.019075196428451582,0.8302325581395349,0.020068704217077744,0.82638761677963,0.020139917022014838 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,test,0.7384615384615385,0.05310002618704711,0.7321212121212122,0.05481591541581202,0.7311776061776062,0.054710675184643114 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,train,0.7397260273972602,0.0218257408788107,0.7285693038693062,0.023370490166331876,0.7256518287842706,0.022888121097670286 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,test,0.5846153846153846,0.058538985138667114,0.578226387887527,0.0596431500638317,0.5786679536679536,0.05969556313753244 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,train,0.7561643835616438,0.021772876296910576,0.7462922032786373,0.023213202698831717,0.7430848140685107,0.022818193778755432 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,test,0.5538461538461539,0.055767047172105656,0.5167905665214048,0.061089750795645215,0.5255791505791506,0.05653910275543799 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,train,0.7315068493150685,0.02142318882258671,0.7209480122324159,0.02296783295017681,0.7183702753862123,0.022578272794651844 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,test,0.676923076923077,0.05192339373122724,0.6431372549019607,0.060631892972743545,0.6467181467181468,0.05440476562575506 +flat_mae,patch,logistic,adhd200_dx,25,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,25,166.81005372000556,test,0.6307692307692307,0.06280358034492947,0.6299810246679317,0.06327439420416091,0.6365830115830116,0.06318943951785536 +flat_mae,patch,logistic,adhd200_dx,26,0.046415888336127774,train,0.852054794520548,0.018429472101000648,0.847803928836175,0.01914915423779992,0.844538071685901,0.01923414811213632 +flat_mae,patch,logistic,adhd200_dx,26,0.046415888336127774,test,0.6153846153846154,0.05898618030908275,0.606060606060606,0.0602912843362489,0.6056949806949807,0.05978379340730521 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,train,0.7424657534246575,0.022062002395728694,0.732337889284154,0.023500131623442437,0.7295139524943518,0.023142991203314554 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,test,0.6461538461538462,0.056504733005926205,0.6336682185738789,0.05926443188431186,0.6327220077220077,0.05798366492181559 +flat_mae,patch,logistic,adhd200_dx,28,0.000774263682681127,train,0.6904109589041096,0.02352133921328011,0.6739551465996316,0.02522879958661633,0.6726354033095194,0.024253772242755448 +flat_mae,patch,logistic,adhd200_dx,28,0.000774263682681127,test,0.6,0.054345430339786324,0.5626293995859213,0.0617402510502052,0.5704633204633205,0.056061716930151655 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,train,0.7561643835616438,0.023025747009230404,0.7468536917981687,0.02423176506304838,0.7438022836905416,0.023855757939057646 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,test,0.6,0.05748871564085862,0.570630081300813,0.0634437681436107,0.5748069498069498,0.05913148825556801 +flat_mae,patch,logistic,adhd200_dx,30,0.3593813663804626,train,0.9698630136986301,0.008846557948709543,0.9692386665747275,0.009069891798869271,0.9675612138975392,0.009581088720176642 +flat_mae,patch,logistic,adhd200_dx,30,0.3593813663804626,test,0.6615384615384615,0.060001451658967656,0.6575670498084292,0.06048906520927739,0.6592664092664093,0.060451241376856125 +flat_mae,patch,logistic,adhd200_dx,31,0.005994842503189409,train,0.7534246575342466,0.02255539150335833,0.7425548589341693,0.024026384618855624,0.7392226903584295,0.02350741898939517 +flat_mae,patch,logistic,adhd200_dx,31,0.005994842503189409,test,0.6307692307692307,0.055753260312765195,0.6036585365853658,0.06223610980272032,0.6061776061776062,0.058127224597020154 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,train,0.7616438356164383,0.02126822616424301,0.752542372881356,0.022461683189617932,0.7493741222446113,0.022138219276341804 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,test,0.5846153846153846,0.06294529605822813,0.578226387887527,0.06380292521054755,0.5786679536679536,0.06372660427590177 +flat_mae,patch,logistic,adhd200_dx,33,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,33,2.782559402207126,test,0.5692307692307692,0.05868197307944784,0.5666666666666667,0.05908246899116847,0.5694980694980695,0.059654419038302556 +flat_mae,patch,logistic,adhd200_dx,34,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,34,166.81005372000556,test,0.5384615384615384,0.06179315754617679,0.5383522727272727,0.06239199883367598,0.5511583011583012,0.06294511990731128 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,train,0.7397260273972602,0.02306619648216573,0.7258914949288938,0.025036796282610164,0.722781950296147,0.024188365106166974 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,test,0.6461538461538462,0.05332544714870629,0.6167649320687003,0.06066932939265853,0.6196911196911197,0.05574871583058074 +flat_mae,patch,logistic,adhd200_dx,36,0.000774263682681127,train,0.6904109589041096,0.02355826927092632,0.6747880938300808,0.02541606492982842,0.6733528729315503,0.02453136135673897 +flat_mae,patch,logistic,adhd200_dx,36,0.000774263682681127,test,0.5230769230769231,0.058417797401600455,0.49987589972697943,0.061557425501980484,0.502895752895753,0.05924746565684722 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,train,0.7561643835616438,0.02164036108750841,0.7457122952038764,0.023049860698618748,0.7423673444464798,0.022636516778872615 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,test,0.7230769230769231,0.05556220246890206,0.7115384615384616,0.059610081081312355,0.708976833976834,0.05804399469741471 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,train,0.7589041095890411,0.021393018013853016,0.7499688628720886,0.022526562365744504,0.7469469377785919,0.02223392540938231 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,test,0.6461538461538462,0.057153388547707365,0.6336682185738789,0.05945815826083519,0.6327220077220077,0.058164508025839834 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,train,0.7534246575342466,0.021971607943866028,0.7419400452488687,0.023649515150259526,0.7385052207363986,0.023056456407908796 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,test,0.6153846153846154,0.05671338508092493,0.5834401435529352,0.06278187649843205,0.5883204633204633,0.058270470545750463 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,train,0.7205479452054795,0.02292521214402119,0.7089041095890412,0.024345522577939902,0.706509128656042,0.023830400546272387 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,test,0.7230769230769231,0.0470618377036781,0.6972049689440993,0.055779388805025115,0.6959459459459459,0.05056856924412733 +flat_mae,patch,logistic,adhd200_dx,41,0.000774263682681127,train,0.673972602739726,0.02288022022157736,0.6486438388299722,0.02613751752373641,0.6501801306710631,0.024120748410874696 +flat_mae,patch,logistic,adhd200_dx,41,0.000774263682681127,test,0.6615384615384615,0.05877141357202869,0.6474358974358974,0.06222753234506633,0.6462355212355213,0.06047565490144164 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,train,0.7506849315068493,0.02292890510764014,0.7381036861817465,0.024613014022160755,0.7346430970263174,0.023910757682405873 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,test,0.6,0.06187275356032003,0.5953065134099617,0.06243661340781053,0.5965250965250966,0.06266298642200656 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,train,0.7397260273972602,0.021486760707925406,0.7285693038693062,0.022983724953120555,0.7256518287842706,0.022523794009758787 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,test,0.6307692307692307,0.05516791795597903,0.6036585365853658,0.06114461286973705,0.6061776061776062,0.05709591903737047 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,train,0.8356164383561644,0.01873386994937686,0.8312060673325934,0.01946594981063492,0.8285400256457227,0.01961468706160962 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,test,0.676923076923077,0.05628111184310291,0.6690909090909091,0.05840864803749684,0.6684362934362934,0.05807804455656731 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,train,0.7726027397260274,0.022173507782114285,0.7617212386248339,0.024138804144527373,0.7576479208646272,0.023643223370662526 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,test,0.5538461538461539,0.0598078501110953,0.5321419707123356,0.06296583759447165,0.5342664092664092,0.06048413511259806 +flat_mae,patch,logistic,adhd200_dx,46,0.000774263682681127,train,0.6520547945205479,0.021995895828302782,0.6238182187056198,0.025266349256639214,0.6271753068327532,0.02312410882851407 +flat_mae,patch,logistic,adhd200_dx,46,0.000774263682681127,test,0.676923076923077,0.053628739093929234,0.6500897205844656,0.06125839453727086,0.6510617760617761,0.05623022627984787 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,train,0.7726027397260274,0.020886902916772556,0.7622970756930223,0.02254583086793734,0.7583653904866581,0.022151140912815965 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,test,0.5230769230769231,0.05641063309897539,0.49987589972697943,0.05854909447209793,0.502895752895753,0.05677740900886619 +flat_mae,patch,logistic,adhd200_dx,48,0.005994842503189409,train,0.7287671232876712,0.022574317705162897,0.7150798344175044,0.024489702917779688,0.7123557428100384,0.023691206496360546 +flat_mae,patch,logistic,adhd200_dx,48,0.005994842503189409,test,0.676923076923077,0.055072720080284944,0.6690909090909091,0.056779663268740274,0.6684362934362934,0.05645592238343994 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,train,0.7589041095890411,0.021973179372749962,0.7488584474885844,0.023438417643886045,0.7455119985345301,0.022969562859613435 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,test,0.6,0.061925513568877495,0.588206627680312,0.0643059304649406,0.5878378378378378,0.06322034599568273 +flat_mae,patch,logistic,adhd200_dx,50,0.3593813663804626,train,0.9643835616438357,0.009269151941083225,0.9637567693494551,0.009438755417698959,0.9634243145875313,0.009595848603021432 +flat_mae,patch,logistic,adhd200_dx,50,0.3593813663804626,test,0.6,0.061745467217387634,0.5921814671814671,0.06254522503170544,0.5921814671814671,0.06219377876360297 +flat_mae,patch,logistic,adhd200_dx,51,0.046415888336127774,train,0.8493150684931506,0.018030061009845895,0.8456825711628193,0.018549239502255687,0.8435458264639434,0.018604267761687384 +flat_mae,patch,logistic,adhd200_dx,51,0.046415888336127774,test,0.5538461538461539,0.06201613838444266,0.543030303030303,0.06327893967452543,0.542953667953668,0.06280604563792958 +flat_mae,patch,logistic,adhd200_dx,52,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,52,2.782559402207126,test,0.5538461538461539,0.059281664447322746,0.5321419707123356,0.06256361429030328,0.5342664092664092,0.060287741841203696 +flat_mae,patch,logistic,adhd200_dx,53,0.3593813663804626,train,0.9726027397260274,0.008211825021395834,0.9720999205038832,0.008382270077473946,0.9714233376076205,0.00866029426573518 +flat_mae,patch,logistic,adhd200_dx,53,0.3593813663804626,test,0.6,0.05591319561415697,0.570630081300813,0.06208711240024855,0.5748069498069498,0.057909146826721374 +flat_mae,patch,logistic,adhd200_dx,54,0.046415888336127774,train,0.8410958904109589,0.018512416955519265,0.8368325317548403,0.019300863902830444,0.8341118641997924,0.019527191875652154 +flat_mae,patch,logistic,adhd200_dx,54,0.046415888336127774,test,0.5692307692307692,0.0607772224540639,0.5666666666666667,0.06071882760676522,0.5694980694980695,0.06099780500583999 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,train,0.7534246575342466,0.0223992574200695,0.7437277663358921,0.023753636487057486,0.7406576296024913,0.023395817507557457 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,test,0.5846153846153846,0.05959493247190873,0.5810455956075435,0.06024286371201866,0.583011583011583,0.06051057707283309 +flat_mae,patch,logistic,adhd200_dx,56,0.000774263682681127,train,0.6767123287671233,0.022220834017647885,0.6542672745954277,0.024471496094562825,0.6547597240031752,0.02300244803986982 +flat_mae,patch,logistic,adhd200_dx,56,0.000774263682681127,test,0.5692307692307692,0.06126554868456906,0.545,0.06539374007888736,0.5477799227799228,0.06261624865272561 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,train,0.7671232876712328,0.02197169471700121,0.7587499319600937,0.023137421196717627,0.7556634304207119,0.022895461486048752 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,test,0.5846153846153846,0.059442427021257425,0.5578231292517006,0.06475376483273171,0.5612934362934363,0.06101362339260015 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,train,0.7616438356164383,0.021718642355898977,0.7519935020813646,0.02302511235819851,0.7486566526225804,0.022642313863589305 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,test,0.6307692307692307,0.05677144877885102,0.6235521235521235,0.05802562420129616,0.6235521235521235,0.05765226332132044 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,train,0.7397260273972602,0.02210311799515851,0.729787648548607,0.023280546784205103,0.7270867680283324,0.022928940394719734 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,test,0.6153846153846154,0.05765516753245339,0.5834401435529352,0.0641398838935539,0.5883204633204633,0.059358634524250914 +flat_mae,patch,logistic,adhd200_dx,60,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,60,2.782559402207126,test,0.5230769230769231,0.06119178503862277,0.5062484685126194,0.06350511336632365,0.5072393822393823,0.062143343657125365 +flat_mae,patch,logistic,adhd200_dx,61,0.000774263682681127,train,0.6767123287671233,0.02249611313146438,0.6542672745954277,0.024655694764532325,0.6547597240031752,0.02323434507435035 +flat_mae,patch,logistic,adhd200_dx,61,0.000774263682681127,test,0.6,0.05659908628279557,0.570630081300813,0.06214351772421274,0.5748069498069498,0.05808544770222929 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,train,0.8438356164383561,0.018753871133502673,0.839195863380249,0.019511231916473526,0.835821579043781,0.0195581453346375 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,test,0.6,0.05603862573229734,0.588206627680312,0.058140384941637265,0.5878378378378378,0.05701097764285227 +flat_mae,patch,logistic,adhd200_dx,63,0.046415888336127774,train,0.8328767123287671,0.01895287258559074,0.8275755252499439,0.019859188074107692,0.8239604323136106,0.019976213141371293 +flat_mae,patch,logistic,adhd200_dx,63,0.046415888336127774,test,0.5692307692307692,0.059153243970126855,0.5608108108108107,0.05987895922068248,0.5608108108108107,0.05955876506309419 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,train,0.7342465753424657,0.02062698296761808,0.7222026065328092,0.022107497460385914,0.71936252060817,0.021600861434028496 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,test,0.6461538461538462,0.055743953953472515,0.6289401836684041,0.05945247923491641,0.6283783783783784,0.05733464127503805 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,train,0.7561643835616438,0.021889365106087647,0.7462922032786373,0.02339728776985673,0.7430848140685107,0.02307166975219351 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,test,0.676923076923077,0.05653492610610264,0.6655231560891939,0.05985009857247318,0.6640926640926641,0.05860871836171528 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,train,0.7452054794520548,0.023124697093967445,0.734283634314163,0.024669282941938906,0.7312236673383403,0.024158772066306906 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,test,0.6307692307692307,0.06123466551190616,0.6198830409356726,0.0630918652830206,0.6192084942084942,0.06236489975641415 +flat_mae,patch,logistic,adhd200_dx,67,0.046415888336127774,train,0.8273972602739726,0.018870380073964074,0.8226036644165864,0.019670832296214066,0.8198235330036027,0.019824594373352045 +flat_mae,patch,logistic,adhd200_dx,67,0.046415888336127774,test,0.5692307692307692,0.05975428186484573,0.545,0.06435366404563307,0.5477799227799228,0.06095474186768401 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,train,0.7479452054794521,0.022381650796188927,0.7362053795877326,0.024122937906308692,0.7329333821823288,0.023562311610917434 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,test,0.676923076923077,0.05475578054377986,0.6612062546537603,0.05847377633743083,0.6597490347490347,0.05646794768974913 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,train,0.7561643835616438,0.02297237254930783,0.747922308701084,0.024347670915976564,0.7452372229346034,0.024154459114002345 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,test,0.5692307692307692,0.058432859362300915,0.5376016260162602,0.06517725224395785,0.5434362934362934,0.0600475205603351 +flat_mae,patch,logistic,adhd200_dx,70,0.046415888336127774,train,0.852054794520548,0.017807291862052228,0.8475119143405335,0.01867193788966987,0.8438206020638701,0.018914904816636952 +flat_mae,patch,logistic,adhd200_dx,70,0.046415888336127774,test,0.5384615384615384,0.06400647156629448,0.5374762808349146,0.06401799331296461,0.5424710424710424,0.06445970254960806 +flat_mae,patch,logistic,adhd200_dx,71,0.046415888336127774,train,0.8383561643835616,0.01856303386596264,0.8335536129725385,0.01937815353338643,0.8302497404897112,0.019540085054252206 +flat_mae,patch,logistic,adhd200_dx,71,0.046415888336127774,test,0.6615384615384615,0.05875830748264327,0.6549227799227799,0.05970085690371634,0.6549227799227799,0.05939131702905786 +flat_mae,patch,logistic,adhd200_dx,72,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,72,2.782559402207126,test,0.676923076923077,0.06065438807620936,0.675694939415538,0.060763325563554765,0.6814671814671815,0.060708427754794854 +flat_mae,patch,logistic,adhd200_dx,73,0.046415888336127774,train,0.8328767123287671,0.017902720806415747,0.8282352941176471,0.018646484833195896,0.8253953715576724,0.018848037058880225 +flat_mae,patch,logistic,adhd200_dx,73,0.046415888336127774,test,0.5538461538461539,0.060446081406460496,0.5381034060279344,0.061840840778516086,0.5386100386100386,0.060839909107285525 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,train,0.7397260273972602,0.02170901301331742,0.729787648548607,0.022965593796301382,0.7270867680283324,0.022594045510799295 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,test,0.7076923076923077,0.054988420513163896,0.6888384983623079,0.06099460420475108,0.6867760617760618,0.05759817603701488 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,train,0.7506849315068493,0.022479030490316413,0.7411650107149814,0.023735586782215646,0.7382304451364718,0.023434672562617935 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,test,0.5384615384615384,0.06233761105046687,0.5248538011695907,0.06346832131607183,0.525096525096525,0.06247269926685842 +flat_mae,patch,logistic,adhd200_dx,76,0.046415888336127774,train,0.8356164383561644,0.019759389595840893,0.8302325581395349,0.020718168558312215,0.82638761677963,0.0207657584078862 +flat_mae,patch,logistic,adhd200_dx,76,0.046415888336127774,test,0.5384615384615384,0.056951136535302975,0.5045731707317074,0.062282923008089644,0.5120656370656371,0.05794684837750245 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,train,0.852054794520548,0.018056150613216564,0.848085460599334,0.01881870972472353,0.8452555413079319,0.01909239758295131 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,test,0.6615384615384615,0.05600507162157083,0.6425000000000001,0.06125987460765259,0.6418918918918919,0.05802482239118376 +flat_mae,patch,logistic,adhd200_dx,78,0.005994842503189409,train,0.7561643835616438,0.022632630463976046,0.74891985685688,0.023432358426900425,0.7466721621786652,0.02329395007627946 +flat_mae,patch,logistic,adhd200_dx,78,0.005994842503189409,test,0.5384615384615384,0.05831466570843385,0.5045731707317074,0.06369584600010945,0.5120656370656371,0.059342106235619405 +flat_mae,patch,logistic,adhd200_dx,79,0.046415888336127774,train,0.8493150684931506,0.018929364904699056,0.8459431044670744,0.019442037354965105,0.8442632960859743,0.019577312182085266 +flat_mae,patch,logistic,adhd200_dx,79,0.046415888336127774,test,0.5230769230769231,0.06432888130858642,0.5157414083153088,0.06510751512319023,0.515926640926641,0.06501326999334031 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,train,0.7698630136986301,0.021384370909541563,0.7618381804623415,0.022408993086927183,0.7588080845087622,0.022172282324819884 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,test,0.6307692307692307,0.058389356248180396,0.61,0.06321979354839691,0.6105212355212355,0.060400545607809566 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,train,0.7479452054794521,0.021383136025530904,0.7391561024111359,0.022593928451094772,0.7365207302924833,0.022416114063178852 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,test,0.6,0.05340123882170065,0.5626293995859213,0.061054130802227755,0.5704633204633205,0.055003273378957926 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,train,0.7452054794520548,0.02128934603168322,0.734283634314163,0.02261702794449389,0.7312236673383403,0.02212638030463707 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,test,0.6615384615384615,0.058016410655308936,0.6595238095238095,0.05826449862276196,0.6636100386100386,0.05831972329002816 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,train,0.7452054794520548,0.022080445221387735,0.7348896056731828,0.023227921338263163,0.7319411369603712,0.022778129456061922 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,test,0.676923076923077,0.05677922780435793,0.6655231560891939,0.05946962476877568,0.6640926640926641,0.05842176129393022 +flat_mae,patch,logistic,adhd200_dx,84,0.046415888336127774,train,0.8438356164383561,0.017898430307391994,0.8388820481843737,0.018638457900993964,0.8351041094217501,0.018635107372653708 +flat_mae,patch,logistic,adhd200_dx,84,0.046415888336127774,test,0.6153846153846154,0.06082817783671406,0.6094688776736361,0.06230006367633172,0.61003861003861,0.062165218964866334 +flat_mae,patch,logistic,adhd200_dx,85,0.3593813663804626,train,0.947945205479452,0.01103649790388059,0.9467803451795348,0.011339844104306999,0.9445563900592294,0.011739451804013456 +flat_mae,patch,logistic,adhd200_dx,85,0.3593813663804626,test,0.6153846153846154,0.058056662793124415,0.6094688776736361,0.058369507039254157,0.61003861003861,0.05840424414309417 +flat_mae,patch,logistic,adhd200_dx,86,0.046415888336127774,train,0.8493150684931506,0.018997988851693813,0.8448381137879596,0.019818525037737966,0.8413934175978507,0.019988968451634392 +flat_mae,patch,logistic,adhd200_dx,86,0.046415888336127774,test,0.5846153846153846,0.061911018415007776,0.5644080416976918,0.06458681921681082,0.5656370656370656,0.06259074564818233 +flat_mae,patch,logistic,adhd200_dx,87,0.046415888336127774,train,0.8328767123287671,0.019995928277589215,0.8279113625648279,0.020727661209354313,0.8246779019356415,0.020733811419750104 +flat_mae,patch,logistic,adhd200_dx,87,0.046415888336127774,test,0.6461538461538462,0.05803080185984889,0.6233308138070043,0.06345206298792684,0.6240347490347491,0.06001702670901925 +flat_mae,patch,logistic,adhd200_dx,88,0.000774263682681127,train,0.6821917808219178,0.02289958072810477,0.6665721665721666,0.02453926150185879,0.6653538499114612,0.02368947321803198 +flat_mae,patch,logistic,adhd200_dx,88,0.000774263682681127,test,0.5846153846153846,0.05993204040791221,0.5699583435432491,0.0625638245959548,0.5699806949806949,0.0610463379070675 +flat_mae,patch,logistic,adhd200_dx,89,0.3593813663804626,train,0.9671232876712329,0.009466189153978111,0.9666158536585365,0.00960318321970581,0.9672864382976125,0.009515788082577758 +flat_mae,patch,logistic,adhd200_dx,89,0.3593813663804626,test,0.5846153846153846,0.06339668294385775,0.5830363506771205,0.06342232983245236,0.5873552123552124,0.06336468076465666 +flat_mae,patch,logistic,adhd200_dx,90,0.000774263682681127,train,0.6767123287671233,0.022558403007667302,0.6521451185630289,0.025516649426952052,0.6533247847591134,0.023641166604245978 +flat_mae,patch,logistic,adhd200_dx,90,0.000774263682681127,test,0.6461538461538462,0.05488612417741788,0.6167649320687003,0.06294287505142353,0.6196911196911197,0.05743951924090726 +flat_mae,patch,logistic,adhd200_dx,91,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,91,2.782559402207126,test,0.5230769230769231,0.056208327627448186,0.4923154446963971,0.05973337810235849,0.4985521235521235,0.05674026547770046 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,train,0.7424657534246575,0.02030480883375148,0.7291258763342385,0.022083491285525152,0.7259266043841973,0.02139481699281122 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,test,0.6461538461538462,0.05823062900094892,0.6289401836684041,0.06213918736860509,0.6283783783783784,0.06009644370283734 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,train,0.7452054794520548,0.02169748440312625,0.732347723240686,0.02343385003778258,0.7290712584722476,0.022783643035788396 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,test,0.6461538461538462,0.049873601180128915,0.6003742314889067,0.060857178402923616,0.6110038610038611,0.052352737550917974 +flat_mae,patch,logistic,adhd200_dx,94,0.3593813663804626,train,0.9780821917808219,0.00715647485113932,0.9776457618814307,0.007326968127730438,0.9762777065396593,0.007818421548782898 +flat_mae,patch,logistic,adhd200_dx,94,0.3593813663804626,test,0.5846153846153846,0.055929795385235584,0.5578231292517006,0.061950269358818894,0.5612934362934363,0.05797837493651718 +flat_mae,patch,logistic,adhd200_dx,95,0.046415888336127774,train,0.8547945205479452,0.018321782562552937,0.8504803641956702,0.019172140874772182,0.8469652561519204,0.01940385322944146 +flat_mae,patch,logistic,adhd200_dx,95,0.046415888336127774,test,0.5230769230769231,0.05989121499131649,0.49987589972697943,0.06323461940823173,0.502895752895753,0.060759171699387264 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,train,0.7534246575342466,0.021059565951627127,0.7458531905675558,0.021925644909811227,0.7435275080906149,0.021780374173196743 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,test,0.676923076923077,0.055720987090744044,0.6690909090909091,0.05751079956255115,0.6684362934362934,0.05719059816812537 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,train,0.7479452054794521,0.021719029431458224,0.736833855799373,0.02347774010138546,0.7336508518043597,0.022946254870024548 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,test,0.6615384615384615,0.05118890412311485,0.622093023255814,0.06141746343856076,0.6288610038610039,0.054179856803824214 +flat_mae,patch,logistic,adhd200_dx,98,0.046415888336127774,train,0.8438356164383561,0.018348475183650046,0.8382205426085334,0.019404059558259145,0.8336691701776883,0.019548834652625236 +flat_mae,patch,logistic,adhd200_dx,98,0.046415888336127774,test,0.6615384615384615,0.058102182993748634,0.6474358974358974,0.06189059542312159,0.6462355212355213,0.05992387685173753 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7479452054794521,0.022999261616481487,0.7396899224806202,0.024071456343763273,0.7372381999145142,0.023783825299104333 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.6307692307692307,0.057653886693678984,0.6198830409356726,0.05954665285823218,0.6192084942084942,0.05864307492527539 +flat_mae,patch,logistic,adhd200_dx,100,0.000774263682681127,train,0.6684931506849315,0.022349155495465154,0.6470329670329671,0.02489520677335942,0.6474781706051169,0.02340307700717201 +flat_mae,patch,logistic,adhd200_dx,100,0.000774263682681127,test,0.5846153846153846,0.0567222736794017,0.5578231292517006,0.061862852752876535,0.5612934362934363,0.05822330360637524 diff --git a/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic/log.txt b/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..054c8c790f327ef3863a3781d60a632f2cb40f45 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:03 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:09:52 time: 3.9237 data: 3.0630 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:47 time: 0.1810 data: 0.0589 max mem: 2851 +extract (train) [ 40/151] eta: 0:00:28 time: 0.1447 data: 0.0397 max mem: 2851 +extract (train) [ 60/151] eta: 0:00:19 time: 0.1404 data: 0.0381 max mem: 2851 +extract (train) [ 80/151] eta: 0:00:14 time: 0.1570 data: 0.0479 max mem: 2851 +extract (train) [100/151] eta: 0:00:09 time: 0.1508 data: 0.0438 max mem: 2851 +extract (train) [120/151] eta: 0:00:05 time: 0.1504 data: 0.0432 max mem: 2851 +extract (train) [140/151] eta: 0:00:01 time: 0.1316 data: 0.0342 max mem: 2851 +extract (train) [150/151] eta: 0:00:00 time: 0.1326 data: 0.0360 max mem: 2851 +extract (train) Total time: 0:00:26 (0.1770 s / it) +extract (validation) [ 0/32] eta: 0:01:56 time: 3.6372 data: 3.4944 max mem: 2851 +extract (validation) [20/32] eta: 0:00:04 time: 0.1683 data: 0.0502 max mem: 2851 +extract (validation) [31/32] eta: 0:00:00 time: 0.1300 data: 0.0315 max mem: 2851 +extract (validation) Total time: 0:00:08 (0.2714 s / it) +extract (test) [ 0/33] eta: 0:01:56 time: 3.5214 data: 3.3831 max mem: 2851 +extract (test) [20/33] eta: 0:00:04 time: 0.1683 data: 0.0497 max mem: 2851 +extract (test) [32/33] eta: 0:00:00 time: 0.1387 data: 0.0385 max mem: 2851 +extract (test) Total time: 0:00:08 (0.2677 s / it) +feature extraction time: 0:00:44 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.046416 | train | 0.83288 | 0.019407 | 0.82791 | 0.02015 | 0.82468 | 0.020149 | +| flat_mae | patch | logistic | adhd200_dx | | 0.046416 | test | 0.66154 | 0.059549 | 0.64744 | 0.063209 | 0.64624 | 0.06146 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06019023884377771, "f1": 0.606060606060606, "f1_std": 0.06187877803169629, "bacc": 0.6056949806949807, "bacc_std": 0.06139948545478854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.000774263682681127, "split": "test", "acc": 0.676923076923077, "acc_std": 0.04940561259829715, "f1": 0.6351242983159583, "f1_std": 0.06111096983818146, "bacc": 0.6423745173745173, "bacc_std": 0.052701326044149635} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.057568104252432487, "f1": 0.606060606060606, "f1_std": 0.05894656073394623, "bacc": 0.6056949806949807, "bacc_std": 0.05860072595645887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.056236390332534116, "f1": 0.6832358674463938, "f1_std": 0.058012311464388476, "bacc": 0.6819498069498069, "bacc_std": 0.0572856646471886} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06196266102215709, "f1": 0.5534233593935086, "f1_std": 0.062099418110663995, "bacc": 0.5603281853281853, "bacc_std": 0.06305056965506149} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05652062303621898, "f1": 0.7006060606060607, "f1_std": 0.05846678040012765, "bacc": 0.6998069498069499, "bacc_std": 0.058102183690535096} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05662906189305606, "f1": 0.6719538572458543, "f1_std": 0.05742197033282907, "bacc": 0.6727799227799228, "bacc_std": 0.057244127105777795} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.058185290644090434, "f1": 0.656084656084656, "f1_std": 0.06328997841716133, "bacc": 0.6554054054054055, "bacc_std": 0.06019530925738961} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.06205380372074719, "f1": 0.5775, "f1_std": 0.0665937609508224, "bacc": 0.5791505791505791, "bacc_std": 0.06378095089626452} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.058979238148219405, "f1": 0.5512820512820513, "f1_std": 0.06208332193459976, "bacc": 0.5521235521235521, "bacc_std": 0.06020492117595055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06121956604119462, "f1": 0.5608108108108107, "f1_std": 0.06216944898110753, "bacc": 0.5608108108108107, "bacc_std": 0.06184992250643221} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05863355260534927, "f1": 0.545, "f1_std": 0.0643018423438985, "bacc": 0.5477799227799228, "bacc_std": 0.060624257353881085} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05951023779457502, "f1": 0.5976190476190476, "f1_std": 0.059495547674102055, "bacc": 0.6008687258687259, "bacc_std": 0.059681775278851205} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0488138015210341, "f1": 0.5656241646618552, "f1_std": 0.05953012356967095, "bacc": 0.5796332046332047, "bacc_std": 0.05088403990764041} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05957395880791655, "f1": 0.6264367816091954, "f1_std": 0.06035131773820891, "bacc": 0.627895752895753, "bacc_std": 0.060583895318543336} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06435519776436043, "f1": 0.5469838981014179, "f1_std": 0.06492285567577333, "bacc": 0.5472972972972974, "bacc_std": 0.06516691700213183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05915142737877714, "f1": 0.5775, "f1_std": 0.06427266232340036, "bacc": 0.5791505791505791, "bacc_std": 0.06112058865015624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05661120385901419, "f1": 0.5250692869740489, "f1_std": 0.0601620893714901, "bacc": 0.5299227799227799, "bacc_std": 0.057191071395968726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.058408526572165484, "f1": 0.6375757575757576, "f1_std": 0.05968032690148713, "bacc": 0.6370656370656371, "bacc_std": 0.05919301871267638} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05768891066921947, "f1": 0.5699583435432491, "f1_std": 0.06021634682385391, "bacc": 0.5699806949806949, "bacc_std": 0.05895407350908164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.05310002618704711, "f1": 0.7321212121212122, "f1_std": 0.05481591541581202, "bacc": 0.7311776061776062, "bacc_std": 0.054710675184643114} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.058538985138667114, "f1": 0.578226387887527, "f1_std": 0.0596431500638317, "bacc": 0.5786679536679536, "bacc_std": 0.05969556313753244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.055767047172105656, "f1": 0.5167905665214048, "f1_std": 0.061089750795645215, "bacc": 0.5255791505791506, "bacc_std": 0.05653910275543799} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05192339373122724, "f1": 0.6431372549019607, "f1_std": 0.060631892972743545, "bacc": 0.6467181467181468, "bacc_std": 0.05440476562575506} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 166.81005372000556, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06280358034492947, "f1": 0.6299810246679317, "f1_std": 0.06327439420416091, "bacc": 0.6365830115830116, "bacc_std": 0.06318943951785536} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05898618030908275, "f1": 0.606060606060606, "f1_std": 0.0602912843362489, "bacc": 0.6056949806949807, "bacc_std": 0.05978379340730521} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.056504733005926205, "f1": 0.6336682185738789, "f1_std": 0.05926443188431186, "bacc": 0.6327220077220077, "bacc_std": 0.05798366492181559} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.054345430339786324, "f1": 0.5626293995859213, "f1_std": 0.0617402510502052, "bacc": 0.5704633204633205, "bacc_std": 0.056061716930151655} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05748871564085862, "f1": 0.570630081300813, "f1_std": 0.0634437681436107, "bacc": 0.5748069498069498, "bacc_std": 0.05913148825556801} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.060001451658967656, "f1": 0.6575670498084292, "f1_std": 0.06048906520927739, "bacc": 0.6592664092664093, "bacc_std": 0.060451241376856125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.055753260312765195, "f1": 0.6036585365853658, "f1_std": 0.06223610980272032, "bacc": 0.6061776061776062, "bacc_std": 0.058127224597020154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06294529605822813, "f1": 0.578226387887527, "f1_std": 0.06380292521054755, "bacc": 0.5786679536679536, "bacc_std": 0.06372660427590177} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 2.782559402207126, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05868197307944784, "f1": 0.5666666666666667, "f1_std": 0.05908246899116847, "bacc": 0.5694980694980695, "bacc_std": 0.059654419038302556} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 166.81005372000556, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06179315754617679, "f1": 0.5383522727272727, "f1_std": 0.06239199883367598, "bacc": 0.5511583011583012, "bacc_std": 0.06294511990731128} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05332544714870629, "f1": 0.6167649320687003, "f1_std": 0.06066932939265853, "bacc": 0.6196911196911197, "bacc_std": 0.05574871583058074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.000774263682681127, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.058417797401600455, "f1": 0.49987589972697943, "f1_std": 0.061557425501980484, "bacc": 0.502895752895753, "bacc_std": 0.05924746565684722} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05556220246890206, "f1": 0.7115384615384616, "f1_std": 0.059610081081312355, "bacc": 0.708976833976834, "bacc_std": 0.05804399469741471} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.057153388547707365, "f1": 0.6336682185738789, "f1_std": 0.05945815826083519, "bacc": 0.6327220077220077, "bacc_std": 0.058164508025839834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05671338508092493, "f1": 0.5834401435529352, "f1_std": 0.06278187649843205, "bacc": 0.5883204633204633, "bacc_std": 0.058270470545750463} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.0470618377036781, "f1": 0.6972049689440993, "f1_std": 0.055779388805025115, "bacc": 0.6959459459459459, "bacc_std": 0.05056856924412733} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05877141357202869, "f1": 0.6474358974358974, "f1_std": 0.06222753234506633, "bacc": 0.6462355212355213, "bacc_std": 0.06047565490144164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06187275356032003, "f1": 0.5953065134099617, "f1_std": 0.06243661340781053, "bacc": 0.5965250965250966, "bacc_std": 0.06266298642200656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05516791795597903, "f1": 0.6036585365853658, "f1_std": 0.06114461286973705, "bacc": 0.6061776061776062, "bacc_std": 0.05709591903737047} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05628111184310291, "f1": 0.6690909090909091, "f1_std": 0.05840864803749684, "bacc": 0.6684362934362934, "bacc_std": 0.05807804455656731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.0598078501110953, "f1": 0.5321419707123356, "f1_std": 0.06296583759447165, "bacc": 0.5342664092664092, "bacc_std": 0.06048413511259806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.000774263682681127, "split": "test", "acc": 0.676923076923077, "acc_std": 0.053628739093929234, "f1": 0.6500897205844656, "f1_std": 0.06125839453727086, "bacc": 0.6510617760617761, "bacc_std": 0.05623022627984787} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05641063309897539, "f1": 0.49987589972697943, "f1_std": 0.05854909447209793, "bacc": 0.502895752895753, "bacc_std": 0.05677740900886619} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.055072720080284944, "f1": 0.6690909090909091, "f1_std": 0.056779663268740274, "bacc": 0.6684362934362934, "bacc_std": 0.05645592238343994} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.061925513568877495, "f1": 0.588206627680312, "f1_std": 0.0643059304649406, "bacc": 0.5878378378378378, "bacc_std": 0.06322034599568273} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.061745467217387634, "f1": 0.5921814671814671, "f1_std": 0.06254522503170544, "bacc": 0.5921814671814671, "bacc_std": 0.06219377876360297} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06201613838444266, "f1": 0.543030303030303, "f1_std": 0.06327893967452543, "bacc": 0.542953667953668, "bacc_std": 0.06280604563792958} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 2.782559402207126, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.059281664447322746, "f1": 0.5321419707123356, "f1_std": 0.06256361429030328, "bacc": 0.5342664092664092, "bacc_std": 0.060287741841203696} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.05591319561415697, "f1": 0.570630081300813, "f1_std": 0.06208711240024855, "bacc": 0.5748069498069498, "bacc_std": 0.057909146826721374} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0607772224540639, "f1": 0.5666666666666667, "f1_std": 0.06071882760676522, "bacc": 0.5694980694980695, "bacc_std": 0.06099780500583999} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05959493247190873, "f1": 0.5810455956075435, "f1_std": 0.06024286371201866, "bacc": 0.583011583011583, "bacc_std": 0.06051057707283309} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06126554868456906, "f1": 0.545, "f1_std": 0.06539374007888736, "bacc": 0.5477799227799228, "bacc_std": 0.06261624865272561} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.059442427021257425, "f1": 0.5578231292517006, "f1_std": 0.06475376483273171, "bacc": 0.5612934362934363, "bacc_std": 0.06101362339260015} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05677144877885102, "f1": 0.6235521235521235, "f1_std": 0.05802562420129616, "bacc": 0.6235521235521235, "bacc_std": 0.05765226332132044} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05765516753245339, "f1": 0.5834401435529352, "f1_std": 0.0641398838935539, "bacc": 0.5883204633204633, "bacc_std": 0.059358634524250914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 2.782559402207126, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06119178503862277, "f1": 0.5062484685126194, "f1_std": 0.06350511336632365, "bacc": 0.5072393822393823, "bacc_std": 0.062143343657125365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.05659908628279557, "f1": 0.570630081300813, "f1_std": 0.06214351772421274, "bacc": 0.5748069498069498, "bacc_std": 0.05808544770222929} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05603862573229734, "f1": 0.588206627680312, "f1_std": 0.058140384941637265, "bacc": 0.5878378378378378, "bacc_std": 0.05701097764285227} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.059153243970126855, "f1": 0.5608108108108107, "f1_std": 0.05987895922068248, "bacc": 0.5608108108108107, "bacc_std": 0.05955876506309419} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.055743953953472515, "f1": 0.6289401836684041, "f1_std": 0.05945247923491641, "bacc": 0.6283783783783784, "bacc_std": 0.05733464127503805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05653492610610264, "f1": 0.6655231560891939, "f1_std": 0.05985009857247318, "bacc": 0.6640926640926641, "bacc_std": 0.05860871836171528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06123466551190616, "f1": 0.6198830409356726, "f1_std": 0.0630918652830206, "bacc": 0.6192084942084942, "bacc_std": 0.06236489975641415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05975428186484573, "f1": 0.545, "f1_std": 0.06435366404563307, "bacc": 0.5477799227799228, "bacc_std": 0.06095474186768401} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05475578054377986, "f1": 0.6612062546537603, "f1_std": 0.05847377633743083, "bacc": 0.6597490347490347, "bacc_std": 0.05646794768974913} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.058432859362300915, "f1": 0.5376016260162602, "f1_std": 0.06517725224395785, "bacc": 0.5434362934362934, "bacc_std": 0.0600475205603351} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06400647156629448, "f1": 0.5374762808349146, "f1_std": 0.06401799331296461, "bacc": 0.5424710424710424, "bacc_std": 0.06445970254960806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05875830748264327, "f1": 0.6549227799227799, "f1_std": 0.05970085690371634, "bacc": 0.6549227799227799, "bacc_std": 0.05939131702905786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 2.782559402207126, "split": "test", "acc": 0.676923076923077, "acc_std": 0.06065438807620936, "f1": 0.675694939415538, "f1_std": 0.060763325563554765, "bacc": 0.6814671814671815, "bacc_std": 0.060708427754794854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.060446081406460496, "f1": 0.5381034060279344, "f1_std": 0.061840840778516086, "bacc": 0.5386100386100386, "bacc_std": 0.060839909107285525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.054988420513163896, "f1": 0.6888384983623079, "f1_std": 0.06099460420475108, "bacc": 0.6867760617760618, "bacc_std": 0.05759817603701488} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06233761105046687, "f1": 0.5248538011695907, "f1_std": 0.06346832131607183, "bacc": 0.525096525096525, "bacc_std": 0.06247269926685842} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.056951136535302975, "f1": 0.5045731707317074, "f1_std": 0.062282923008089644, "bacc": 0.5120656370656371, "bacc_std": 0.05794684837750245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05600507162157083, "f1": 0.6425000000000001, "f1_std": 0.06125987460765259, "bacc": 0.6418918918918919, "bacc_std": 0.05802482239118376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05831466570843385, "f1": 0.5045731707317074, "f1_std": 0.06369584600010945, "bacc": 0.5120656370656371, "bacc_std": 0.059342106235619405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06432888130858642, "f1": 0.5157414083153088, "f1_std": 0.06510751512319023, "bacc": 0.515926640926641, "bacc_std": 0.06501326999334031} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.058389356248180396, "f1": 0.61, "f1_std": 0.06321979354839691, "bacc": 0.6105212355212355, "bacc_std": 0.060400545607809566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05340123882170065, "f1": 0.5626293995859213, "f1_std": 0.061054130802227755, "bacc": 0.5704633204633205, "bacc_std": 0.055003273378957926} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.058016410655308936, "f1": 0.6595238095238095, "f1_std": 0.05826449862276196, "bacc": 0.6636100386100386, "bacc_std": 0.05831972329002816} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05677922780435793, "f1": 0.6655231560891939, "f1_std": 0.05946962476877568, "bacc": 0.6640926640926641, "bacc_std": 0.05842176129393022} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06082817783671406, "f1": 0.6094688776736361, "f1_std": 0.06230006367633172, "bacc": 0.61003861003861, "bacc_std": 0.062165218964866334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.058056662793124415, "f1": 0.6094688776736361, "f1_std": 0.058369507039254157, "bacc": 0.61003861003861, "bacc_std": 0.05840424414309417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.061911018415007776, "f1": 0.5644080416976918, "f1_std": 0.06458681921681082, "bacc": 0.5656370656370656, "bacc_std": 0.06259074564818233} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05803080185984889, "f1": 0.6233308138070043, "f1_std": 0.06345206298792684, "bacc": 0.6240347490347491, "bacc_std": 0.06001702670901925} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05993204040791221, "f1": 0.5699583435432491, "f1_std": 0.0625638245959548, "bacc": 0.5699806949806949, "bacc_std": 0.0610463379070675} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06339668294385775, "f1": 0.5830363506771205, "f1_std": 0.06342232983245236, "bacc": 0.5873552123552124, "bacc_std": 0.06336468076465666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.000774263682681127, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05488612417741788, "f1": 0.6167649320687003, "f1_std": 0.06294287505142353, "bacc": 0.6196911196911197, "bacc_std": 0.05743951924090726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 2.782559402207126, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.056208327627448186, "f1": 0.4923154446963971, "f1_std": 0.05973337810235849, "bacc": 0.4985521235521235, "bacc_std": 0.05674026547770046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05823062900094892, "f1": 0.6289401836684041, "f1_std": 0.06213918736860509, "bacc": 0.6283783783783784, "bacc_std": 0.06009644370283734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.049873601180128915, "f1": 0.6003742314889067, "f1_std": 0.060857178402923616, "bacc": 0.6110038610038611, "bacc_std": 0.052352737550917974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.055929795385235584, "f1": 0.5578231292517006, "f1_std": 0.061950269358818894, "bacc": 0.5612934362934363, "bacc_std": 0.05797837493651718} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05989121499131649, "f1": 0.49987589972697943, "f1_std": 0.06323461940823173, "bacc": 0.502895752895753, "bacc_std": 0.060759171699387264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.055720987090744044, "f1": 0.6690909090909091, "f1_std": 0.05751079956255115, "bacc": 0.6684362934362934, "bacc_std": 0.05719059816812537} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05118890412311485, "f1": 0.622093023255814, "f1_std": 0.06141746343856076, "bacc": 0.6288610038610039, "bacc_std": 0.054179856803824214} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.058102182993748634, "f1": 0.6474358974358974, "f1_std": 0.06189059542312159, "bacc": 0.6462355212355213, "bacc_std": 0.05992387685173753} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.057653886693678984, "f1": 0.6198830409356726, "f1_std": 0.05954665285823218, "bacc": 0.6192084942084942, "bacc_std": 0.05864307492527539} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.0567222736794017, "f1": 0.5578231292517006, "f1_std": 0.061862852752876535, "bacc": 0.5612934362934363, "bacc_std": 0.05822330360637524} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 3.516 | 23.453 | 0.7994 | 0.091217 | 0.79091 | 0.096578 | 0.78863 | 0.09676 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 3.516 | 23.453 | 0.61262 | 0.052588 | 0.59546 | 0.05478 | 0.59757 | 0.05335 | + + +done! total time: 0:04:35 diff --git a/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6f4698fa64478d5cbad51a2e8f6750d5b513b87 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..e2d5b3c60cead73fa60c562a730c0e23e8a86262 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,,21.54434690031882,test,0.6829268292682927,0.07469310599569252,0.6072218128224024,0.08265119819502115,0.6371527777777778,0.0933769725685963 +flat_mae,patch,logistic,adni_ad_vs_cn,1,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,1,1291.5496650148827,test,0.7560975609756098,0.060751960576271094,0.6440972222222222,0.09125746909128643,0.635483870967742,0.08729699185356012 +flat_mae,patch,logistic,adni_ad_vs_cn,2,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,2,21.54434690031882,test,0.7073170731707317,0.06090602859206814,0.5729166666666666,0.08975823331794162,0.5693548387096774,0.08352076582899495 +flat_mae,patch,logistic,adni_ad_vs_cn,3,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,3,2.782559402207126,test,0.7317073170731707,0.06119095631740582,0.5918552036199095,0.09049414384878635,0.5854838709677419,0.08150229729338858 +flat_mae,patch,logistic,adni_ad_vs_cn,4,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,4,10000.0,test,0.8048780487804879,0.05903194871983235,0.7354838709677419,0.07963033723878764,0.7354838709677419,0.08261705163974395 +flat_mae,patch,logistic,adni_ad_vs_cn,5,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,5,21.54434690031882,test,0.6585365853658537,0.05290852780127935,0.4564393939393939,0.06273681100932395,0.4693548387096774,0.055214656697800665 +flat_mae,patch,logistic,adni_ad_vs_cn,6,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,6,2.782559402207126,test,0.6585365853658537,0.0632309534964592,0.5017361111111112,0.07693899033880729,0.5032258064516129,0.07181706809325292 +flat_mae,patch,logistic,adni_ad_vs_cn,7,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,7,166.81005372000556,test,0.7560975609756098,0.06577626888991142,0.6893939393939394,0.08092340334662386,0.7032258064516128,0.08745007072568414 +flat_mae,patch,logistic,adni_ad_vs_cn,8,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,8,166.81005372000556,test,0.7317073170731707,0.06440088603851737,0.6479313036690086,0.0830317573625899,0.6532258064516129,0.08812688202874432 +flat_mae,patch,logistic,adni_ad_vs_cn,9,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,9,21.54434690031882,test,0.7073170731707317,0.06963667075002176,0.6272727272727273,0.08317530532060659,0.6370967741935484,0.0885902480521867 +flat_mae,patch,logistic,adni_ad_vs_cn,10,0.3593813663804626,train,0.975609756097561,0.007996259067613283,0.9645665510802881,0.012170927504527827,0.9476744186046512,0.017154765092728445 +flat_mae,patch,logistic,adni_ad_vs_cn,10,0.3593813663804626,test,0.7317073170731707,0.04958293636748563,0.5512437810945273,0.08751101814900063,0.5516129032258065,0.06998248413230115 +flat_mae,patch,logistic,adni_ad_vs_cn,11,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,11,1291.5496650148827,test,0.7560975609756098,0.03252601576228226,0.5119047619047619,0.07685099605898597,0.5338709677419355,0.049493483946614396 +flat_mae,patch,logistic,adni_ad_vs_cn,12,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,12,166.81005372000556,test,0.6829268292682927,0.06933009952952143,0.5839188134270101,0.08390310322736193,0.5870967741935484,0.08753548743723365 +flat_mae,patch,logistic,adni_ad_vs_cn,13,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,13,2.782559402207126,test,0.7073170731707317,0.0663344867206715,0.603225806451613,0.08527354275051797,0.603225806451613,0.08586431495520316 +flat_mae,patch,logistic,adni_ad_vs_cn,14,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,14,1291.5496650148827,test,0.6829268292682927,0.07036852089462364,0.5839188134270101,0.08467733427372978,0.5870967741935484,0.08687533492849742 +flat_mae,patch,logistic,adni_ad_vs_cn,15,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,15,1291.5496650148827,test,0.6097560975609756,0.07965720728357803,0.5494505494505495,0.08238839228560041,0.5725806451612903,0.09294007244217486 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.005994842503189409,train,0.8346883468834688,0.01270177183788242,0.6944244579899811,0.030939496151896794,0.6615375133536034,0.02515413691355604 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.005994842503189409,test,0.7560975609756098,1.1102230246251565e-16,0.4305555555555556,1.6653345369377348e-16,0.5,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,17,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,17,2.782559402207126,test,0.6097560975609756,0.06897352258200687,0.47096774193548385,0.07742728674968488,0.47096774193548385,0.07671129274731055 +flat_mae,patch,logistic,adni_ad_vs_cn,18,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,18,21.54434690031882,test,0.7073170731707317,0.06627428449441666,0.603225806451613,0.08704167050948522,0.603225806451613,0.08679893273678875 +flat_mae,patch,logistic,adni_ad_vs_cn,19,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,19,166.81005372000556,test,0.6341463414634146,0.07594988798431805,0.5467943994104643,0.08214776070915095,0.5548387096774194,0.08947873528765625 +flat_mae,patch,logistic,adni_ad_vs_cn,20,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,20,166.81005372000556,test,0.7317073170731707,0.06876718432145013,0.6676492262343405,0.07862640047180426,0.6870967741935483,0.08555635703399367 +flat_mae,patch,logistic,adni_ad_vs_cn,21,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,21,166.81005372000556,test,0.7317073170731707,0.058921882363828525,0.5918552036199095,0.09243016941174909,0.5854838709677419,0.0821773971952232 +flat_mae,patch,logistic,adni_ad_vs_cn,22,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,22,166.81005372000556,test,0.8048780487804879,0.04883555428460247,0.6893939393939394,0.08874741757762356,0.667741935483871,0.07883644808276138 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,test,0.6097560975609756,0.0662566710636764,0.47096774193548385,0.07553831997439833,0.47096774193548385,0.07623672501763958 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.046415888336127774,train,0.8943089430894309,0.013634446644279924,0.8326335988835263,0.0242516151151637,0.79753882816994,0.026025536644366964 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.046415888336127774,test,0.6585365853658537,0.06782549256731432,0.5370967741935484,0.08107186049835596,0.5370967741935484,0.08015166288652747 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,train,0.986449864498645,0.005860570043996583,0.9806516564069758,0.008573612758272632,0.9709302325581395,0.01257296712927176 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,test,0.7317073170731707,0.057134509460803035,0.5918552036199095,0.08620603980808071,0.5854838709677419,0.07753287772288395 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.3593813663804626,train,0.975609756097561,0.007154315511150618,0.9645665510802881,0.010835169103837621,0.9476744186046512,0.015348502462875413 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.3593813663804626,test,0.7073170731707317,0.07269137289279834,0.6272727272727273,0.08627259461181669,0.6370967741935484,0.09147148757127305 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.3593813663804626,train,0.975609756097561,0.007550234719211433,0.9645665510802881,0.011479338366934013,0.9476744186046512,0.01619788727551752 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.3593813663804626,test,0.7073170731707317,0.057488068751528676,0.5729166666666666,0.08082299023517758,0.5693548387096774,0.07599797330119863 +flat_mae,patch,logistic,adni_ad_vs_cn,28,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,28,21.54434690031882,test,0.6829268292682927,0.0649913553030593,0.5547201336675021,0.08386149051486809,0.5532258064516129,0.0803197705817561 +flat_mae,patch,logistic,adni_ad_vs_cn,29,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,29,166.81005372000556,test,0.7560975609756098,0.06601820454503732,0.6893939393939394,0.08059528841200657,0.7032258064516128,0.08605121868546 +flat_mae,patch,logistic,adni_ad_vs_cn,30,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,30,2.782559402207126,test,0.7073170731707317,0.06559368739668378,0.603225806451613,0.08339410597209616,0.603225806451613,0.08323722577322286 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,train,0.9024390243902439,0.013361394447548466,0.8463069233617179,0.02360130261435228,0.8109335195989811,0.025702734309103983 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,test,0.6585365853658537,0.06159714449062689,0.5017361111111112,0.07879591021931896,0.5032258064516129,0.07306188637295516 +flat_mae,patch,logistic,adni_ad_vs_cn,32,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,32,2.782559402207126,test,0.6829268292682927,0.07341635197730922,0.6259649122807017,0.07954011799138176,0.6548387096774193,0.08843064967696032 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.3593813663804626,train,0.983739837398374,0.0066750140527340185,0.9766829555986183,0.009903555460545214,0.9651162790697674,0.014320233636388652 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.3593813663804626,test,0.6341463414634146,0.06854128965714291,0.5467943994104643,0.07789634634280497,0.5548387096774194,0.0845071960312339 +flat_mae,patch,logistic,adni_ad_vs_cn,34,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,34,2.782559402207126,test,0.7073170731707317,0.06856912689828491,0.603225806451613,0.09126008083311113,0.603225806451613,0.09159064535386323 +flat_mae,patch,logistic,adni_ad_vs_cn,35,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,35,21.54434690031882,test,0.7317073170731707,0.06922930885141937,0.6232247284878863,0.0963316358111418,0.6193548387096774,0.09412624086599312 +flat_mae,patch,logistic,adni_ad_vs_cn,36,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,36,2.782559402207126,test,0.7317073170731707,0.06565643456933166,0.6232247284878863,0.08657926207944053,0.6193548387096774,0.08437041464782541 +flat_mae,patch,logistic,adni_ad_vs_cn,37,0.046415888336127774,train,0.8970189701897019,0.013987582716017785,0.8377684191040355,0.0248210366075909,0.8033527816583121,0.026611620877183093 +flat_mae,patch,logistic,adni_ad_vs_cn,37,0.046415888336127774,test,0.7073170731707317,0.04517937019015755,0.4831932773109243,0.06996275933883866,0.5016129032258064,0.05350194984144624 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,train,0.991869918699187,0.004615277199816103,0.9884880564885973,0.006643408780230101,0.9825581395348837,0.009901379574024139 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,test,0.6341463414634146,0.07269929427758738,0.5467943994104643,0.08440104006647929,0.5548387096774194,0.09104437723676734 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.046415888336127774,train,0.9051490514905149,0.013165225744196173,0.851341551849166,0.023210943285227888,0.8167474730873532,0.025860505821802555 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.046415888336127774,test,0.5609756097560976,0.06564938507599771,0.40483870967741936,0.05870895566629248,0.40483870967741936,0.06163603847014703 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,test,0.6585365853658537,0.05997205377331521,0.5017361111111112,0.07695017249289807,0.5032258064516129,0.07120219768556968 +flat_mae,patch,logistic,adni_ad_vs_cn,41,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,41,2.782559402207126,test,0.7560975609756098,0.0637752411216992,0.6693548387096775,0.08257463549585953,0.6693548387096775,0.08378272597436866 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,train,0.8970189701897019,0.013616758706983539,0.8360690235690236,0.02443268234190293,0.7993056126222369,0.02612672751152475 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,test,0.7073170731707317,0.06683209892527603,0.603225806451613,0.08332155669572555,0.603225806451613,0.08298327477136218 +flat_mae,patch,logistic,adni_ad_vs_cn,43,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,43,21.54434690031882,test,0.7560975609756098,0.06531521518696613,0.6893939393939394,0.07884323278891814,0.7032258064516128,0.08562330709568558 +flat_mae,patch,logistic,adni_ad_vs_cn,44,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,44,21.54434690031882,test,0.7560975609756098,0.06650940006827605,0.6693548387096775,0.08791406025435267,0.6693548387096775,0.08794106317945076 +flat_mae,patch,logistic,adni_ad_vs_cn,45,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,45,1291.5496650148827,test,0.8048780487804879,0.06217022935620754,0.7354838709677419,0.08391637014323428,0.7354838709677419,0.08641365193286653 +flat_mae,patch,logistic,adni_ad_vs_cn,46,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,46,166.81005372000556,test,0.7073170731707317,0.05527205980315912,0.5340909090909092,0.08271171757622651,0.535483870967742,0.0700712391326856 +flat_mae,patch,logistic,adni_ad_vs_cn,47,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,47,166.81005372000556,test,0.7560975609756098,0.06163113006218866,0.6440972222222222,0.09356288792235583,0.635483870967742,0.08792905870782727 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.3593813663804626,train,0.981029810298103,0.0072760689885888645,0.9729123189697663,0.01063434288467619,0.9633494946174705,0.014506109973658206 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.3593813663804626,test,0.7317073170731707,0.06238982436594961,0.6479313036690086,0.07973429325561347,0.6532258064516129,0.08451904969097777 +flat_mae,patch,logistic,adni_ad_vs_cn,49,0.046415888336127774,train,0.8888888888888888,0.01360266889665109,0.8202715706190526,0.025387891449104527,0.7818637521571206,0.02642088549081554 +flat_mae,patch,logistic,adni_ad_vs_cn,49,0.046415888336127774,test,0.8048780487804879,0.04250390330957467,0.6554621848739496,0.09437074181266959,0.6338709677419355,0.07417268991509189 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.3593813663804626,train,0.978319783197832,0.007388269502709441,0.9689106074648244,0.010863429490185409,0.9575355411290984,0.01470291801621068 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.3593813663804626,test,0.7560975609756098,0.06255012980678717,0.6693548387096775,0.08437738075032508,0.6693548387096775,0.08324011948691344 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.3593813663804626,train,0.975609756097561,0.0072211294851017875,0.9645665510802881,0.010951382874151254,0.9476744186046512,0.015491841744200891 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.3593813663804626,test,0.6585365853658537,0.06894986927853933,0.5651515151515152,0.08277341097369258,0.5709677419354839,0.08810176454369571 +flat_mae,patch,logistic,adni_ad_vs_cn,52,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,52,21.54434690031882,test,0.7073170731707317,0.061708281106901304,0.5729166666666666,0.08752480235771185,0.5693548387096774,0.08104091867371513 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,test,0.6585365853658537,0.07190811441347139,0.5651515151515152,0.08501136764892162,0.5709677419354839,0.08995434476176928 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.005994842503189409,train,0.8211382113821138,0.013250642519417146,0.6716019417475728,0.030169674918035235,0.6446092530199687,0.024081745935972728 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.005994842503189409,test,0.7317073170731707,0.047140732509274884,0.5512437810945273,0.08279298890946533,0.5516129032258065,0.06574504179245279 +flat_mae,patch,logistic,adni_ad_vs_cn,55,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,55,1291.5496650148827,test,0.8048780487804879,0.06368281006238652,0.7515151515151515,0.07788088962264188,0.7693548387096774,0.08181761721654277 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.046415888336127774,train,0.8943089430894309,0.014005466200096814,0.8326335988835263,0.024735081059167024,0.79753882816994,0.026381917425557826 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.046415888336127774,test,0.6829268292682927,0.040680557923661376,0.4057971014492754,0.014588852671054897,0.45161290322580644,0.026901659272098654 +flat_mae,patch,logistic,adni_ad_vs_cn,57,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,57,21.54434690031882,test,0.7317073170731707,0.0684564412933572,0.6479313036690086,0.08648434388132373,0.6532258064516129,0.08738651990188138 +flat_mae,patch,logistic,adni_ad_vs_cn,58,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,58,21.54434690031882,test,0.7560975609756098,0.06650301348881416,0.6693548387096775,0.08934009521615008,0.6693548387096775,0.0906658208003125 +flat_mae,patch,logistic,adni_ad_vs_cn,59,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,59,2.782559402207126,test,0.7073170731707317,0.0680706320366247,0.6272727272727273,0.08213611005697752,0.6370967741935484,0.0879423632908865 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.046415888336127774,train,0.8997289972899729,0.013704255225697062,0.8412164912484736,0.024676269663423123,0.805119566110609,0.027234999789339666 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.046415888336127774,test,0.8048780487804879,0.0506670319449976,0.6893939393939394,0.09520012326628226,0.667741935483871,0.08390404428418487 +flat_mae,patch,logistic,adni_ad_vs_cn,61,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,61,166.81005372000556,test,0.6585365853658537,0.06960514111216567,0.5651515151515152,0.08100959752200344,0.5709677419354839,0.08557971279722651 +flat_mae,patch,logistic,adni_ad_vs_cn,62,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,62,166.81005372000556,test,0.6829268292682927,0.0723407233157872,0.5839188134270101,0.08777958460273834,0.5870967741935484,0.09139491847925323 +flat_mae,patch,logistic,adni_ad_vs_cn,63,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,63,21.54434690031882,test,0.8536585365853658,0.0516267204356303,0.7864583333333333,0.08027515338750701,0.7677419354838709,0.08090226397534958 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.005994842503189409,train,0.8265582655826558,0.0132583256043883,0.6815533980582524,0.031092304002357723,0.6521899909606377,0.025106333970993083 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.005994842503189409,test,0.7560975609756098,0.05405837724081965,0.6117424242424243,0.09331210241498446,0.6016129032258064,0.07972973827533018 +flat_mae,patch,logistic,adni_ad_vs_cn,65,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,65,2.782559402207126,test,0.7317073170731707,0.06710243264136595,0.6479313036690086,0.08478849515602527,0.6532258064516129,0.08877025937222131 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,train,0.8997289972899729,0.013938937915163216,0.8412164912484736,0.025049929994811936,0.805119566110609,0.027385612505389716 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,test,0.8048780487804879,0.04270594501285282,0.6554621848739496,0.09726883893894248,0.6338709677419355,0.07576424762037692 +flat_mae,patch,logistic,adni_ad_vs_cn,67,0.046415888336127774,train,0.8943089430894309,0.01313331363806157,0.8308632543926662,0.023897622304554413,0.7934916591338648,0.025143627597526346 +flat_mae,patch,logistic,adni_ad_vs_cn,67,0.046415888336127774,test,0.7317073170731707,0.0515675977010674,0.5512437810945273,0.08896555772166843,0.5516129032258065,0.07125472611523045 +flat_mae,patch,logistic,adni_ad_vs_cn,68,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,68,2.782559402207126,test,0.6585365853658537,0.06583240849557591,0.5370967741935484,0.08260428212281196,0.5370967741935484,0.08339774007074405 +flat_mae,patch,logistic,adni_ad_vs_cn,69,0.046415888336127774,train,0.8943089430894309,0.012912457567486352,0.8326335988835263,0.02320656760092595,0.79753882816994,0.024827317834562934 +flat_mae,patch,logistic,adni_ad_vs_cn,69,0.046415888336127774,test,0.8536585365853658,0.04698497994331429,0.7670454545454546,0.08702819627475249,0.7338709677419355,0.08146872863926266 +flat_mae,patch,logistic,adni_ad_vs_cn,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,70,2.782559402207126,test,0.7317073170731707,0.061904630071354654,0.6232247284878863,0.0852771502667445,0.6193548387096774,0.08284801726426642 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.046415888336127774,train,0.8997289972899729,0.013218945510642453,0.8395369336545807,0.024430034134463357,0.8010723970745337,0.026640485332331775 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.046415888336127774,test,0.7073170731707317,0.0332279277331527,0.4142857142857143,0.011561705437815907,0.46774193548387094,0.021973307049342927 +flat_mae,patch,logistic,adni_ad_vs_cn,72,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,72,21.54434690031882,test,0.6585365853658537,0.062305955764564526,0.5017361111111112,0.08053912277465329,0.5032258064516129,0.07408709586124271 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.005994842503189409,train,0.8265582655826558,0.013379219080565342,0.6815533980582524,0.031069840320738444,0.6521899909606377,0.025072509288267184 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.005994842503189409,test,0.7804878048780488,0.047466598043294485,0.6328358208955224,0.09144187902117322,0.6177419354838709,0.07490463836849828 +flat_mae,patch,logistic,adni_ad_vs_cn,74,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,74,166.81005372000556,test,0.8292682926829268,0.057625511568932745,0.7602339181286549,0.08326180043528457,0.7516129032258064,0.08499081538184929 +flat_mae,patch,logistic,adni_ad_vs_cn,75,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,75,1291.5496650148827,test,0.7317073170731707,0.06489049944715244,0.6676492262343405,0.07784021400970226,0.6870967741935483,0.08660580682167986 +flat_mae,patch,logistic,adni_ad_vs_cn,76,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,76,2.782559402207126,test,0.7317073170731707,0.04988245552719876,0.5512437810945273,0.08660260062350651,0.5516129032258065,0.06872992424695895 +flat_mae,patch,logistic,adni_ad_vs_cn,77,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,77,166.81005372000556,test,0.7560975609756098,0.05329999949775272,0.6117424242424243,0.09285296883642837,0.6016129032258064,0.08066348765962945 +flat_mae,patch,logistic,adni_ad_vs_cn,78,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,78,21.54434690031882,test,0.6585365853658537,0.0619270934393351,0.5017361111111112,0.07736628964069585,0.5032258064516129,0.07300441699749846 +flat_mae,patch,logistic,adni_ad_vs_cn,79,0.046415888336127774,train,0.8915989159891599,0.014117359132608604,0.8274410774410774,0.02539427066763693,0.7917248746815679,0.027000551202209355 +flat_mae,patch,logistic,adni_ad_vs_cn,79,0.046415888336127774,test,0.8292682926829268,0.059711999656969356,0.7759562841530054,0.0780367635054407,0.7854838709677419,0.08182686037019693 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,80,166.81005372000556,test,0.5853658536585366,0.07313169690501527,0.4863669859985261,0.07745540345116567,0.4887096774193548,0.08393202393007665 +flat_mae,patch,logistic,adni_ad_vs_cn,81,0.3593813663804626,train,0.975609756097561,0.008139066399029594,0.9645665510802881,0.01240175229044168,0.9476744186046512,0.017461136635127408 +flat_mae,patch,logistic,adni_ad_vs_cn,81,0.3593813663804626,test,0.7073170731707317,0.05732359070365379,0.5340909090909092,0.08514043076716758,0.535483870967742,0.07217782238128603 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,test,0.6829268292682927,0.06301070228161726,0.5547201336675021,0.08344150413467971,0.5532258064516129,0.08085037239197428 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,test,0.6341463414634146,0.078371527809579,0.5684210526315789,0.08310727997914037,0.5887096774193548,0.09262657841081971 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,train,0.8970189701897019,0.012803604416090325,0.8325131390348781,0.02434446126184608,0.7912112745500863,0.02600497090409653 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,test,0.7560975609756098,0.05400993585194957,0.6117424242424243,0.09181779319265893,0.6016129032258064,0.07851866722644807 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,test,0.8048780487804879,0.0589008180355653,0.7354838709677419,0.07947229902837386,0.7354838709677419,0.08338529354760832 +flat_mae,patch,logistic,adni_ad_vs_cn,86,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,86,1291.5496650148827,test,0.6829268292682927,0.07220664047862084,0.6259649122807017,0.07980739712241197,0.6548387096774193,0.08953028403051566 +flat_mae,patch,logistic,adni_ad_vs_cn,87,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,87,166.81005372000556,test,0.7804878048780488,0.0657658674076713,0.7280766396462786,0.07745861843945868,0.7532258064516129,0.08430221210419553 +flat_mae,patch,logistic,adni_ad_vs_cn,88,0.3593813663804626,train,0.981029810298103,0.006602685022370121,0.9729123189697663,0.009634597939738788,0.9633494946174705,0.01287323659963581 +flat_mae,patch,logistic,adni_ad_vs_cn,88,0.3593813663804626,test,0.7560975609756098,0.063043472909589,0.6693548387096775,0.08420744751617514,0.6693548387096775,0.08513584486669203 +flat_mae,patch,logistic,adni_ad_vs_cn,89,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,89,166.81005372000556,test,0.7073170731707317,0.06998725132484586,0.6272727272727273,0.0867892902234142,0.6370967741935484,0.09227827795709195 +flat_mae,patch,logistic,adni_ad_vs_cn,90,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,90,1291.5496650148827,test,0.8048780487804879,0.06343046706662582,0.764367816091954,0.07088013871857227,0.8032258064516129,0.0733781167139563 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.3593813663804626,train,0.978319783197832,0.007399880724499489,0.9689106074648244,0.010884627031558333,0.9575355411290984,0.014627380588695775 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.3593813663804626,test,0.7073170731707317,0.06177179702577043,0.603225806451613,0.08353750274846851,0.603225806451613,0.08584872903424375 +flat_mae,patch,logistic,adni_ad_vs_cn,92,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,92,166.81005372000556,test,0.6585365853658537,0.07627714893942475,0.5876436781609196,0.08508048615690386,0.6048387096774194,0.09390499388856398 +flat_mae,patch,logistic,adni_ad_vs_cn,93,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,93,166.81005372000556,test,0.5365853658536586,0.07145399088785251,0.42593957258658804,0.07182742238943822,0.42258064516129035,0.07849380603492276 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.046415888336127774,train,0.907859078590786,0.012785626399160158,0.8563215758131013,0.02212266810177018,0.8225614265757252,0.024583894856153754 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.046415888336127774,test,0.7073170731707317,0.05523750030205272,0.5340909090909092,0.08224032310270869,0.535483870967742,0.06941780929746119 +flat_mae,patch,logistic,adni_ad_vs_cn,95,0.3593813663804626,train,0.978319783197832,0.007090657344830875,0.9686411149825784,0.010648845034939092,0.9534883720930232,0.015211933489782472 +flat_mae,patch,logistic,adni_ad_vs_cn,95,0.3593813663804626,test,0.7804878048780488,0.0574425404133332,0.6660633484162897,0.09429729736171491,0.6516129032258065,0.08616463659399748 +flat_mae,patch,logistic,adni_ad_vs_cn,96,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,96,2.782559402207126,test,0.7073170731707317,0.06071946198421547,0.5729166666666666,0.0868970649883493,0.5693548387096774,0.08119089381385973 +flat_mae,patch,logistic,adni_ad_vs_cn,97,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,97,21.54434690031882,test,0.6585365853658537,0.06530000323405534,0.5370967741935484,0.08127164757938399,0.5370967741935484,0.08253135493463387 +flat_mae,patch,logistic,adni_ad_vs_cn,98,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,98,21.54434690031882,test,0.6097560975609756,0.06927454454782182,0.5030303030303029,0.0789337203395345,0.5048387096774194,0.08386053601379073 +flat_mae,patch,logistic,adni_ad_vs_cn,99,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,99,21.54434690031882,test,0.7560975609756098,0.05209509448806485,0.6117424242424243,0.09009737408432235,0.6016129032258064,0.07655766939357589 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.005994842503189409,train,0.8238482384823849,0.01387962649705566,0.6787307285318189,0.03196207899705018,0.6504232065083408,0.02575760428261679 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.005994842503189409,test,0.7073170731707317,0.0447180605296918,0.4831932773109243,0.06971470103973369,0.5016129032258064,0.05348522071140164 diff --git a/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..09199ab790e339aff9f73299154cb0bff28a8c23 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:03 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:11:14 time: 4.1130 data: 3.2262 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:53 time: 0.1869 data: 0.0645 max mem: 2851 +extract (train) [ 40/164] eta: 0:00:32 time: 0.1518 data: 0.0465 max mem: 2851 +extract (train) [ 60/164] eta: 0:00:24 time: 0.1596 data: 0.0516 max mem: 2851 +extract (train) [ 80/164] eta: 0:00:17 time: 0.1584 data: 0.0503 max mem: 2851 +extract (train) [100/164] eta: 0:00:13 time: 0.1768 data: 0.0607 max mem: 2851 +extract (train) [120/164] eta: 0:00:08 time: 0.1710 data: 0.0567 max mem: 2851 +extract (train) [140/164] eta: 0:00:04 time: 0.1526 data: 0.0478 max mem: 2851 +extract (train) [160/164] eta: 0:00:00 time: 0.1450 data: 0.0437 max mem: 2851 +extract (train) [163/164] eta: 0:00:00 time: 0.1437 data: 0.0426 max mem: 2851 +extract (train) Total time: 0:00:30 (0.1879 s / it) +extract (validation) [ 0/21] eta: 0:01:07 time: 3.2361 data: 3.1174 max mem: 2851 +extract (validation) [20/21] eta: 0:00:00 time: 0.1437 data: 0.0420 max mem: 2851 +extract (validation) Total time: 0:00:06 (0.3035 s / it) +extract (test) [ 0/21] eta: 0:01:18 time: 3.7563 data: 3.6475 max mem: 2851 +extract (test) [20/21] eta: 0:00:00 time: 0.1304 data: 0.0326 max mem: 2851 +extract (test) Total time: 0:00:06 (0.3150 s / it) +feature extraction time: 0:00:43 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 21.544 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 21.544 | test | 0.68293 | 0.074693 | 0.60722 | 0.082651 | 0.63715 | 0.093377 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 1291.5496650148827, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.060751960576271094, "f1": 0.6440972222222222, "f1_std": 0.09125746909128643, "bacc": 0.635483870967742, "bacc_std": 0.08729699185356012} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06090602859206814, "f1": 0.5729166666666666, "f1_std": 0.08975823331794162, "bacc": 0.5693548387096774, "bacc_std": 0.08352076582899495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06119095631740582, "f1": 0.5918552036199095, "f1_std": 0.09049414384878635, "bacc": 0.5854838709677419, "bacc_std": 0.08150229729338858} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 10000.0, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05903194871983235, "f1": 0.7354838709677419, "f1_std": 0.07963033723878764, "bacc": 0.7354838709677419, "bacc_std": 0.08261705163974395} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05290852780127935, "f1": 0.4564393939393939, "f1_std": 0.06273681100932395, "bacc": 0.4693548387096774, "bacc_std": 0.055214656697800665} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0632309534964592, "f1": 0.5017361111111112, "f1_std": 0.07693899033880729, "bacc": 0.5032258064516129, "bacc_std": 0.07181706809325292} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06577626888991142, "f1": 0.6893939393939394, "f1_std": 0.08092340334662386, "bacc": 0.7032258064516128, "bacc_std": 0.08745007072568414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06440088603851737, "f1": 0.6479313036690086, "f1_std": 0.0830317573625899, "bacc": 0.6532258064516129, "bacc_std": 0.08812688202874432} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06963667075002176, "f1": 0.6272727272727273, "f1_std": 0.08317530532060659, "bacc": 0.6370967741935484, "bacc_std": 0.0885902480521867} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04958293636748563, "f1": 0.5512437810945273, "f1_std": 0.08751101814900063, "bacc": 0.5516129032258065, "bacc_std": 0.06998248413230115} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 1291.5496650148827, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.03252601576228226, "f1": 0.5119047619047619, "f1_std": 0.07685099605898597, "bacc": 0.5338709677419355, "bacc_std": 0.049493483946614396} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06933009952952143, "f1": 0.5839188134270101, "f1_std": 0.08390310322736193, "bacc": 0.5870967741935484, "bacc_std": 0.08753548743723365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0663344867206715, "f1": 0.603225806451613, "f1_std": 0.08527354275051797, "bacc": 0.603225806451613, "bacc_std": 0.08586431495520316} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 1291.5496650148827, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07036852089462364, "f1": 0.5839188134270101, "f1_std": 0.08467733427372978, "bacc": 0.5870967741935484, "bacc_std": 0.08687533492849742} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 1291.5496650148827, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07965720728357803, "f1": 0.5494505494505495, "f1_std": 0.08238839228560041, "bacc": 0.5725806451612903, "bacc_std": 0.09294007244217486} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 1.1102230246251565e-16, "f1": 0.4305555555555556, "f1_std": 1.6653345369377348e-16, "bacc": 0.5, "bacc_std": 0.0} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 2.782559402207126, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06897352258200687, "f1": 0.47096774193548385, "f1_std": 0.07742728674968488, "bacc": 0.47096774193548385, "bacc_std": 0.07671129274731055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06627428449441666, "f1": 0.603225806451613, "f1_std": 0.08704167050948522, "bacc": 0.603225806451613, "bacc_std": 0.08679893273678875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07594988798431805, "f1": 0.5467943994104643, "f1_std": 0.08214776070915095, "bacc": 0.5548387096774194, "bacc_std": 0.08947873528765625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06876718432145013, "f1": 0.6676492262343405, "f1_std": 0.07862640047180426, "bacc": 0.6870967741935483, "bacc_std": 0.08555635703399367} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.058921882363828525, "f1": 0.5918552036199095, "f1_std": 0.09243016941174909, "bacc": 0.5854838709677419, "bacc_std": 0.0821773971952232} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04883555428460247, "f1": 0.6893939393939394, "f1_std": 0.08874741757762356, "bacc": 0.667741935483871, "bacc_std": 0.07883644808276138} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 166.81005372000556, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.0662566710636764, "f1": 0.47096774193548385, "f1_std": 0.07553831997439833, "bacc": 0.47096774193548385, "bacc_std": 0.07623672501763958} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06782549256731432, "f1": 0.5370967741935484, "f1_std": 0.08107186049835596, "bacc": 0.5370967741935484, "bacc_std": 0.08015166288652747} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.057134509460803035, "f1": 0.5918552036199095, "f1_std": 0.08620603980808071, "bacc": 0.5854838709677419, "bacc_std": 0.07753287772288395} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07269137289279834, "f1": 0.6272727272727273, "f1_std": 0.08627259461181669, "bacc": 0.6370967741935484, "bacc_std": 0.09147148757127305} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.057488068751528676, "f1": 0.5729166666666666, "f1_std": 0.08082299023517758, "bacc": 0.5693548387096774, "bacc_std": 0.07599797330119863} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 21.54434690031882, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0649913553030593, "f1": 0.5547201336675021, "f1_std": 0.08386149051486809, "bacc": 0.5532258064516129, "bacc_std": 0.0803197705817561} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06601820454503732, "f1": 0.6893939393939394, "f1_std": 0.08059528841200657, "bacc": 0.7032258064516128, "bacc_std": 0.08605121868546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06559368739668378, "f1": 0.603225806451613, "f1_std": 0.08339410597209616, "bacc": 0.603225806451613, "bacc_std": 0.08323722577322286} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06159714449062689, "f1": 0.5017361111111112, "f1_std": 0.07879591021931896, "bacc": 0.5032258064516129, "bacc_std": 0.07306188637295516} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07341635197730922, "f1": 0.6259649122807017, "f1_std": 0.07954011799138176, "bacc": 0.6548387096774193, "bacc_std": 0.08843064967696032} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06854128965714291, "f1": 0.5467943994104643, "f1_std": 0.07789634634280497, "bacc": 0.5548387096774194, "bacc_std": 0.0845071960312339} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06856912689828491, "f1": 0.603225806451613, "f1_std": 0.09126008083311113, "bacc": 0.603225806451613, "bacc_std": 0.09159064535386323} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06922930885141937, "f1": 0.6232247284878863, "f1_std": 0.0963316358111418, "bacc": 0.6193548387096774, "bacc_std": 0.09412624086599312} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06565643456933166, "f1": 0.6232247284878863, "f1_std": 0.08657926207944053, "bacc": 0.6193548387096774, "bacc_std": 0.08437041464782541} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.04517937019015755, "f1": 0.4831932773109243, "f1_std": 0.06996275933883866, "bacc": 0.5016129032258064, "bacc_std": 0.05350194984144624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07269929427758738, "f1": 0.5467943994104643, "f1_std": 0.08440104006647929, "bacc": 0.5548387096774194, "bacc_std": 0.09104437723676734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.5609756097560976, "acc_std": 0.06564938507599771, "f1": 0.40483870967741936, "f1_std": 0.05870895566629248, "bacc": 0.40483870967741936, "bacc_std": 0.06163603847014703} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05997205377331521, "f1": 0.5017361111111112, "f1_std": 0.07695017249289807, "bacc": 0.5032258064516129, "bacc_std": 0.07120219768556968} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0637752411216992, "f1": 0.6693548387096775, "f1_std": 0.08257463549585953, "bacc": 0.6693548387096775, "bacc_std": 0.08378272597436866} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06683209892527603, "f1": 0.603225806451613, "f1_std": 0.08332155669572555, "bacc": 0.603225806451613, "bacc_std": 0.08298327477136218} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06531521518696613, "f1": 0.6893939393939394, "f1_std": 0.07884323278891814, "bacc": 0.7032258064516128, "bacc_std": 0.08562330709568558} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06650940006827605, "f1": 0.6693548387096775, "f1_std": 0.08791406025435267, "bacc": 0.6693548387096775, "bacc_std": 0.08794106317945076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 1291.5496650148827, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06217022935620754, "f1": 0.7354838709677419, "f1_std": 0.08391637014323428, "bacc": 0.7354838709677419, "bacc_std": 0.08641365193286653} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05527205980315912, "f1": 0.5340909090909092, "f1_std": 0.08271171757622651, "bacc": 0.535483870967742, "bacc_std": 0.0700712391326856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06163113006218866, "f1": 0.6440972222222222, "f1_std": 0.09356288792235583, "bacc": 0.635483870967742, "bacc_std": 0.08792905870782727} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06238982436594961, "f1": 0.6479313036690086, "f1_std": 0.07973429325561347, "bacc": 0.6532258064516129, "bacc_std": 0.08451904969097777} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04250390330957467, "f1": 0.6554621848739496, "f1_std": 0.09437074181266959, "bacc": 0.6338709677419355, "bacc_std": 0.07417268991509189} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06255012980678717, "f1": 0.6693548387096775, "f1_std": 0.08437738075032508, "bacc": 0.6693548387096775, "bacc_std": 0.08324011948691344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06894986927853933, "f1": 0.5651515151515152, "f1_std": 0.08277341097369258, "bacc": 0.5709677419354839, "bacc_std": 0.08810176454369571} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.061708281106901304, "f1": 0.5729166666666666, "f1_std": 0.08752480235771185, "bacc": 0.5693548387096774, "bacc_std": 0.08104091867371513} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07190811441347139, "f1": 0.5651515151515152, "f1_std": 0.08501136764892162, "bacc": 0.5709677419354839, "bacc_std": 0.08995434476176928} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.047140732509274884, "f1": 0.5512437810945273, "f1_std": 0.08279298890946533, "bacc": 0.5516129032258065, "bacc_std": 0.06574504179245279} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 1291.5496650148827, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06368281006238652, "f1": 0.7515151515151515, "f1_std": 0.07788088962264188, "bacc": 0.7693548387096774, "bacc_std": 0.08181761721654277} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.040680557923661376, "f1": 0.4057971014492754, "f1_std": 0.014588852671054897, "bacc": 0.45161290322580644, "bacc_std": 0.026901659272098654} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.0684564412933572, "f1": 0.6479313036690086, "f1_std": 0.08648434388132373, "bacc": 0.6532258064516129, "bacc_std": 0.08738651990188138} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06650301348881416, "f1": 0.6693548387096775, "f1_std": 0.08934009521615008, "bacc": 0.6693548387096775, "bacc_std": 0.0906658208003125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0680706320366247, "f1": 0.6272727272727273, "f1_std": 0.08213611005697752, "bacc": 0.6370967741935484, "bacc_std": 0.0879423632908865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.0506670319449976, "f1": 0.6893939393939394, "f1_std": 0.09520012326628226, "bacc": 0.667741935483871, "bacc_std": 0.08390404428418487} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06960514111216567, "f1": 0.5651515151515152, "f1_std": 0.08100959752200344, "bacc": 0.5709677419354839, "bacc_std": 0.08557971279722651} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0723407233157872, "f1": 0.5839188134270101, "f1_std": 0.08777958460273834, "bacc": 0.5870967741935484, "bacc_std": 0.09139491847925323} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 21.54434690031882, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.0516267204356303, "f1": 0.7864583333333333, "f1_std": 0.08027515338750701, "bacc": 0.7677419354838709, "bacc_std": 0.08090226397534958} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05405837724081965, "f1": 0.6117424242424243, "f1_std": 0.09331210241498446, "bacc": 0.6016129032258064, "bacc_std": 0.07972973827533018} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06710243264136595, "f1": 0.6479313036690086, "f1_std": 0.08478849515602527, "bacc": 0.6532258064516129, "bacc_std": 0.08877025937222131} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04270594501285282, "f1": 0.6554621848739496, "f1_std": 0.09726883893894248, "bacc": 0.6338709677419355, "bacc_std": 0.07576424762037692} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.0515675977010674, "f1": 0.5512437810945273, "f1_std": 0.08896555772166843, "bacc": 0.5516129032258065, "bacc_std": 0.07125472611523045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06583240849557591, "f1": 0.5370967741935484, "f1_std": 0.08260428212281196, "bacc": 0.5370967741935484, "bacc_std": 0.08339774007074405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04698497994331429, "f1": 0.7670454545454546, "f1_std": 0.08702819627475249, "bacc": 0.7338709677419355, "bacc_std": 0.08146872863926266} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.061904630071354654, "f1": 0.6232247284878863, "f1_std": 0.0852771502667445, "bacc": 0.6193548387096774, "bacc_std": 0.08284801726426642} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0332279277331527, "f1": 0.4142857142857143, "f1_std": 0.011561705437815907, "bacc": 0.46774193548387094, "bacc_std": 0.021973307049342927} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.062305955764564526, "f1": 0.5017361111111112, "f1_std": 0.08053912277465329, "bacc": 0.5032258064516129, "bacc_std": 0.07408709586124271} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.047466598043294485, "f1": 0.6328358208955224, "f1_std": 0.09144187902117322, "bacc": 0.6177419354838709, "bacc_std": 0.07490463836849828} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.057625511568932745, "f1": 0.7602339181286549, "f1_std": 0.08326180043528457, "bacc": 0.7516129032258064, "bacc_std": 0.08499081538184929} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 1291.5496650148827, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06489049944715244, "f1": 0.6676492262343405, "f1_std": 0.07784021400970226, "bacc": 0.6870967741935483, "bacc_std": 0.08660580682167986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04988245552719876, "f1": 0.5512437810945273, "f1_std": 0.08660260062350651, "bacc": 0.5516129032258065, "bacc_std": 0.06872992424695895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05329999949775272, "f1": 0.6117424242424243, "f1_std": 0.09285296883642837, "bacc": 0.6016129032258064, "bacc_std": 0.08066348765962945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0619270934393351, "f1": 0.5017361111111112, "f1_std": 0.07736628964069585, "bacc": 0.5032258064516129, "bacc_std": 0.07300441699749846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.059711999656969356, "f1": 0.7759562841530054, "f1_std": 0.0780367635054407, "bacc": 0.7854838709677419, "bacc_std": 0.08182686037019693} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 166.81005372000556, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.07313169690501527, "f1": 0.4863669859985261, "f1_std": 0.07745540345116567, "bacc": 0.4887096774193548, "bacc_std": 0.08393202393007665} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05732359070365379, "f1": 0.5340909090909092, "f1_std": 0.08514043076716758, "bacc": 0.535483870967742, "bacc_std": 0.07217782238128603} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06301070228161726, "f1": 0.5547201336675021, "f1_std": 0.08344150413467971, "bacc": 0.5532258064516129, "bacc_std": 0.08085037239197428} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.078371527809579, "f1": 0.5684210526315789, "f1_std": 0.08310727997914037, "bacc": 0.5887096774193548, "bacc_std": 0.09262657841081971} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05400993585194957, "f1": 0.6117424242424243, "f1_std": 0.09181779319265893, "bacc": 0.6016129032258064, "bacc_std": 0.07851866722644807} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.0589008180355653, "f1": 0.7354838709677419, "f1_std": 0.07947229902837386, "bacc": 0.7354838709677419, "bacc_std": 0.08338529354760832} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 1291.5496650148827, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07220664047862084, "f1": 0.6259649122807017, "f1_std": 0.07980739712241197, "bacc": 0.6548387096774193, "bacc_std": 0.08953028403051566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0657658674076713, "f1": 0.7280766396462786, "f1_std": 0.07745861843945868, "bacc": 0.7532258064516129, "bacc_std": 0.08430221210419553} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.063043472909589, "f1": 0.6693548387096775, "f1_std": 0.08420744751617514, "bacc": 0.6693548387096775, "bacc_std": 0.08513584486669203} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06998725132484586, "f1": 0.6272727272727273, "f1_std": 0.0867892902234142, "bacc": 0.6370967741935484, "bacc_std": 0.09227827795709195} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 1291.5496650148827, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06343046706662582, "f1": 0.764367816091954, "f1_std": 0.07088013871857227, "bacc": 0.8032258064516129, "bacc_std": 0.0733781167139563} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06177179702577043, "f1": 0.603225806451613, "f1_std": 0.08353750274846851, "bacc": 0.603225806451613, "bacc_std": 0.08584872903424375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07627714893942475, "f1": 0.5876436781609196, "f1_std": 0.08508048615690386, "bacc": 0.6048387096774194, "bacc_std": 0.09390499388856398} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 166.81005372000556, "split": "test", "acc": 0.5365853658536586, "acc_std": 0.07145399088785251, "f1": 0.42593957258658804, "f1_std": 0.07182742238943822, "bacc": 0.42258064516129035, "bacc_std": 0.07849380603492276} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05523750030205272, "f1": 0.5340909090909092, "f1_std": 0.08224032310270869, "bacc": 0.535483870967742, "bacc_std": 0.06941780929746119} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0574425404133332, "f1": 0.6660633484162897, "f1_std": 0.09429729736171491, "bacc": 0.6516129032258065, "bacc_std": 0.08616463659399748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06071946198421547, "f1": 0.5729166666666666, "f1_std": 0.0868970649883493, "bacc": 0.5693548387096774, "bacc_std": 0.08119089381385973} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06530000323405534, "f1": 0.5370967741935484, "f1_std": 0.08127164757938399, "bacc": 0.5370967741935484, "bacc_std": 0.08253135493463387} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 21.54434690031882, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06927454454782182, "f1": 0.5030303030303029, "f1_std": 0.0789337203395345, "bacc": 0.5048387096774194, "bacc_std": 0.08386053601379073} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05209509448806485, "f1": 0.6117424242424243, "f1_std": 0.09009737408432235, "bacc": 0.6016129032258064, "bacc_std": 0.07655766939357589} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0447180605296918, "f1": 0.4831932773109243, "f1_std": 0.06971470103973369, "bacc": 0.5016129032258064, "bacc_std": 0.05348522071140164} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 258.8 | 1049.1 | 0.97333 | 0.049521 | 0.95588 | 0.085444 | 0.94728 | 0.097808 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 258.8 | 1049.1 | 0.71585 | 0.061564 | 0.597 | 0.08595 | 0.60074 | 0.082762 | + + +done! total time: 0:04:40 diff --git a/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/config.yaml b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3da629d5881df71eea5c8ed04007ea31dc228b64 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n400_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_log.json b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..14cf40718adf021d8b29b475e35f669b28bf017a --- /dev/null +++ b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 15, "eval/id_best": 34, "eval/lr_best": 0.0015299999999999997, "eval/wd_best": 0.05, "eval/train/loss": 0.00011796606122516096, "eval/train/acc": 1.0, "eval/train/acc_std": 0.0, "eval/train/f1": 1.0, "eval/train/f1_std": 0.0, "eval/validation/loss": 0.11959633231163025, "eval/validation/acc": 0.9774305555555556, "eval/validation/acc_std": 0.0023442695422314095, "eval/validation/f1": 0.9725775024400131, "eval/validation/f1_std": 0.0031428599623646966, "eval/test/loss": 0.13511162996292114, "eval/test/acc": 0.9732142857142857, "eval/test/acc_std": 0.0022770634837648803, "eval/test/f1": 0.9701430673357256, "eval/test/f1_std": 0.002732238041106486} diff --git a/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..ac2b4d3140030829b7d51822946a67854289270a --- /dev/null +++ b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 15, "eval/best/id_best": 34, "eval/best/lr_best": 0.0015299999999999997, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.00011796606122516096, "eval/best/train/acc": 1.0, "eval/best/train/acc_std": 0.0, "eval/best/train/f1": 1.0, "eval/best/train/f1_std": 0.0, "eval/best/validation/loss": 0.11959633231163025, "eval/best/validation/acc": 0.9774305555555556, "eval/best/validation/acc_std": 0.0023442695422314095, "eval/best/validation/f1": 0.9725775024400131, "eval/best/validation/f1_std": 0.0031428599623646966, "eval/best/test/loss": 0.13511162996292114, "eval/best/test/acc": 0.9732142857142857, "eval/best/test/acc_std": 0.0022770634837648803, "eval/best/test/f1": 0.9701430673357256, "eval/best/test/f1_std": 0.002732238041106486} diff --git a/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..95d0a139ff5af8fcc4e20091de508eafd559273c --- /dev/null +++ b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 41, "eval/last/lr_best": 0.0048, "eval/last/wd_best": 0.05, "eval/last/train/loss": 6.200446023285622e-06, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.37438637018203735, "eval/last/validation/acc": 0.9774305555555556, "eval/last/validation/acc_std": 0.0022321287477508143, "eval/last/validation/f1": 0.9735475315880806, "eval/last/validation/f1_std": 0.002928772423093447, "eval/last/test/loss": 0.402966171503067, "eval/last/test/acc": 0.973015873015873, "eval/last/test/acc_std": 0.0023169988600690032, "eval/last/test/f1": 0.9687786444188914, "eval/last/test/f1_std": 0.0029020579068110104} diff --git a/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..b319e981a2ec8e3c35041892ced55db7163ababb --- /dev/null +++ b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",train,0.00011796606122516096,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",validation,0.11959633231163025,0.9774305555555556,0.0023442695422314095,0.9725775024400131,0.0031428599623646966 +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",test,0.13511162996292114,0.9732142857142857,0.0022770634837648803,0.9701430673357256,0.002732238041106486 diff --git a/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..b319e981a2ec8e3c35041892ced55db7163ababb --- /dev/null +++ b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",train,0.00011796606122516096,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",validation,0.11959633231163025,0.9774305555555556,0.0023442695422314095,0.9725775024400131,0.0031428599623646966 +flat_mae,patch,attn,hcpya_task21,best,15,0.0015299999999999997,0.05,34,"[5.1, 1.0]",test,0.13511162996292114,0.9732142857142857,0.0022770634837648803,0.9701430673357256,0.002732238041106486 diff --git a/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..ab752f9615eea78b337e84dc99f66099d0e7a8b1 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.0048,0.05,41,"[16, 1.0]",train,6.200446023285622e-06,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.0048,0.05,41,"[16, 1.0]",validation,0.37438637018203735,0.9774305555555556,0.0022321287477508143,0.9735475315880806,0.002928772423093447 +flat_mae,patch,attn,hcpya_task21,last,19,0.0048,0.05,41,"[16, 1.0]",test,0.402966171503067,0.973015873015873,0.0023169988600690032,0.9687786444188914,0.0029020579068110104 diff --git a/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/log.txt b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..60e8f32b92655fdbd2ea0f3a20e388182bfae52e --- /dev/null +++ b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,888 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 20:19:39 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n400_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:24:25 lr: nan time: 3.6632 data: 3.0468 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:53 lr: 0.000003 loss: 3.0244 (3.0287) grad: 0.2826 (0.2989) time: 0.4612 data: 0.0022 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:09 lr: 0.000006 loss: 2.9991 (2.9881) grad: 0.2958 (0.2978) time: 0.4353 data: 0.0033 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:51 lr: 0.000009 loss: 2.8820 (2.9373) grad: 0.2958 (0.2936) time: 0.4545 data: 0.0034 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:36 lr: 0.000012 loss: 2.7619 (2.8830) grad: 0.2741 (0.2849) time: 0.4493 data: 0.0033 max mem: 22446 +train: [0] [100/400] eta: 0:02:24 lr: 0.000015 loss: 2.6512 (2.8248) grad: 0.2551 (0.2806) time: 0.4492 data: 0.0035 max mem: 22446 +train: [0] [120/400] eta: 0:02:13 lr: 0.000018 loss: 2.5483 (2.7670) grad: 0.2575 (0.2759) time: 0.4511 data: 0.0032 max mem: 22446 +train: [0] [140/400] eta: 0:02:03 lr: 0.000021 loss: 2.4255 (2.7105) grad: 0.2585 (0.2749) time: 0.4689 data: 0.0034 max mem: 22446 +train: [0] [160/400] eta: 0:01:53 lr: 0.000024 loss: 2.3483 (2.6628) grad: 0.2460 (0.2700) time: 0.4476 data: 0.0032 max mem: 22446 +train: [0] [180/400] eta: 0:01:43 lr: 0.000027 loss: 2.2416 (2.6118) grad: 0.2283 (0.2658) time: 0.4687 data: 0.0033 max mem: 22446 +train: [0] [200/400] eta: 0:01:34 lr: 0.000030 loss: 2.1979 (2.5634) grad: 0.2374 (0.2628) time: 0.4665 data: 0.0031 max mem: 22446 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 2.0728 (2.5171) grad: 0.2265 (0.2594) time: 0.4374 data: 0.0033 max mem: 22446 +train: [0] [240/400] eta: 0:01:14 lr: 0.000036 loss: 1.9864 (2.4681) grad: 0.2294 (0.2578) time: 0.4667 data: 0.0036 max mem: 22446 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 1.9047 (2.4250) grad: 0.2304 (0.2556) time: 0.4647 data: 0.0034 max mem: 22446 +train: [0] [280/400] eta: 0:00:56 lr: 0.000042 loss: 1.8840 (2.3855) grad: 0.2142 (0.2523) time: 0.4573 data: 0.0034 max mem: 22446 +train: [0] [300/400] eta: 0:00:47 lr: 0.000045 loss: 1.8288 (2.3472) grad: 0.2003 (0.2488) time: 0.6154 data: 0.1771 max mem: 22446 +train: [0] [320/400] eta: 0:00:38 lr: 0.000048 loss: 1.7772 (2.3105) grad: 0.1978 (0.2460) time: 0.4505 data: 0.0032 max mem: 22446 +train: [0] [340/400] eta: 0:00:28 lr: 0.000051 loss: 1.7139 (2.2744) grad: 0.2061 (0.2442) time: 0.4486 data: 0.0034 max mem: 22446 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 1.6870 (2.2417) grad: 0.2047 (0.2420) time: 0.4545 data: 0.0033 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.6647 (2.2106) grad: 0.2023 (0.2398) time: 0.4623 data: 0.0033 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.6004 (2.1785) grad: 0.1970 (0.2381) time: 0.4511 data: 0.0034 max mem: 22446 +train: [0] Total time: 0:03:08 (0.4714 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.6004 (2.1785) grad: 0.1970 (0.2381) +eval (validation): [0] [ 0/63] eta: 0:03:27 time: 3.2896 data: 3.0072 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:21 time: 0.3514 data: 0.0045 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3333 data: 0.0029 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3218 data: 0.0033 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3147 data: 0.0033 max mem: 22446 +eval (validation): [0] Total time: 0:00:24 (0.3865 s / it) +cv: [0] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.186 acc: 0.937 f1: 0.916 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:48 lr: nan time: 3.4205 data: 3.0753 max mem: 22446 +train: [1] [ 20/400] eta: 0:03:42 lr: 0.000063 loss: 1.5533 (1.5585) grad: 0.1910 (0.1973) time: 0.4427 data: 0.0027 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:04 lr: 0.000066 loss: 1.5303 (1.5326) grad: 0.1957 (0.1976) time: 0.4383 data: 0.0032 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:49 lr: 0.000069 loss: 1.4796 (1.5114) grad: 0.1942 (0.1947) time: 0.4660 data: 0.0034 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:35 lr: 0.000072 loss: 1.4505 (1.4975) grad: 0.1907 (0.1948) time: 0.4515 data: 0.0034 max mem: 22446 +train: [1] [100/400] eta: 0:02:23 lr: 0.000075 loss: 1.4398 (1.4886) grad: 0.1974 (0.1952) time: 0.4468 data: 0.0034 max mem: 22446 +train: [1] [120/400] eta: 0:02:13 lr: 0.000078 loss: 1.4173 (1.4696) grad: 0.1847 (0.1940) time: 0.4648 data: 0.0034 max mem: 22446 +train: [1] [140/400] eta: 0:02:03 lr: 0.000081 loss: 1.3550 (1.4546) grad: 0.1824 (0.1920) time: 0.4662 data: 0.0035 max mem: 22446 +train: [1] [160/400] eta: 0:01:53 lr: 0.000084 loss: 1.3390 (1.4367) grad: 0.1779 (0.1901) time: 0.4555 data: 0.0034 max mem: 22446 +train: [1] [180/400] eta: 0:01:44 lr: 0.000087 loss: 1.3087 (1.4225) grad: 0.1779 (0.1888) time: 0.4832 data: 0.0034 max mem: 22446 +train: [1] [200/400] eta: 0:01:34 lr: 0.000090 loss: 1.2998 (1.4090) grad: 0.1695 (0.1872) time: 0.4568 data: 0.0034 max mem: 22446 +train: [1] [220/400] eta: 0:01:24 lr: 0.000093 loss: 1.2602 (1.3937) grad: 0.1718 (0.1868) time: 0.4510 data: 0.0033 max mem: 22446 +train: [1] [240/400] eta: 0:01:15 lr: 0.000096 loss: 1.2335 (1.3802) grad: 0.1775 (0.1857) time: 0.4630 data: 0.0034 max mem: 22446 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 1.2279 (1.3687) grad: 0.1734 (0.1845) time: 0.4585 data: 0.0035 max mem: 22446 +train: [1] [280/400] eta: 0:00:56 lr: 0.000102 loss: 1.1962 (1.3545) grad: 0.1686 (0.1842) time: 0.4549 data: 0.0035 max mem: 22446 +train: [1] [300/400] eta: 0:00:47 lr: 0.000105 loss: 1.1564 (1.3417) grad: 0.1653 (0.1825) time: 0.6344 data: 0.1780 max mem: 22446 +train: [1] [320/400] eta: 0:00:38 lr: 0.000108 loss: 1.1463 (1.3292) grad: 0.1609 (0.1812) time: 0.4344 data: 0.0032 max mem: 22446 +train: [1] [340/400] eta: 0:00:28 lr: 0.000111 loss: 1.1211 (1.3170) grad: 0.1584 (0.1798) time: 0.4523 data: 0.0034 max mem: 22446 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 1.1250 (1.3063) grad: 0.1573 (0.1785) time: 0.4525 data: 0.0034 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 1.1166 (1.2960) grad: 0.1597 (0.1780) time: 0.4458 data: 0.0037 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.1033 (1.2865) grad: 0.1614 (0.1775) time: 0.4452 data: 0.0036 max mem: 22446 +train: [1] Total time: 0:03:08 (0.4709 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.1033 (1.2865) grad: 0.1614 (0.1775) +eval (validation): [1] [ 0/63] eta: 0:03:26 time: 3.2807 data: 3.0323 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:20 time: 0.3374 data: 0.0028 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:09 time: 0.3302 data: 0.0027 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3232 data: 0.0083 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3189 data: 0.0081 max mem: 22446 +eval (validation): [1] Total time: 0:00:24 (0.3829 s / it) +cv: [1] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.124 acc: 0.963 f1: 0.958 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:22:23 lr: nan time: 3.3593 data: 3.0178 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:39 lr: 0.000123 loss: 1.0161 (1.0535) grad: 0.1731 (0.1758) time: 0.4385 data: 0.0029 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:04 lr: 0.000126 loss: 1.0380 (1.0517) grad: 0.1731 (0.1782) time: 0.4434 data: 0.0033 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:49 lr: 0.000129 loss: 1.0312 (1.0467) grad: 0.1774 (0.1798) time: 0.4670 data: 0.0034 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:35 lr: 0.000132 loss: 1.0444 (1.0483) grad: 0.1861 (0.1821) time: 0.4547 data: 0.0036 max mem: 22446 +train: [2] [100/400] eta: 0:02:23 lr: 0.000135 loss: 1.0551 (1.0489) grad: 0.1912 (0.1851) time: 0.4415 data: 0.0033 max mem: 22446 +train: [2] [120/400] eta: 0:02:12 lr: 0.000138 loss: 1.0380 (1.0511) grad: 0.1923 (0.1898) time: 0.4488 data: 0.0034 max mem: 22446 +train: [2] [140/400] eta: 0:02:02 lr: 0.000141 loss: 0.9784 (1.0403) grad: 0.1901 (0.1902) time: 0.4659 data: 0.0035 max mem: 22446 +train: [2] [160/400] eta: 0:01:52 lr: 0.000144 loss: 1.0262 (1.0423) grad: 0.2027 (0.1945) time: 0.4364 data: 0.0032 max mem: 22446 +train: [2] [180/400] eta: 0:01:43 lr: 0.000147 loss: 1.0319 (1.0403) grad: 0.2222 (0.1989) time: 0.4758 data: 0.0034 max mem: 22446 +train: [2] [200/400] eta: 0:01:33 lr: 0.000150 loss: 0.9599 (1.0302) grad: 0.2101 (0.1991) time: 0.4546 data: 0.0035 max mem: 22446 +train: [2] [220/400] eta: 0:01:23 lr: 0.000153 loss: 0.9872 (1.0339) grad: 0.2056 (0.2009) time: 0.4363 data: 0.0033 max mem: 22446 +train: [2] [240/400] eta: 0:01:14 lr: 0.000156 loss: 1.0017 (1.0259) grad: 0.2127 (0.2020) time: 0.4591 data: 0.0034 max mem: 22446 +train: [2] [260/400] eta: 0:01:04 lr: 0.000159 loss: 0.9716 (1.0231) grad: 0.2246 (0.2052) time: 0.4628 data: 0.0035 max mem: 22446 +train: [2] [280/400] eta: 0:00:55 lr: 0.000162 loss: 0.9716 (1.0207) grad: 0.2292 (0.2082) time: 0.4631 data: 0.0033 max mem: 22446 +train: [2] [300/400] eta: 0:00:47 lr: 0.000165 loss: 0.9564 (1.0169) grad: 0.2428 (0.2113) time: 0.6938 data: 0.2322 max mem: 22446 +train: [2] [320/400] eta: 0:00:38 lr: 0.000168 loss: 0.9398 (1.0137) grad: 0.2417 (0.2135) time: 0.4480 data: 0.0031 max mem: 22446 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 0.9659 (1.0118) grad: 0.2368 (0.2157) time: 0.4470 data: 0.0033 max mem: 22446 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 0.9659 (1.0102) grad: 0.2368 (0.2179) time: 0.4511 data: 0.0034 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 0.9245 (1.0053) grad: 0.2572 (0.2205) time: 0.4441 data: 0.0035 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.8698 (0.9974) grad: 0.2522 (0.2218) time: 0.4546 data: 0.0036 max mem: 22446 +train: [2] Total time: 0:03:08 (0.4719 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.8698 (0.9974) grad: 0.2522 (0.2218) +eval (validation): [2] [ 0/63] eta: 0:03:23 time: 3.2272 data: 2.9949 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:20 time: 0.3485 data: 0.0035 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:10 time: 0.3948 data: 0.0498 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3274 data: 0.0032 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3282 data: 0.0032 max mem: 22446 +eval (validation): [2] Total time: 0:00:25 (0.4063 s / it) +cv: [2] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.133 acc: 0.967 f1: 0.961 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:48 lr: nan time: 3.4214 data: 3.0390 max mem: 22446 +train: [3] [ 20/400] eta: 0:04:00 lr: 0.000183 loss: 0.8492 (0.8764) grad: 0.2706 (0.2807) time: 0.4940 data: 0.0513 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:16 lr: 0.000186 loss: 0.8517 (0.8762) grad: 0.2524 (0.2612) time: 0.4546 data: 0.0032 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:55 lr: 0.000189 loss: 0.8738 (0.8838) grad: 0.2524 (0.2659) time: 0.4589 data: 0.0034 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:40 lr: 0.000192 loss: 0.8844 (0.8896) grad: 0.2884 (0.2731) time: 0.4474 data: 0.0037 max mem: 22446 +train: [3] [100/400] eta: 0:02:27 lr: 0.000195 loss: 0.8943 (0.8914) grad: 0.2891 (0.2745) time: 0.4541 data: 0.0034 max mem: 22446 +train: [3] [120/400] eta: 0:02:16 lr: 0.000198 loss: 0.8988 (0.8950) grad: 0.2857 (0.2731) time: 0.4732 data: 0.0034 max mem: 22446 +train: [3] [140/400] eta: 0:02:06 lr: 0.000201 loss: 0.9201 (0.9053) grad: 0.2819 (0.2773) time: 0.4635 data: 0.0034 max mem: 22446 +train: [3] [160/400] eta: 0:01:55 lr: 0.000204 loss: 0.9143 (0.9038) grad: 0.2869 (0.2804) time: 0.4710 data: 0.0032 max mem: 22446 +train: [3] [180/400] eta: 0:01:45 lr: 0.000207 loss: 0.8746 (0.9047) grad: 0.2849 (0.2831) time: 0.4622 data: 0.0032 max mem: 22446 +train: [3] [200/400] eta: 0:01:35 lr: 0.000210 loss: 0.9035 (0.9077) grad: 0.2849 (0.2854) time: 0.4398 data: 0.0032 max mem: 22446 +train: [3] [220/400] eta: 0:01:25 lr: 0.000213 loss: 0.8937 (0.9046) grad: 0.3179 (0.2897) time: 0.4629 data: 0.0033 max mem: 22446 +train: [3] [240/400] eta: 0:01:15 lr: 0.000216 loss: 0.8848 (0.9089) grad: 0.3188 (0.2928) time: 0.4664 data: 0.0034 max mem: 22446 +train: [3] [260/400] eta: 0:01:06 lr: 0.000219 loss: 0.8177 (0.9038) grad: 0.3188 (0.2953) time: 0.4607 data: 0.0035 max mem: 22446 +train: [3] [280/400] eta: 0:00:56 lr: 0.000222 loss: 0.8674 (0.9064) grad: 0.3234 (0.2977) time: 0.4480 data: 0.0034 max mem: 22446 +train: [3] [300/400] eta: 0:00:48 lr: 0.000225 loss: 0.9203 (0.9083) grad: 0.3480 (0.3026) time: 0.6207 data: 0.1767 max mem: 22446 +train: [3] [320/400] eta: 0:00:38 lr: 0.000228 loss: 0.8904 (0.9051) grad: 0.3486 (0.3037) time: 0.4780 data: 0.0157 max mem: 22446 +train: [3] [340/400] eta: 0:00:28 lr: 0.000231 loss: 0.8054 (0.9002) grad: 0.3237 (0.3090) time: 0.4612 data: 0.0034 max mem: 22446 +train: [3] [360/400] eta: 0:00:19 lr: 0.000234 loss: 0.8183 (0.8972) grad: 0.3692 (0.3145) time: 0.4434 data: 0.0034 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 0.8305 (0.8980) grad: 0.3717 (0.3173) time: 0.4486 data: 0.0035 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 0.9014 (0.8995) grad: 0.3800 (0.3201) time: 0.4496 data: 0.0034 max mem: 22446 +train: [3] Total time: 0:03:10 (0.4756 s / it) +train: [3] Summary: lr: 0.000240 loss: 0.9014 (0.8995) grad: 0.3800 (0.3201) +eval (validation): [3] [ 0/63] eta: 0:03:27 time: 3.2936 data: 3.0117 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:24 time: 0.4221 data: 0.0750 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:10 time: 0.3385 data: 0.0030 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3477 data: 0.0032 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3468 data: 0.0031 max mem: 22446 +eval (validation): [3] Total time: 0:00:26 (0.4191 s / it) +cv: [3] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.115 acc: 0.967 f1: 0.962 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:22:14 lr: nan time: 3.3363 data: 2.9891 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:48 lr: 0.000243 loss: 1.0173 (1.0184) grad: 0.4460 (0.4479) time: 0.4646 data: 0.0024 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:11 lr: 0.000246 loss: 1.0173 (0.9765) grad: 0.4057 (0.4235) time: 0.4581 data: 0.0034 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:54 lr: 0.000249 loss: 0.9934 (0.9723) grad: 0.4171 (0.4480) time: 0.4741 data: 0.0036 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:39 lr: 0.000252 loss: 0.9376 (0.9762) grad: 0.4974 (0.4563) time: 0.4583 data: 0.0034 max mem: 22446 +train: [4] [100/400] eta: 0:02:27 lr: 0.000255 loss: 0.9614 (0.9805) grad: 0.4852 (0.4604) time: 0.4620 data: 0.0033 max mem: 22446 +train: [4] [120/400] eta: 0:02:16 lr: 0.000258 loss: 1.0735 (1.0096) grad: 0.5014 (0.4694) time: 0.4707 data: 0.0032 max mem: 22446 +train: [4] [140/400] eta: 0:02:05 lr: 0.000261 loss: 1.0779 (1.0133) grad: 0.5222 (0.4790) time: 0.4586 data: 0.0032 max mem: 22446 +train: [4] [160/400] eta: 0:01:55 lr: 0.000264 loss: 1.0441 (1.0281) grad: 0.5497 (0.4890) time: 0.4646 data: 0.0034 max mem: 22446 +train: [4] [180/400] eta: 0:01:45 lr: 0.000267 loss: 1.0969 (1.0339) grad: 0.5686 (0.4972) time: 0.4708 data: 0.0036 max mem: 22446 +train: [4] [200/400] eta: 0:01:35 lr: 0.000270 loss: 1.0969 (1.0392) grad: 0.6004 (0.5128) time: 0.4468 data: 0.0034 max mem: 22446 +train: [4] [220/400] eta: 0:01:25 lr: 0.000273 loss: 1.0791 (1.0446) grad: 0.5746 (0.5170) time: 0.4580 data: 0.0035 max mem: 22446 +train: [4] [240/400] eta: 0:01:15 lr: 0.000276 loss: 1.0851 (1.0470) grad: 0.5709 (0.5188) time: 0.4568 data: 0.0035 max mem: 22446 +train: [4] [260/400] eta: 0:01:06 lr: 0.000279 loss: 1.1379 (1.0644) grad: 0.5861 (0.5262) time: 0.4625 data: 0.0034 max mem: 22446 +train: [4] [280/400] eta: 0:00:56 lr: 0.000282 loss: 1.1786 (1.0826) grad: 0.6361 (0.5455) time: 0.4461 data: 0.0035 max mem: 22446 +train: [4] [300/400] eta: 0:00:48 lr: 0.000285 loss: 1.1778 (1.1037) grad: 0.7293 (0.5591) time: 0.6660 data: 0.2312 max mem: 22446 +train: [4] [320/400] eta: 0:00:38 lr: 0.000288 loss: 1.0855 (1.0970) grad: 0.6391 (0.5633) time: 0.4361 data: 0.0035 max mem: 22446 +train: [4] [340/400] eta: 0:00:28 lr: 0.000291 loss: 1.0630 (1.0990) grad: 0.5803 (0.5645) time: 0.4508 data: 0.0035 max mem: 22446 +train: [4] [360/400] eta: 0:00:19 lr: 0.000294 loss: 1.0978 (1.1106) grad: 0.6324 (0.5792) time: 0.4502 data: 0.0037 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 1.1728 (1.1208) grad: 0.6631 (0.5813) time: 0.4550 data: 0.0034 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 1.1876 (1.1242) grad: 0.5920 (0.5836) time: 0.4531 data: 0.0034 max mem: 22446 +train: [4] Total time: 0:03:10 (0.4755 s / it) +train: [4] Summary: lr: 0.000300 loss: 1.1876 (1.1242) grad: 0.5920 (0.5836) +eval (validation): [4] [ 0/63] eta: 0:06:58 time: 6.6448 data: 6.3590 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:27 time: 0.3345 data: 0.0030 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:11 time: 0.3251 data: 0.0034 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3437 data: 0.0032 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3458 data: 0.0032 max mem: 22446 +eval (validation): [4] Total time: 0:00:27 (0.4407 s / it) +cv: [4] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.120 acc: 0.966 f1: 0.954 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:22:24 lr: nan time: 3.3615 data: 3.0195 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:39 lr: 0.000300 loss: 1.0772 (1.0770) grad: 0.5609 (0.5953) time: 0.4381 data: 0.0028 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:07 lr: 0.000300 loss: 1.0772 (1.0650) grad: 0.5838 (0.5956) time: 0.4643 data: 0.0032 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:49 lr: 0.000300 loss: 1.1640 (1.1093) grad: 0.6465 (0.6234) time: 0.4489 data: 0.0035 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:36 lr: 0.000300 loss: 1.1640 (1.1128) grad: 0.6677 (0.6388) time: 0.4592 data: 0.0035 max mem: 22446 +train: [5] [100/400] eta: 0:02:24 lr: 0.000300 loss: 1.1470 (1.1596) grad: 0.6436 (0.6362) time: 0.4561 data: 0.0035 max mem: 22446 +train: [5] [120/400] eta: 0:02:14 lr: 0.000300 loss: 1.5092 (1.2398) grad: 0.6558 (0.6512) time: 0.4719 data: 0.0035 max mem: 22446 +train: [5] [140/400] eta: 0:02:04 lr: 0.000300 loss: 1.5412 (1.2681) grad: 0.7066 (0.6587) time: 0.4675 data: 0.0035 max mem: 22446 +train: [5] [160/400] eta: 0:01:54 lr: 0.000299 loss: 1.3756 (1.3016) grad: 0.7012 (0.6664) time: 0.4734 data: 0.0035 max mem: 22446 +train: [5] [180/400] eta: 0:01:44 lr: 0.000299 loss: 1.3756 (1.3156) grad: 0.7260 (0.6844) time: 0.4584 data: 0.0036 max mem: 22446 +train: [5] [200/400] eta: 0:01:34 lr: 0.000299 loss: 1.3510 (1.3363) grad: 0.8054 (0.6993) time: 0.4461 data: 0.0035 max mem: 22446 +train: [5] [220/400] eta: 0:01:24 lr: 0.000299 loss: 1.3238 (1.3517) grad: 0.7930 (0.7076) time: 0.4651 data: 0.0037 max mem: 22446 +train: [5] [240/400] eta: 0:01:15 lr: 0.000299 loss: 1.3112 (1.3501) grad: 0.7255 (0.7107) time: 0.4719 data: 0.0035 max mem: 22446 +train: [5] [260/400] eta: 0:01:06 lr: 0.000299 loss: 1.3008 (1.3646) grad: 0.7532 (0.7149) time: 0.4685 data: 0.0034 max mem: 22446 +train: [5] [280/400] eta: 0:00:56 lr: 0.000298 loss: 1.3740 (1.3729) grad: 0.7091 (0.7179) time: 0.4529 data: 0.0034 max mem: 22446 +train: [5] [300/400] eta: 0:00:48 lr: 0.000298 loss: 1.1948 (1.3622) grad: 0.6921 (0.7167) time: 0.6218 data: 0.1763 max mem: 22446 +train: [5] [320/400] eta: 0:00:38 lr: 0.000298 loss: 1.0843 (1.3380) grad: 0.6921 (0.7172) time: 0.4788 data: 0.0035 max mem: 22446 +train: [5] [340/400] eta: 0:00:28 lr: 0.000298 loss: 1.1320 (1.3387) grad: 0.6900 (0.7165) time: 0.4659 data: 0.0035 max mem: 22446 +train: [5] [360/400] eta: 0:00:19 lr: 0.000297 loss: 1.1733 (1.3262) grad: 0.6946 (0.7136) time: 0.4539 data: 0.0034 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 1.1388 (1.3250) grad: 0.6577 (0.7101) time: 0.4619 data: 0.0036 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 1.1336 (1.3059) grad: 0.5947 (0.7036) time: 0.4583 data: 0.0034 max mem: 22446 +train: [5] Total time: 0:03:10 (0.4767 s / it) +train: [5] Summary: lr: 0.000297 loss: 1.1336 (1.3059) grad: 0.5947 (0.7036) +eval (validation): [5] [ 0/63] eta: 0:03:25 time: 3.2545 data: 2.9713 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:22 time: 0.3792 data: 0.0047 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3338 data: 0.0031 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3369 data: 0.0032 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3369 data: 0.0032 max mem: 22446 +eval (validation): [5] Total time: 0:00:25 (0.4003 s / it) +cv: [5] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.110 acc: 0.970 f1: 0.964 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:22:47 lr: nan time: 3.4197 data: 3.0400 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:42 lr: 0.000296 loss: 0.9157 (0.9798) grad: 0.5962 (0.5820) time: 0.4452 data: 0.0027 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:11 lr: 0.000296 loss: 0.9106 (0.9278) grad: 0.5905 (0.5660) time: 0.4715 data: 0.0032 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:52 lr: 0.000296 loss: 0.9153 (0.9562) grad: 0.5982 (0.5917) time: 0.4560 data: 0.0035 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:37 lr: 0.000295 loss: 0.9171 (0.9988) grad: 0.6319 (0.5985) time: 0.4543 data: 0.0031 max mem: 22446 +train: [6] [100/400] eta: 0:02:26 lr: 0.000295 loss: 0.9187 (1.0071) grad: 0.5966 (0.5951) time: 0.4654 data: 0.0034 max mem: 22446 +train: [6] [120/400] eta: 0:02:15 lr: 0.000295 loss: 0.9187 (0.9838) grad: 0.5694 (0.5921) time: 0.4737 data: 0.0033 max mem: 22446 +train: [6] [140/400] eta: 0:02:04 lr: 0.000294 loss: 0.8822 (0.9787) grad: 0.5739 (0.6264) time: 0.4519 data: 0.0033 max mem: 22446 +train: [6] [160/400] eta: 0:01:55 lr: 0.000294 loss: 0.8822 (0.9748) grad: 0.5986 (0.6342) time: 0.4734 data: 0.0034 max mem: 22446 +train: [6] [180/400] eta: 0:01:45 lr: 0.000293 loss: 0.8385 (0.9761) grad: 0.5943 (0.6283) time: 0.4570 data: 0.0035 max mem: 22446 +train: [6] [200/400] eta: 0:01:34 lr: 0.000293 loss: 0.9287 (0.9931) grad: 0.5786 (0.6259) time: 0.4519 data: 0.0033 max mem: 22446 +train: [6] [220/400] eta: 0:01:25 lr: 0.000292 loss: 0.8579 (0.9776) grad: 0.5617 (0.6211) time: 0.4663 data: 0.0033 max mem: 22446 +train: [6] [240/400] eta: 0:01:15 lr: 0.000292 loss: 0.8012 (0.9638) grad: 0.5542 (0.6167) time: 0.4709 data: 0.0034 max mem: 22446 +train: [6] [260/400] eta: 0:01:06 lr: 0.000291 loss: 0.7530 (0.9508) grad: 0.5517 (0.6102) time: 0.4519 data: 0.0034 max mem: 22446 +train: [6] [280/400] eta: 0:00:56 lr: 0.000291 loss: 0.8731 (0.9578) grad: 0.5474 (0.6079) time: 0.4513 data: 0.0035 max mem: 22446 +train: [6] [300/400] eta: 0:00:48 lr: 0.000290 loss: 0.8862 (0.9556) grad: 0.5996 (0.6073) time: 0.6303 data: 0.1780 max mem: 22446 +train: [6] [320/400] eta: 0:00:38 lr: 0.000290 loss: 0.7748 (0.9401) grad: 0.5578 (0.6009) time: 0.4575 data: 0.0033 max mem: 22446 +train: [6] [340/400] eta: 0:00:28 lr: 0.000289 loss: 0.7086 (0.9298) grad: 0.4812 (0.5937) time: 0.4581 data: 0.0032 max mem: 22446 +train: [6] [360/400] eta: 0:00:19 lr: 0.000288 loss: 0.7086 (0.9190) grad: 0.4812 (0.5883) time: 0.4588 data: 0.0035 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.6946 (0.9057) grad: 0.4926 (0.5830) time: 0.4565 data: 0.0034 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.5921 (0.8900) grad: 0.4665 (0.5775) time: 0.4540 data: 0.0033 max mem: 22446 +train: [6] Total time: 0:03:10 (0.4755 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.5921 (0.8900) grad: 0.4665 (0.5775) +eval (validation): [6] [ 0/63] eta: 0:03:21 time: 3.1993 data: 2.9621 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:20 time: 0.3383 data: 0.0034 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3401 data: 0.0030 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3396 data: 0.0032 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3390 data: 0.0032 max mem: 22446 +eval (validation): [6] Total time: 0:00:24 (0.3891 s / it) +cv: [6] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 0.107 acc: 0.973 f1: 0.968 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:22:08 lr: nan time: 3.3216 data: 2.9752 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:37 lr: 0.000286 loss: 0.8190 (0.7433) grad: 0.4598 (0.4746) time: 0.4350 data: 0.0026 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:07 lr: 0.000286 loss: 0.7736 (0.7395) grad: 0.4598 (0.4720) time: 0.4642 data: 0.0035 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:49 lr: 0.000285 loss: 0.7250 (0.7221) grad: 0.4375 (0.4507) time: 0.4559 data: 0.0034 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:36 lr: 0.000284 loss: 0.5829 (0.6946) grad: 0.4294 (0.4502) time: 0.4560 data: 0.0033 max mem: 22446 +train: [7] [100/400] eta: 0:02:24 lr: 0.000284 loss: 0.5328 (0.6900) grad: 0.4573 (0.4518) time: 0.4584 data: 0.0032 max mem: 22446 +train: [7] [120/400] eta: 0:02:14 lr: 0.000283 loss: 0.6280 (0.6851) grad: 0.4679 (0.4556) time: 0.4772 data: 0.0033 max mem: 22446 +train: [7] [140/400] eta: 0:02:03 lr: 0.000282 loss: 0.5966 (0.6732) grad: 0.3960 (0.4484) time: 0.4488 data: 0.0032 max mem: 22446 +train: [7] [160/400] eta: 0:01:54 lr: 0.000282 loss: 0.5590 (0.6580) grad: 0.4097 (0.4468) time: 0.4735 data: 0.0034 max mem: 22446 +train: [7] [180/400] eta: 0:01:44 lr: 0.000281 loss: 0.5342 (0.6507) grad: 0.3991 (0.4399) time: 0.4512 data: 0.0033 max mem: 22446 +train: [7] [200/400] eta: 0:01:34 lr: 0.000280 loss: 0.5233 (0.6380) grad: 0.3650 (0.4384) time: 0.4513 data: 0.0033 max mem: 22446 +train: [7] [220/400] eta: 0:01:24 lr: 0.000279 loss: 0.5341 (0.6325) grad: 0.3734 (0.4347) time: 0.4598 data: 0.0034 max mem: 22446 +train: [7] [240/400] eta: 0:01:15 lr: 0.000278 loss: 0.4995 (0.6254) grad: 0.3803 (0.4324) time: 0.4674 data: 0.0034 max mem: 22446 +train: [7] [260/400] eta: 0:01:05 lr: 0.000278 loss: 0.4995 (0.6188) grad: 0.4038 (0.4326) time: 0.4671 data: 0.0033 max mem: 22446 +train: [7] [280/400] eta: 0:00:56 lr: 0.000277 loss: 0.5395 (0.6212) grad: 0.4160 (0.4319) time: 0.4512 data: 0.0034 max mem: 22446 +train: [7] [300/400] eta: 0:00:47 lr: 0.000276 loss: 0.5525 (0.6222) grad: 0.4160 (0.4344) time: 0.6315 data: 0.1753 max mem: 22446 +train: [7] [320/400] eta: 0:00:38 lr: 0.000275 loss: 0.5536 (0.6203) grad: 0.4439 (0.4323) time: 0.4473 data: 0.0030 max mem: 22446 +train: [7] [340/400] eta: 0:00:28 lr: 0.000274 loss: 0.5217 (0.6134) grad: 0.3989 (0.4292) time: 0.4518 data: 0.0036 max mem: 22446 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 0.4669 (0.6066) grad: 0.3457 (0.4254) time: 0.4538 data: 0.0034 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.4326 (0.5959) grad: 0.3408 (0.4203) time: 0.4569 data: 0.0034 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.4202 (0.5896) grad: 0.3187 (0.4166) time: 0.4559 data: 0.0032 max mem: 22446 +train: [7] Total time: 0:03:09 (0.4731 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.4202 (0.5896) grad: 0.3187 (0.4166) +eval (validation): [7] [ 0/63] eta: 0:03:22 time: 3.2158 data: 2.9833 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:21 time: 0.3674 data: 0.0175 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3393 data: 0.0031 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3433 data: 0.0032 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3376 data: 0.0032 max mem: 22446 +eval (validation): [7] Total time: 0:00:25 (0.3993 s / it) +cv: [7] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.099 acc: 0.975 f1: 0.969 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:21:44 lr: nan time: 3.2611 data: 2.8869 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:44 lr: 0.000270 loss: 0.3926 (0.3830) grad: 0.2966 (0.3186) time: 0.4567 data: 0.0032 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:10 lr: 0.000270 loss: 0.3904 (0.3802) grad: 0.3069 (0.3185) time: 0.4657 data: 0.0031 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:51 lr: 0.000269 loss: 0.3904 (0.3927) grad: 0.3128 (0.3215) time: 0.4538 data: 0.0036 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:37 lr: 0.000268 loss: 0.3954 (0.3951) grad: 0.3359 (0.3163) time: 0.4531 data: 0.0034 max mem: 22446 +train: [8] [100/400] eta: 0:02:25 lr: 0.000267 loss: 0.3432 (0.3869) grad: 0.3032 (0.3120) time: 0.4556 data: 0.0033 max mem: 22446 +train: [8] [120/400] eta: 0:02:15 lr: 0.000266 loss: 0.4011 (0.4001) grad: 0.3032 (0.3127) time: 0.4721 data: 0.0037 max mem: 22446 +train: [8] [140/400] eta: 0:02:04 lr: 0.000265 loss: 0.4169 (0.4065) grad: 0.3193 (0.3173) time: 0.4569 data: 0.0033 max mem: 22446 +train: [8] [160/400] eta: 0:01:55 lr: 0.000264 loss: 0.4041 (0.4051) grad: 0.3237 (0.3180) time: 0.4917 data: 0.0035 max mem: 22446 +train: [8] [180/400] eta: 0:01:45 lr: 0.000263 loss: 0.3993 (0.4081) grad: 0.3120 (0.3184) time: 0.4567 data: 0.0034 max mem: 22446 +train: [8] [200/400] eta: 0:01:35 lr: 0.000262 loss: 0.3814 (0.4055) grad: 0.2776 (0.3197) time: 0.4675 data: 0.0034 max mem: 22446 +train: [8] [220/400] eta: 0:01:25 lr: 0.000260 loss: 0.3814 (0.4047) grad: 0.3071 (0.3188) time: 0.4616 data: 0.0034 max mem: 22446 +train: [8] [240/400] eta: 0:01:16 lr: 0.000259 loss: 0.4241 (0.4094) grad: 0.3059 (0.3182) time: 0.4703 data: 0.0034 max mem: 22446 +train: [8] [260/400] eta: 0:01:06 lr: 0.000258 loss: 0.4199 (0.4084) grad: 0.3015 (0.3181) time: 0.4674 data: 0.0034 max mem: 22446 +train: [8] [280/400] eta: 0:00:56 lr: 0.000257 loss: 0.3547 (0.4068) grad: 0.2848 (0.3187) time: 0.4481 data: 0.0034 max mem: 22446 +train: [8] [300/400] eta: 0:00:48 lr: 0.000256 loss: 0.3663 (0.4098) grad: 0.3005 (0.3184) time: 0.6241 data: 0.1778 max mem: 22446 +train: [8] [320/400] eta: 0:00:38 lr: 0.000255 loss: 0.3595 (0.4041) grad: 0.2582 (0.3124) time: 0.4554 data: 0.0032 max mem: 22446 +train: [8] [340/400] eta: 0:00:28 lr: 0.000254 loss: 0.3058 (0.3997) grad: 0.2232 (0.3075) time: 0.4535 data: 0.0037 max mem: 22446 +train: [8] [360/400] eta: 0:00:19 lr: 0.000253 loss: 0.3184 (0.3954) grad: 0.2344 (0.3046) time: 0.4553 data: 0.0035 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.3066 (0.3923) grad: 0.2539 (0.3026) time: 0.4564 data: 0.0035 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.2958 (0.3911) grad: 0.2519 (0.2999) time: 0.4623 data: 0.0035 max mem: 22446 +train: [8] Total time: 0:03:10 (0.4765 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.2958 (0.3911) grad: 0.2519 (0.2999) +eval (validation): [8] [ 0/63] eta: 0:03:29 time: 3.3195 data: 3.0378 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:20 time: 0.3363 data: 0.0040 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3262 data: 0.0027 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3426 data: 0.0032 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3412 data: 0.0032 max mem: 22446 +eval (validation): [8] Total time: 0:00:24 (0.3867 s / it) +cv: [8] best hparam: (2.3, 1.0) (029) ('029_lr2.3e+00_wd1.0e+00') loss: 0.100 acc: 0.974 f1: 0.969 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:22:16 lr: nan time: 3.3402 data: 3.0073 max mem: 22446 +train: [9] [ 20/400] eta: 0:03:41 lr: 0.000249 loss: 0.3059 (0.3359) grad: 0.2322 (0.2317) time: 0.4455 data: 0.0024 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:06 lr: 0.000248 loss: 0.3059 (0.3324) grad: 0.2322 (0.2432) time: 0.4507 data: 0.0031 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:48 lr: 0.000247 loss: 0.3321 (0.3344) grad: 0.2465 (0.2424) time: 0.4482 data: 0.0035 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:35 lr: 0.000246 loss: 0.3321 (0.3357) grad: 0.2414 (0.2363) time: 0.4536 data: 0.0035 max mem: 22446 +train: [9] [100/400] eta: 0:02:24 lr: 0.000244 loss: 0.3199 (0.3291) grad: 0.2207 (0.2316) time: 0.4694 data: 0.0035 max mem: 22446 +train: [9] [120/400] eta: 0:02:14 lr: 0.000243 loss: 0.3322 (0.3331) grad: 0.2288 (0.2333) time: 0.4721 data: 0.0034 max mem: 22446 +train: [9] [140/400] eta: 0:02:03 lr: 0.000242 loss: 0.3067 (0.3282) grad: 0.2292 (0.2335) time: 0.4553 data: 0.0033 max mem: 22446 +train: [9] [160/400] eta: 0:01:54 lr: 0.000241 loss: 0.2905 (0.3259) grad: 0.2287 (0.2343) time: 0.4766 data: 0.0034 max mem: 22446 +train: [9] [180/400] eta: 0:01:44 lr: 0.000240 loss: 0.2905 (0.3211) grad: 0.2264 (0.2343) time: 0.4557 data: 0.0033 max mem: 22446 +train: [9] [200/400] eta: 0:01:34 lr: 0.000238 loss: 0.2801 (0.3198) grad: 0.2339 (0.2340) time: 0.4594 data: 0.0033 max mem: 22446 +train: [9] [220/400] eta: 0:01:24 lr: 0.000237 loss: 0.2804 (0.3180) grad: 0.2298 (0.2329) time: 0.4619 data: 0.0034 max mem: 22446 +train: [9] [240/400] eta: 0:01:15 lr: 0.000236 loss: 0.2759 (0.3147) grad: 0.2107 (0.2300) time: 0.4770 data: 0.0038 max mem: 22446 +train: [9] [260/400] eta: 0:01:06 lr: 0.000234 loss: 0.2759 (0.3151) grad: 0.2236 (0.2302) time: 0.4655 data: 0.0035 max mem: 22446 +train: [9] [280/400] eta: 0:00:56 lr: 0.000233 loss: 0.2683 (0.3134) grad: 0.2244 (0.2297) time: 0.4498 data: 0.0033 max mem: 22446 +train: [9] [300/400] eta: 0:00:48 lr: 0.000232 loss: 0.2775 (0.3131) grad: 0.2172 (0.2300) time: 0.6316 data: 0.1821 max mem: 22446 +train: [9] [320/400] eta: 0:00:38 lr: 0.000230 loss: 0.2646 (0.3084) grad: 0.2291 (0.2293) time: 0.4586 data: 0.0032 max mem: 22446 +train: [9] [340/400] eta: 0:00:28 lr: 0.000229 loss: 0.2616 (0.3079) grad: 0.2291 (0.2292) time: 0.4634 data: 0.0031 max mem: 22446 +train: [9] [360/400] eta: 0:00:19 lr: 0.000228 loss: 0.2834 (0.3056) grad: 0.1984 (0.2271) time: 0.4669 data: 0.0034 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.2781 (0.3041) grad: 0.1979 (0.2262) time: 0.4654 data: 0.0035 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.2608 (0.3013) grad: 0.1961 (0.2243) time: 0.4627 data: 0.0035 max mem: 22446 +train: [9] Total time: 0:03:10 (0.4769 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.2608 (0.3013) grad: 0.1961 (0.2243) +eval (validation): [9] [ 0/63] eta: 0:03:23 time: 3.2237 data: 2.9491 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:20 time: 0.3333 data: 0.0030 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3318 data: 0.0027 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3419 data: 0.0034 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3417 data: 0.0034 max mem: 22446 +eval (validation): [9] Total time: 0:00:24 (0.3864 s / it) +cv: [9] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.102 acc: 0.974 f1: 0.969 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:21:42 lr: nan time: 3.2566 data: 2.9139 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:41 lr: 0.000224 loss: 0.2975 (0.3051) grad: 0.1685 (0.1916) time: 0.4480 data: 0.0038 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:10 lr: 0.000222 loss: 0.2741 (0.2828) grad: 0.1685 (0.1859) time: 0.4723 data: 0.0031 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:52 lr: 0.000221 loss: 0.2590 (0.2735) grad: 0.1709 (0.1823) time: 0.4620 data: 0.0035 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:37 lr: 0.000220 loss: 0.2489 (0.2672) grad: 0.1892 (0.1859) time: 0.4534 data: 0.0033 max mem: 22446 +train: [10] [100/400] eta: 0:02:27 lr: 0.000218 loss: 0.2481 (0.2632) grad: 0.1892 (0.1871) time: 0.4764 data: 0.0033 max mem: 22446 +train: [10] [120/400] eta: 0:02:16 lr: 0.000217 loss: 0.2481 (0.2612) grad: 0.1731 (0.1882) time: 0.4746 data: 0.0033 max mem: 22446 +train: [10] [140/400] eta: 0:02:05 lr: 0.000215 loss: 0.2651 (0.2651) grad: 0.1814 (0.1901) time: 0.4643 data: 0.0034 max mem: 22446 +train: [10] [160/400] eta: 0:01:55 lr: 0.000214 loss: 0.2406 (0.2622) grad: 0.1803 (0.1909) time: 0.4729 data: 0.0033 max mem: 22446 +train: [10] [180/400] eta: 0:01:45 lr: 0.000213 loss: 0.2344 (0.2618) grad: 0.1623 (0.1885) time: 0.4600 data: 0.0032 max mem: 22446 +train: [10] [200/400] eta: 0:01:35 lr: 0.000211 loss: 0.2410 (0.2612) grad: 0.1664 (0.1874) time: 0.4603 data: 0.0034 max mem: 22446 +train: [10] [220/400] eta: 0:01:25 lr: 0.000210 loss: 0.2329 (0.2596) grad: 0.1644 (0.1838) time: 0.4636 data: 0.0036 max mem: 22446 +train: [10] [240/400] eta: 0:01:16 lr: 0.000208 loss: 0.2577 (0.2597) grad: 0.1622 (0.1828) time: 0.4836 data: 0.0036 max mem: 22446 +train: [10] [260/400] eta: 0:01:06 lr: 0.000207 loss: 0.2660 (0.2589) grad: 0.1670 (0.1836) time: 0.4758 data: 0.0034 max mem: 22446 +train: [10] [280/400] eta: 0:00:57 lr: 0.000205 loss: 0.2416 (0.2579) grad: 0.1623 (0.1829) time: 0.4602 data: 0.0035 max mem: 22446 +train: [10] [300/400] eta: 0:00:48 lr: 0.000204 loss: 0.2367 (0.2570) grad: 0.1683 (0.1828) time: 0.6270 data: 0.1766 max mem: 22446 +train: [10] [320/400] eta: 0:00:38 lr: 0.000202 loss: 0.2262 (0.2548) grad: 0.1566 (0.1804) time: 0.4587 data: 0.0037 max mem: 22446 +train: [10] [340/400] eta: 0:00:28 lr: 0.000201 loss: 0.2110 (0.2524) grad: 0.1458 (0.1779) time: 0.4618 data: 0.0035 max mem: 22446 +train: [10] [360/400] eta: 0:00:19 lr: 0.000199 loss: 0.2049 (0.2500) grad: 0.1244 (0.1741) time: 0.4531 data: 0.0034 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.2034 (0.2488) grad: 0.1244 (0.1723) time: 0.4599 data: 0.0034 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.2265 (0.2478) grad: 0.1382 (0.1705) time: 0.4635 data: 0.0034 max mem: 22446 +train: [10] Total time: 0:03:11 (0.4798 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.2265 (0.2478) grad: 0.1382 (0.1705) +eval (validation): [10] [ 0/63] eta: 0:03:26 time: 3.2813 data: 2.9948 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:20 time: 0.3361 data: 0.0034 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3271 data: 0.0028 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3380 data: 0.0031 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3370 data: 0.0031 max mem: 22446 +eval (validation): [10] Total time: 0:00:24 (0.3845 s / it) +cv: [10] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.128 acc: 0.976 f1: 0.971 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [11] [ 0/400] eta: 0:21:35 lr: nan time: 3.2389 data: 2.8354 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:43 lr: 0.000195 loss: 0.2332 (0.2307) grad: 0.1322 (0.1455) time: 0.4567 data: 0.0028 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:07 lr: 0.000193 loss: 0.2252 (0.2268) grad: 0.1303 (0.1398) time: 0.4491 data: 0.0030 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:49 lr: 0.000192 loss: 0.2201 (0.2252) grad: 0.1252 (0.1373) time: 0.4562 data: 0.0033 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:35 lr: 0.000190 loss: 0.2199 (0.2227) grad: 0.1150 (0.1310) time: 0.4474 data: 0.0031 max mem: 22446 +train: [11] [100/400] eta: 0:02:24 lr: 0.000189 loss: 0.2093 (0.2189) grad: 0.1087 (0.1303) time: 0.4556 data: 0.0033 max mem: 22446 +train: [11] [120/400] eta: 0:02:13 lr: 0.000187 loss: 0.2093 (0.2183) grad: 0.1386 (0.1310) time: 0.4633 data: 0.0034 max mem: 22446 +train: [11] [140/400] eta: 0:02:03 lr: 0.000186 loss: 0.2136 (0.2183) grad: 0.1369 (0.1314) time: 0.4606 data: 0.0037 max mem: 22446 +train: [11] [160/400] eta: 0:01:53 lr: 0.000184 loss: 0.2136 (0.2199) grad: 0.1321 (0.1322) time: 0.4707 data: 0.0035 max mem: 22446 +train: [11] [180/400] eta: 0:01:44 lr: 0.000183 loss: 0.2106 (0.2172) grad: 0.1183 (0.1306) time: 0.4623 data: 0.0034 max mem: 22446 +train: [11] [200/400] eta: 0:01:34 lr: 0.000181 loss: 0.1841 (0.2143) grad: 0.1112 (0.1286) time: 0.4690 data: 0.0035 max mem: 22446 +train: [11] [220/400] eta: 0:01:24 lr: 0.000180 loss: 0.1820 (0.2134) grad: 0.1134 (0.1288) time: 0.4611 data: 0.0036 max mem: 22446 +train: [11] [240/400] eta: 0:01:15 lr: 0.000178 loss: 0.1938 (0.2124) grad: 0.1320 (0.1292) time: 0.4740 data: 0.0036 max mem: 22446 +train: [11] [260/400] eta: 0:01:06 lr: 0.000177 loss: 0.2004 (0.2125) grad: 0.1283 (0.1282) time: 0.4757 data: 0.0036 max mem: 22446 +train: [11] [280/400] eta: 0:00:56 lr: 0.000175 loss: 0.2006 (0.2128) grad: 0.1335 (0.1290) time: 0.4696 data: 0.0037 max mem: 22446 +train: [11] [300/400] eta: 0:00:48 lr: 0.000174 loss: 0.2107 (0.2128) grad: 0.1135 (0.1273) time: 0.6329 data: 0.1851 max mem: 22446 +train: [11] [320/400] eta: 0:00:38 lr: 0.000172 loss: 0.1970 (0.2117) grad: 0.0943 (0.1256) time: 0.4609 data: 0.0032 max mem: 22446 +train: [11] [340/400] eta: 0:00:28 lr: 0.000170 loss: 0.1957 (0.2109) grad: 0.0974 (0.1245) time: 0.4652 data: 0.0034 max mem: 22446 +train: [11] [360/400] eta: 0:00:19 lr: 0.000169 loss: 0.1993 (0.2099) grad: 0.1076 (0.1235) time: 0.4750 data: 0.0036 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.1953 (0.2095) grad: 0.1002 (0.1221) time: 0.4616 data: 0.0034 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.2035 (0.2094) grad: 0.0903 (0.1205) time: 0.4708 data: 0.0036 max mem: 22446 +train: [11] Total time: 0:03:11 (0.4791 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.2035 (0.2094) grad: 0.0903 (0.1205) +eval (validation): [11] [ 0/63] eta: 0:03:51 time: 3.6794 data: 3.4067 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:21 time: 0.3427 data: 0.0032 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3531 data: 0.0026 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3474 data: 0.0031 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3467 data: 0.0030 max mem: 22446 +eval (validation): [11] Total time: 0:00:25 (0.4048 s / it) +cv: [11] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.123 acc: 0.977 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [12] [ 0/400] eta: 0:28:44 lr: nan time: 4.3115 data: 3.8951 max mem: 22446 +train: [12] [ 20/400] eta: 0:04:09 lr: 0.000164 loss: 0.1781 (0.1836) grad: 0.0839 (0.0933) time: 0.4734 data: 0.0023 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:22 lr: 0.000163 loss: 0.1807 (0.1859) grad: 0.0950 (0.0948) time: 0.4614 data: 0.0033 max mem: 22446 +train: [12] [ 60/400] eta: 0:03:00 lr: 0.000161 loss: 0.1770 (0.1829) grad: 0.0950 (0.0937) time: 0.4730 data: 0.0035 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:45 lr: 0.000160 loss: 0.1770 (0.1834) grad: 0.0827 (0.0910) time: 0.4654 data: 0.0035 max mem: 22446 +train: [12] [100/400] eta: 0:02:31 lr: 0.000158 loss: 0.1914 (0.1866) grad: 0.0807 (0.0897) time: 0.4536 data: 0.0034 max mem: 22446 +train: [12] [120/400] eta: 0:02:19 lr: 0.000156 loss: 0.1914 (0.1863) grad: 0.0865 (0.0901) time: 0.4658 data: 0.0035 max mem: 22446 +train: [12] [140/400] eta: 0:02:07 lr: 0.000155 loss: 0.1726 (0.1857) grad: 0.0887 (0.0890) time: 0.4574 data: 0.0035 max mem: 22446 +train: [12] [160/400] eta: 0:01:57 lr: 0.000153 loss: 0.1861 (0.1856) grad: 0.0809 (0.0892) time: 0.4633 data: 0.0035 max mem: 22446 +train: [12] [180/400] eta: 0:01:46 lr: 0.000152 loss: 0.1821 (0.1863) grad: 0.0808 (0.0889) time: 0.4618 data: 0.0034 max mem: 22446 +train: [12] [200/400] eta: 0:01:36 lr: 0.000150 loss: 0.1756 (0.1860) grad: 0.0794 (0.0885) time: 0.4699 data: 0.0036 max mem: 22446 +train: [12] [220/400] eta: 0:01:26 lr: 0.000149 loss: 0.1768 (0.1863) grad: 0.0848 (0.0885) time: 0.4687 data: 0.0035 max mem: 22446 +train: [12] [240/400] eta: 0:01:17 lr: 0.000147 loss: 0.1942 (0.1872) grad: 0.0817 (0.0879) time: 0.4793 data: 0.0034 max mem: 22446 +train: [12] [260/400] eta: 0:01:07 lr: 0.000145 loss: 0.1863 (0.1872) grad: 0.0797 (0.0879) time: 0.4657 data: 0.0034 max mem: 22446 +train: [12] [280/400] eta: 0:00:57 lr: 0.000144 loss: 0.1758 (0.1869) grad: 0.0825 (0.0877) time: 0.4604 data: 0.0033 max mem: 22446 +train: [12] [300/400] eta: 0:00:49 lr: 0.000142 loss: 0.1759 (0.1870) grad: 0.0838 (0.0874) time: 0.6609 data: 0.1899 max mem: 22446 +train: [12] [320/400] eta: 0:00:39 lr: 0.000141 loss: 0.1806 (0.1864) grad: 0.0763 (0.0870) time: 0.4483 data: 0.0032 max mem: 22446 +train: [12] [340/400] eta: 0:00:29 lr: 0.000139 loss: 0.1806 (0.1867) grad: 0.0776 (0.0869) time: 0.4693 data: 0.0034 max mem: 22446 +train: [12] [360/400] eta: 0:00:19 lr: 0.000138 loss: 0.1856 (0.1864) grad: 0.0756 (0.0862) time: 0.4734 data: 0.0035 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.1863 (0.1869) grad: 0.0725 (0.0861) time: 0.4713 data: 0.0034 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.1851 (0.1862) grad: 0.0795 (0.0856) time: 0.4483 data: 0.0034 max mem: 22446 +train: [12] Total time: 0:03:13 (0.4843 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.1851 (0.1862) grad: 0.0795 (0.0856) +eval (validation): [12] [ 0/63] eta: 0:03:19 time: 3.1630 data: 2.8783 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:20 time: 0.3485 data: 0.0040 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3339 data: 0.0031 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3434 data: 0.0032 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3426 data: 0.0032 max mem: 22446 +eval (validation): [12] Total time: 0:00:24 (0.3900 s / it) +cv: [12] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.122 acc: 0.977 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:22:42 lr: nan time: 3.4051 data: 3.0196 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:43 lr: 0.000133 loss: 0.1920 (0.1940) grad: 0.0756 (0.0831) time: 0.4486 data: 0.0032 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:09 lr: 0.000131 loss: 0.1790 (0.1804) grad: 0.0742 (0.0803) time: 0.4579 data: 0.0029 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:51 lr: 0.000130 loss: 0.1666 (0.1777) grad: 0.0756 (0.0808) time: 0.4628 data: 0.0035 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:39 lr: 0.000128 loss: 0.1734 (0.1796) grad: 0.0786 (0.0815) time: 0.4752 data: 0.0035 max mem: 22446 +train: [13] [100/400] eta: 0:02:27 lr: 0.000127 loss: 0.1761 (0.1789) grad: 0.0779 (0.0803) time: 0.4648 data: 0.0033 max mem: 22446 +train: [13] [120/400] eta: 0:02:16 lr: 0.000125 loss: 0.1781 (0.1788) grad: 0.0725 (0.0789) time: 0.4638 data: 0.0034 max mem: 22446 +train: [13] [140/400] eta: 0:02:06 lr: 0.000124 loss: 0.1656 (0.1788) grad: 0.0725 (0.0790) time: 0.4795 data: 0.0034 max mem: 22446 +train: [13] [160/400] eta: 0:01:55 lr: 0.000122 loss: 0.1656 (0.1777) grad: 0.0728 (0.0781) time: 0.4667 data: 0.0034 max mem: 22446 +train: [13] [180/400] eta: 0:01:45 lr: 0.000120 loss: 0.1812 (0.1793) grad: 0.0742 (0.0779) time: 0.4683 data: 0.0033 max mem: 22446 +train: [13] [200/400] eta: 0:01:36 lr: 0.000119 loss: 0.1812 (0.1798) grad: 0.0780 (0.0787) time: 0.4798 data: 0.0034 max mem: 22446 +train: [13] [220/400] eta: 0:01:26 lr: 0.000117 loss: 0.1683 (0.1786) grad: 0.0822 (0.0789) time: 0.4656 data: 0.0039 max mem: 22446 +train: [13] [240/400] eta: 0:01:16 lr: 0.000116 loss: 0.1781 (0.1790) grad: 0.0770 (0.0786) time: 0.4640 data: 0.0034 max mem: 22446 +train: [13] [260/400] eta: 0:01:07 lr: 0.000114 loss: 0.1810 (0.1792) grad: 0.0764 (0.0784) time: 0.4869 data: 0.0035 max mem: 22446 +train: [13] [280/400] eta: 0:00:57 lr: 0.000113 loss: 0.1627 (0.1785) grad: 0.0763 (0.0785) time: 0.4654 data: 0.0035 max mem: 22446 +train: [13] [300/400] eta: 0:00:49 lr: 0.000111 loss: 0.1621 (0.1782) grad: 0.0763 (0.0783) time: 0.6589 data: 0.1842 max mem: 22446 +train: [13] [320/400] eta: 0:00:39 lr: 0.000110 loss: 0.1686 (0.1779) grad: 0.0719 (0.0780) time: 0.4730 data: 0.0027 max mem: 22446 +train: [13] [340/400] eta: 0:00:29 lr: 0.000108 loss: 0.1713 (0.1773) grad: 0.0719 (0.0775) time: 0.4646 data: 0.0035 max mem: 22446 +train: [13] [360/400] eta: 0:00:19 lr: 0.000107 loss: 0.1489 (0.1759) grad: 0.0636 (0.0768) time: 0.4707 data: 0.0034 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.1560 (0.1763) grad: 0.0661 (0.0766) time: 0.4772 data: 0.0035 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.1752 (0.1767) grad: 0.0733 (0.0765) time: 0.4682 data: 0.0035 max mem: 22446 +train: [13] Total time: 0:03:14 (0.4857 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.1752 (0.1767) grad: 0.0733 (0.0765) +eval (validation): [13] [ 0/63] eta: 0:03:22 time: 3.2193 data: 2.9695 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:21 time: 0.3681 data: 0.0040 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3438 data: 0.0031 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3356 data: 0.0033 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3367 data: 0.0032 max mem: 22446 +eval (validation): [13] Total time: 0:00:25 (0.3988 s / it) +cv: [13] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.121 acc: 0.977 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:23:19 lr: nan time: 3.4995 data: 3.0950 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:54 lr: 0.000102 loss: 0.1621 (0.1747) grad: 0.0692 (0.0719) time: 0.4734 data: 0.0033 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:15 lr: 0.000101 loss: 0.1709 (0.1704) grad: 0.0742 (0.0740) time: 0.4658 data: 0.0034 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:56 lr: 0.000099 loss: 0.1622 (0.1664) grad: 0.0737 (0.0733) time: 0.4659 data: 0.0034 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:42 lr: 0.000098 loss: 0.1596 (0.1657) grad: 0.0679 (0.0725) time: 0.4801 data: 0.0035 max mem: 22446 +train: [14] [100/400] eta: 0:02:30 lr: 0.000096 loss: 0.1632 (0.1657) grad: 0.0679 (0.0721) time: 0.4669 data: 0.0035 max mem: 22446 +train: [14] [120/400] eta: 0:02:17 lr: 0.000095 loss: 0.1742 (0.1693) grad: 0.0666 (0.0715) time: 0.4490 data: 0.0035 max mem: 22446 +train: [14] [140/400] eta: 0:02:07 lr: 0.000093 loss: 0.1850 (0.1724) grad: 0.0668 (0.0721) time: 0.4848 data: 0.0034 max mem: 22446 +train: [14] [160/400] eta: 0:01:57 lr: 0.000092 loss: 0.1721 (0.1708) grad: 0.0661 (0.0710) time: 0.4661 data: 0.0035 max mem: 22446 +train: [14] [180/400] eta: 0:01:46 lr: 0.000090 loss: 0.1555 (0.1703) grad: 0.0594 (0.0704) time: 0.4637 data: 0.0035 max mem: 22446 +train: [14] [200/400] eta: 0:01:36 lr: 0.000089 loss: 0.1704 (0.1705) grad: 0.0646 (0.0708) time: 0.4677 data: 0.0033 max mem: 22446 +train: [14] [220/400] eta: 0:01:26 lr: 0.000088 loss: 0.1667 (0.1701) grad: 0.0699 (0.0703) time: 0.4636 data: 0.0035 max mem: 22446 +train: [14] [240/400] eta: 0:01:16 lr: 0.000086 loss: 0.1600 (0.1685) grad: 0.0634 (0.0702) time: 0.4629 data: 0.0034 max mem: 22446 +train: [14] [260/400] eta: 0:01:07 lr: 0.000085 loss: 0.1512 (0.1679) grad: 0.0645 (0.0699) time: 0.5023 data: 0.0036 max mem: 22446 +train: [14] [280/400] eta: 0:00:57 lr: 0.000083 loss: 0.1720 (0.1694) grad: 0.0654 (0.0701) time: 0.4831 data: 0.0035 max mem: 22446 +train: [14] [300/400] eta: 0:00:49 lr: 0.000082 loss: 0.1771 (0.1689) grad: 0.0644 (0.0699) time: 0.6134 data: 0.1655 max mem: 22446 +train: [14] [320/400] eta: 0:00:39 lr: 0.000081 loss: 0.1655 (0.1686) grad: 0.0644 (0.0694) time: 0.4807 data: 0.0035 max mem: 22446 +train: [14] [340/400] eta: 0:00:29 lr: 0.000079 loss: 0.1711 (0.1690) grad: 0.0647 (0.0691) time: 0.4629 data: 0.0036 max mem: 22446 +train: [14] [360/400] eta: 0:00:19 lr: 0.000078 loss: 0.1666 (0.1691) grad: 0.0673 (0.0692) time: 0.4649 data: 0.0036 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.1639 (0.1693) grad: 0.0702 (0.0691) time: 0.4724 data: 0.0036 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.1664 (0.1692) grad: 0.0617 (0.0689) time: 0.4754 data: 0.0036 max mem: 22446 +train: [14] Total time: 0:03:14 (0.4861 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.1664 (0.1692) grad: 0.0617 (0.0689) +eval (validation): [14] [ 0/63] eta: 0:03:34 time: 3.3989 data: 3.1133 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:22 time: 0.3751 data: 0.0037 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:10 time: 0.3967 data: 0.0034 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3364 data: 0.0032 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3358 data: 0.0032 max mem: 22446 +eval (validation): [14] Total time: 0:00:26 (0.4211 s / it) +cv: [14] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.120 acc: 0.977 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:22:20 lr: nan time: 3.3504 data: 3.0146 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:50 lr: 0.000074 loss: 0.1635 (0.1704) grad: 0.0605 (0.0635) time: 0.4691 data: 0.0039 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:14 lr: 0.000072 loss: 0.1695 (0.1683) grad: 0.0635 (0.0691) time: 0.4724 data: 0.0030 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:55 lr: 0.000071 loss: 0.1634 (0.1651) grad: 0.0695 (0.0670) time: 0.4651 data: 0.0036 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:40 lr: 0.000070 loss: 0.1596 (0.1642) grad: 0.0640 (0.0665) time: 0.4610 data: 0.0035 max mem: 22446 +train: [15] [100/400] eta: 0:02:28 lr: 0.000068 loss: 0.1596 (0.1624) grad: 0.0667 (0.0666) time: 0.4601 data: 0.0036 max mem: 22446 +train: [15] [120/400] eta: 0:02:16 lr: 0.000067 loss: 0.1563 (0.1609) grad: 0.0667 (0.0664) time: 0.4573 data: 0.0035 max mem: 22446 +train: [15] [140/400] eta: 0:02:06 lr: 0.000066 loss: 0.1606 (0.1612) grad: 0.0649 (0.0663) time: 0.4859 data: 0.0036 max mem: 22446 +train: [15] [160/400] eta: 0:01:56 lr: 0.000064 loss: 0.1644 (0.1608) grad: 0.0677 (0.0667) time: 0.4722 data: 0.0037 max mem: 22446 +train: [15] [180/400] eta: 0:01:46 lr: 0.000063 loss: 0.1546 (0.1618) grad: 0.0675 (0.0668) time: 0.4683 data: 0.0034 max mem: 22446 +train: [15] [200/400] eta: 0:01:36 lr: 0.000062 loss: 0.1546 (0.1616) grad: 0.0617 (0.0664) time: 0.4613 data: 0.0034 max mem: 22446 +train: [15] [220/400] eta: 0:01:26 lr: 0.000061 loss: 0.1492 (0.1613) grad: 0.0617 (0.0662) time: 0.4711 data: 0.0035 max mem: 22446 +train: [15] [240/400] eta: 0:01:16 lr: 0.000059 loss: 0.1543 (0.1605) grad: 0.0627 (0.0662) time: 0.4682 data: 0.0036 max mem: 22446 +train: [15] [260/400] eta: 0:01:07 lr: 0.000058 loss: 0.1601 (0.1613) grad: 0.0627 (0.0657) time: 0.4775 data: 0.0035 max mem: 22446 +train: [15] [280/400] eta: 0:00:57 lr: 0.000057 loss: 0.1682 (0.1615) grad: 0.0600 (0.0655) time: 0.4732 data: 0.0036 max mem: 22446 +train: [15] [300/400] eta: 0:00:49 lr: 0.000056 loss: 0.1589 (0.1612) grad: 0.0642 (0.0655) time: 0.6533 data: 0.2064 max mem: 22446 +train: [15] [320/400] eta: 0:00:39 lr: 0.000054 loss: 0.1589 (0.1613) grad: 0.0646 (0.0657) time: 0.4784 data: 0.0030 max mem: 22446 +train: [15] [340/400] eta: 0:00:29 lr: 0.000053 loss: 0.1628 (0.1620) grad: 0.0727 (0.0660) time: 0.4762 data: 0.0037 max mem: 22446 +train: [15] [360/400] eta: 0:00:19 lr: 0.000052 loss: 0.1628 (0.1617) grad: 0.0685 (0.0661) time: 0.4685 data: 0.0036 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.1455 (0.1610) grad: 0.0669 (0.0659) time: 0.4805 data: 0.0036 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.1626 (0.1618) grad: 0.0601 (0.0657) time: 0.4722 data: 0.0036 max mem: 22446 +train: [15] Total time: 0:03:14 (0.4870 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.1626 (0.1618) grad: 0.0601 (0.0657) +eval (validation): [15] [ 0/63] eta: 0:03:30 time: 3.3387 data: 3.0798 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:22 time: 0.3716 data: 0.0044 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:09 time: 0.3520 data: 0.0030 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3492 data: 0.0033 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3476 data: 0.0033 max mem: 22446 +eval (validation): [15] Total time: 0:00:25 (0.4086 s / it) +cv: [15] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.120 acc: 0.977 f1: 0.973 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [16] [ 0/400] eta: 0:23:05 lr: nan time: 3.4634 data: 3.0690 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:56 lr: 0.000048 loss: 0.1611 (0.1626) grad: 0.0653 (0.0662) time: 0.4798 data: 0.0039 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:17 lr: 0.000047 loss: 0.1642 (0.1640) grad: 0.0659 (0.0671) time: 0.4718 data: 0.0035 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:58 lr: 0.000046 loss: 0.1668 (0.1636) grad: 0.0659 (0.0654) time: 0.4732 data: 0.0037 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:42 lr: 0.000045 loss: 0.1619 (0.1647) grad: 0.0616 (0.0651) time: 0.4611 data: 0.0034 max mem: 22446 +train: [16] [100/400] eta: 0:02:31 lr: 0.000044 loss: 0.1670 (0.1656) grad: 0.0616 (0.0646) time: 0.4946 data: 0.0037 max mem: 22446 +train: [16] [120/400] eta: 0:02:19 lr: 0.000043 loss: 0.1598 (0.1639) grad: 0.0594 (0.0639) time: 0.4692 data: 0.0034 max mem: 22446 +train: [16] [140/400] eta: 0:02:09 lr: 0.000042 loss: 0.1553 (0.1630) grad: 0.0619 (0.0640) time: 0.4761 data: 0.0035 max mem: 22446 +train: [16] [160/400] eta: 0:01:58 lr: 0.000041 loss: 0.1546 (0.1618) grad: 0.0651 (0.0642) time: 0.4661 data: 0.0035 max mem: 22446 +train: [16] [180/400] eta: 0:01:47 lr: 0.000040 loss: 0.1582 (0.1622) grad: 0.0618 (0.0640) time: 0.4659 data: 0.0035 max mem: 22446 +train: [16] [200/400] eta: 0:01:37 lr: 0.000039 loss: 0.1595 (0.1623) grad: 0.0613 (0.0644) time: 0.4732 data: 0.0035 max mem: 22446 +train: [16] [220/400] eta: 0:01:27 lr: 0.000038 loss: 0.1595 (0.1620) grad: 0.0655 (0.0640) time: 0.4742 data: 0.0036 max mem: 22446 +train: [16] [240/400] eta: 0:01:17 lr: 0.000036 loss: 0.1612 (0.1619) grad: 0.0640 (0.0640) time: 0.4728 data: 0.0035 max mem: 22446 +train: [16] [260/400] eta: 0:01:07 lr: 0.000035 loss: 0.1612 (0.1625) grad: 0.0646 (0.0640) time: 0.4716 data: 0.0034 max mem: 22446 +train: [16] [280/400] eta: 0:00:58 lr: 0.000034 loss: 0.1570 (0.1620) grad: 0.0662 (0.0643) time: 0.4856 data: 0.0035 max mem: 22446 +train: [16] [300/400] eta: 0:00:49 lr: 0.000033 loss: 0.1570 (0.1621) grad: 0.0656 (0.0645) time: 0.6294 data: 0.1810 max mem: 22446 +train: [16] [320/400] eta: 0:00:39 lr: 0.000032 loss: 0.1564 (0.1618) grad: 0.0653 (0.0646) time: 0.4932 data: 0.0037 max mem: 22446 +train: [16] [340/400] eta: 0:00:29 lr: 0.000031 loss: 0.1633 (0.1627) grad: 0.0670 (0.0650) time: 0.4782 data: 0.0032 max mem: 22446 +train: [16] [360/400] eta: 0:00:19 lr: 0.000031 loss: 0.1643 (0.1626) grad: 0.0637 (0.0649) time: 0.4676 data: 0.0036 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.1567 (0.1615) grad: 0.0616 (0.0648) time: 0.4859 data: 0.0038 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.1427 (0.1613) grad: 0.0624 (0.0649) time: 0.4827 data: 0.0037 max mem: 22446 +train: [16] Total time: 0:03:16 (0.4914 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.1427 (0.1613) grad: 0.0624 (0.0649) +eval (validation): [16] [ 0/63] eta: 0:03:28 time: 3.3128 data: 3.0594 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:21 time: 0.3515 data: 0.0043 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:09 time: 0.3709 data: 0.0031 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3509 data: 0.0034 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3507 data: 0.0034 max mem: 22446 +eval (validation): [16] Total time: 0:00:25 (0.4082 s / it) +cv: [16] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.119 acc: 0.977 f1: 0.973 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:23:15 lr: nan time: 3.4878 data: 3.0881 max mem: 22446 +train: [17] [ 20/400] eta: 0:04:02 lr: 0.000028 loss: 0.1434 (0.1500) grad: 0.0663 (0.0652) time: 0.4960 data: 0.0035 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:20 lr: 0.000027 loss: 0.1550 (0.1574) grad: 0.0627 (0.0653) time: 0.4691 data: 0.0036 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:58 lr: 0.000026 loss: 0.1601 (0.1606) grad: 0.0597 (0.0637) time: 0.4647 data: 0.0036 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:44 lr: 0.000025 loss: 0.1709 (0.1635) grad: 0.0599 (0.0638) time: 0.4717 data: 0.0036 max mem: 22446 +train: [17] [100/400] eta: 0:02:31 lr: 0.000024 loss: 0.1619 (0.1627) grad: 0.0628 (0.0640) time: 0.4761 data: 0.0035 max mem: 22446 +train: [17] [120/400] eta: 0:02:19 lr: 0.000023 loss: 0.1586 (0.1618) grad: 0.0572 (0.0630) time: 0.4703 data: 0.0034 max mem: 22446 +train: [17] [140/400] eta: 0:02:08 lr: 0.000023 loss: 0.1628 (0.1605) grad: 0.0572 (0.0630) time: 0.4621 data: 0.0034 max mem: 22446 +train: [17] [160/400] eta: 0:01:57 lr: 0.000022 loss: 0.1689 (0.1606) grad: 0.0616 (0.0632) time: 0.4698 data: 0.0036 max mem: 22446 +train: [17] [180/400] eta: 0:01:47 lr: 0.000021 loss: 0.1648 (0.1609) grad: 0.0633 (0.0631) time: 0.4827 data: 0.0034 max mem: 22446 +train: [17] [200/400] eta: 0:01:37 lr: 0.000020 loss: 0.1577 (0.1603) grad: 0.0640 (0.0634) time: 0.4647 data: 0.0034 max mem: 22446 +train: [17] [220/400] eta: 0:01:27 lr: 0.000019 loss: 0.1577 (0.1602) grad: 0.0646 (0.0634) time: 0.4552 data: 0.0035 max mem: 22446 +train: [17] [240/400] eta: 0:01:17 lr: 0.000019 loss: 0.1567 (0.1607) grad: 0.0614 (0.0634) time: 0.4655 data: 0.0033 max mem: 22446 +train: [17] [260/400] eta: 0:01:07 lr: 0.000018 loss: 0.1577 (0.1611) grad: 0.0648 (0.0635) time: 0.4766 data: 0.0034 max mem: 22446 +train: [17] [280/400] eta: 0:00:57 lr: 0.000017 loss: 0.1539 (0.1601) grad: 0.0623 (0.0633) time: 0.4799 data: 0.0035 max mem: 22446 +train: [17] [300/400] eta: 0:00:49 lr: 0.000016 loss: 0.1419 (0.1601) grad: 0.0623 (0.0634) time: 0.6907 data: 0.2160 max mem: 22446 +train: [17] [320/400] eta: 0:00:39 lr: 0.000016 loss: 0.1539 (0.1601) grad: 0.0626 (0.0635) time: 0.4604 data: 0.0041 max mem: 22446 +train: [17] [340/400] eta: 0:00:29 lr: 0.000015 loss: 0.1545 (0.1601) grad: 0.0604 (0.0633) time: 0.4825 data: 0.0036 max mem: 22446 +train: [17] [360/400] eta: 0:00:19 lr: 0.000014 loss: 0.1523 (0.1594) grad: 0.0604 (0.0632) time: 0.4767 data: 0.0034 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.1523 (0.1598) grad: 0.0608 (0.0631) time: 0.4687 data: 0.0036 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.1644 (0.1597) grad: 0.0622 (0.0630) time: 0.4655 data: 0.0035 max mem: 22446 +train: [17] Total time: 0:03:16 (0.4903 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.1644 (0.1597) grad: 0.0622 (0.0630) +eval (validation): [17] [ 0/63] eta: 0:03:33 time: 3.3919 data: 3.0735 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:22 time: 0.3850 data: 0.0042 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:10 time: 0.3497 data: 0.0035 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3513 data: 0.0031 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3491 data: 0.0031 max mem: 22446 +eval (validation): [17] Total time: 0:00:26 (0.4139 s / it) +cv: [17] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.376 acc: 0.977 f1: 0.974 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:58 lr: nan time: 3.4469 data: 3.0413 max mem: 22446 +train: [18] [ 20/400] eta: 0:04:07 lr: 0.000012 loss: 0.1533 (0.1596) grad: 0.0613 (0.0606) time: 0.5126 data: 0.0040 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:22 lr: 0.000012 loss: 0.1528 (0.1580) grad: 0.0613 (0.0614) time: 0.4684 data: 0.0036 max mem: 22446 +train: [18] [ 60/400] eta: 0:03:00 lr: 0.000011 loss: 0.1520 (0.1568) grad: 0.0627 (0.0613) time: 0.4666 data: 0.0035 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:44 lr: 0.000011 loss: 0.1533 (0.1567) grad: 0.0613 (0.0616) time: 0.4623 data: 0.0035 max mem: 22446 +train: [18] [100/400] eta: 0:02:31 lr: 0.000010 loss: 0.1597 (0.1604) grad: 0.0647 (0.0628) time: 0.4668 data: 0.0035 max mem: 22446 +train: [18] [120/400] eta: 0:02:19 lr: 0.000009 loss: 0.1597 (0.1589) grad: 0.0648 (0.0634) time: 0.4732 data: 0.0035 max mem: 22446 +train: [18] [140/400] eta: 0:02:08 lr: 0.000009 loss: 0.1508 (0.1588) grad: 0.0633 (0.0632) time: 0.4693 data: 0.0034 max mem: 22446 +train: [18] [160/400] eta: 0:01:58 lr: 0.000008 loss: 0.1619 (0.1582) grad: 0.0586 (0.0631) time: 0.4818 data: 0.0035 max mem: 22446 +train: [18] [180/400] eta: 0:01:48 lr: 0.000008 loss: 0.1481 (0.1575) grad: 0.0640 (0.0635) time: 0.4703 data: 0.0036 max mem: 22446 +train: [18] [200/400] eta: 0:01:37 lr: 0.000007 loss: 0.1489 (0.1571) grad: 0.0654 (0.0635) time: 0.4667 data: 0.0035 max mem: 22446 +train: [18] [220/400] eta: 0:01:27 lr: 0.000007 loss: 0.1489 (0.1565) grad: 0.0616 (0.0633) time: 0.4806 data: 0.0034 max mem: 22446 +train: [18] [240/400] eta: 0:01:18 lr: 0.000006 loss: 0.1512 (0.1562) grad: 0.0616 (0.0634) time: 0.4927 data: 0.0034 max mem: 22446 +train: [18] [260/400] eta: 0:01:08 lr: 0.000006 loss: 0.1538 (0.1566) grad: 0.0604 (0.0631) time: 0.4759 data: 0.0035 max mem: 22446 +train: [18] [280/400] eta: 0:00:58 lr: 0.000006 loss: 0.1542 (0.1568) grad: 0.0636 (0.0635) time: 0.4567 data: 0.0036 max mem: 22446 +train: [18] [300/400] eta: 0:00:49 lr: 0.000005 loss: 0.1542 (0.1567) grad: 0.0670 (0.0638) time: 0.6520 data: 0.1906 max mem: 22446 +train: [18] [320/400] eta: 0:00:39 lr: 0.000005 loss: 0.1590 (0.1569) grad: 0.0638 (0.0638) time: 0.4791 data: 0.0030 max mem: 22446 +train: [18] [340/400] eta: 0:00:29 lr: 0.000004 loss: 0.1573 (0.1573) grad: 0.0617 (0.0637) time: 0.4726 data: 0.0035 max mem: 22446 +train: [18] [360/400] eta: 0:00:19 lr: 0.000004 loss: 0.1568 (0.1570) grad: 0.0613 (0.0639) time: 0.4784 data: 0.0036 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.1566 (0.1566) grad: 0.0619 (0.0638) time: 0.4807 data: 0.0036 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.1520 (0.1561) grad: 0.0619 (0.0637) time: 0.4637 data: 0.0035 max mem: 22446 +train: [18] Total time: 0:03:16 (0.4913 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.1520 (0.1561) grad: 0.0619 (0.0637) +eval (validation): [18] [ 0/63] eta: 0:03:39 time: 3.4810 data: 3.2180 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:22 time: 0.3733 data: 0.0322 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:10 time: 0.3557 data: 0.0036 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3404 data: 0.0026 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3390 data: 0.0025 max mem: 22446 +eval (validation): [18] Total time: 0:00:25 (0.4103 s / it) +cv: [18] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.374 acc: 0.977 f1: 0.974 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:24:09 lr: nan time: 3.6227 data: 3.2029 max mem: 22446 +train: [19] [ 20/400] eta: 0:04:02 lr: 0.000003 loss: 0.1646 (0.1570) grad: 0.0601 (0.0587) time: 0.4902 data: 0.0029 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:21 lr: 0.000003 loss: 0.1590 (0.1589) grad: 0.0633 (0.0638) time: 0.4736 data: 0.0037 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:59 lr: 0.000002 loss: 0.1606 (0.1582) grad: 0.0638 (0.0630) time: 0.4684 data: 0.0034 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:45 lr: 0.000002 loss: 0.1617 (0.1575) grad: 0.0610 (0.0627) time: 0.4853 data: 0.0035 max mem: 22446 +train: [19] [100/400] eta: 0:02:32 lr: 0.000002 loss: 0.1546 (0.1599) grad: 0.0610 (0.0624) time: 0.4749 data: 0.0033 max mem: 22446 +train: [19] [120/400] eta: 0:02:20 lr: 0.000002 loss: 0.1575 (0.1590) grad: 0.0593 (0.0621) time: 0.4721 data: 0.0034 max mem: 22446 +train: [19] [140/400] eta: 0:02:10 lr: 0.000001 loss: 0.1510 (0.1576) grad: 0.0610 (0.0621) time: 0.4924 data: 0.0035 max mem: 22446 +train: [19] [160/400] eta: 0:01:59 lr: 0.000001 loss: 0.1510 (0.1578) grad: 0.0616 (0.0620) time: 0.4838 data: 0.0037 max mem: 22446 +train: [19] [180/400] eta: 0:01:49 lr: 0.000001 loss: 0.1561 (0.1575) grad: 0.0635 (0.0624) time: 0.4721 data: 0.0035 max mem: 22446 +train: [19] [200/400] eta: 0:01:38 lr: 0.000001 loss: 0.1512 (0.1573) grad: 0.0641 (0.0623) time: 0.4772 data: 0.0035 max mem: 22446 +train: [19] [220/400] eta: 0:01:28 lr: 0.000001 loss: 0.1557 (0.1581) grad: 0.0614 (0.0627) time: 0.4788 data: 0.0034 max mem: 22446 +train: [19] [240/400] eta: 0:01:18 lr: 0.000001 loss: 0.1583 (0.1574) grad: 0.0598 (0.0623) time: 0.4940 data: 0.0034 max mem: 22446 +train: [19] [260/400] eta: 0:01:08 lr: 0.000000 loss: 0.1508 (0.1573) grad: 0.0604 (0.0626) time: 0.4798 data: 0.0036 max mem: 22446 +train: [19] [280/400] eta: 0:00:58 lr: 0.000000 loss: 0.1598 (0.1579) grad: 0.0640 (0.0624) time: 0.4741 data: 0.0036 max mem: 22446 +train: [19] [300/400] eta: 0:00:50 lr: 0.000000 loss: 0.1687 (0.1584) grad: 0.0594 (0.0625) time: 0.6465 data: 0.1840 max mem: 22446 +train: [19] [320/400] eta: 0:00:39 lr: 0.000000 loss: 0.1602 (0.1583) grad: 0.0594 (0.0626) time: 0.4663 data: 0.0030 max mem: 22446 +train: [19] [340/400] eta: 0:00:29 lr: 0.000000 loss: 0.1454 (0.1579) grad: 0.0636 (0.0626) time: 0.4630 data: 0.0035 max mem: 22446 +train: [19] [360/400] eta: 0:00:19 lr: 0.000000 loss: 0.1565 (0.1579) grad: 0.0641 (0.0627) time: 0.4732 data: 0.0035 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.1619 (0.1581) grad: 0.0641 (0.0627) time: 0.4751 data: 0.0036 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.1632 (0.1583) grad: 0.0605 (0.0628) time: 0.4820 data: 0.0035 max mem: 22446 +train: [19] Total time: 0:03:17 (0.4942 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.1632 (0.1583) grad: 0.0605 (0.0628) +eval (validation): [19] [ 0/63] eta: 0:03:33 time: 3.3937 data: 3.1271 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:22 time: 0.3751 data: 0.0030 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:10 time: 0.3644 data: 0.0033 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3611 data: 0.0033 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3629 data: 0.0033 max mem: 22446 +eval (validation): [19] Total time: 0:00:26 (0.4193 s / it) +cv: [19] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.374 acc: 0.977 f1: 0.974 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9774305555555556, "hparam": [16, 1.0], "hparam_id": 41, "epoch": 19, "is_best": false, "best_score": 0.9774305555555556} +eval (train): [20] [ 0/297] eta: 0:15:29 time: 3.1295 data: 2.8845 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:21 time: 0.3795 data: 0.0045 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:53 time: 0.3711 data: 0.0032 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:39 time: 0.3689 data: 0.0035 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:27 time: 0.3600 data: 0.0034 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:18 time: 0.3653 data: 0.0035 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:09 time: 0.3581 data: 0.0035 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:00 time: 0.3493 data: 0.0032 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:52 time: 0.3546 data: 0.0034 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:44 time: 0.3891 data: 0.0035 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:36 time: 0.3604 data: 0.0032 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:29 time: 0.3566 data: 0.0031 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3367 data: 0.0032 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3683 data: 0.0034 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3763 data: 0.0035 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3694 data: 0.0033 max mem: 22446 +eval (train): [20] Total time: 0:01:51 (0.3742 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:13 time: 3.0674 data: 2.7707 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:22 time: 0.3905 data: 0.0053 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:10 time: 0.3490 data: 0.0030 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3581 data: 0.0033 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3563 data: 0.0033 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4126 s / it) +eval (test): [20] [ 0/79] eta: 0:04:06 time: 3.1239 data: 2.8699 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:31 time: 0.4061 data: 0.0153 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3613 data: 0.0032 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:08 time: 0.3925 data: 0.0035 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3681 data: 0.0034 max mem: 22446 +eval (test): [20] Total time: 0:00:33 (0.4195 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9774305555555556, "hparam": [5.1, 1.0], "hparam_id": 34, "epoch": 15, "is_best": true, "best_score": 0.9774305555555556} +eval (train): [20] [ 0/297] eta: 0:15:30 time: 3.1323 data: 2.8255 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:28 time: 0.4047 data: 0.0047 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:58 time: 0.3869 data: 0.0034 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:42 time: 0.3656 data: 0.0036 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:29 time: 0.3511 data: 0.0034 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:19 time: 0.3728 data: 0.0036 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:11 time: 0.3932 data: 0.0036 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:02 time: 0.3599 data: 0.0034 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:54 time: 0.4071 data: 0.0035 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:46 time: 0.3698 data: 0.0035 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:37 time: 0.3650 data: 0.0035 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:29 time: 0.3630 data: 0.0033 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:22 time: 0.3896 data: 0.0038 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:14 time: 0.3634 data: 0.0034 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3852 data: 0.0036 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3512 data: 0.0032 max mem: 22446 +eval (train): [20] Total time: 0:01:54 (0.3864 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:13 time: 3.0663 data: 2.8239 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3537 data: 0.0115 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3636 data: 0.0032 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3608 data: 0.0032 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3569 data: 0.0033 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4058 s / it) +eval (test): [20] [ 0/79] eta: 0:04:14 time: 3.2235 data: 2.9206 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:30 time: 0.3855 data: 0.0080 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3620 data: 0.0031 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3747 data: 0.0037 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3444 data: 0.0034 max mem: 22446 +eval (test): [20] Total time: 0:00:32 (0.4068 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|-----------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 15 | 0.00153 | 0.05 | 34 | [5.1, 1.0] | train | 0.00011797 | 1 | 0 | 1 | 0 | +| flat_mae | patch | attn | hcpya_task21 | best | 15 | 0.00153 | 0.05 | 34 | [5.1, 1.0] | validation | 0.1196 | 0.97743 | 0.0023443 | 0.97258 | 0.0031429 | +| flat_mae | patch | attn | hcpya_task21 | best | 15 | 0.00153 | 0.05 | 34 | [5.1, 1.0] | test | 0.13511 | 0.97321 | 0.0022771 | 0.97014 | 0.0027322 | + + +done! total time: 1:20:39 diff --git a/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/train_log.json b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..36ede0735c24757a5771e380eefbe8d5e3291187 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.1785051012039185, "train/grad": 0.2380917750298977, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.021131591796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.017987060546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.0128076171875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.00771728515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.00275634765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.9956103515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.987650146484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.9787744140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.96708740234375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.95455078125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.942225341796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9237261962890626, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9056842041015627, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.8796282958984376, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.854505920410156, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.8305758666992187, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.8001629638671877, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.7644273376464845, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.7244371032714843, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.6872096252441406, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.6410638427734376, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.593242340087891, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.539608154296875, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.482239761352539, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.4226164627075195, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.352080841064453, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2900527000427244, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2348310852050783, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.161925973892212, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.0788190364837646, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.0070489954948427, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9428795444965363, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8555413964390755, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7756639960408211, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6805511114001275, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5874607557058333, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.4928645650297403, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.408155433535576, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.3233613410964609, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2273520325869323, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.1579279416427015, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.1012480894103647, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0335535848885775, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.9790138603746891, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.9225490050576627, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.866080023124814, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.8198572699632496, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.7735838499385863, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.7369232406839729, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.043616762906312945, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04355354022234678, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04344896188005805, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.043344538090750576, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.04323913538828492, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04308971473015845, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.04292218098416924, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.042732438258826735, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.04247938902117312, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04220326224341989, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.04192770067602396, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.041506126401945946, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04108363540843129, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.040459039211273196, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03984712175093591, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03925800464116037, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.038521184949204326, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.037689796844497324, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03682242532260716, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.036077738581225274, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03522939858026802, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.034424222046509384, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03359880911186337, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03277679816819727, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.031987308394163846, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.031113612074404953, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.030396715076640247, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.029795558797195552, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.029054483054205774, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.028281053169630466, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02767301766667515, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.027188622998073696, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.026637209095060826, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02625355409923941, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.025916881980374454, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.025548811526969074, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02516447342466563, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.024754393864423036, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02427109284559265, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02394306532572955, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.023672773784492163, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.023602459949906916, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0230818216688931, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02272280602250248, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.022829638319090007, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02282293020747602, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.022468065838329495, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02266303960699588, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.022896747414488345, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9797849655151367, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.970625638961792, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.9555766582489014, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.9405946731567383, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.925680637359619, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.905061960220337, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.882112979888916, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.856748104095459, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.823554277420044, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7884645462036133, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.754610061645508, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.704545021057129, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.656942844390869, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.590061664581299, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5282015800476074, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.471071243286133, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4011805057525635, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3216280937194824, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.235386371612549, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.156846284866333, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.0619699954986572, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.9668965339660645, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.865034580230713, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.7617595195770264, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.6613394021987915, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.5515503883361816, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.4618053436279297, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.3862919807434082, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.2912685871124268, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.182983636856079, "validation/loss_030_lr2.7e+00_wd1.0e+00": 1.0806152820587158, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.9706708192825317, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.7885920405387878, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.6269670724868774, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.4796197712421417, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.3625893294811249, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2919313311576843, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.254534512758255, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2319023460149765, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2138213813304901, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2053128033876419, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.19879767298698425, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.19534321129322052, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.18621666729450226, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.1994095742702484, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.24248062074184418, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.27053385972976685, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.25702083110809326, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.23680175840854645, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.1259920634920635, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.14756944444444445, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.17584325396825398, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.18675595238095238, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.1939484126984127, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.19370039682539683, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.19246031746031747, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.18948412698412698, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.19072420634920634, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1902281746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.18998015873015872, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2013888888888889, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21577380952380953, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25148809523809523, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.27901785714285715, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.3003472222222222, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.33110119047619047, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.35912698412698413, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.3841765873015873, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.40625, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.44146825396825395, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.46924603174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.4913194444444444, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.5205853174603174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.5443948412698413, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.5570436507936508, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.5652281746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.5753968253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.591765873015873, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.628968253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.6607142857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.6961805555555556, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.7467757936507936, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.7953869047619048, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.84375, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8938492063492064, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9112103174603174, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9211309523809523, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9255952380952381, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9310515873015873, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9305555555555556, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9337797619047619, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9337797619047619, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9372519841269841, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9327876984126984, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9250992063492064, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9206349206349206, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9241071428571429, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9285714285714286, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.02891274086861629, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.03508882287552811, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.04112968698535939, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.04251342848488659, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.042692260411895507, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.041735176149515975, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.04068429295729681, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03930291207733125, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03764782082276101, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.036267549016985555, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.036730789121644854, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.05016704517398084, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06354564235339291, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.08806221188235337, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.10210732173641184, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.11325529096036786, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.131988564842612, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1458104385391882, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1579857987735863, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.17216852651728168, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19722144604703432, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21620083603304302, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.23189838317662317, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2624965447619611, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.29312942371602063, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.31144060633494386, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.32765793808142973, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.34418389682061074, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.37664260436547536, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.44907126016710164, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.50375711883977, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.5594674196985258, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.6467321255646246, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.7382242775102833, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.800028211854549, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.864278708389355, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.8826952551936484, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9026221965311476, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.907249675760339, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9109146365441293, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9084188319734645, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9116379832926172, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9073847752971647, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9156534002881541, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9112625260799481, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.907545214354897, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9042369751796853, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9095845519670333, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9200334562075833, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.9790138603746891, "validation/loss_best": 0.18621666729450226, "validation/acc_best": 0.9372519841269841, "validation/f1_best": 0.9156534002881541} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.2865473824739455, "train/grad": 0.17753793187439443, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.8994110107421873, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.8795831298828123, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.8473016357421876, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.8159967041015626, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.7855316162109376, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.7446661376953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.7000765991210938, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.652593688964844, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.593437042236328, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.5337416076660157, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.4782901000976563, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.3999007415771483, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.3283836364746096, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.23170783996582, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.145677375793457, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.068554039001465, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.97705228805542, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.8776901435852051, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.775987606048584, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.6889643096923828, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.589946084022522, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.4948293733596802, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.392861955165863, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.2810371267795562, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.1593801611661911, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.0136414113640786, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.8874557200074196, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.7818498227745294, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.6562164480984211, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.5313395065814257, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.44026751663535835, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.37224851284176114, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.29990456560626627, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.2539796914719045, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.21738100539892913, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.19370019874535502, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.1770588923804462, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.16436010866425932, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.15286601169034839, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.14292765583842992, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.13458163041621446, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.13055930664762855, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.13120888182893395, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.13617845945060253, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.14222369390539824, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.14580758889205753, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.1670682366285473, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.16853491713292898, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.256861158888787, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04071254212409258, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.04027789415791631, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.039561544302850964, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03884940067306161, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03815073559060693, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0371904087997973, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0361389257479459, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0350278726965189, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03369500597938895, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03244055391289294, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03136605331674218, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.030000557955354452, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02889878914691508, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.027604658538475633, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02661021552979946, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.025819825818762184, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02497852884232998, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02416363143362105, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.023409103993326425, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02282274033408612, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02223205966874957, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02175200484227389, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021357074831612408, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02107287076301873, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02090109231881797, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02073887745384127, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.020161503544077277, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.019812533683143556, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.019048539721406996, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.01850642162375152, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.018123670765198767, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.017787326527759433, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.017178174555301667, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.016586184767074883, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.01592982126865536, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.015485826688818633, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.015193949101958424, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.014865344678983093, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014456026789266616, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.014254501136019827, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.013748821923509241, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.013613022784702479, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.014105043006129563, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014681438953848555, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01541972413077019, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.01568328947061673, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.018921133113326504, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01931503099622205, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02689680553972721, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.807727575302124, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.77582049369812, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.725027084350586, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.6765520572662354, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.630570411682129, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.5698537826538086, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.5060343742370605, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.439683437347412, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.359369993209839, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.2803196907043457, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.2078988552093506, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.107229709625244, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.017162561416626, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.8990687131881714, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.7982158660888672, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.7114163637161255, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.613482117652893, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.5123021602630615, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.4128954410552979, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.3292431831359863, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.2314144372940063, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.127070426940918, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.9870020747184753, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.7989204525947571, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.6110977530479431, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.4452746510505676, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.35425707697868347, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.3034150302410126, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.2615172863006592, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.22965498268604279, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.21035976707935333, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.19725896418094635, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.18229933083057404, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.16969124972820282, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.156540647149086, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.14645881950855255, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13928525149822235, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1335422247648239, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12963919341564178, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.12412505596876144, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1341768205165863, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.1277536153793335, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.13330641388893127, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.15747739374637604, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.14202861487865448, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.16218382120132446, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.2342873215675354, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3116445243358612, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.5713974237442017, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.18725198412698413, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.1884920634920635, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.19047619047619047, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.20337301587301587, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.21850198412698413, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.25124007936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.2854662698412698, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.3226686507936508, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.3603670634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.390625, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.4176587301587302, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.455109126984127, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.4880952380952381, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5106646825396826, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.5260416666666666, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.5379464285714286, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.5463789682539683, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.5567956349206349, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.5744047619047619, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.5984623015873016, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.6309523809523809, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.667906746031746, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.7093253968253969, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.7566964285714286, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.8204365079365079, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8687996031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.902281746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9114583333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9233630952380952, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9320436507936508, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9350198412698413, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9375, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9429563492063492, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9464285714285714, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9508928571428571, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9561011904761905, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9578373015873016, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9595734126984127, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9588293650793651, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9623015873015873, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9615575396825397, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9556051587301587, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9618055555555556, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9583333333333334, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9461805555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9379960317460317, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9317956349206349, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.034874922364277784, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0355538252520989, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0417312574865194, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.05708817581185514, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.07102277080991849, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.09341959572113312, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.11326095289625575, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.13675243400369125, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.15283272966504, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.1656930086525838, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.185145466157512, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.2094914018187475, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.2301464583875465, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.2504466073854153, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.27210336657676737, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2875626505693988, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2976165998896151, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.3191190379265857, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.35448104972501426, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.4040480734848328, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.4642276839690692, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.5224613499569055, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.5800410985103109, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.6549387976556091, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.7687794583363112, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.8391521953756228, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8744575019174439, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8866039352708971, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9052637111606443, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9163279224015946, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9205412081590804, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9247973212438685, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9317916303998087, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.936846286186094, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9428219800447182, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9476420344827714, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9490778221763698, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9521449302137531, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.954412088308278, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9580026168463538, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9514354173846012, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9556793593251828, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9569762269886959, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.947502742073809, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9581614957887344, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9523519684359544, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9383785116143647, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9197864537293802, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9154239505145654, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.14292765583842992, "validation/loss_best": 0.12412505596876144, "validation/acc_best": 0.9630456349206349, "validation/f1_best": 0.9580026168463538} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 0.997420611679554, "train/grad": 0.221843975558877, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.686680908203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.6427761840820314, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5742596435546874, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.5112158203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.452891540527344, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.3777996826171877, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.2998914337158203, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.2201891326904297, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.124700889587402, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.0328010940551757, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.9510355377197266, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.841620578765869, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.7484349822998047, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.6320882034301758, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.536221408843994, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.4545026063919066, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.3598120903968811, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.2518981838226317, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.1235334569215774, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.9962967331707477, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.8373245891928672, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.6823887878656387, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.5339081795513629, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.4128115963190794, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.3262958996370435, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.26083839509636164, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.22500846706330777, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.20247847257182003, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.1799379713088274, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.1600527145061642, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.14667990593239666, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.13689663785509765, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.12638746911659837, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.11844146412797273, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.10966388604603708, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.10222434828989208, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0960686663351953, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09200381984002888, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.091073534861207, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.09110948098823428, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.09522338582202793, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.09898852517828345, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.12378893436864019, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.1820966214220971, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.20120039987377822, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.42086622522212563, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.777215884514153, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.0369048894941806, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.187081689955667, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03636032243259251, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03527441693469882, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03360275086946785, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03214456032961607, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.030903337048366664, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02949554833583534, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.028261762345209718, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02720694293268025, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.026147535098716618, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.025263260966166854, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024549389276653528, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023656634772196414, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022946387967094778, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022131503205746413, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02154396259225905, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02112055561039597, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02073125460650772, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02042128651868552, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0201916345115751, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020042302114889027, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.019939890317618846, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01963998531457037, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01884785035159439, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.017938192342408, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.017135685328394174, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.016179815577343108, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.015307321299333125, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.014713259746786207, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.014151668443810194, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.013585662690456957, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.013268888571765274, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.013038225751370192, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.012839620113372802, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.012599272150546313, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.012170361777534708, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.011752754824701696, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011503488577436655, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.011677224023733288, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.012037299273069948, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.012335874112322927, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.012913975543924608, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.013926005191751755, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01647041437216103, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.023016600434784776, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02483976449817419, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0394353862456046, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.06015552082564682, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07095072869211436, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.12033285125158727, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.567998170852661, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5137219429016113, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.4308011531829834, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.3559627532958984, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.2873494625091553, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.1997947692871094, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.10945463180542, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.018188953399658, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.911051869392395, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.8111876249313354, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.7250417470932007, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.613659381866455, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.5217574834823608, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.408846139907837, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.315107822418213, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.2315665483474731, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.1232761144638062, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.9709876775741577, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.7664167284965515, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.5974353551864624, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.43979620933532715, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.344400554895401, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.286382794380188, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.2483639121055603, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.22196638584136963, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.19892102479934692, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.18237225711345673, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.170551598072052, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.15808239579200745, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.14715708792209625, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.13943468034267426, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.132331982254982, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12532401084899902, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12300796061754227, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11717633157968521, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11740104854106903, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1167825311422348, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1149866133928299, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.123389333486557, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13326983153820038, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1663113236427307, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.15417973697185516, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.28281739354133606, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.42389047145843506, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4445948004722595, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.1196945905685425, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.5766406059265137, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.82515549659729, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1502583026885986, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.2527281746031746, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.28125, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.32068452380952384, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.35367063492063494, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3767361111111111, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.40848214285714285, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.4417162698412698, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.47544642857142855, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5034722222222222, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5257936507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5394345238095238, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5560515873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5701884920634921, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5865575396825397, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6103670634920635, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6388888888888888, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.6674107142857143, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7125496031746031, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7740575396825397, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8415178571428571, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.886656746031746, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9037698412698413, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9184027777777778, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9288194444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9352678571428571, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9422123015873016, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.949156746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9528769841269841, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.957093253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9593253968253969, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9598214285714286, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9603174603174603, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.966765873015873, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9580853174603174, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9637896825396826, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9422123015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.933531746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9360119047619048, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9154265873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9186507936507936, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8794642857142857, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9117063492063492, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.09196421927473597, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.10858980170738441, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.13232900833383066, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1485332185501242, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.16116983502361962, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.18186376755535252, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.20617224758719743, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.22834059330543507, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.25312657646103615, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.27568794687066217, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.28889193952860176, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.30689242545079676, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.32953137582310216, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.363187820792018, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.40963211664444393, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.4620869266497986, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.5145898485231878, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.5824451749124577, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.6894189406672981, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.8013500888146972, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8555586793233829, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.881314515903791, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9007841725275691, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.91424352053519, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9225482333318091, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9317278992673359, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9390458353695392, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9426534848864853, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9460272204890849, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9474236360839954, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9510748288135424, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9508035590319537, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9507465566804221, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9501600677133762, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9542596988121514, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9543244148131371, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9568084583816837, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9595770941617138, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9612445308638365, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9611568664041885, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9450906906171229, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.953223329889687, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9209338174089868, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9088824160684775, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9201670486473361, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8946337705184533, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8966214790400349, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8462602795330965, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.886974743574674, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.09110948098823428, "validation/loss_best": 0.13326983153820038, "validation/acc_best": 0.966765873015873, "validation/f1_best": 0.9611568664041885} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 0.8994612494111061, "train/grad": 0.32007717452943324, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.4435829162597655, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.3809617614746093, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.286301727294922, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.2010021209716797, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.1233695220947264, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.025787239074707, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.927533073425293, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.8315142822265624, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.7227542877197266, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.6243439292907715, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.541201219558716, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.434213457107544, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.3434394931793212, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.2211286330223083, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.0980650633573532, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.9694886320829391, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.802511625289917, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.6199689967930317, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.4619202037155628, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.36310771413147447, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.28875298477709294, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2443133944272995, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.21248821791261435, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.1890064107812941, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.170760397631675, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.15304340770468117, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.13931806363165378, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.12841709394939244, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.11575905829668046, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.10364266145043075, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.09444962562061847, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.08663611808791756, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.07700246918015181, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.07055807017721236, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.06428575243800878, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06086717800237238, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06427671108394861, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0659299795795232, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07541034629568458, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08563331301324069, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.12148175277747214, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.14984378462657333, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.308489838950336, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.5272857471369207, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.8795606840867549, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.9597533554118126, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.4687691869027912, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.458546122983098, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.287124536857009, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.030209414483979343, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.029070506282150747, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0276490284409374, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.026625383682549, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.025829723151400686, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.024938046652823686, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.024107620287686586, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023326214542612432, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022487118849530818, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021777505027130245, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021235195547342302, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02064356787595898, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020253287139348687, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019895418449304998, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019702183124609293, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0195902603212744, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019437243174761535, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.018803647137247026, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.017597470465116203, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.016531615843996407, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.015531826179940254, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014895678260363638, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.014326308714225888, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013821681893896312, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.013341562494169922, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012824459949042648, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.012374915322288871, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.012027580321300775, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01168372661108151, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011394875347614289, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011000098364893346, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.010601390881929547, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01005008008563891, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.009712992251152172, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009451342613901943, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.009351191850146278, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.010352453769883141, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.011252006753347815, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.012657901517522987, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01472245152166579, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.019038625463144852, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.020490284738771153, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03400788155850023, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04637962146662176, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06589819567510859, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.07195470653474331, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.09876011880114674, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.17052632896229625, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1721968354843557, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.3198063373565674, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.249558687210083, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.143629312515259, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.0493111610412598, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.9651172161102295, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.8612929582595825, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.7597754001617432, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.6632956266403198, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.5566043853759766, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.461737036705017, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.3811376094818115, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.2748684883117676, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.1779371500015259, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.0232259035110474, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.8417132496833801, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.6737285852432251, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.5047438740730286, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3747587502002716, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.3053053617477417, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.26724863052368164, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.2360326200723648, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.21327340602874756, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.19427868723869324, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.17862509191036224, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1663672924041748, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.155047208070755, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1456870138645172, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1378699392080307, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12979261577129364, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12623989582061768, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12422359734773636, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11977285146713257, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.11330569535493851, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11104587465524673, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11523494124412537, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11775769293308258, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1455041468143463, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1574527472257614, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2900756895542145, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2745826840400696, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3136569857597351, "validation/loss_041_lr1.6e+01_wd1.0e+00": 1.2564573287963867, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.9544174075126648, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.9112122654914856, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.7615947723388672, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.4484446048736572, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.979325294494629, "validation/loss_047_lr4.3e+01_wd1.0e+00": 7.662059783935547, "validation/loss_048_lr5.0e+01_wd1.0e+00": 7.572178363800049, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.3695436507936508, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.39533730158730157, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.43774801587301587, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.4694940476190476, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.49528769841269843, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5094246031746031, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5238095238095238, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5354662698412699, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.546875, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.564484126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.580109126984127, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6165674603174603, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6530257936507936, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6951884920634921, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7420634920634921, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8035714285714286, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8740079365079365, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8993055555555556, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9134424603174603, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9243551587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9322916666666666, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9424603174603174, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9471726190476191, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.955109126984127, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9580853174603174, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9595734126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9595734126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9608134920634921, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9588293650793651, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9417162698412699, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9375, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9434523809523809, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.8824404761904762, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9327876984126984, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.939484126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9228670634920635, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9122023809523809, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8826884920634921, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8861607142857143, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.904265873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.15644879637537656, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.16885936807868762, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.19785307702323873, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.2189555687452848, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.23510067851880997, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.24623921770432158, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2627587236109104, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.2803194733028157, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.2981252246152817, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.33022911609741495, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.36552634938070705, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.4318713315731196, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.49302274955056724, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.5601177208593624, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6381660692304436, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.7531742085130141, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8450057955071366, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8751067861925541, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.8920858200447358, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9064700509661544, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9170860252288845, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9248439206227714, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.929379784138889, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9343687897277594, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9393327501025994, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9460815580824903, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9472912197396817, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9507054218041546, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9524862825237703, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.953641100473299, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9544873313913262, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9546263770693586, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9577639037928419, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9593954119434885, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9618139039718168, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9599927138706803, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9466728348251037, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9467530661402729, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9139631431234224, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9251143387084472, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9288839242220578, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8412861199461458, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9235283735501896, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.921561558933196, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9073998831304234, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8770666593876095, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8499819723922983, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8554916600321976, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8664337629022685, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.06428575243800878, "validation/loss_best": 0.11523494124412537, "validation/acc_best": 0.9665178571428571, "validation/f1_best": 0.9618139039718168} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 1.1242293015122413, "train/grad": 0.5836309938132763, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.199742431640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.1229410552978516, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.0088973236083985, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.909887237548828, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.823228759765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.7192512512207032, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.6197219848632813, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.5263173484802246, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.422894787788391, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.3279589319229126, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.2418851041793824, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.10956352353096, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.966105564236641, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.7536446659266949, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.5759650003910065, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.4496170374751091, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.34498187474906444, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.27692481443285943, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.23425370521843433, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.20820898588746786, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.1848963194526732, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.1666515571437776, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.15013759148307146, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.1352007489372045, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.12224163442850113, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.10846164704300464, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.09767807119525969, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.08906827748753131, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0784241664595902, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.06749674297869206, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.05911321572959423, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.05312397493980825, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.04881553269922733, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.045746181355789305, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.042149427002295854, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0401528196875006, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06157085156999528, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07193641770631075, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09828922471031547, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.21020147217437624, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.3556033223494887, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.6410294109396637, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.8794321362581105, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.177030080864206, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.3243789120670408, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.9113012364134192, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.4435333797335623, "train/loss_047_lr4.3e+01_wd1.0e+00": 8.688295060098172, "train/loss_048_lr5.0e+01_wd1.0e+00": 10.89328624010086, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.026632738392800094, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02585796846076846, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.024838774157688023, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.024021167121827603, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02333750907331705, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022557716388255357, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02187074457295239, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021301858443766832, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02079069988336414, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0204358886834234, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020216394648887217, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020014723404310642, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01985964910592884, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01952601275406778, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0186716179177165, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01753909501247108, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.016021545049734413, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.014874585140496492, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.014144935458898545, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.013628655367065222, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.013095414033159614, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01261101726675406, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.012156408485025168, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0117228421731852, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.011313560340786353, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.010803204564144835, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.01037127148360014, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.009987461351556703, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.009509141378803179, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.009013986919308081, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.008617914113565349, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.008419300272362307, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0085999468783848, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.008690222338191233, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.008830725937150419, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008504759800853207, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011733182000461966, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01315477328520501, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.016391949714161455, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.026885734198149294, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03667739521479234, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.052986298999749125, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06636455962434411, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.08489594114129431, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.09012883294906715, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.11455968622118234, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1894964300096035, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.30295196682214737, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.37632268510758876, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.0804710388183594, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9978971481323242, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.877616047859192, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.775586724281311, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.6883792877197266, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.5856072902679443, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.4887675046920776, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.397948145866394, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.2951542139053345, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.195658564567566, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.095660924911499, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.9170514345169067, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.7316069006919861, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.5210259556770325, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.38836202025413513, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.3218286633491516, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.2733754813671112, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.23861686885356903, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.21283547580242157, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.19535106420516968, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.17868100106716156, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.16466446220874786, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.1519721895456314, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.141924649477005, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.13415540754795074, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.12744955718517303, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.12371447682380676, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12116886675357819, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11913755536079407, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11939362436532974, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12095589190721512, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12047716230154037, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12097888439893723, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12957951426506042, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1507534682750702, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.18441241979599, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.19021978974342346, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.35753878951072693, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3215457499027252, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5858928561210632, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.8530864119529724, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.8096973299980164, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.0947314500808716, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.8109946250915527, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.5730366706848145, "validation/loss_045_lr3.1e+01_wd1.0e+00": 4.541351795196533, "validation/loss_046_lr3.6e+01_wd1.0e+00": 5.453577518463135, "validation/loss_047_lr4.3e+01_wd1.0e+00": 11.949044227600098, "validation/loss_048_lr5.0e+01_wd1.0e+00": 19.68372344970703, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.45982142857142855, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.4878472222222222, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5096726190476191, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5277777777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5399305555555556, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5540674603174603, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5659722222222222, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5823412698412699, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6121031746031746, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6460813492063492, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6728670634920635, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7341269841269841, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7931547619047619, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8742559523809523, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8988095238095238, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9134424603174603, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9236111111111112, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9332837301587301, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9437003968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9464285714285714, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9513888888888888, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9556051587301587, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9583333333333334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9590773809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9610615079365079, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.964781746031746, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9573412698412699, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9352678571428571, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9449404761904762, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9402281746031746, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9270833333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9528769841269841, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9434523809523809, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9340277777777778, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9285714285714286, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9191468253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9149305555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9159226190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8988095238095238, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2149172078750104, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.23060155043281066, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.2487893565566735, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.26843403806198796, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.28495937007768607, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.30271449925265087, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3231702611855714, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.3553187519620849, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.4119634837200127, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.47854578684689375, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5262389891021874, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.6277671182870663, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.729176930440326, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8460775711947137, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.875672894202326, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8946829746795415, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9079887337991989, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9205324600891002, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9263448898741098, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.932376823646616, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9362780391368217, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.942165349953194, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9483595586614678, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9515482830818295, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9517694462539543, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9525091738527512, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9514325991112458, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9506877822677341, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9510259712122181, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9519447538576813, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9523235631075414, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9542781959129426, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9524602258370781, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9529676209913392, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9426527736389817, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9340673879157658, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9505989422423169, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9198972553577702, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9250445238088043, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9257875879135091, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.910437270076305, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9451848700092944, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9354379851229668, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9276058712390979, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9167643103332379, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9044793954811757, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.890417039710435, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8975880850691208, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8805482851588101, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.05312397493980825, "validation/loss_best": 0.12047716230154037, "validation/acc_best": 0.9657738095238095, "validation/f1_best": 0.9542781959129426} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 1.3059186567366123, "train/grad": 0.7035699425637723, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9772986221313475, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.8916423416137695, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.7696240615844727, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.6683333587646485, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.5831085777282714, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.483314218521118, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.3884924602508546, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.2967589712142944, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.1846247029304504, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.0574704861640931, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.9164792570471764, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.7047760947048665, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5358840683102608, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.37605565167963506, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.2971558527648449, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.25493435710668566, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.22076872948557139, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.1942438510619104, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.17296393059194087, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.15786905819550157, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.14265713155269621, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.12945913210511206, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.11635333901271223, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.10380943531170488, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.09234824990853667, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0796591765806079, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.06954685733653605, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.061451426986604926, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.051751911761239174, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.041667900867760184, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.03501728804782033, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.030096741430461406, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.026396319679915906, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.02943633021786809, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.02603541632182896, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0474115576967597, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05380206736736, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.14018266819417477, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1873027602303773, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.3237299049925059, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.3875438569765538, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.5086709493864328, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.9159944466035813, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.2074488317593932, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.9911828783806413, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.999909440269693, "train/loss_046_lr3.6e+01_wd1.0e+00": 6.817470049122348, "train/loss_047_lr4.3e+01_wd1.0e+00": 9.023370483480393, "train/loss_048_lr5.0e+01_wd1.0e+00": 16.21850974857807, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024439394446089863, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023749856268987058, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022809556461870672, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022073974600061774, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0215085709374398, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020933650452643632, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020495245703496038, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020184054970741272, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019932416882365942, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019775569839403033, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019633484990336, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019230655124410988, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.018314666105434297, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.016308621661737563, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01493802467128262, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.014160730831790716, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.013478626438882202, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.012898614909499884, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.012385107011068612, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.011971292735543102, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.011531819913070649, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.011143548770342022, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.010763420308940112, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.010366505759302527, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.009957374832592904, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.009438007100252435, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.009000319112092257, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00868617904256098, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00826685906853527, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.007542608380317688, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.007074407007894479, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006624870656814892, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006255745352827944, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.007032383386103902, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006805269586184295, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.01105417498270981, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011628893109736964, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.021178558400133624, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0261215957661625, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.035487228693673385, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04051888657850213, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05101127078560239, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07066627234437646, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0873941918573837, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.12330388212576508, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.24893811041489244, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.28898352194577454, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.3119185024499893, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.4161786030977964, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.8777402639389038, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.790929913520813, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.6692456007003784, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.5697742700576782, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.486390471458435, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.3885918855667114, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.2939108610153198, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.1983834505081177, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.069566011428833, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.9063024520874023, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.738629162311554, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5378226637840271, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.40391215682029724, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.31022047996520996, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.2651623785495758, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.23754611611366272, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.21327659487724304, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1928315907716751, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1761597841978073, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.16407257318496704, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.15271928906440735, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.14318157732486725, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.13407598435878754, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.12595602869987488, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1191856637597084, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11303390562534332, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1094374880194664, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10722286999225616, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10571489483118057, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10384097695350647, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10514608025550842, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11115481704473495, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.11026863008737564, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12496568262577057, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.125050351023674, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.18022389709949493, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.15589068830013275, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.34114140272140503, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.47753679752349854, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.546355664730072, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.8490002751350403, "validation/loss_041_lr1.6e+01_wd1.0e+00": 1.092004418373108, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.547223448753357, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.0343735218048096, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.7432522773742676, "validation/loss_045_lr3.1e+01_wd1.0e+00": 10.29796028137207, "validation/loss_046_lr3.6e+01_wd1.0e+00": 7.763178825378418, "validation/loss_047_lr4.3e+01_wd1.0e+00": 13.31202507019043, "validation/loss_048_lr5.0e+01_wd1.0e+00": 19.45853042602539, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5114087301587301, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5300099206349206, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5443948412698413, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5538194444444444, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5677083333333334, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5855654761904762, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6098710317460317, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6470734126984127, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6850198412698413, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7341269841269841, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7881944444444444, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8692956349206349, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8983134920634921, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9149305555555556, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.925843253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9312996031746031, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9379960317460317, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9451884920634921, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9494047619047619, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.953125, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9538690476190477, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9583333333333334, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9603174603174603, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9608134920634921, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9704861111111112, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9444444444444444, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9565972222222222, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9489087301587301, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9469246031746031, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9441964285714286, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9422123015873016, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9466765873015873, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9047619047619048, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9375, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9241071428571429, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9151785714285714, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2536971477834558, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2754838534002017, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.2899915814975077, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.30239829023138265, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.32729294309740403, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3658934386844515, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.4137395540423047, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.47949720410375135, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.5461904137562011, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.6330164944494132, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7324312501997451, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8399701098309086, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8757948468259025, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8975896657498383, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9100538652719815, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9159405578100265, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9248590067923155, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9347027591157416, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9390801459513293, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9425535685109094, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9436093080189318, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9471914524922624, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9500351248648913, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9529351441809647, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9538671836877926, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9568985085977866, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9583777137711255, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9577321290901692, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9610202810699567, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9629186173888675, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9644657450978287, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9607043881927372, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.964388632045136, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9589761590634102, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9603166221130726, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9475420064579743, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9619549039768687, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9481688886386499, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9374977013670854, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9544918238681052, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.937609887944299, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9439992727982168, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9311737945305497, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9305825823727696, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9377107663987768, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8888942291881224, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9223760673394659, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9123678891566355, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8996390346051042, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.026396319679915906, "validation/loss_best": 0.11026863008737564, "validation/acc_best": 0.9704861111111112, "validation/f1_best": 0.964388632045136} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.8899668756127358, "train/grad": 0.5774915958940983, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.796864013671875, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.710039348602295, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.5901223373413087, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.492797269821167, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.4109774875640868, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.3134902000427247, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.214312195777893, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.104007077217102, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9384195417165756, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.7507696464657784, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5926277422904969, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.4202799490094185, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.3261212828755379, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.25821017354726794, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.2213163299113512, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.1974978405609727, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.17546218115836382, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.1562975136935711, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.13950730913318693, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.12670811255462466, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.1131762440688908, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.10112554562278092, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.08904713725671172, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.07756515730172396, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.06694137633778155, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.05512656539678574, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.04521380956284702, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.037127436390146616, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.02761225818656385, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.019431592095643282, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.015672981329262255, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.01092535849660635, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.008171340692788362, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.006778559237718582, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.017747525423765183, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.04417877420783043, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.043192519303411246, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09594117016531527, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.16056309282779693, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.206509328186512, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.29794071342796086, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.39228524655103686, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.5459779170993716, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.6513342528324574, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.1544843345787377, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.79692738942802, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.180141934873536, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.970269494187087, "train/loss_048_lr5.0e+01_wd1.0e+00": 9.441141221262514, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023014423372223973, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022342777363955974, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021485420344397425, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020881209396757184, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020458628716878594, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02006805202923715, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019799768710508942, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019614560957998038, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019433403215371074, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01910876637324691, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01848545196931809, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01681900782044977, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.015369288814254105, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.014279487919993698, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.013615262028761208, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01312525377376005, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012618989227339625, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0121162837324664, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.011616478017531335, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.011220332742668688, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.010793744432739914, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01040134041570127, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01000229150056839, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.009616416285280139, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00920980481780134, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.008627215726301074, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.007924529078882187, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00720355938130524, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006148684305371716, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0050778846599860115, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004557630569470348, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0036427469400223344, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0030094657725567233, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.002774084897901048, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006172211311677529, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.011177351864753292, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.011054966308256553, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01833311433383642, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.024434770663647215, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.028883977217537903, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03959729934580689, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04485080496764567, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05698359560867164, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06540756774862072, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.09875018606836347, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.22688662108033897, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.26780191677156834, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.22588502923026682, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.31359090708196163, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.7241703271865845, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.6383131742477417, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.5205776691436768, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.4252105951309204, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.3443011045455933, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.245568871498108, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.1402924060821533, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.012966275215149, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.8168543577194214, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6276126503944397, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.48540955781936646, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.3542802035808563, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2944709360599518, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.24652817845344543, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.2188907265663147, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.2002400904893875, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1826888620853424, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.16701845824718475, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.15355856716632843, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.14341263473033905, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.13328950107097626, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.12475505471229553, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11671212315559387, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11032328754663467, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.10571464896202087, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1022663563489914, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10110286623239517, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.100994773209095, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.09978598356246948, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.09996849298477173, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10176662355661392, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1089601069688797, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10774963349103928, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10681311786174774, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.15282492339611053, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1609162837266922, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.19159026443958282, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.3309508264064789, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.48428428173065186, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5193541049957275, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.7584381103515625, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.8297098278999329, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.8504907488822937, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.6072884798049927, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.3083879947662354, "validation/loss_045_lr3.1e+01_wd1.0e+00": 6.187016487121582, "validation/loss_046_lr3.6e+01_wd1.0e+00": 5.3855156898498535, "validation/loss_047_lr4.3e+01_wd1.0e+00": 10.660481452941895, "validation/loss_048_lr5.0e+01_wd1.0e+00": 13.169854164123535, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5362103174603174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5436507936507936, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5572916666666666, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5744047619047619, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5954861111111112, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6297123015873016, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6669146825396826, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7065972222222222, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7735615079365079, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8363095238095238, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.878968253968254, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9060019841269841, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.919890873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9295634920634921, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9347718253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.941468253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9476686507936508, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9526289682539683, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9565972222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9623015873015873, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9637896825396826, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9699900793650794, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9699900793650794, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9575892857142857, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9652777777777778, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9563492063492064, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9580853174603174, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9613095238095238, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9516369047619048, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9513888888888888, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9441964285714286, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9451884920634921, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9270833333333334, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9288194444444444, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.28498455675337925, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2967160815309194, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.31728393324157106, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.3516287187964516, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.39176557270470175, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.4586949141639617, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.5224082278599226, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.5845061784011539, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7044824707251974, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7984120928335182, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8521799302933715, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8853871444700235, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.905728661172492, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9161347651740671, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9220923003887491, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9295607238767593, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9368416330540635, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9420194664574486, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9486099032533708, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9513039607938917, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9538873524180866, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9553444891444116, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9576745676137571, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9574250994666106, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9598489648738481, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9609167497024145, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9628674574781413, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.963693947332928, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9656259450923738, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9663694519001021, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9668760254932571, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9636441386003577, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9652565262474572, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.967868772713887, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9653223259145097, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9623889045314691, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9609782112297286, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9508085133905573, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9460531331053021, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9581593221277449, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9471614855409045, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9543216908602439, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9550609101238947, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9442380160791366, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9436314348713359, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9340309520636135, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.933604071488605, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9205992598254144, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9038882785445692, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 0.006778559237718582, "validation/loss_best": 0.10681311786174774, "validation/acc_best": 0.9734623015873016, "validation/f1_best": 0.967868772713887} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.5895645137131215, "train/grad": 0.41658146552741526, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.6587369537353516, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.5745533752441405, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4592475700378418, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.3652265691757202, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.2840357446670532, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.18049222946167, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.0597112441062928, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.9049695092439651, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6978353038430214, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.5216028900444507, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.3978259063512087, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.30040722228586675, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.2525279524177313, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.21168810706585645, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.18668647287413478, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.16917047964408993, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.1519759512320161, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.13590993616729974, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.12120986718684434, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.10951341213658453, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.09680467269383371, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.08498861463740467, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.07267449649050832, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.06068770833313465, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.049187218165025116, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.03659523444250226, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.026898568468168377, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.019831204963847996, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.012645893711596728, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.007521965568885207, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.005020933793857694, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.003907893998548389, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0030686170049011707, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0022728976886719466, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.017493484551087023, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.02733617558144033, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.061864689402282236, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05202904852107167, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.08614944620989264, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.13837067916989326, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.29665889037773013, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.1939812647551298, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.3091436668206006, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.4107784735225141, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6751386979594827, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.957377534136176, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.254081932557747, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.6560237150080503, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.526801519878209, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022202170621603727, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021624851087108255, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02094876785762608, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020521046053618194, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020245466311462224, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020003804871812463, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019825218198820948, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01965241685509682, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019257126320153476, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.018314153356477618, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.016608830727636813, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014849962652660906, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.014017508924007415, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013203375241719186, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012654705001041293, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012236162042245268, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.011802926494274288, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011373722197022288, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.010968872676603496, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010609920336864889, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.010202279498334974, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00977412554435432, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.009248094209469855, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008649634176399558, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007931381651433185, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006888163217809051, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005832417548517697, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004863920303760097, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0036775647196918724, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0025630527948669623, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.001862854199425783, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.001572030171228107, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.001220821209208225, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0009722989638794389, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006756057279999368, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008906357095902422, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.014289769990027708, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.013106825577433484, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.017166621486007897, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.024952906017497015, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03585963179791147, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03191994977632504, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04176995758336772, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05064986800226642, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07538121126839933, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.15058515913019618, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.17992297464225168, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1684176939151859, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.2304714737832546, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.6067090034484863, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.523647665977478, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.409792423248291, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.31618332862854, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.2339284420013428, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.1245579719543457, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.9887844920158386, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.8151503801345825, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6112081408500671, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.45049431920051575, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.3550080358982086, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.2854302227497101, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.24829736351966858, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.21527335047721863, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.19482815265655518, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.18038176000118256, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.16625964641571045, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.15308865904808044, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1414995789527893, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.1321175992488861, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.12255020439624786, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11362990736961365, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.10587114095687866, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.10024872422218323, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09657368063926697, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0943983793258667, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.09359873086214066, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.09374094754457474, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.09410165995359421, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.09685609489679337, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.09885190427303314, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1048758327960968, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10275522619485855, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10625004023313522, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.16048263013362885, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.17742972075939178, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1927202045917511, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2285103052854538, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.33720672130584717, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5094228386878967, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.8026823997497559, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7843706607818604, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.724269449710846, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.335758924484253, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.1602673530578613, "validation/loss_045_lr3.1e+01_wd1.0e+00": 4.730711936950684, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.711392402648926, "validation/loss_047_lr4.3e+01_wd1.0e+00": 8.132010459899902, "validation/loss_048_lr5.0e+01_wd1.0e+00": 9.438281059265137, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5456349206349206, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5595238095238095, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5803571428571429, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6056547619047619, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6369047619047619, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6693948412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7123015873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7715773809523809, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.845734126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8896329365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9072420634920635, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.921875, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9298115079365079, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9437003968253969, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9474206349206349, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.951140873015873, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9546130952380952, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9598214285714286, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9618055555555556, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9672619047619048, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9704861111111112, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.970734126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.972718253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9578373015873016, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9623015873015873, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9578373015873016, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9523809523809523, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9503968253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9508928571428571, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9375, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9280753968253969, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.29617375098429044, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3175770455215665, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.36009396643857494, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.40927394619687696, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.46787769250431005, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5235137849733665, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.5940889915241063, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7034319823052484, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8122261474632664, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.865589506288084, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8861368205793593, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9060314630894272, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9150227847588537, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9253641418158739, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.93234235636703, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9368310438510585, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9406326918587737, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9449376526496167, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.948475317761627, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9526202609430438, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9550225188127557, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9593605142575162, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9617834788569446, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9649065527231481, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9647753920869472, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9658513504487348, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.965785109207221, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9663606342680888, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9668168598360785, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9689366135824855, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.969450315396442, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9672346424727787, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9680553171207009, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9702030067848885, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9580933655837475, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9618419868511187, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9656384701912072, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9679702001710249, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.961254710687298, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9524218512642951, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9496865756882433, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9550662335358384, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9619640533853774, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9466427661873675, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9415429439653038, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9444645005470447, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9422289092230106, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9320214938994883, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.912138582275097, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.005020933793857694, "validation/loss_best": 0.09885190427303314, "validation/acc_best": 0.9749503968253969, "validation/f1_best": 0.969450315396442} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.3910605128854513, "train/grad": 0.29991268254816533, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5386609649658203, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4572345161437987, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.345371127128601, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.2515805339813233, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.1660078287124633, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.0446378886699677, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8867909181118011, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7056594696640969, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5122619378566742, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.36982865288853645, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.2956604851782322, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.23869816087186335, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.20678420215845109, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.17726457690820097, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.15774038640782237, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.14339451583102344, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.12849544577300548, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.11418900166638196, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.10037690699100495, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.08912152409553528, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.07667492670938372, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.0650565982144326, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.05312043759971857, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.04166517277248204, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.03138125464320183, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.02137980652973056, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.014759422643110157, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.01033732458949089, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.006260862750932575, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0036292019486427306, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0024026198964565994, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0018066684994846583, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.001219884417951107, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0008613904565572738, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.012180296201258899, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.011345065655186773, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.02130422631278634, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.027284011403098704, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04706797652877867, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07658348777331411, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.12575492247007788, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.14285605090670286, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.14044799313880504, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.14654259536415337, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.3077253990527242, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.8462932176236063, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.9578921045269817, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.943987329583615, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.094386246316135, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021207274654880166, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02072696980088949, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02019085376523435, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01986213552299887, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019640108561143278, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01940709105692804, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019150937134400012, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018713219822384417, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017698973421938716, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015784410410560668, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014569470481947065, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0136520762485452, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01306394841754809, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01239757239818573, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011880447908770292, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01144818349275738, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010971334010828287, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010449660154990851, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009897244779858738, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009412670366000384, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008849070193246007, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008274123982992022, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007582136173732578, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006774274463532492, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005837973961606621, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004638242150831502, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0036182367493165658, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.002792261508584488, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0018923005831311458, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.001157844994449988, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0008116468678053934, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0006505284248123644, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0004637942518093041, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0003725726168886467, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005152272879931843, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004932494569775372, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007446876683470691, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008513942757128063, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01255075948382947, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.017351128458340054, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.026472923831602876, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.026643867398548234, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.026472387049996975, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02929160369049021, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.04722974839459867, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.09800791748702092, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.11151583214915578, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.14559069720199144, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.14652478211825837, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5165431499481201, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.43560791015625, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.323988437652588, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.2289258241653442, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.1400123834609985, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.00832200050354, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8358398675918579, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.654489278793335, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.4720432162284851, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.3542731702327728, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.29697391390800476, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.2494438886642456, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2216629534959793, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.19577428698539734, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.17838172614574432, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1656147688627243, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.15273238718509674, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1406967043876648, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12962216138839722, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.12120087444782257, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11271249502897263, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.10572690516710281, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.10020869225263596, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09648925811052322, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09465889632701874, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.09409705549478531, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.09482965618371964, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.09549923241138458, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.0969013199210167, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.0995108112692833, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10071306675672531, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10512813180685043, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10487676411867142, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10882388055324554, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1536979079246521, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1674228012561798, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.19619795680046082, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2532949447631836, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3550548851490021, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5965714454650879, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.7664433121681213, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.8473289012908936, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.8074460625648499, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.0157837867736816, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.5479027032852173, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.7252233028411865, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.169175148010254, "validation/loss_047_lr4.3e+01_wd1.0e+00": 4.8427228927612305, "validation/loss_048_lr5.0e+01_wd1.0e+00": 5.2963643074035645, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5617559523809523, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5766369047619048, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6063988095238095, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6393849206349206, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6676587301587301, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7061011904761905, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7579365079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8229166666666666, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8829365079365079, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9050099206349206, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9174107142857143, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9283234126984127, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.935515873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9424603174603174, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9481646825396826, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.951140873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.955109126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9655257936507936, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9675099206349206, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9712301587301587, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.972718253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.972718253968254, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9598214285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9662698412698413, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9590773809523809, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9469246031746031, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9486607142857143, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9459325396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.31897871695629465, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.34868552281825127, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.40623343297299447, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.4697248373948944, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5176089008355491, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5790428093784707, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6709720605199074, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.776762015869498, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8546657415628836, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8833327910956748, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9008641328057119, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9136717166783868, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9217832843172747, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.929605974053888, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9375466226509388, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9405309390599379, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9455995375554264, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.949412789414525, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9537034096424817, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9594201374663788, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9598493918401716, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9620221634163161, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.963400073897728, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9659132065095793, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9663733055003478, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9655029772462461, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9652022005963226, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9665071330314681, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9667464699577889, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9685360353256204, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9687864112635834, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9685629863060653, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9686712142482938, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.968730190818003, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9626798358678906, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9656872135148676, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9640110838629141, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9617409731786918, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9642517625542527, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9512161795209141, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9576188901874184, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9469713563272971, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9572821921104858, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9565031925212895, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9486330704257068, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9457233096045832, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9351558990518731, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9411211923333883, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9367913599333149, "id_best": 29, "lr_best": 0.0006899999999999999, "wd_best": 0.05, "train/loss_best": 0.0036292019486427306, "validation/loss_best": 0.0995108112692833, "validation/acc_best": 0.9739583333333334, "validation/f1_best": 0.9685360353256204} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.3012711402773857, "train/grad": 0.22429522074759006, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.476109209060669, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.3965692615509033, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2855428600311278, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1883622002601624, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.093331320285797, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9454679191112518, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7619851821660996, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.5841474437713623, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.41003979444503785, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.31030386827886103, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.259988645426929, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.21617544565349817, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1895920741558075, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.16367600675672292, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.1459022942185402, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.13228306584060193, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.11781732274219393, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.10357342635281384, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.08949382835999131, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.07796241230331362, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.0652384483627975, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.053534313505515456, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.04171165152452886, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.030787473544478416, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.021498874966055154, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.013508133124560118, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.00880477943457663, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.005986664658412338, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0037059333641082048, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0021214874740689994, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0013993898220360278, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0009958131238818168, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0007023112196475268, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00047868250869214534, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.004929716726765037, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.003940205276012421, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.011960239550098777, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.018119331346824766, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.018269488075748085, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07043682129122317, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07112235472537577, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06838073200546205, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07233462806791068, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.11998335248790681, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.14112779864110053, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.4589071634784341, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6348831142019481, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.9573658571112901, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.9117279110755772, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0209891245001927, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020608644848689437, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020212707645259797, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01997462163679302, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019808601066470147, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019598124385811388, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019212271138094365, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018452128781937062, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01643633807078004, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01478649833239615, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.013987087779678404, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013185974047519266, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012618720182217658, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012013524456415325, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011530497772619129, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011140065561048686, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010715299672447144, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010269530174555256, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009782585779903456, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009346227655187249, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008761115373345093, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008120572242187336, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007303536322433501, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006318244931171648, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005158250154927373, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.003788555376813747, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0027729603860643694, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0020209664179128596, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0013284495934203734, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.000725049082248006, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0004685233965574298, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0003361420666260528, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0002453109747511917, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0001726883069932228, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0025736858927575667, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0021144538219527932, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005064515683661739, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0069555666223202195, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008800807837662959, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.016469100253569274, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.016460515983898628, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.021778670901397342, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.018863383776150332, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0263279870057901, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.035147129211400466, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05845982793512739, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.08620152095344587, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1051662524021183, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.09941243961964513, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.4461749792099, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.366838812828064, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2552436590194702, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1559501886367798, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.05564284324646, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.8965092897415161, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7098140120506287, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5405288338661194, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3848925232887268, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.3061002194881439, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2642902731895447, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.22705338895320892, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2039320468902588, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18091584742069244, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.1652008295059204, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1534002423286438, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.14150388538837433, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1302480399608612, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12026865035295486, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11266373842954636, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.10587189346551895, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.1011759340763092, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09805114567279816, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09621348977088928, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09557383507490158, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0954030305147171, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.09634360671043396, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.09741967171430588, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.09912899136543274, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10150925815105438, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1021929532289505, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10624178498983383, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10579446703195572, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10786186903715134, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1405358761548996, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1584196537733078, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1744789332151413, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.25946369767189026, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.338894248008728, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5125152468681335, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.6360265016555786, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5906869173049927, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6680373549461365, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.8490719795227051, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.248108983039856, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.8729312419891357, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.037700891494751, "validation/loss_047_lr4.3e+01_wd1.0e+00": 4.5893940925598145, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.912346363067627, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.576140873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5920138888888888, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6294642857142857, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6599702380952381, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6909722222222222, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7450396825396826, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8050595238095238, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8687996031746031, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9015376984126984, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.917906746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.925843253968254, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.935515873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9432043650793651, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9486607142857143, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9533730158730159, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9565972222222222, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9620535714285714, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9637896825396826, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9702380952380952, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9657738095238095, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.96875, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9657738095238095, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.964781746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9461805555555556, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9441964285714286, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3449709490641787, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3820383340248303, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.4518971168530849, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.506959286474674, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5570228061957343, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6464055291730334, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7532032268397517, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8385360686755916, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8786241175124309, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9015057130693057, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9108265608487358, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9228023497458947, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9307741176408083, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9368992004417656, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9430298438779084, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9476116068111903, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9489476748469304, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9531730022386091, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9539898561904203, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9565519333608771, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9585610860275258, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9618011975238338, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9629109466790856, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9642454041499294, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9651406194110516, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9662697567043078, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9657493602165484, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9663520017802292, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9683973179786727, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9684795920139512, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9691879642098226, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9678139421979239, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9677741160130464, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9694605155374438, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9667759040457462, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9661022122631453, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9673956941072083, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9642811639522137, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9618654718736679, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9599292706110609, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.95991523042993, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9619114654655244, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.959846755087068, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9572350875094168, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9601591937418783, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.951708908916779, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.95214046534855, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9325711957550609, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9375956375664969, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.0013993898220360278, "validation/loss_best": 0.1021929532289505, "validation/acc_best": 0.9739583333333334, "validation/f1_best": 0.9691879642098226} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.24783265188336373, "train/grad": 0.17047549426555633, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4153923034667968, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.3372230529785156, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2254472661018372, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1220588207244873, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.012630853652954, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.839217120707035, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.6516236364841461, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.4861288920044899, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.34370128214359286, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2737888813763857, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.23531461216509342, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.19986154790967703, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1770953030139208, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.1538816345669329, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.13708026740700008, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.12396606413647532, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.10989387116394937, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09585397681221366, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.08196515139192342, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0705503252428025, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.05782585950568318, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.04606762481853366, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.03431837073527277, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.02400813381187618, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.015947573445737363, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.009242949085310101, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.005731362020596862, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0038395977020263673, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.002471427796408534, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0015410175453871488, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0010639922413975, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0008118599653244019, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0005942306201905012, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00042775720357894896, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0014770378172397614, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0008245799131691455, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0024946467485278843, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.014310863921418787, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.013849321007728576, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.032653432125225666, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.03503512743860483, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.04732894335873425, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.05911244078539312, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.056754505364224314, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.07717715014703572, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.20381630225107072, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.2567236693482846, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.47668346906080844, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.5689921130053699, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020606534387916327, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02028908908367157, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019966907035559416, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01977219886612147, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019626503325998784, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019412231678143144, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018979092063382268, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017883945046924053, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.015677631525322797, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.014513094446156173, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.013816284956410527, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013058227514848112, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012490574391558767, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011851475476287305, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01134873068658635, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010938363193999975, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010444308570586145, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009896673925686628, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009286056325072423, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008714933975134044, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007994981409283355, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007191270645707845, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006183892399421893, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005026532813790254, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0038293870259076355, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.002527434942603577, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.001704922116623493, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00118733889379655, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0007718876306898892, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0004776937759015709, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00032180748356040566, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00024938163620390696, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00018593182416225317, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00014031341241206973, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0010265020889801235, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0005647058045573772, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0019181409914244797, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0051602436887240085, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006003408480968059, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.011308759153739079, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01288888811862195, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.014077187734215162, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.016073796163218617, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.015975166083649864, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.019827325794101028, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03699642747813263, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.053164510154935576, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.06677360056581245, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.07478955552870938, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.390927791595459, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.3123925924301147, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1992648839950562, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0929521322250366, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9783189296722412, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7995096445083618, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.615683376789093, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.45829543471336365, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3362213671207428, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.27694591879844666, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.24334946274757385, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.21193107962608337, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.19188936054706573, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.17157913744449615, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.15751224756240845, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.14693158864974976, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.13617482781410217, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.12598823010921478, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.11702774465084076, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11040307581424713, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.10459774732589722, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.10059338808059692, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09781775623559952, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09665153920650482, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09646305441856384, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.09757308661937714, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.09875886142253876, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.09955214709043503, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1011698916554451, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10372298955917358, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10434441268444061, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10794250667095184, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10686513781547546, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10798369348049164, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1284247189760208, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.14030271768569946, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1550157219171524, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.25956568121910095, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3193557858467102, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.4978700578212738, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.6318151354789734, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5445713996887207, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6545872092247009, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.7686454057693481, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.0942374467849731, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.960019588470459, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.1416943073272705, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.2944743633270264, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.3271644115448, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5875496031746031, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.609375, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6480654761904762, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6790674603174603, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7132936507936508, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7728174603174603, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8405257936507936, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8871527777777778, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9097222222222222, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9208829365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9300595238095238, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9399801587301587, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9444444444444444, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.951140873015873, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9548611111111112, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9603174603174603, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9625496031746031, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.964781746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9665178571428571, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9682539682539683, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.970734126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.972718253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.972718253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9699900793650794, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.970734126984127, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9680059523809523, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9709821428571429, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9704861111111112, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.964781746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9578373015873016, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9506448412698413, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3709833580435332, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4124292458749512, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.4860121981220417, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5390516467058962, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5951408311583146, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7025452134183887, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8038816686349887, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8623664737836466, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8889161422395121, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.90314282523572, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9163588802973722, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9279179027345457, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9327782885685934, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9411647530735252, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9460694765207671, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9482380222854028, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9525889946692934, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9535543878937883, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9559140495043951, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9588600456366576, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9599214267476268, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9617823249812675, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9632487682731113, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9646896901519982, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9653642721205441, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9651788246466678, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.965700038268936, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9667792504725135, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9687185253732495, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9677928345610836, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9674341876142424, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9680709159838721, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9687579000417172, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9692211811332407, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9708415011381646, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9685515353680776, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.972005560302607, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9655795648318471, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9645030147767402, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9627719921086145, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9599892367831185, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9653374158464911, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9639987834697434, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9594395458598441, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.961303810379729, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9540260749683599, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9564837322064083, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9459009501795521, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9428193471426722, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.0014770378172397614, "validation/loss_best": 0.1284247189760208, "validation/acc_best": 0.9759424603174603, "validation/f1_best": 0.9708415011381646} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.20940628297626973, "train/grad": 0.12045712132006883, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3603137063980102, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2831439113616943, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.169690833091736, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.0589739561080933, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9360140585899352, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7516458120942116, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5715526103973388, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.4167800348252058, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.306020692884922, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2510510530695319, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.21910829786211253, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.18853605329990386, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.16832237543538212, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.14691320603713393, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.13100291220471263, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.11814223617315292, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.10404994567856192, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.0894975390471518, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0747479588817805, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06232977474108339, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04869624144397676, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.036586207235231996, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.025517381858080626, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.016631431449204682, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.01045931932516396, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.006138052502647043, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.003939174916595221, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0027813924103975295, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0018875297252088786, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0012773030158132314, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0009134847205132246, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0007077552378177643, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0005269995518028736, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0003917561564594507, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00030818743631243705, "train/loss_035_lr6.0e+00_wd1.0e+00": 8.29906016588211e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00041558681055903435, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0027154540084302426, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.006587929744273424, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.014792555533349514, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.012701834123581647, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.02025031519122422, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.03212012575007975, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.022948102112859487, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.06080384730361402, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.04163843471556902, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.08014142986387014, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.14785470108501614, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.25325564137659967, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020337367337197064, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020041923723183572, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019722111709415912, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019489147728309036, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019256946314126253, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01877681113779545, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.017980959014967084, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01639411016367376, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.014690117659047246, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.013811525772325695, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.013232063355389982, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012587634343653918, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012111844606697559, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011533111087046563, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011073921700008213, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010680434585083276, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010209877765737474, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009658950387965888, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009026373699307443, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008395034834975376, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0075409576611127705, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006571323937387206, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005396565995761193, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004129688725515735, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0029373507664422504, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0018542928851093166, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0012161038897465914, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0008539416694839019, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005723253681208007, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00038691794135957023, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002768462090898538, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00021625255752951488, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00016731219848225009, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00012676432355874568, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.000230653269004506, "train/grad_035_lr6.0e+00_wd1.0e+00": 6.685935220104966e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0003886835972237179, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0021784539676320235, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0029377144336469384, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006179844386383095, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006130591432670949, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.007570324537629244, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00965444902740491, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.008858783144588602, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01453033473271975, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.017221577864254763, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.025684029348718704, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.034514734078403095, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.04696215883241308, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.34768545627594, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2694127559661865, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1537232398986816, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0391037464141846, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9109227657318115, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7233327627182007, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5470254421234131, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.40156880021095276, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3059960901737213, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.2571582794189453, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.22828635573387146, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.2006298452615738, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.18270108103752136, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1642799824476242, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.15115807950496674, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.14135944843292236, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.13098666071891785, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.12118348479270935, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.11221764981746674, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.1059013307094574, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.10066546499729156, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09742183983325958, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09573233127593994, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09547597169876099, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09600568562746048, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.09715937077999115, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.09877379983663559, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10009424388408661, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10172528028488159, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10387764126062393, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10467831790447235, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10817492753267288, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1066785678267479, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10714290291070938, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12286707013845444, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13713592290878296, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14420409500598907, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2232218235731125, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.27524644136428833, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.4633939862251282, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4951508641242981, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5283243060112, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5861169695854187, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6225953698158264, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.8657302856445312, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.049071788787842, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.802323579788208, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.2911927700042725, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1331400871276855, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5922619047619048, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6175595238095238, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6555059523809523, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6899801587301587, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7313988095238095, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7943948412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8653273809523809, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8978174603174603, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9169146825396826, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9268353174603174, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9320436507936508, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9427083333333334, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9476686507936508, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9533730158730159, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9603174603174603, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9623015873015873, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9625496031746031, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9675099206349206, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.96875, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9699900793650794, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9709821428571429, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9692460317460317, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9737103174603174, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.951140873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3816223874938813, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.43356826843576085, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5021107588621236, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5587640962743303, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6257416328989729, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7413408249513519, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8360215443734476, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8749652832680055, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9000737149120439, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9125850253176153, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9187583741783103, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9309630433157989, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.938000985359168, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9437953581110861, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9497763682378988, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9527261551947783, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9554598527028707, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9556750920419022, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9575425495747196, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9599645533420297, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9612203855406852, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9622986749937769, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9639799666880625, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9650593107335622, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9659401224456593, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9672706396588373, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9655191875362351, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9664630933429669, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9682700491613943, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9685565036699638, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9689639410260672, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9694261809697847, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9680973036088415, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9695321371905317, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9720962115653881, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9692564323246192, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9722694388008295, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9688868976918211, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9688168547366409, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9643019938068601, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9705892891480963, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9695184084320237, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9656056615927372, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9674026687704315, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9675808068890255, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.958206332653323, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9637003104574452, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9584658577535757, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.942672243152118, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.00030818743631243705, "validation/loss_best": 0.12286707013845444, "validation/acc_best": 0.9769345238095238, "validation/f1_best": 0.9720962115653881} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.1862343453615904, "train/grad": 0.08558344336226582, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3265133905410766, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2486763143539428, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.1311828994750976, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.0109109306335449, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8754875683784484, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6856470715999603, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5096261508762836, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.3672549077123404, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.27778651282191275, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.23059793207794427, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.20204405833035707, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.17415675312280654, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1553658481873572, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.13511717998422682, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.12013920028693974, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.10793840133585036, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09445541904307903, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.08054503090679646, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06644784072414041, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.05460034331306815, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.041699436493217945, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.030400087870657445, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.0201517166942358, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.012520683063194156, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.00777446792460978, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004576195860281586, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0030064354091882705, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0021619987301528453, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.001493718149140477, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0010413386672735213, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007766338437795639, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0006122993025928736, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00046370861120522024, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00035462046042084695, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00014179656282067298, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.944249384105205e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.680459715425968e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.002172146737575531, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0008535290881991386, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0047480844799429175, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.002548695793375373, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.010782884350046516, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.003313475139439106, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.005945043945685029, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.010696351770311594, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.004610117198899389, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0133350433036685, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0410266141127795, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.043645936148241166, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020111040365882217, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01986602553166449, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01961419524624944, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019427519855089486, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01923254304099828, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018805836443789303, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.017857583109289407, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.015782325426116586, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.014185493816621602, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.013326934869401157, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012744977783877403, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012099011249374599, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.011608764578122646, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011015726258046926, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010529550991486758, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.010109057179652154, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009597394759766757, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009010765985585749, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008324174990411848, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0076571978570427746, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006765234812628478, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005770799539750442, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004578596540377475, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0033416115265572444, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0023160750087117777, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.001454451961617451, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0009604749285790603, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006763050504378043, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.000459242343713413, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0003170629638043465, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002367741052148631, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00018909680547949393, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00014610756630645483, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00011298962923319777, "train/grad_034_lr5.1e+00_wd1.0e+00": 8.258132094852045e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.763101115258905e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 8.831375750247617e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0010935120573375024, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0013288295484159384, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0026327857061245715, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.002692858237679405, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.004310514088121242, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.003460701934053993, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.003793331736168337, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.004614003173903481, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.005419670506796457, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006793992530535137, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.013922637756884744, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.020012427534942853, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3140792846679688, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2356507778167725, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1166174411773682, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9936152100563049, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.85532146692276, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6665149927139282, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.49627381563186646, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.36608901619911194, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.28686589002609253, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.24412953853607178, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.21818214654922485, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.19270646572113037, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.17578047513961792, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.15812936425209045, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.14559054374694824, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.13611146807670593, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.12631654739379883, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.11714622378349304, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1093597412109375, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.10401108115911484, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.09987881034612656, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09748470783233643, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09610095620155334, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09606768935918808, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09696810692548752, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.09825583547353745, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.09979534149169922, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10074620693922043, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10214715451002121, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10445774346590042, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10508178174495697, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10893385112285614, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10742183029651642, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10744983702898026, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12151976674795151, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13322465121746063, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14123710989952087, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.21032771468162537, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2424698770046234, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3489457964897156, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4453204274177551, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4748455286026001, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.4735325872898102, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6264969706535339, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.7693355679512024, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.8171933889389038, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.536791205406189, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.1194310188293457, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.607520341873169, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6071428571428571, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6331845238095238, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6691468253968254, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7113095238095238, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7579365079365079, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8196924603174603, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8799603174603174, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9050099206349206, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9211309523809523, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9312996031746031, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9379960317460317, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9437003968253969, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9499007936507936, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9556051587301587, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9588293650793651, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9632936507936508, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9632936507936508, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9677579365079365, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9685019841269841, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9680059523809523, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9692460317460317, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9712301587301587, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.972718253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9742063492063492, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9704861111111112, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9704861111111112, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9655257936507936, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.96875, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9598214285714286, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9521329365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4093911157693773, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.46212964523530975, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5226287715395694, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5912805242437105, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6744411290154834, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7757509656560513, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8515834318545019, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8823831862325368, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9038701311457166, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9184614199359991, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9252562197846824, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9321043137489591, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9403496311065673, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9475001678017074, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9517014466359331, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9539991015840771, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.957075482428084, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9572651855115915, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9586348794144981, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9619929690473596, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9626349531756866, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9625827767365349, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9638756515017197, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9656979133827234, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9670256728821419, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9671840305761827, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9665024184287048, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9664224933812665, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.967971984227429, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9681526853996296, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9688769022614582, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9686124184499862, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9678179704208887, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9698802139866876, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9721237435285872, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9690706180817117, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9703279535654273, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9703563238599519, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9646184634184646, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9710235888737361, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9713548091181663, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9705915023654221, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9693959940512789, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9654928949860335, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9652392280567106, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.959225554086752, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9653617293501556, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9536297130686106, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9464819868671756, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.00014179656282067298, "validation/loss_best": 0.12151976674795151, "validation/acc_best": 0.9769345238095238, "validation/f1_best": 0.9721237435285872} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.1766622120887041, "train/grad": 0.07650972286239266, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.29883460521698, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2200697612762452, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0978151202201842, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9679305458068848, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8231117364764213, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6327463303506374, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.46232024520635606, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.3361449534446001, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.2609173633530736, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.21962032444775104, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1941981816291809, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.16859266225248576, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.15103251354768873, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.1313252325169742, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.11643122158944606, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.10433298512361944, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09075716064311563, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.0768047714792192, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06280572202056646, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.05135348303243518, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03901873473078012, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02833240230567753, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.018538506273180246, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.011245867451652885, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0067305838130414485, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.003933852761983872, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002625148883089423, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0019078377354890107, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0013237631414085627, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0009375568199902773, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007133269775658846, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005671392753720284, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00044145419262349607, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00033917548134922983, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00012908992357552052, "train/loss_035_lr6.0e+00_wd1.0e+00": 7.23924022167921e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 4.8508867621421814e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.00013233212754130364, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00019429796375334263, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0006605576910078526, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.000578478230163455, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0013987706787884236, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0010674734599888324, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.003535442976281047, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.004814409455284476, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.012651902986690403, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.015961492490023373, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.013370712297037244, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.01803247827105224, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019989787796512247, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019791433750651777, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01958138249348849, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01940520527306944, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019150837175548076, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018542120009660722, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.017244781837798654, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01518996715079993, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01395147125935182, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.013207878302782774, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012651158040389418, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.012001839871518314, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.011512163605075329, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010885959782171994, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010374811114743351, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00991745071252808, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009373336551943794, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00873104115598835, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008004069622256794, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007312316278112121, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0063967034855159, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005379874034551904, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004149380225862842, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002938582136121113, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00196367412849213, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0012051526621507946, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0008091846619936405, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005831580304220551, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003972963598062051, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002808771844502189, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00021597107206616784, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00017290610852796818, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.000137135624554503, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00010714749897488219, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.886894498620678e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.686446677226286e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 3.905342434933346e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0002320923475996395, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0001719765394939654, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0005333577672715382, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0004660950203496262, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0011938521594557175, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0014702654739033125, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0016596902036080656, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0024865570566906586, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0060254161461574226, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.007857526308163946, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.007303799131529003, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.007966233592494598, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2886301279067993, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2099039554595947, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0877177715301514, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9577387571334839, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8133613467216492, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6257739067077637, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.46002209186553955, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.3438023030757904, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.2744121253490448, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.23546886444091797, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.211475670337677, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.1876111477613449, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.17170220613479614, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.15500150620937347, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.14306855201721191, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.13386715948581696, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.12456898391246796, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.11545319855213165, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.10771229863166809, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.10276807844638824, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.09863175451755524, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09642961621284485, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09558257460594177, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09606875479221344, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09730789065361023, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.09902425110340118, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1004527360200882, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10151723027229309, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10287482291460037, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10553266108036041, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10591862350702286, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.10963223874568939, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.107960544526577, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10779488831758499, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12070945650339127, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13083557784557343, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13569436967372894, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.19870364665985107, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.22072246670722961, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.35638031363487244, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.41724440455436707, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.43228083848953247, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.42745333909988403, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.49423518776893616, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6660882234573364, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.5612478256225586, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.2748736143112183, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.850651502609253, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.164306163787842, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6148313492063492, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6416170634920635, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6795634920634921, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7185019841269841, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7690972222222222, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8348214285714286, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8859126984126984, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9069940476190477, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9241071428571429, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9320436507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9454365079365079, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9518849206349206, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9553571428571429, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9588293650793651, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.966765873015873, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9665178571428571, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9680059523809523, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.970734126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9747023809523809, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.972718253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9662698412698413, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9694940476190477, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9536210317460317, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.42523433010604855, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.47476662172511214, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.539279251562047, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6037754576050083, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6984223042910956, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7960490011722603, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8613497477415025, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8859214437864673, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9097465061516449, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9191026095187583, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9260943080912718, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9344038451003668, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9425471210668829, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9464208856837295, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9518053817282308, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9536909746575473, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9539077850544517, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9560001165189124, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9604499105585589, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9597681189025041, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9615070465791723, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9629164692942179, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9645834773517679, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9663508622018223, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.96593850495887, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9647911900134063, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9663610786659114, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9675190689170612, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9680905376578262, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9687906658665194, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9685111360015768, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9690954989207482, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9681573776324429, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.969880869095445, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9722598281435446, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9704644748037092, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9705524254649714, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9712233109514352, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9675992724490531, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9697864316005711, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9706832081580712, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9719335103976073, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9689747331241972, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9711634032382602, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9672745931366513, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9615781583263338, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9649005923782028, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.958395268545537, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.948562217284276, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.00012908992357552052, "validation/loss_best": 0.12070945650339127, "validation/acc_best": 0.9769345238095238, "validation/f1_best": 0.9722598281435446} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.16919299464672805, "train/grad": 0.06886923115700483, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2744075489044189, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1951904964447022, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0702855968475342, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.935139656662941, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7874752432107925, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6011307589709759, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.4345317612588406, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.32011177331209184, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.2515078093484044, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2124918731674552, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1879414614662528, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.1631791428476572, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1458979432657361, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.12662420889362694, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.11185886779800057, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.09979116233997047, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.08602580059319735, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07171092462725938, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.057400662656873463, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04566100249066949, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03324511293321848, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.023056113487109543, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.014610533760860562, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.008868363834917545, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0054967440757900474, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.003336032489314675, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0022708992566913364, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0016792495269328355, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0011851842701435088, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0008500054199248552, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0006459161732345819, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005142557621002198, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003953570406883955, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0003114466741681099, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00012510711327195167, "train/loss_035_lr6.0e+00_wd1.0e+00": 7.56007432937622e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 4.1471635922789575e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.828819677233696e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.339220441877842e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 6.069432944059372e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 5.29351644217968e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0006619499437510967, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00048258554190397265, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0002716082707047462, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.2011192739009857e-05, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0035096019227057696, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0007046393305063247, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.004946513408794999, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0046892435383051635, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02035493593662977, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020147601929493247, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019888728726655244, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019633032088167964, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019275649124756454, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01855842979159206, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.016839457442983985, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01473040270153433, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.013623514526989311, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012961644770111889, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012457531820982694, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01186026747804135, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.011372923403978347, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010774940627161413, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010270404433831572, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009812536009121686, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009261915624374523, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008609707104042172, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007854638040298597, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007118571531027556, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006123738253954798, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005008382866508327, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003711197574448306, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002498880905332044, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0016443122341297566, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0010202002341975459, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0006903527636313811, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005061069177463651, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00035162009997293355, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002534443373588147, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00019604826855356805, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001583644804850337, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00012368481588055148, "train/grad_033_lr4.3e+00_wd1.0e+00": 9.961394831407233e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.386639468018984e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.523787652146893e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 3.343370777031396e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.649432549330811e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 2.7475820220599357e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00025318298231612755, "train/grad_040_lr1.4e+01_wd1.0e+00": 2.0527198773408376e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0002656861818061549, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00021236027623067164, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00030345282443286254, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00021153412472246175, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0022368568710112494, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.001718542283449241, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.002331808674224417, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0033317360203858294, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2702113389968872, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1910834312438965, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.066344141960144, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9309301972389221, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7829984426498413, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5973979234695435, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.4351707994937897, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.32926255464553833, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.266137957572937, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.2299012988805771, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.20744511485099792, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.1845463663339615, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.1690995693206787, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1526598036289215, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.1409887969493866, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.13194209337234497, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.12282812595367432, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.11443223804235458, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.10731964558362961, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.10287915915250778, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0994102731347084, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0972885936498642, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09657246619462967, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09705814719200134, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09841182827949524, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10019073635339737, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10161470621824265, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10251940041780472, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10376236587762833, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10622832179069519, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1067766547203064, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11020833998918533, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10842731595039368, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1079028770327568, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1200086697936058, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12957458198070526, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13303744792938232, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.19288618862628937, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.21268506348133087, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3259684145450592, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.40219181776046753, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4004032015800476, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.39918753504753113, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4636223018169403, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6386001110076904, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.482520341873169, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.184892177581787, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.648457407951355, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.963062047958374, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.623015873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6473214285714286, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6852678571428571, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7289186507936508, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.777281746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8462301587301587, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8908730158730159, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9104662698412699, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9248511904761905, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9325396825396826, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9409722222222222, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9471726190476191, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9518849206349206, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.957093253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9585813492063492, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9605654761904762, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9618055555555556, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9635416666666666, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9672619047619048, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9680059523809523, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9704861111111112, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9714781746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.970734126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.972718253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9759424603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9734623015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9657738095238095, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9697420634920635, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9563492063492064, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.44198409930258575, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4856005772729326, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5507836392905182, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6220966925909386, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7120408206023839, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8129475285049869, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8671498715361875, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8922725307061681, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.910457171679529, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9191783651817299, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9281742474103677, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9367966430830952, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.942964076682383, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9490397441171625, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9505747805468064, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9533164292265891, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9548057929620241, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9574621645043748, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9601821956897003, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9612631161224395, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9615583767865987, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9630062842583855, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9648760040826161, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9663834887253838, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9649851450209529, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9656806404960578, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9659122248862265, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9672891682034904, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9682236632915897, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9683435170613462, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9681184483657975, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9676963500047273, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9679619970197503, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9692493837123398, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9722154262401107, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9701961790504663, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9713910656134759, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9715889253391319, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9688108369334274, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.969586469378684, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9713719020682215, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9722061631442462, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9700183061703623, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9715814987083443, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.967845097171667, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9609378097400347, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9661160869132857, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9573536067168897, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.951349693796646, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.00012510711327195167, "validation/loss_best": 0.1200086697936058, "validation/acc_best": 0.9769345238095238, "validation/f1_best": 0.9722154262401107} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.16181081768125297, "train/grad": 0.06566566120833159, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2570707082748414, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1766579055786133, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0475763273239136, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9052786374092102, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7531848296523094, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5672338129580021, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.4041916238516569, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.2999869731813669, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.23733701445162297, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.20048218723386527, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.17691703850403429, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.1525089360959828, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.13508839307352902, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.11572764161042869, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.10087016436271369, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.08900319664739072, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07575582017190755, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06233685243874788, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04937334042973816, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03900111578404904, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02824989880435169, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.019361542975530027, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.012098735915496945, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0073195792641490695, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004564861040562391, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002812109496444464, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0019403535965830088, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0014497016556560994, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0010369710624217987, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.000748247979208827, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005764645710587501, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0004659615084528923, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00036454432643949985, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00029154026880860327, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00012134701013565064, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.757976487278938e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 4.0145618841052054e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.414007671177387e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.1806115508079528e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.4554774388670921e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.4105498343706133e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 5.65200112760067e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 5.191462114453316e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 5.785813555121422e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 4.35451976954937e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.0693231597542763e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0006106468569487333, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0009408498927950859, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.0548625141382218e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020006520627066492, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019826394841074943, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01960724189411849, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019378937231376767, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019017937602475287, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018272649301216005, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.016539529790170492, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01468656032346189, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.013624028358608485, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012912880624644458, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012387363663874567, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.011779274956788868, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.011300125098787249, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01071488460758701, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010201151357032359, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00974955381359905, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.009199947870802134, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008562656969297677, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0078020177455618974, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007023604658897966, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005949945648899302, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004765621180995367, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0034464221945381722, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0023174427141202615, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0015181040874449536, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0009403439504967536, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0006412663016817533, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00047269591050280724, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003320411501044873, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00023828417139156954, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001848506660098792, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00014944015760192997, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001180407771607861, "train/grad_033_lr4.3e+00_wd1.0e+00": 9.428877135178482e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.048394901881693e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.2447358453046036e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 3.284612936454323e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.0670129229299905e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.7975576648264214e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.2258037034411903e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 9.823371476127549e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.3908587415836863e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 2.160369169729978e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.6701336151148897e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 4.770088593302002e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 3.704403585374026e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0009712056644337365, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0010006882236630922, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.782756789782287e-05, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2576864957809448, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1781977415084839, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.051400065422058, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.912420928478241, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7627005577087402, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5782808661460876, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.41902339458465576, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.3200026750564575, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.2599931061267853, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.22523048520088196, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2034350484609604, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.1814042329788208, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.16630546748638153, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.15036995708942413, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.13896316289901733, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.13007962703704834, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.12086766213178635, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.11257389187812805, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.10550276190042496, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.10093612223863602, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.09781692177057266, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09626690298318863, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09586626291275024, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09691038727760315, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09865996241569519, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10036520659923553, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10180914402008057, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10269845277070999, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10400981456041336, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10631534457206726, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10686048120260239, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11068669706583023, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.108714759349823, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10816466063261032, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11959633231163025, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12833555042743683, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13118961453437805, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.18956483900547028, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20835362374782562, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.31642603874206543, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3914775550365448, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3873820900917053, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3837297260761261, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4415222704410553, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6019256711006165, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.4088431596755981, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.08870530128479, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.5180901288986206, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.8165383338928223, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6257440476190477, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6510416666666666, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6882440476190477, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7331349206349206, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7837301587301587, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8521825396825397, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8936011904761905, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9112103174603174, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9250992063492064, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9347718253968254, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9422123015873016, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9476686507936508, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9533730158730159, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9598214285714286, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9625496031746031, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.964781746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9670138888888888, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9672619047619048, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9680059523809523, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9694940476190477, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9702380952380952, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9709821428571429, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.972718253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9724702380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9747023809523809, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9734623015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9665178571428571, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9694940476190477, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.957093253968254, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.44732249880671077, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4953899601165471, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5561645574324457, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6299728245111824, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7250543018943946, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.81909314999316, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.870179056662888, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8917331522145686, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9101426934163541, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9230187883419552, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9307856323839866, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9370731367675672, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9437009965403126, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9485403371468445, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9524284114070971, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9543687140996782, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9550012010029999, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9590325232404621, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9611680342011588, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9609988946969834, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.961476939592801, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.963134250012497, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9646162703791109, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9652941803473919, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9651391495528426, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.965079567916474, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9659151065453276, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9677189282187181, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.969187302331291, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9683534288749216, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9679273375066095, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9681992339103024, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9676090742841671, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9698789840754265, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9725775024400131, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9704193772100971, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9713910656134759, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9719366887324734, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9687682152952943, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.969934904674983, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9710253054246236, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9728524756179417, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9715103040492604, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9719253631572334, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9679733920624832, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9622310942312785, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9657326663588564, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9587060212395931, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9524916085657432, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.00012134701013565064, "validation/loss_best": 0.11959633231163025, "validation/acc_best": 0.9774305555555556, "validation/f1_best": 0.9725775024400131} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.1613066541031003, "train/grad": 0.0648840880393982, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2486722707748412, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1685233163833617, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0386066961288452, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8948578476905823, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7419980230927468, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5569781236350536, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.39604466885328293, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.29692154757678507, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.23714984744787215, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.20221909895539283, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1797013039700687, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.1562263918854296, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.13937685676850378, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.12045176283456385, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.10590373188257217, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.0939395788218826, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.08061997907236218, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06681877017021179, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0530427382234484, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04180113504640758, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.029959860602393747, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.020209293607622386, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01231873563490808, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.007292877454310656, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004493505591526627, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002782244589179754, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.001916112545877695, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.001426799427717924, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0010179734881967306, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0007373667322099209, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005782633926719427, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00047002896666526795, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003634020034223795, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0002885762695223093, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00012171882204711437, "train/loss_035_lr6.0e+00_wd1.0e+00": 7.093848660588264e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.9498964324593546e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.6772161945700645e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.380179077386856e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 7.810760289430618e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 4.197722300887108e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.5841204226017e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 9.920746088027954e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 5.721393972635269e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.2927781790494918e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.2458996176719663e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 8.428692817687988e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.67003832757473e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.7896946519613267e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020252529573626818, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02007087703794241, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019828300992958247, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01954310480039567, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019129450819455086, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018316433243453504, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.016436959579586984, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.014581423136405647, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.013445101552642882, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012695517223328352, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012152094093617052, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.011511269100010395, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.011019135739188642, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010420461238827556, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00991479133022949, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009470210410654545, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008912051025545225, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008230067244730889, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007432401466649026, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006630387522745878, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0055526792828459294, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004389996151439845, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003108939054363873, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002035729838244151, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0013223469322838355, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0008381230950180907, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005822382698534057, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0004323257321084384, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003088312919862801, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00022419610995711993, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00017663139111391503, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00014363693844643421, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00011302324674034026, "train/grad_033_lr4.3e+00_wd1.0e+00": 9.101179892240908e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.7562820106795695e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.204762133440454e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 3.149515941174741e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.9875106490422354e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.8443991092231826e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.3148005517802954e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.1203842772129466e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.290003532829539e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.863660436479564e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.0319156196376342e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 8.669179198607153e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 3.6802114999830167e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 3.2715392405092796e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0003464612204096014, "train/grad_048_lr5.0e+01_wd1.0e+00": 8.119775533194645e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.249859094619751, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1701034307479858, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0420655012130737, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9010767340660095, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7501829266548157, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5666501522064209, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.40927833318710327, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.31418493390083313, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.25652527809143066, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.22270368039608002, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.20139488577842712, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.17972788214683533, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.1647626906633377, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.14904065430164337, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.1376134604215622, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.12883460521697998, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.11997702717781067, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.11161179840564728, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.10486973822116852, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.10045593231916428, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.09744969010353088, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09607325494289398, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09600865840911865, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09707982838153839, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09875179827213287, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10060117393732071, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10178592056035995, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10274980217218399, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10412600636482239, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10648244619369507, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10711450129747391, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11074372380971909, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10886096954345703, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10800694674253464, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11929763853549957, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12779784202575684, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.13049091398715973, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1874750703573227, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20638199150562286, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.31101298332214355, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.38492971658706665, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.38001516461372375, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3753546476364136, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4295240342617035, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5853248834609985, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.36577308177948, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.0437088012695312, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.4567779302597046, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.7384449243545532, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6272321428571429, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6512896825396826, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6922123015873016, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7380952380952381, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7874503968253969, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8568948412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8958333333333334, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9141865079365079, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9263392857142857, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9357638888888888, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9419642857142857, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9489087301587301, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9526289682539683, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9575892857142857, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9600694444444444, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9613095238095238, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9620535714285714, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.964781746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.966765873015873, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9672619047619048, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9685019841269841, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.96875, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9697420634920635, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.970734126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9704861111111112, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9774305555555556, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9756944444444444, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9692460317460317, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9568452380952381, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4525636275099143, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.49633391268727156, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5616902418080001, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6390377477356481, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7310555193062133, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.825644246217553, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8732845271850568, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8965999471828664, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.911646398225457, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.923083369697557, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9307556862656119, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9394544778850279, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9434951574488278, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9493001847286661, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9531398207710903, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.954171417251006, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9546947670935599, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9579129122485375, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9610465605271378, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9606534789055406, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9625441871802538, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9626565013047913, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.963232253130925, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9654887438029531, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9642468497394577, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9645508543807798, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.966306759356361, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.968311023584611, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9692261046509167, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9680550422656957, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9679675897847199, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9690954989207482, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9679555461768066, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9699895736243921, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9725775024400131, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9704193772100971, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9713910656134759, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9719366887324734, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9688554300574965, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9702844673864991, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9712560429010715, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9735475315880806, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.971687569585084, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9722794935233312, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9672119794220106, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9612822064677179, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9655235136542094, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9587060212395931, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9521218713806813, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.00012171882204711437, "validation/loss_best": 0.11929763853549957, "validation/acc_best": 0.9774305555555556, "validation/f1_best": 0.9725775024400131} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.15967458490282296, "train/grad": 0.06303067237138749, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2449212503433227, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.16460928440094, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.034047510623932, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8889849764108658, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7358060732483864, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5515259702503681, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.3916858433932066, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.2943509120494127, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.2346520783007145, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.19950135868042707, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.17680176865309477, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.15310881013050676, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1362051063310355, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.11713208870962262, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.1025704809371382, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.09069294415414333, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07746340352110564, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06377261686138809, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05014660093002021, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03905254772864282, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.027533066803589464, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.01829231853596866, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.011153001999482513, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.006664639776572585, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0041317326202988626, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002572444686666131, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0017822913825511933, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0013331250473856927, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0009505038801580668, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006928901840001345, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005379728414118289, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00043810817413032056, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003422569762915373, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0002746346592903137, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00011640134267508984, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.97522982954979e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 4.1461922228336336e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3314300924539567e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.482267864048481e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.798155277967453e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 4.310030490159988e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 6.786966696381569e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 9.711897000670433e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 5.9578008949756625e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 4.604998975992203e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.351932391524315e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.4252066612243653e-05, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.2657605111598968e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.384798765182495e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019584350367076696, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019390468248166145, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01916235630400479, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018930234615691006, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018577436306513845, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.017781642274931075, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.015854486231692135, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.014199613397940992, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01319212599657476, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012489932507742196, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.011971662256401032, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.011366663132794202, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.010879679964855313, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01027520995819941, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0097689278726466, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00931364658754319, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008746321417856962, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008065367308445275, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0072508524032309654, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006428071631817147, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005324116976698861, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00412325828394387, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0028532159567112103, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0018710382812423632, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0012236276424664539, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0007823382781498366, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005444317321234848, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00040856271640222987, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00029109323531884, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002124794625706272, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00016694543541234452, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00013552797348893365, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00010676479390895111, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.727226093469654e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.553932417114993e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.9699703938822496e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 3.141671908366561e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.6034547862741063e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.7823645268928017e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.2868870337196675e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.1357595898376375e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.3494982806853048e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.749938426932802e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.675794704472457e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.4201539019280416e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.8065344865546494e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 5.653197065467116e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 7.53598667705887e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.085795037029494e-05, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.245836615562439, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1659249067306519, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0372624397277832, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8950165510177612, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7440453171730042, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5607748627662659, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.4044302701950073, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.3115095794200897, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.25466209650039673, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.2215321660041809, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2003215104341507, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.17880603671073914, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.16400061547756195, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.14835594594478607, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.13700108230113983, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.12824746966362, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.11931020766496658, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.11114891618490219, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.10417711734771729, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.10016876459121704, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.09733662754297256, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09593041986227036, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09604234248399734, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09739545732736588, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09908570349216461, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10080769658088684, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10204405337572098, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10295560956001282, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10424426198005676, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10673043131828308, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10726875066757202, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.110909104347229, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10898193717002869, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10811526328325272, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11920712143182755, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12759017944335938, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1294400990009308, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.18622447550296783, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20469428598880768, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3082135021686554, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.38116195797920227, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3763166666030884, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3712623119354248, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4224768877029419, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5776661038398743, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.3459254503250122, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.0192065238952637, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.4238603115081787, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.6957263946533203, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6292162698412699, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6552579365079365, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6937003968253969, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7413194444444444, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7886904761904762, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8603670634920635, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8980654761904762, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9136904761904762, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9263392857142857, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9367559523809523, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9432043650793651, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9501488095238095, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9533730158730159, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9580853174603174, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9598214285714286, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9618055555555556, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9655257936507936, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9665178571428571, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9670138888888888, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9680059523809523, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9702380952380952, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9702380952380952, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9704861111111112, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.972718253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9739583333333334, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9742063492063492, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9568452380952381, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4550966088040309, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5007484277718997, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5652841692662608, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6446264771591534, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7326965924407819, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8298713096959858, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8760443435606545, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8962978997549241, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9115117579998196, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9242794170500959, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9319937109333496, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9409009474137445, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9444860001380081, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9498634623201576, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9528125557936543, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9547605926594291, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.955203584184917, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9598748181195961, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9604272905971817, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9604417123597999, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9613033259979492, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9628004171761665, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9642535354828099, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9645486340302434, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9644321846894032, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9654391112309035, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9660991127213817, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.967173190820074, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9686688699044798, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9685895857099728, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9679675897847199, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9687566714618475, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9679555461768066, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9696979898400334, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9723964833682378, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9704193772100971, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9713910656134759, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9725659443733315, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9684068890446746, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9702844673864991, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9712560429010715, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9735475315880806, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.970963253258907, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9716312716546286, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9674810647174726, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9612822064677179, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9663218689654488, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9587060212395931, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.952182847100457, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 6.786966696381569e-06, "validation/loss_best": 0.3763166666030884, "validation/acc_best": 0.9774305555555556, "validation/f1_best": 0.9735475315880806} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.15612588778138162, "train/grad": 0.06372618973255158, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2300210309028625, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1502337527275086, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0199302637577057, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8750771072506904, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7229106786847115, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.539907251149416, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.380788512006402, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.2856353060528636, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.22735316529870034, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.19286717647686602, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.17046808717772366, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.14692166095599532, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.13011689584702255, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.11110868130810558, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.09679881020449102, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.08525220046751202, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07243713128380477, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.059135943632572886, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04617262316867709, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.035853721788153055, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02527858182787895, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.016827550409361722, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.010243145395070314, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.006160072842612862, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.003876995127648115, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0024216021224856376, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0016813074983656406, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.001262493021786213, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0009053714852780104, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006618605926632881, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005174946319311857, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0004212354402989149, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00033421359956264495, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0002698675636202097, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00011162272654473782, "train/loss_035_lr6.0e+00_wd1.0e+00": 7.085930556058884e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 4.171645268797874e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.5159269571304323e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4610495418310165e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 6.730174645781517e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 4.999060183763504e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 5.639567971229553e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 9.654555469751358e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 7.053548470139503e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 5.9164222329854966e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.1682865917682646e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 7.840674370527268e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.4609657227993012e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.4241505414247513e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01995903505012393, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01977852834854275, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019537534369155764, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019260001410730185, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.018847780651412904, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018033450534567237, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01600144341122359, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.014238204834982752, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.013211657016072423, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012508503592107445, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0119910771981813, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01140234288526699, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.010924713341519237, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010332834366708994, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.009823240786790848, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009367457604967058, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008797956662019714, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008099481797544286, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0072650871693622325, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006413942736689933, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0052909876441117375, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004079452859878074, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.002809824667056091, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0018214700715907383, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0011839422882621875, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.000752382989085163, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005281991790980101, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00039785627166565973, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.000284867510854383, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00020889122140943072, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00016464091921079672, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00013279421144034133, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00010588409502815922, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.660167148264009e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.549410309868108e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.0014582232288376e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 3.1244785637056795e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.705218964596412e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.8537214213147914e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.1038554752920869e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.0771073541048082e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.3707093360220507e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.7522980671168585e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.0545929913797387e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.8376409813569952e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 8.62195368318908e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 3.482868917063883e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 6.681480627311469e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.0994795191697168e-05, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2442704439163208, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1643599271774292, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0353689193725586, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8930034041404724, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7416257858276367, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5586356520652771, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.402744323015213, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.31041115522384644, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.25398343801498413, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.22099103033542633, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.19990690052509308, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.17847688496112823, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.16364365816116333, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.14797893166542053, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.13676859438419342, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.12792041897773743, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.11891989409923553, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.11080048978328705, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.10408752411603928, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.10002695769071579, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.09704531729221344, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09590277075767517, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09589225798845291, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09733367711305618, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09913785755634308, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1008935421705246, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10217753052711487, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10304422676563263, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10427150130271912, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10677836835384369, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1074918583035469, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11096606403589249, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1089756041765213, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10822251439094543, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11924514919519424, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12743297219276428, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1292678713798523, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1856682300567627, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20389316976070404, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.30745357275009155, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.38084056973457336, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3741665184497833, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3705504238605499, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4198237359523773, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5732541084289551, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.3385860919952393, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.0099408626556396, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.411486029624939, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.6828981637954712, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6282242063492064, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6545138888888888, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6937003968253969, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7418154761904762, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7876984126984127, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8616071428571429, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8978174603174603, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9139384920634921, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9263392857142857, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9365079365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9429563492063492, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9496527777777778, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9533730158730159, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9583333333333334, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9618055555555556, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9625496031746031, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9662698412698413, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9670138888888888, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9680059523809523, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9694940476190477, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9709821428571429, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.970734126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9704861111111112, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.972718253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9734623015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9699900793650794, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9637896825396826, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.957093253968254, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.45484369144900977, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5006134166327983, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5658775675387009, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6461513712505276, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7318632467765119, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8311276668456008, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8750015714100005, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8957452903317071, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9118505121154971, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9240433026682143, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9319255412561579, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9405433783054976, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9444657354616821, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.950383709241794, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9520766276866779, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9547333216183672, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9554172936055316, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9603837000894027, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9599449317540399, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9604219808760089, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9612392483727361, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9632859443363432, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9656780048053609, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9652963338465871, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9645741573670631, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9654391112309035, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9659337054388578, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9672048726510946, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9683171816087773, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9680550422656957, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9677425939873016, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9687566714618475, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9679555461768066, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9700344235459033, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9723964833682378, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9704193772100971, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9713910656134759, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9725659443733315, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.968488006066727, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9702844673864991, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9708964749836514, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9735475315880806, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9713226560436107, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.971761236084699, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9676618501607601, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9612822064677179, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.965937557419078, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.958366329481426, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9524916085657432, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 5.639567971229553e-06, "validation/loss_best": 0.3741665184497833, "validation/acc_best": 0.9774305555555556, "validation/f1_best": 0.9735475315880806} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.15832355462014674, "train/grad": 0.06278234407305718, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2331608295440675, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1533087253570558, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0233788013458252, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8791100886464119, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7272517818212509, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5439461623132229, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.38600013457238674, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.29210446678102014, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.23436649579554797, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.19998800739645958, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.17768380181863905, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.15423948645591737, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.13741898293606936, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.11833075327798724, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.10347085826098919, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.09136543999426067, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07769835969433188, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06350700792856515, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04935256815515458, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.038109644446522, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02650922360830009, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.017467346293851733, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.010587953953072429, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.006371542466804385, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004008382689207792, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0025102369487285613, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0017464652936905623, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0013186064455658196, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0009471981041133404, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006904674135148525, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005362766981124878, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.000436995392665267, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003446138184517622, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00026677765883505346, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00011324256658554077, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.66352640837431e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 4.456793889403343e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2769970819354057e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.5314659103751184e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 6.385538727045059e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 5.689607933163643e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 7.699495181441307e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.3487953692674637e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 9.391466155648232e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.643028274178505e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.2789946049451827e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 7.300730794668198e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.167699858546257e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.291632652282715e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019544838713482022, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019350610189139843, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019089708989486098, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01880273566581309, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01842492670286447, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01766890282742679, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.015774800423532725, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.014153703660704196, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.013204058087430894, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01254642341984436, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.012057490020524711, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01143970406614244, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01093877650797367, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0103038883022964, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.009763349678833038, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.009280945144128054, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008674856942379848, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.007967204550513998, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007117412016959861, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0062862657883670185, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005178826667834074, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003997533299261704, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0027698241808684545, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0018119257673970424, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0011721263591607566, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0007455603672133293, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005222226479963865, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00039287444145884364, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002780160576367052, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00020246893520379673, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00015923170823953114, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00012955081958352822, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00010296784828824457, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.308486410896877e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.2837518219348566e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.934897772069235e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 3.0730520801398595e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.5779706401909408e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.968704587247863e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.169018917531405e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 1.0346183138572497e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.216318474361199e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 2.145665762632972e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.2566267671580658e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.9016710097800295e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.3803806216648146e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.8738258601558253e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 9.926657812026987e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.033882508303436e-05, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.24405837059021, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.164075255393982, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0351457595825195, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8926530480384827, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7412665486335754, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5583404898643494, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.4024236500263214, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.31032368540763855, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.25398150086402893, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.2208189219236374, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.19976218044757843, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.1784691959619522, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.16364911198616028, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.14797979593276978, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.13673529028892517, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.12784545123577118, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.11892654001712799, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.11073020845651627, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.10411492735147476, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.09998856484889984, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.09716879576444626, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.09597335755825043, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.09592743217945099, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.09728864580392838, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.09914929419755936, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.10087554156780243, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.102198027074337, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.10305413603782654, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10432060062885284, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.10675650835037231, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.10748390853404999, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11087058484554291, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.10896731168031693, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.10828616470098495, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11914928257465363, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12746965885162354, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1292993426322937, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.18565714359283447, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2038230001926422, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.307159423828125, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.38047054409980774, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.37438637018203735, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.36995169520378113, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.41960829496383667, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5735337734222412, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.3378117084503174, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.0086973905563354, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.409091591835022, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.6794114112854004, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6277281746031746, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6545138888888888, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6939484126984127, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7420634920634921, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7881944444444444, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8618551587301587, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8983134920634921, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9139384920634921, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.925843253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9372519841269841, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9429563492063492, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.949156746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.953125, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9580853174603174, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9615575396825397, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.966765873015873, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9677579365079365, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9692460317460317, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9709821428571429, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.970734126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.970734126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.972718253968254, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9732142857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9761904761904762, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9734623015873016, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9699900793650794, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9568452380952381, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4533135433613961, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5004170244708175, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5660328270307223, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6469706754893517, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.732084923705368, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8314086677822188, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.875697105249689, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8955940815225303, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9107586431264113, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9247986230941974, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9319255412561579, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9396781658501412, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9439197303367469, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9501504476110932, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9520766276866779, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9545303054999046, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9556368692877707, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9603837000894027, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9593883454831679, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9602418821118189, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.960701351791229, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9631947180340881, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9656780048053609, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9652963338465871, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9649585279173478, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9654391112309035, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9661371315714182, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9672048726510946, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9681332343791142, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9680550422656957, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9677425939873016, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9687566714618475, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9679555461768066, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9700344235459033, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9722154262401107, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9704193772100971, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9713910656134759, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9725659443733315, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.968488006066727, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9702844673864991, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.971127212460099, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9735475315880806, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9713226560436107, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.971761236084699, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9674810647174726, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9612822064677179, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9661397799599746, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9587318595208748, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9523602218935753, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 7.699495181441307e-06, "validation/loss_best": 0.37438637018203735, "validation/acc_best": 0.9774305555555556, "validation/f1_best": 0.9735475315880806} diff --git a/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/config.yaml b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..818613d2bdbb093f351f91cf2c79631f8920f18e --- /dev/null +++ b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n400_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..f8490e86480608ef74c84806228023385f9ded6b --- /dev/null +++ b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 6, "eval/id_best": 24, "eval/lr_best": 0.0003, "eval/wd_best": 0.05, "eval/train/loss": 2.1266303062438965, "eval/train/acc": 0.35932265896309046, "eval/train/acc_std": 0.0023684007937128304, "eval/train/f1": 0.30619275494657777, "eval/train/f1_std": 0.0024261667694791635, "eval/validation/loss": 2.4374959468841553, "eval/validation/acc": 0.2652270210409745, "eval/validation/acc_std": 0.005532223324409703, "eval/validation/f1": 0.2077494155918342, "eval/validation/f1_std": 0.005075415887341154, "eval/test/loss": 2.3717026710510254, "eval/test/acc": 0.2769944341372913, "eval/test/acc_std": 0.0053214647868485825, "eval/test/f1": 0.21331332511308143, "eval/test/f1_std": 0.005178533185979236, "eval/testid/loss": 2.336786985397339, "eval/testid/acc": 0.2959321380374012, "eval/testid/acc_std": 0.005766870872841902, "eval/testid/f1": 0.24068460038782558, "eval/testid/f1_std": 0.005541370071999979} diff --git a/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..b729313aec7ce9d41582e6eb96f0ac8ce8d605f3 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 6, "eval/best/id_best": 24, "eval/best/lr_best": 0.0003, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.1266303062438965, "eval/best/train/acc": 0.35932265896309046, "eval/best/train/acc_std": 0.0023684007937128304, "eval/best/train/f1": 0.30619275494657777, "eval/best/train/f1_std": 0.0024261667694791635, "eval/best/validation/loss": 2.4374959468841553, "eval/best/validation/acc": 0.2652270210409745, "eval/best/validation/acc_std": 0.005532223324409703, "eval/best/validation/f1": 0.2077494155918342, "eval/best/validation/f1_std": 0.005075415887341154, "eval/best/test/loss": 2.3717026710510254, "eval/best/test/acc": 0.2769944341372913, "eval/best/test/acc_std": 0.0053214647868485825, "eval/best/test/f1": 0.21331332511308143, "eval/best/test/f1_std": 0.005178533185979236, "eval/best/testid/loss": 2.336786985397339, "eval/best/testid/acc": 0.2959321380374012, "eval/best/testid/acc_std": 0.005766870872841902, "eval/best/testid/f1": 0.24068460038782558, "eval/best/testid/f1_std": 0.005541370071999979} diff --git a/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..86e25f1f0400b138974313a94d21cbe82e8b12d9 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 19, "eval/last/lr_best": 0.00013199999999999998, "eval/last/wd_best": 0.05, "eval/last/train/loss": 2.099229574203491, "eval/last/train/acc": 0.3693106733458312, "eval/last/train/acc_std": 0.0024006206529680785, "eval/last/train/f1": 0.3128059812692869, "eval/last/train/f1_std": 0.002558052719611866, "eval/last/validation/loss": 2.464930534362793, "eval/last/validation/acc": 0.2602436323366556, "eval/last/validation/acc_std": 0.005287523944748845, "eval/last/validation/f1": 0.2011314630607287, "eval/last/validation/f1_std": 0.004811255663379231, "eval/last/test/loss": 2.4013166427612305, "eval/last/test/acc": 0.27217068645640075, "eval/last/test/acc_std": 0.005085234197581823, "eval/last/test/f1": 0.20186235506816455, "eval/last/test/f1_std": 0.005114241313029599, "eval/last/testid/loss": 2.3316454887390137, "eval/last/testid/acc": 0.28860613071139385, "eval/last/testid/acc_std": 0.005681091544264919, "eval/last/testid/f1": 0.2274397035897179, "eval/last/testid/f1_std": 0.005377975876302206} diff --git a/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..9b742e4d13e5d031b83666c03081eb8655767ed4 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,6,0.0003,0.05,24,"[1, 1.0]",train,2.1266303062438965,0.35932265896309046,0.0023684007937128304,0.30619275494657777,0.0024261667694791635 +flat_mae,patch,attn,nsd_cococlip,best,6,0.0003,0.05,24,"[1, 1.0]",validation,2.4374959468841553,0.2652270210409745,0.005532223324409703,0.2077494155918342,0.005075415887341154 +flat_mae,patch,attn,nsd_cococlip,best,6,0.0003,0.05,24,"[1, 1.0]",test,2.3717026710510254,0.2769944341372913,0.0053214647868485825,0.21331332511308143,0.005178533185979236 +flat_mae,patch,attn,nsd_cococlip,best,6,0.0003,0.05,24,"[1, 1.0]",testid,2.336786985397339,0.2959321380374012,0.005766870872841902,0.24068460038782558,0.005541370071999979 diff --git a/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..9b742e4d13e5d031b83666c03081eb8655767ed4 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,6,0.0003,0.05,24,"[1, 1.0]",train,2.1266303062438965,0.35932265896309046,0.0023684007937128304,0.30619275494657777,0.0024261667694791635 +flat_mae,patch,attn,nsd_cococlip,best,6,0.0003,0.05,24,"[1, 1.0]",validation,2.4374959468841553,0.2652270210409745,0.005532223324409703,0.2077494155918342,0.005075415887341154 +flat_mae,patch,attn,nsd_cococlip,best,6,0.0003,0.05,24,"[1, 1.0]",test,2.3717026710510254,0.2769944341372913,0.0053214647868485825,0.21331332511308143,0.005178533185979236 +flat_mae,patch,attn,nsd_cococlip,best,6,0.0003,0.05,24,"[1, 1.0]",testid,2.336786985397339,0.2959321380374012,0.005766870872841902,0.24068460038782558,0.005541370071999979 diff --git a/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..e241e4e3dff8cf44783c49db4b1a3af8be06da19 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",train,2.099229574203491,0.3693106733458312,0.0024006206529680785,0.3128059812692869,0.002558052719611866 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",validation,2.464930534362793,0.2602436323366556,0.005287523944748845,0.2011314630607287,0.004811255663379231 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",test,2.4013166427612305,0.27217068645640075,0.005085234197581823,0.20186235506816455,0.005114241313029599 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",testid,2.3316454887390137,0.28860613071139385,0.005681091544264919,0.2274397035897179,0.005377975876302206 diff --git a/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/log.txt b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b86e7670b6273b4ba183f07495a4a72a6ae053c --- /dev/null +++ b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,961 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 20:20:14 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n400_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:22:06 lr: nan time: 3.3167 data: 2.7795 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:39 lr: 0.000003 loss: 3.1939 (3.2145) grad: 0.1863 (0.1937) time: 0.4406 data: 0.0040 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:06 lr: 0.000006 loss: 3.1994 (3.2070) grad: 0.1863 (0.1914) time: 0.4562 data: 0.0050 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:48 lr: 0.000009 loss: 3.1934 (3.2009) grad: 0.1762 (0.1875) time: 0.4459 data: 0.0050 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:34 lr: 0.000012 loss: 3.1841 (3.1944) grad: 0.1760 (0.1847) time: 0.4469 data: 0.0051 max mem: 22448 +train: [0] [100/400] eta: 0:02:22 lr: 0.000015 loss: 3.1716 (3.1904) grad: 0.1746 (0.1822) time: 0.4353 data: 0.0049 max mem: 22448 +train: [0] [120/400] eta: 0:02:11 lr: 0.000018 loss: 3.1667 (3.1872) grad: 0.1637 (0.1791) time: 0.4419 data: 0.0048 max mem: 22448 +train: [0] [140/400] eta: 0:02:00 lr: 0.000021 loss: 3.1731 (3.1847) grad: 0.1650 (0.1782) time: 0.4404 data: 0.0050 max mem: 22448 +train: [0] [160/400] eta: 0:01:50 lr: 0.000024 loss: 3.1659 (3.1817) grad: 0.1813 (0.1791) time: 0.4373 data: 0.0047 max mem: 22448 +train: [0] [180/400] eta: 0:01:40 lr: 0.000027 loss: 3.1632 (3.1790) grad: 0.1768 (0.1782) time: 0.4426 data: 0.0046 max mem: 22448 +train: [0] [200/400] eta: 0:01:31 lr: 0.000030 loss: 3.1641 (3.1779) grad: 0.1585 (0.1764) time: 0.4416 data: 0.0049 max mem: 22448 +train: [0] [220/400] eta: 0:01:21 lr: 0.000033 loss: 3.1701 (3.1769) grad: 0.1583 (0.1749) time: 0.4338 data: 0.0047 max mem: 22448 +train: [0] [240/400] eta: 0:01:12 lr: 0.000036 loss: 3.1610 (3.1758) grad: 0.1606 (0.1738) time: 0.4367 data: 0.0048 max mem: 22448 +train: [0] [260/400] eta: 0:01:03 lr: 0.000039 loss: 3.1577 (3.1741) grad: 0.1545 (0.1724) time: 0.4322 data: 0.0048 max mem: 22448 +train: [0] [280/400] eta: 0:00:54 lr: 0.000042 loss: 3.1519 (3.1716) grad: 0.1533 (0.1713) time: 0.4352 data: 0.0047 max mem: 22448 +train: [0] [300/400] eta: 0:00:44 lr: 0.000045 loss: 3.1295 (3.1684) grad: 0.1563 (0.1707) time: 0.4322 data: 0.0046 max mem: 22448 +train: [0] [320/400] eta: 0:00:35 lr: 0.000048 loss: 3.1231 (3.1663) grad: 0.1659 (0.1708) time: 0.4314 data: 0.0048 max mem: 22448 +train: [0] [340/400] eta: 0:00:26 lr: 0.000051 loss: 3.1326 (3.1640) grad: 0.1659 (0.1705) time: 0.4434 data: 0.0049 max mem: 22448 +train: [0] [360/400] eta: 0:00:17 lr: 0.000054 loss: 3.1138 (3.1606) grad: 0.1653 (0.1708) time: 0.4392 data: 0.0048 max mem: 22448 +train: [0] [380/400] eta: 0:00:08 lr: 0.000057 loss: 3.1027 (3.1577) grad: 0.1727 (0.1710) time: 0.4399 data: 0.0047 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.1027 (3.1556) grad: 0.1727 (0.1713) time: 0.4332 data: 0.0049 max mem: 22448 +train: [0] Total time: 0:02:58 (0.4470 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.1027 (3.1556) grad: 0.1727 (0.1713) +eval (validation): [0] [ 0/85] eta: 0:04:19 time: 3.0524 data: 2.7734 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:30 time: 0.3397 data: 0.0047 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:18 time: 0.3723 data: 0.0044 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:09 time: 0.3458 data: 0.0043 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3107 data: 0.0043 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3099 data: 0.0044 max mem: 22448 +eval (validation): [0] Total time: 0:00:31 (0.3755 s / it) +cv: [0] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 2.647 acc: 0.207 f1: 0.146 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:19:59 lr: nan time: 2.9979 data: 2.6710 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:33 lr: 0.000063 loss: 3.0592 (3.0599) grad: 0.1731 (0.1718) time: 0.4413 data: 0.0039 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:00 lr: 0.000066 loss: 3.0592 (3.0575) grad: 0.1663 (0.1692) time: 0.4361 data: 0.0045 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:43 lr: 0.000069 loss: 3.0485 (3.0486) grad: 0.1663 (0.1701) time: 0.4420 data: 0.0049 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:30 lr: 0.000072 loss: 3.0421 (3.0477) grad: 0.1692 (0.1719) time: 0.4408 data: 0.0049 max mem: 22448 +train: [1] [100/400] eta: 0:02:20 lr: 0.000075 loss: 3.0363 (3.0433) grad: 0.1744 (0.1731) time: 0.4473 data: 0.0049 max mem: 22448 +train: [1] [120/400] eta: 0:02:10 lr: 0.000078 loss: 3.0270 (3.0405) grad: 0.1827 (0.1745) time: 0.4530 data: 0.0049 max mem: 22448 +train: [1] [140/400] eta: 0:01:59 lr: 0.000081 loss: 3.0229 (3.0386) grad: 0.1779 (0.1755) time: 0.4350 data: 0.0048 max mem: 22448 +train: [1] [160/400] eta: 0:01:49 lr: 0.000084 loss: 3.0166 (3.0384) grad: 0.1775 (0.1758) time: 0.4419 data: 0.0049 max mem: 22448 +train: [1] [180/400] eta: 0:01:40 lr: 0.000087 loss: 3.0166 (3.0378) grad: 0.1773 (0.1764) time: 0.4428 data: 0.0049 max mem: 22448 +train: [1] [200/400] eta: 0:01:30 lr: 0.000090 loss: 3.0067 (3.0365) grad: 0.1775 (0.1773) time: 0.4349 data: 0.0048 max mem: 22448 +train: [1] [220/400] eta: 0:01:21 lr: 0.000093 loss: 2.9940 (3.0307) grad: 0.1895 (0.1790) time: 0.4449 data: 0.0048 max mem: 22448 +train: [1] [240/400] eta: 0:01:12 lr: 0.000096 loss: 2.9855 (3.0277) grad: 0.1895 (0.1795) time: 0.4423 data: 0.0049 max mem: 22448 +train: [1] [260/400] eta: 0:01:03 lr: 0.000099 loss: 2.9870 (3.0264) grad: 0.1825 (0.1800) time: 0.4518 data: 0.0050 max mem: 22448 +train: [1] [280/400] eta: 0:00:54 lr: 0.000102 loss: 2.9870 (3.0229) grad: 0.1823 (0.1802) time: 0.4506 data: 0.0054 max mem: 22448 +train: [1] [300/400] eta: 0:00:45 lr: 0.000105 loss: 2.9750 (3.0218) grad: 0.1861 (0.1808) time: 0.4401 data: 0.0049 max mem: 22448 +train: [1] [320/400] eta: 0:00:36 lr: 0.000108 loss: 2.9750 (3.0183) grad: 0.1895 (0.1815) time: 0.4403 data: 0.0049 max mem: 22448 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 2.9508 (3.0143) grad: 0.1912 (0.1820) time: 0.4540 data: 0.0050 max mem: 22448 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 2.9703 (3.0129) grad: 0.1876 (0.1824) time: 0.4382 data: 0.0048 max mem: 22448 +train: [1] [380/400] eta: 0:00:08 lr: 0.000117 loss: 2.9542 (3.0091) grad: 0.1874 (0.1832) time: 0.4350 data: 0.0050 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.9416 (3.0080) grad: 0.2005 (0.1846) time: 0.4326 data: 0.0048 max mem: 22448 +train: [1] Total time: 0:02:59 (0.4492 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.9416 (3.0080) grad: 0.2005 (0.1846) +eval (validation): [1] [ 0/85] eta: 0:04:27 time: 3.1482 data: 2.8615 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:31 time: 0.3455 data: 0.0035 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:18 time: 0.3412 data: 0.0038 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:09 time: 0.3495 data: 0.0040 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3350 data: 0.0043 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3274 data: 0.0043 max mem: 22448 +eval (validation): [1] Total time: 0:00:32 (0.3772 s / it) +cv: [1] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 2.541 acc: 0.236 f1: 0.161 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:20:30 lr: nan time: 3.0774 data: 2.7485 max mem: 22448 +train: [2] [ 20/400] eta: 0:03:37 lr: 0.000123 loss: 2.9639 (2.9421) grad: 0.2164 (0.2188) time: 0.4459 data: 0.0036 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:04 lr: 0.000126 loss: 2.9592 (2.9448) grad: 0.2156 (0.2150) time: 0.4481 data: 0.0049 max mem: 22448 +train: [2] [ 60/400] eta: 0:02:45 lr: 0.000129 loss: 2.9592 (2.9455) grad: 0.2053 (0.2138) time: 0.4360 data: 0.0048 max mem: 22448 +train: [2] [ 80/400] eta: 0:02:32 lr: 0.000132 loss: 2.9315 (2.9423) grad: 0.2040 (0.2118) time: 0.4441 data: 0.0049 max mem: 22448 +train: [2] [100/400] eta: 0:02:21 lr: 0.000135 loss: 2.9007 (2.9359) grad: 0.2096 (0.2123) time: 0.4487 data: 0.0045 max mem: 22448 +train: [2] [120/400] eta: 0:02:09 lr: 0.000138 loss: 2.9007 (2.9305) grad: 0.2164 (0.2141) time: 0.4229 data: 0.0046 max mem: 22448 +train: [2] [140/400] eta: 0:01:59 lr: 0.000141 loss: 2.9257 (2.9331) grad: 0.2201 (0.2152) time: 0.4507 data: 0.0047 max mem: 22448 +train: [2] [160/400] eta: 0:01:49 lr: 0.000144 loss: 2.9248 (2.9285) grad: 0.2200 (0.2167) time: 0.4358 data: 0.0049 max mem: 22448 +train: [2] [180/400] eta: 0:01:40 lr: 0.000147 loss: 2.8822 (2.9225) grad: 0.2200 (0.2171) time: 0.4317 data: 0.0048 max mem: 22448 +train: [2] [200/400] eta: 0:01:30 lr: 0.000150 loss: 2.8822 (2.9187) grad: 0.2202 (0.2178) time: 0.4325 data: 0.0049 max mem: 22448 +train: [2] [220/400] eta: 0:01:21 lr: 0.000153 loss: 2.9023 (2.9193) grad: 0.2319 (0.2208) time: 0.4289 data: 0.0048 max mem: 22448 +train: [2] [240/400] eta: 0:01:11 lr: 0.000156 loss: 2.9274 (2.9210) grad: 0.2392 (0.2231) time: 0.4393 data: 0.0050 max mem: 22448 +train: [2] [260/400] eta: 0:01:03 lr: 0.000159 loss: 2.9347 (2.9212) grad: 0.2612 (0.2299) time: 0.4572 data: 0.0050 max mem: 22448 +train: [2] [280/400] eta: 0:00:53 lr: 0.000162 loss: 2.9689 (2.9412) grad: 0.4245 (0.2725) time: 0.4438 data: 0.0048 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=68.49 > 63.56) at step 546. Freezing. +train: [2] [300/400] eta: 0:00:44 lr: 0.000165 loss: 3.2046 (2.9731) grad: 0.7864 (0.3138) time: 0.4351 data: 0.0047 max mem: 22448 +train: [2] [320/400] eta: 0:00:35 lr: 0.000168 loss: 2.9013 (2.9666) grad: 0.2224 (0.3078) time: 0.4539 data: 0.0050 max mem: 22448 +train: [2] [340/400] eta: 0:00:26 lr: 0.000171 loss: 2.8705 (2.9629) grad: 0.2207 (0.3028) time: 0.4349 data: 0.0047 max mem: 22448 +train: [2] [360/400] eta: 0:00:17 lr: 0.000174 loss: 2.9003 (2.9598) grad: 0.2244 (0.2986) time: 0.4354 data: 0.0045 max mem: 22448 +train: [2] [380/400] eta: 0:00:08 lr: 0.000177 loss: 2.9160 (2.9586) grad: 0.2499 (0.2974) time: 0.4337 data: 0.0047 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=76.63 > 63.56) at step 599. Freezing. +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 2.9931 (2.9782) grad: 0.3272 (0.3312) time: 0.4333 data: 0.0048 max mem: 22448 +train: [2] Total time: 0:02:58 (0.4468 s / it) +train: [2] Summary: lr: 0.000180 loss: 2.9931 (2.9782) grad: 0.3272 (0.3312) +eval (validation): [2] [ 0/85] eta: 0:04:27 time: 3.1449 data: 2.8764 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:29 time: 0.3193 data: 0.0033 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:17 time: 0.3384 data: 0.0041 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:09 time: 0.3557 data: 0.0043 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3200 data: 0.0040 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3136 data: 0.0039 max mem: 22448 +eval (validation): [2] Total time: 0:00:31 (0.3685 s / it) +cv: [2] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.503 acc: 0.251 f1: 0.176 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:20:36 lr: nan time: 3.0920 data: 2.7177 max mem: 22448 +train: [3] [ 20/400] eta: 0:03:41 lr: 0.000183 loss: 2.8236 (2.8533) grad: 0.2080 (0.2098) time: 0.4580 data: 0.0048 max mem: 22448 +train: [3] [ 40/400] eta: 0:03:03 lr: 0.000186 loss: 2.8628 (2.8718) grad: 0.2217 (0.2215) time: 0.4340 data: 0.0050 max mem: 22448 +train: [3] [ 60/400] eta: 0:02:45 lr: 0.000189 loss: 2.8595 (2.8674) grad: 0.2354 (0.2274) time: 0.4390 data: 0.0049 max mem: 22448 +train: [3] [ 80/400] eta: 0:02:33 lr: 0.000192 loss: 2.8600 (2.8767) grad: 0.2549 (0.2457) time: 0.4554 data: 0.0050 max mem: 22448 +train: [3] [100/400] eta: 0:02:21 lr: 0.000195 loss: 2.9359 (2.8940) grad: 0.3432 (0.2920) time: 0.4377 data: 0.0050 max mem: 22448 +WARNING: classifier 46 (36, 1.0) diverged (loss=64.64 > 63.56) at step 659. Freezing. +train: [3] [120/400] eta: 0:02:10 lr: 0.000198 loss: 2.9941 (2.9713) grad: 0.4993 (0.4155) time: 0.4391 data: 0.0049 max mem: 22448 +train: [3] [140/400] eta: 0:02:00 lr: 0.000201 loss: 2.9513 (2.9573) grad: 0.2431 (0.3891) time: 0.4547 data: 0.0049 max mem: 22448 +train: [3] [160/400] eta: 0:01:50 lr: 0.000204 loss: 2.8658 (2.9487) grad: 0.2277 (0.3688) time: 0.4329 data: 0.0049 max mem: 22448 +train: [3] [180/400] eta: 0:01:40 lr: 0.000207 loss: 2.8553 (2.9361) grad: 0.2203 (0.3523) time: 0.4344 data: 0.0049 max mem: 22448 +train: [3] [200/400] eta: 0:01:31 lr: 0.000210 loss: 2.8514 (2.9294) grad: 0.2191 (0.3395) time: 0.4330 data: 0.0045 max mem: 22448 +train: [3] [220/400] eta: 0:01:21 lr: 0.000213 loss: 2.8543 (2.9220) grad: 0.2213 (0.3291) time: 0.4266 data: 0.0050 max mem: 22448 +train: [3] [240/400] eta: 0:01:12 lr: 0.000216 loss: 2.8506 (2.9159) grad: 0.2272 (0.3209) time: 0.4433 data: 0.0049 max mem: 22448 +train: [3] [260/400] eta: 0:01:03 lr: 0.000219 loss: 2.8506 (2.9103) grad: 0.2237 (0.3135) time: 0.4459 data: 0.0051 max mem: 22448 +train: [3] [280/400] eta: 0:00:53 lr: 0.000222 loss: 2.8251 (2.9052) grad: 0.2237 (0.3075) time: 0.4328 data: 0.0049 max mem: 22448 +train: [3] [300/400] eta: 0:00:44 lr: 0.000225 loss: 2.8528 (2.9038) grad: 0.2427 (0.3071) time: 0.4274 data: 0.0048 max mem: 22448 +train: [3] [320/400] eta: 0:00:35 lr: 0.000228 loss: 2.9088 (2.9082) grad: 0.3473 (0.3222) time: 0.4450 data: 0.0051 max mem: 22448 +train: [3] [340/400] eta: 0:00:26 lr: 0.000231 loss: 3.1259 (2.9320) grad: 0.7953 (0.3597) time: 0.4345 data: 0.0053 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=64.59 > 63.56) at step 775. Freezing. +train: [3] [360/400] eta: 0:00:17 lr: 0.000234 loss: 3.2797 (2.9519) grad: 0.8555 (0.3871) time: 0.4352 data: 0.0047 max mem: 22448 +train: [3] [380/400] eta: 0:00:08 lr: 0.000237 loss: 2.8686 (2.9454) grad: 0.2359 (0.3786) time: 0.4296 data: 0.0049 max mem: 22448 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 2.8235 (2.9390) grad: 0.2210 (0.3705) time: 0.4288 data: 0.0050 max mem: 22448 +train: [3] Total time: 0:02:58 (0.4456 s / it) +train: [3] Summary: lr: 0.000240 loss: 2.8235 (2.9390) grad: 0.2210 (0.3705) +eval (validation): [3] [ 0/85] eta: 0:04:14 time: 2.9916 data: 2.7650 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:29 time: 0.3271 data: 0.0029 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:18 time: 0.3472 data: 0.0038 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3289 data: 0.0041 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3206 data: 0.0040 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3148 data: 0.0040 max mem: 22448 +eval (validation): [3] Total time: 0:00:30 (0.3646 s / it) +cv: [3] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.553 acc: 0.234 f1: 0.167 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:20:10 lr: nan time: 3.0274 data: 2.7046 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:34 lr: 0.000243 loss: 2.7477 (2.7860) grad: 0.2248 (0.2263) time: 0.4408 data: 0.0048 max mem: 22448 +train: [4] [ 40/400] eta: 0:02:59 lr: 0.000246 loss: 2.7797 (2.7894) grad: 0.2256 (0.2246) time: 0.4274 data: 0.0046 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:41 lr: 0.000249 loss: 2.8009 (2.7936) grad: 0.2256 (0.2250) time: 0.4272 data: 0.0051 max mem: 22448 +train: [4] [ 80/400] eta: 0:02:29 lr: 0.000252 loss: 2.8009 (2.7980) grad: 0.2236 (0.2230) time: 0.4479 data: 0.0052 max mem: 22448 +train: [4] [100/400] eta: 0:02:17 lr: 0.000255 loss: 2.8247 (2.8040) grad: 0.2194 (0.2235) time: 0.4255 data: 0.0049 max mem: 22448 +train: [4] [120/400] eta: 0:02:07 lr: 0.000258 loss: 2.7874 (2.8011) grad: 0.2278 (0.2256) time: 0.4305 data: 0.0047 max mem: 22448 +train: [4] [140/400] eta: 0:01:58 lr: 0.000261 loss: 2.7836 (2.8006) grad: 0.2453 (0.2294) time: 0.4526 data: 0.0050 max mem: 22448 +train: [4] [160/400] eta: 0:01:48 lr: 0.000264 loss: 2.7947 (2.8017) grad: 0.2541 (0.2324) time: 0.4314 data: 0.0050 max mem: 22448 +train: [4] [180/400] eta: 0:01:38 lr: 0.000267 loss: 2.8211 (2.8063) grad: 0.2583 (0.2371) time: 0.4310 data: 0.0050 max mem: 22448 +train: [4] [200/400] eta: 0:01:29 lr: 0.000270 loss: 2.8534 (2.8170) grad: 0.2911 (0.2633) time: 0.4262 data: 0.0051 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=70.92 > 63.56) at step 903. Freezing. +train: [4] [220/400] eta: 0:01:20 lr: 0.000273 loss: 2.8534 (2.8386) grad: 0.3161 (0.2918) time: 0.4300 data: 0.0049 max mem: 22448 +train: [4] [240/400] eta: 0:01:11 lr: 0.000276 loss: 2.8422 (2.8359) grad: 0.2290 (0.2865) time: 0.4318 data: 0.0049 max mem: 22448 +train: [4] [260/400] eta: 0:01:02 lr: 0.000279 loss: 2.8331 (2.8357) grad: 0.2247 (0.2821) time: 0.4451 data: 0.0051 max mem: 22448 +train: [4] [280/400] eta: 0:00:53 lr: 0.000282 loss: 2.8296 (2.8338) grad: 0.2276 (0.2788) time: 0.4237 data: 0.0047 max mem: 22448 +train: [4] [300/400] eta: 0:00:44 lr: 0.000285 loss: 2.8078 (2.8322) grad: 0.2310 (0.2753) time: 0.4294 data: 0.0048 max mem: 22448 +train: [4] [320/400] eta: 0:00:35 lr: 0.000288 loss: 2.8094 (2.8309) grad: 0.2203 (0.2712) time: 0.4353 data: 0.0049 max mem: 22448 +train: [4] [340/400] eta: 0:00:26 lr: 0.000291 loss: 2.8007 (2.8297) grad: 0.2152 (0.2681) time: 0.4348 data: 0.0049 max mem: 22448 +train: [4] [360/400] eta: 0:00:17 lr: 0.000294 loss: 2.7965 (2.8294) grad: 0.2187 (0.2657) time: 0.4284 data: 0.0050 max mem: 22448 +train: [4] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.7981 (2.8284) grad: 0.2223 (0.2635) time: 0.4312 data: 0.0051 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.7776 (2.8253) grad: 0.2195 (0.2613) time: 0.4317 data: 0.0050 max mem: 22448 +train: [4] Total time: 0:02:56 (0.4402 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.7776 (2.8253) grad: 0.2195 (0.2613) +eval (validation): [4] [ 0/85] eta: 0:04:18 time: 3.0429 data: 2.7720 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:30 time: 0.3413 data: 0.0030 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:18 time: 0.3684 data: 0.0044 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:09 time: 0.3382 data: 0.0043 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3297 data: 0.0042 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3168 data: 0.0040 max mem: 22448 +eval (validation): [4] Total time: 0:00:32 (0.3775 s / it) +cv: [4] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 2.486 acc: 0.250 f1: 0.184 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:20:43 lr: nan time: 3.1086 data: 2.7739 max mem: 22448 +train: [5] [ 20/400] eta: 0:03:31 lr: 0.000300 loss: 2.7051 (2.7073) grad: 0.2307 (0.2327) time: 0.4302 data: 0.0033 max mem: 22448 +train: [5] [ 40/400] eta: 0:03:01 lr: 0.000300 loss: 2.7324 (2.7534) grad: 0.2304 (0.2335) time: 0.4495 data: 0.0047 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:44 lr: 0.000300 loss: 2.7633 (2.7606) grad: 0.2339 (0.2357) time: 0.4432 data: 0.0050 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:31 lr: 0.000300 loss: 2.7620 (2.7522) grad: 0.2346 (0.2352) time: 0.4412 data: 0.0051 max mem: 22448 +train: [5] [100/400] eta: 0:02:19 lr: 0.000300 loss: 2.7383 (2.7555) grad: 0.2342 (0.2361) time: 0.4260 data: 0.0046 max mem: 22448 +train: [5] [120/400] eta: 0:02:09 lr: 0.000300 loss: 2.7444 (2.7470) grad: 0.2310 (0.2344) time: 0.4441 data: 0.0047 max mem: 22448 +train: [5] [140/400] eta: 0:01:59 lr: 0.000300 loss: 2.6915 (2.7373) grad: 0.2225 (0.2323) time: 0.4373 data: 0.0050 max mem: 22448 +train: [5] [160/400] eta: 0:01:48 lr: 0.000299 loss: 2.6803 (2.7332) grad: 0.2199 (0.2320) time: 0.4245 data: 0.0050 max mem: 22448 +train: [5] [180/400] eta: 0:01:38 lr: 0.000299 loss: 2.7520 (2.7382) grad: 0.2307 (0.2322) time: 0.4191 data: 0.0050 max mem: 22448 +train: [5] [200/400] eta: 0:01:29 lr: 0.000299 loss: 2.7568 (2.7364) grad: 0.2333 (0.2329) time: 0.4276 data: 0.0049 max mem: 22448 +train: [5] [220/400] eta: 0:01:20 lr: 0.000299 loss: 2.6801 (2.7349) grad: 0.2333 (0.2327) time: 0.4279 data: 0.0050 max mem: 22448 +train: [5] [240/400] eta: 0:01:11 lr: 0.000299 loss: 2.7208 (2.7353) grad: 0.2343 (0.2332) time: 0.4365 data: 0.0048 max mem: 22448 +train: [5] [260/400] eta: 0:01:02 lr: 0.000299 loss: 2.7208 (2.7330) grad: 0.2327 (0.2333) time: 0.4411 data: 0.0049 max mem: 22448 +train: [5] [280/400] eta: 0:00:53 lr: 0.000298 loss: 2.7233 (2.7338) grad: 0.2355 (0.2341) time: 0.4302 data: 0.0049 max mem: 22448 +train: [5] [300/400] eta: 0:00:44 lr: 0.000298 loss: 2.7092 (2.7301) grad: 0.2408 (0.2344) time: 0.4204 data: 0.0049 max mem: 22448 +train: [5] [320/400] eta: 0:00:35 lr: 0.000298 loss: 2.7119 (2.7318) grad: 0.2430 (0.2351) time: 0.4471 data: 0.0049 max mem: 22448 +train: [5] [340/400] eta: 0:00:26 lr: 0.000298 loss: 2.7206 (2.7299) grad: 0.2423 (0.2352) time: 0.4336 data: 0.0049 max mem: 22448 +train: [5] [360/400] eta: 0:00:17 lr: 0.000297 loss: 2.6904 (2.7291) grad: 0.2416 (0.2355) time: 0.4320 data: 0.0048 max mem: 22448 +train: [5] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.7168 (2.7290) grad: 0.2430 (0.2358) time: 0.4299 data: 0.0049 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.6903 (2.7267) grad: 0.2305 (0.2353) time: 0.4291 data: 0.0047 max mem: 22448 +train: [5] Total time: 0:02:56 (0.4409 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.6903 (2.7267) grad: 0.2305 (0.2353) +eval (validation): [5] [ 0/85] eta: 0:04:10 time: 2.9522 data: 2.6872 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:30 time: 0.3419 data: 0.0049 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:18 time: 0.3468 data: 0.0035 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:09 time: 0.3422 data: 0.0043 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3231 data: 0.0040 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3141 data: 0.0039 max mem: 22448 +eval (validation): [5] Total time: 0:00:31 (0.3714 s / it) +cv: [5] best hparam: (1.4, 1.0) (026) ('026_lr1.4e+00_wd1.0e+00') loss: 2.448 acc: 0.261 f1: 0.194 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:20:31 lr: nan time: 3.0782 data: 2.7612 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:29 lr: 0.000296 loss: 2.6370 (2.6287) grad: 0.2231 (0.2256) time: 0.4260 data: 0.0038 max mem: 22448 +train: [6] [ 40/400] eta: 0:02:59 lr: 0.000296 loss: 2.6458 (2.6549) grad: 0.2327 (0.2307) time: 0.4448 data: 0.0044 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:44 lr: 0.000296 loss: 2.6458 (2.6488) grad: 0.2327 (0.2315) time: 0.4488 data: 0.0053 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:31 lr: 0.000295 loss: 2.6239 (2.6420) grad: 0.2337 (0.2329) time: 0.4388 data: 0.0050 max mem: 22448 +train: [6] [100/400] eta: 0:02:18 lr: 0.000295 loss: 2.6173 (2.6428) grad: 0.2378 (0.2342) time: 0.4181 data: 0.0049 max mem: 22448 +train: [6] [120/400] eta: 0:02:08 lr: 0.000295 loss: 2.6523 (2.6483) grad: 0.2389 (0.2361) time: 0.4505 data: 0.0052 max mem: 22448 +train: [6] [140/400] eta: 0:01:58 lr: 0.000294 loss: 2.6906 (2.6562) grad: 0.2422 (0.2372) time: 0.4390 data: 0.0051 max mem: 22448 +train: [6] [160/400] eta: 0:01:48 lr: 0.000294 loss: 2.6917 (2.6586) grad: 0.2422 (0.2375) time: 0.4343 data: 0.0050 max mem: 22448 +train: [6] [180/400] eta: 0:01:39 lr: 0.000293 loss: 2.6512 (2.6582) grad: 0.2400 (0.2379) time: 0.4270 data: 0.0052 max mem: 22448 +train: [6] [200/400] eta: 0:01:29 lr: 0.000293 loss: 2.6596 (2.6608) grad: 0.2355 (0.2379) time: 0.4326 data: 0.0047 max mem: 22448 +train: [6] [220/400] eta: 0:01:20 lr: 0.000292 loss: 2.6914 (2.6608) grad: 0.2364 (0.2386) time: 0.4348 data: 0.0050 max mem: 22448 +train: [6] [240/400] eta: 0:01:11 lr: 0.000292 loss: 2.6774 (2.6629) grad: 0.2399 (0.2386) time: 0.4530 data: 0.0050 max mem: 22448 +train: [6] [260/400] eta: 0:01:02 lr: 0.000291 loss: 2.6613 (2.6608) grad: 0.2339 (0.2383) time: 0.4429 data: 0.0049 max mem: 22448 +train: [6] [280/400] eta: 0:00:53 lr: 0.000291 loss: 2.6613 (2.6614) grad: 0.2313 (0.2382) time: 0.4255 data: 0.0048 max mem: 22448 +train: [6] [300/400] eta: 0:00:44 lr: 0.000290 loss: 2.6766 (2.6650) grad: 0.2376 (0.2385) time: 0.4293 data: 0.0048 max mem: 22448 +train: [6] [320/400] eta: 0:00:35 lr: 0.000290 loss: 2.6999 (2.6667) grad: 0.2397 (0.2387) time: 0.4415 data: 0.0051 max mem: 22448 +train: [6] [340/400] eta: 0:00:26 lr: 0.000289 loss: 2.6964 (2.6679) grad: 0.2432 (0.2387) time: 0.4350 data: 0.0050 max mem: 22448 +train: [6] [360/400] eta: 0:00:17 lr: 0.000288 loss: 2.6306 (2.6659) grad: 0.2337 (0.2383) time: 0.4394 data: 0.0048 max mem: 22448 +train: [6] [380/400] eta: 0:00:08 lr: 0.000288 loss: 2.6179 (2.6669) grad: 0.2340 (0.2384) time: 0.4353 data: 0.0047 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.6380 (2.6676) grad: 0.2308 (0.2379) time: 0.4307 data: 0.0049 max mem: 22448 +train: [6] Total time: 0:02:57 (0.4436 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.6380 (2.6676) grad: 0.2308 (0.2379) +eval (validation): [6] [ 0/85] eta: 0:04:13 time: 2.9807 data: 2.7057 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:31 time: 0.3560 data: 0.0038 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:18 time: 0.3398 data: 0.0042 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:09 time: 0.3453 data: 0.0042 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3233 data: 0.0039 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3140 data: 0.0038 max mem: 22448 +eval (validation): [6] Total time: 0:00:31 (0.3736 s / it) +cv: [6] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 2.437 acc: 0.265 f1: 0.208 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:20:19 lr: nan time: 3.0485 data: 2.6810 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:39 lr: 0.000286 loss: 2.5877 (2.5941) grad: 0.2290 (0.2359) time: 0.4528 data: 0.0047 max mem: 22448 +train: [7] [ 40/400] eta: 0:03:04 lr: 0.000286 loss: 2.5776 (2.5909) grad: 0.2315 (0.2387) time: 0.4435 data: 0.0048 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:46 lr: 0.000285 loss: 2.5757 (2.5857) grad: 0.2453 (0.2430) time: 0.4417 data: 0.0049 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:31 lr: 0.000284 loss: 2.5894 (2.5966) grad: 0.2453 (0.2415) time: 0.4269 data: 0.0047 max mem: 22448 +train: [7] [100/400] eta: 0:02:19 lr: 0.000284 loss: 2.5878 (2.5906) grad: 0.2365 (0.2406) time: 0.4311 data: 0.0047 max mem: 22448 +train: [7] [120/400] eta: 0:02:09 lr: 0.000283 loss: 2.5788 (2.5897) grad: 0.2388 (0.2418) time: 0.4507 data: 0.0051 max mem: 22448 +train: [7] [140/400] eta: 0:01:59 lr: 0.000282 loss: 2.5827 (2.5949) grad: 0.2397 (0.2418) time: 0.4367 data: 0.0052 max mem: 22448 +train: [7] [160/400] eta: 0:01:49 lr: 0.000282 loss: 2.6065 (2.5952) grad: 0.2397 (0.2415) time: 0.4307 data: 0.0049 max mem: 22448 +train: [7] [180/400] eta: 0:01:39 lr: 0.000281 loss: 2.6277 (2.6018) grad: 0.2461 (0.2430) time: 0.4280 data: 0.0051 max mem: 22448 +train: [7] [200/400] eta: 0:01:30 lr: 0.000280 loss: 2.6143 (2.6002) grad: 0.2499 (0.2428) time: 0.4312 data: 0.0051 max mem: 22448 +train: [7] [220/400] eta: 0:01:20 lr: 0.000279 loss: 2.5835 (2.5963) grad: 0.2396 (0.2428) time: 0.4389 data: 0.0048 max mem: 22448 +train: [7] [240/400] eta: 0:01:11 lr: 0.000278 loss: 2.5971 (2.6006) grad: 0.2408 (0.2430) time: 0.4456 data: 0.0051 max mem: 22448 +train: [7] [260/400] eta: 0:01:02 lr: 0.000278 loss: 2.6104 (2.5989) grad: 0.2396 (0.2423) time: 0.4461 data: 0.0050 max mem: 22448 +train: [7] [280/400] eta: 0:00:53 lr: 0.000277 loss: 2.5409 (2.5959) grad: 0.2369 (0.2421) time: 0.4287 data: 0.0047 max mem: 22448 +train: [7] [300/400] eta: 0:00:44 lr: 0.000276 loss: 2.5318 (2.5953) grad: 0.2407 (0.2424) time: 0.4256 data: 0.0047 max mem: 22448 +train: [7] [320/400] eta: 0:00:35 lr: 0.000275 loss: 2.5711 (2.5955) grad: 0.2433 (0.2423) time: 0.4367 data: 0.0052 max mem: 22448 +train: [7] [340/400] eta: 0:00:26 lr: 0.000274 loss: 2.5720 (2.5940) grad: 0.2386 (0.2421) time: 0.4370 data: 0.0048 max mem: 22448 +train: [7] [360/400] eta: 0:00:17 lr: 0.000273 loss: 2.5760 (2.5947) grad: 0.2386 (0.2422) time: 0.4294 data: 0.0048 max mem: 22448 +train: [7] [380/400] eta: 0:00:08 lr: 0.000272 loss: 2.5882 (2.5948) grad: 0.2432 (0.2425) time: 0.4274 data: 0.0048 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.5981 (2.5966) grad: 0.2453 (0.2430) time: 0.4213 data: 0.0049 max mem: 22448 +train: [7] Total time: 0:02:57 (0.4427 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.5981 (2.5966) grad: 0.2453 (0.2430) +eval (validation): [7] [ 0/85] eta: 0:04:26 time: 3.1341 data: 2.8908 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:30 time: 0.3419 data: 0.0045 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:19 time: 0.3733 data: 0.0043 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:09 time: 0.3320 data: 0.0042 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3228 data: 0.0043 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3107 data: 0.0042 max mem: 22448 +eval (validation): [7] Total time: 0:00:31 (0.3764 s / it) +cv: [7] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 2.470 acc: 0.257 f1: 0.206 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:21:05 lr: nan time: 3.1638 data: 2.7992 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:35 lr: 0.000270 loss: 2.4585 (2.4798) grad: 0.2267 (0.2317) time: 0.4371 data: 0.0038 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:01 lr: 0.000270 loss: 2.4798 (2.5029) grad: 0.2340 (0.2372) time: 0.4372 data: 0.0049 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:44 lr: 0.000269 loss: 2.5345 (2.5117) grad: 0.2382 (0.2386) time: 0.4395 data: 0.0052 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:30 lr: 0.000268 loss: 2.5568 (2.5229) grad: 0.2390 (0.2416) time: 0.4291 data: 0.0048 max mem: 22448 +train: [8] [100/400] eta: 0:02:19 lr: 0.000267 loss: 2.5443 (2.5220) grad: 0.2512 (0.2451) time: 0.4449 data: 0.0049 max mem: 22448 +train: [8] [120/400] eta: 0:02:08 lr: 0.000266 loss: 2.5299 (2.5243) grad: 0.2616 (0.2492) time: 0.4377 data: 0.0048 max mem: 22448 +train: [8] [140/400] eta: 0:01:59 lr: 0.000265 loss: 2.5351 (2.5289) grad: 0.2616 (0.2509) time: 0.4505 data: 0.0050 max mem: 22448 +train: [8] [160/400] eta: 0:01:49 lr: 0.000264 loss: 2.5433 (2.5313) grad: 0.2609 (0.2532) time: 0.4491 data: 0.0050 max mem: 22448 +train: [8] [180/400] eta: 0:01:40 lr: 0.000263 loss: 2.5416 (2.5288) grad: 0.2544 (0.2527) time: 0.4360 data: 0.0050 max mem: 22448 +train: [8] [200/400] eta: 0:01:30 lr: 0.000262 loss: 2.5340 (2.5311) grad: 0.2463 (0.2523) time: 0.4385 data: 0.0049 max mem: 22448 +train: [8] [220/400] eta: 0:01:21 lr: 0.000260 loss: 2.5333 (2.5325) grad: 0.2456 (0.2518) time: 0.4369 data: 0.0051 max mem: 22448 +train: [8] [240/400] eta: 0:01:12 lr: 0.000259 loss: 2.5333 (2.5323) grad: 0.2456 (0.2514) time: 0.4531 data: 0.0051 max mem: 22448 +train: [8] [260/400] eta: 0:01:03 lr: 0.000258 loss: 2.5541 (2.5345) grad: 0.2434 (0.2513) time: 0.4574 data: 0.0049 max mem: 22448 +train: [8] [280/400] eta: 0:00:54 lr: 0.000257 loss: 2.5369 (2.5339) grad: 0.2434 (0.2512) time: 0.4370 data: 0.0048 max mem: 22448 +train: [8] [300/400] eta: 0:00:44 lr: 0.000256 loss: 2.5169 (2.5349) grad: 0.2474 (0.2514) time: 0.4294 data: 0.0047 max mem: 22448 +train: [8] [320/400] eta: 0:00:35 lr: 0.000255 loss: 2.5438 (2.5354) grad: 0.2415 (0.2506) time: 0.4426 data: 0.0049 max mem: 22448 +train: [8] [340/400] eta: 0:00:26 lr: 0.000254 loss: 2.5376 (2.5346) grad: 0.2419 (0.2510) time: 0.4233 data: 0.0050 max mem: 22448 +train: [8] [360/400] eta: 0:00:17 lr: 0.000253 loss: 2.5220 (2.5343) grad: 0.2481 (0.2509) time: 0.4456 data: 0.0051 max mem: 22448 +train: [8] [380/400] eta: 0:00:08 lr: 0.000252 loss: 2.5220 (2.5349) grad: 0.2445 (0.2505) time: 0.4329 data: 0.0050 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.5418 (2.5354) grad: 0.2481 (0.2510) time: 0.4232 data: 0.0049 max mem: 22448 +train: [8] Total time: 0:02:58 (0.4464 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.5418 (2.5354) grad: 0.2481 (0.2510) +eval (validation): [8] [ 0/85] eta: 0:05:13 time: 3.6838 data: 3.4456 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:33 time: 0.3558 data: 0.0039 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:19 time: 0.3516 data: 0.0036 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:09 time: 0.3274 data: 0.0041 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3292 data: 0.0042 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3266 data: 0.0042 max mem: 22448 +eval (validation): [8] Total time: 0:00:32 (0.3834 s / it) +cv: [8] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.480 acc: 0.248 f1: 0.189 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:21:37 lr: nan time: 3.2443 data: 2.8729 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:44 lr: 0.000249 loss: 2.4748 (2.4843) grad: 0.2450 (0.2555) time: 0.4579 data: 0.0025 max mem: 22448 +train: [9] [ 40/400] eta: 0:03:07 lr: 0.000248 loss: 2.5181 (2.4929) grad: 0.2464 (0.2507) time: 0.4459 data: 0.0048 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:48 lr: 0.000247 loss: 2.4987 (2.4847) grad: 0.2421 (0.2474) time: 0.4442 data: 0.0050 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:33 lr: 0.000246 loss: 2.4629 (2.4942) grad: 0.2448 (0.2499) time: 0.4367 data: 0.0048 max mem: 22448 +train: [9] [100/400] eta: 0:02:21 lr: 0.000244 loss: 2.4799 (2.4930) grad: 0.2543 (0.2512) time: 0.4408 data: 0.0052 max mem: 22448 +train: [9] [120/400] eta: 0:02:10 lr: 0.000243 loss: 2.4799 (2.4922) grad: 0.2505 (0.2511) time: 0.4228 data: 0.0049 max mem: 22448 +train: [9] [140/400] eta: 0:02:00 lr: 0.000242 loss: 2.4827 (2.4943) grad: 0.2502 (0.2519) time: 0.4508 data: 0.0050 max mem: 22448 +train: [9] [160/400] eta: 0:01:50 lr: 0.000241 loss: 2.4640 (2.4892) grad: 0.2530 (0.2522) time: 0.4423 data: 0.0049 max mem: 22448 +train: [9] [180/400] eta: 0:01:40 lr: 0.000240 loss: 2.4640 (2.4912) grad: 0.2507 (0.2528) time: 0.4324 data: 0.0050 max mem: 22448 +train: [9] [200/400] eta: 0:01:31 lr: 0.000238 loss: 2.4694 (2.4889) grad: 0.2527 (0.2536) time: 0.4384 data: 0.0050 max mem: 22448 +train: [9] [220/400] eta: 0:01:21 lr: 0.000237 loss: 2.4783 (2.4868) grad: 0.2585 (0.2545) time: 0.4268 data: 0.0046 max mem: 22448 +train: [9] [240/400] eta: 0:01:12 lr: 0.000236 loss: 2.5112 (2.4905) grad: 0.2569 (0.2542) time: 0.4469 data: 0.0050 max mem: 22448 +train: [9] [260/400] eta: 0:01:03 lr: 0.000234 loss: 2.5221 (2.4899) grad: 0.2548 (0.2541) time: 0.4724 data: 0.0052 max mem: 22448 +train: [9] [280/400] eta: 0:00:54 lr: 0.000233 loss: 2.4716 (2.4897) grad: 0.2556 (0.2543) time: 0.4250 data: 0.0049 max mem: 22448 +train: [9] [300/400] eta: 0:00:45 lr: 0.000232 loss: 2.4923 (2.4901) grad: 0.2558 (0.2546) time: 0.4405 data: 0.0049 max mem: 22448 +train: [9] [320/400] eta: 0:00:36 lr: 0.000230 loss: 2.4923 (2.4910) grad: 0.2544 (0.2543) time: 0.4554 data: 0.0051 max mem: 22448 +train: [9] [340/400] eta: 0:00:26 lr: 0.000229 loss: 2.4643 (2.4889) grad: 0.2467 (0.2544) time: 0.4309 data: 0.0049 max mem: 22448 +train: [9] [360/400] eta: 0:00:17 lr: 0.000228 loss: 2.4476 (2.4895) grad: 0.2590 (0.2550) time: 0.4230 data: 0.0048 max mem: 22448 +train: [9] [380/400] eta: 0:00:08 lr: 0.000226 loss: 2.4476 (2.4890) grad: 0.2521 (0.2548) time: 0.4282 data: 0.0048 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.5162 (2.4908) grad: 0.2541 (0.2553) time: 0.4286 data: 0.0048 max mem: 22448 +train: [9] Total time: 0:02:58 (0.4472 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.5162 (2.4908) grad: 0.2541 (0.2553) +eval (validation): [9] [ 0/85] eta: 0:04:15 time: 3.0015 data: 2.7381 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:31 time: 0.3581 data: 0.0039 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:18 time: 0.3449 data: 0.0036 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:09 time: 0.3344 data: 0.0042 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3195 data: 0.0042 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3106 data: 0.0041 max mem: 22448 +eval (validation): [9] Total time: 0:00:31 (0.3714 s / it) +cv: [9] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.458 acc: 0.258 f1: 0.193 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:20:08 lr: nan time: 3.0203 data: 2.6547 max mem: 22448 +train: [10] [ 20/400] eta: 0:03:29 lr: 0.000224 loss: 2.4273 (2.4214) grad: 0.2550 (0.2623) time: 0.4284 data: 0.0037 max mem: 22448 +train: [10] [ 40/400] eta: 0:02:55 lr: 0.000222 loss: 2.4229 (2.4109) grad: 0.2550 (0.2586) time: 0.4227 data: 0.0041 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:39 lr: 0.000221 loss: 2.4180 (2.4262) grad: 0.2470 (0.2541) time: 0.4264 data: 0.0047 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:26 lr: 0.000220 loss: 2.4180 (2.4197) grad: 0.2454 (0.2526) time: 0.4263 data: 0.0046 max mem: 22448 +train: [10] [100/400] eta: 0:02:15 lr: 0.000218 loss: 2.3892 (2.4153) grad: 0.2464 (0.2521) time: 0.4247 data: 0.0047 max mem: 22448 +train: [10] [120/400] eta: 0:02:05 lr: 0.000217 loss: 2.4239 (2.4189) grad: 0.2517 (0.2530) time: 0.4279 data: 0.0047 max mem: 22448 +train: [10] [140/400] eta: 0:01:56 lr: 0.000215 loss: 2.4268 (2.4235) grad: 0.2531 (0.2528) time: 0.4592 data: 0.0053 max mem: 22448 +train: [10] [160/400] eta: 0:01:47 lr: 0.000214 loss: 2.4514 (2.4258) grad: 0.2493 (0.2533) time: 0.4379 data: 0.0050 max mem: 22448 +train: [10] [180/400] eta: 0:01:38 lr: 0.000213 loss: 2.4494 (2.4284) grad: 0.2500 (0.2535) time: 0.4466 data: 0.0051 max mem: 22448 +train: [10] [200/400] eta: 0:01:29 lr: 0.000211 loss: 2.4494 (2.4301) grad: 0.2558 (0.2541) time: 0.4355 data: 0.0051 max mem: 22448 +train: [10] [220/400] eta: 0:01:20 lr: 0.000210 loss: 2.4431 (2.4307) grad: 0.2569 (0.2545) time: 0.4315 data: 0.0050 max mem: 22448 +train: [10] [240/400] eta: 0:01:11 lr: 0.000208 loss: 2.4381 (2.4308) grad: 0.2508 (0.2539) time: 0.4321 data: 0.0052 max mem: 22448 +train: [10] [260/400] eta: 0:01:02 lr: 0.000207 loss: 2.4424 (2.4318) grad: 0.2461 (0.2536) time: 0.4538 data: 0.0052 max mem: 22448 +train: [10] [280/400] eta: 0:00:53 lr: 0.000205 loss: 2.4576 (2.4325) grad: 0.2502 (0.2537) time: 0.4388 data: 0.0046 max mem: 22448 +train: [10] [300/400] eta: 0:00:44 lr: 0.000204 loss: 2.4123 (2.4312) grad: 0.2532 (0.2536) time: 0.4242 data: 0.0047 max mem: 22448 +train: [10] [320/400] eta: 0:00:35 lr: 0.000202 loss: 2.4025 (2.4290) grad: 0.2538 (0.2537) time: 0.4354 data: 0.0050 max mem: 22448 +train: [10] [340/400] eta: 0:00:26 lr: 0.000201 loss: 2.4236 (2.4297) grad: 0.2522 (0.2536) time: 0.4352 data: 0.0049 max mem: 22448 +train: [10] [360/400] eta: 0:00:17 lr: 0.000199 loss: 2.4236 (2.4279) grad: 0.2522 (0.2534) time: 0.4420 data: 0.0049 max mem: 22448 +train: [10] [380/400] eta: 0:00:08 lr: 0.000198 loss: 2.3707 (2.4260) grad: 0.2535 (0.2533) time: 0.4408 data: 0.0051 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.4034 (2.4274) grad: 0.2557 (0.2536) time: 0.4343 data: 0.0052 max mem: 22448 +train: [10] Total time: 0:02:56 (0.4422 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.4034 (2.4274) grad: 0.2557 (0.2536) +eval (validation): [10] [ 0/85] eta: 0:04:24 time: 3.1171 data: 2.8779 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:31 time: 0.3497 data: 0.0044 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:18 time: 0.3495 data: 0.0038 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:09 time: 0.3393 data: 0.0044 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3112 data: 0.0041 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3028 data: 0.0039 max mem: 22448 +eval (validation): [10] Total time: 0:00:31 (0.3720 s / it) +cv: [10] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.458 acc: 0.260 f1: 0.197 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:20:49 lr: nan time: 3.1227 data: 2.7957 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:47 lr: 0.000195 loss: 2.3353 (2.3439) grad: 0.2439 (0.2487) time: 0.4715 data: 0.0043 max mem: 22448 +train: [11] [ 40/400] eta: 0:03:08 lr: 0.000193 loss: 2.3814 (2.3744) grad: 0.2475 (0.2498) time: 0.4459 data: 0.0045 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:48 lr: 0.000192 loss: 2.3759 (2.3581) grad: 0.2514 (0.2521) time: 0.4391 data: 0.0050 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:34 lr: 0.000190 loss: 2.3566 (2.3727) grad: 0.2536 (0.2536) time: 0.4371 data: 0.0051 max mem: 22448 +train: [11] [100/400] eta: 0:02:21 lr: 0.000189 loss: 2.3792 (2.3721) grad: 0.2523 (0.2530) time: 0.4376 data: 0.0051 max mem: 22448 +train: [11] [120/400] eta: 0:02:10 lr: 0.000187 loss: 2.3214 (2.3611) grad: 0.2532 (0.2534) time: 0.4344 data: 0.0048 max mem: 22448 +train: [11] [140/400] eta: 0:02:00 lr: 0.000186 loss: 2.3639 (2.3666) grad: 0.2580 (0.2546) time: 0.4409 data: 0.0048 max mem: 22448 +train: [11] [160/400] eta: 0:01:50 lr: 0.000184 loss: 2.3931 (2.3711) grad: 0.2606 (0.2556) time: 0.4399 data: 0.0053 max mem: 22448 +train: [11] [180/400] eta: 0:01:40 lr: 0.000183 loss: 2.3764 (2.3752) grad: 0.2675 (0.2581) time: 0.4397 data: 0.0051 max mem: 22448 +train: [11] [200/400] eta: 0:01:30 lr: 0.000181 loss: 2.4294 (2.3806) grad: 0.2711 (0.2588) time: 0.4275 data: 0.0050 max mem: 22448 +train: [11] [220/400] eta: 0:01:21 lr: 0.000180 loss: 2.4682 (2.3884) grad: 0.2625 (0.2588) time: 0.4486 data: 0.0051 max mem: 22448 +train: [11] [240/400] eta: 0:01:12 lr: 0.000178 loss: 2.4469 (2.3910) grad: 0.2526 (0.2590) time: 0.4423 data: 0.0051 max mem: 22448 +train: [11] [260/400] eta: 0:01:03 lr: 0.000177 loss: 2.4063 (2.3915) grad: 0.2614 (0.2594) time: 0.4537 data: 0.0050 max mem: 22448 +train: [11] [280/400] eta: 0:00:54 lr: 0.000175 loss: 2.3956 (2.3908) grad: 0.2617 (0.2595) time: 0.4491 data: 0.0051 max mem: 22448 +train: [11] [300/400] eta: 0:00:45 lr: 0.000174 loss: 2.3956 (2.3936) grad: 0.2562 (0.2596) time: 0.4372 data: 0.0047 max mem: 22448 +train: [11] [320/400] eta: 0:00:36 lr: 0.000172 loss: 2.4169 (2.3953) grad: 0.2641 (0.2602) time: 0.4422 data: 0.0051 max mem: 22448 +train: [11] [340/400] eta: 0:00:27 lr: 0.000170 loss: 2.3828 (2.3952) grad: 0.2678 (0.2605) time: 0.4405 data: 0.0050 max mem: 22448 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 2.3828 (2.3948) grad: 0.2545 (0.2605) time: 0.4488 data: 0.0050 max mem: 22448 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 2.3747 (2.3918) grad: 0.2563 (0.2603) time: 0.4537 data: 0.0051 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.3572 (2.3945) grad: 0.2506 (0.2596) time: 0.4529 data: 0.0053 max mem: 22448 +train: [11] Total time: 0:03:00 (0.4512 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.3572 (2.3945) grad: 0.2506 (0.2596) +eval (validation): [11] [ 0/85] eta: 0:04:31 time: 3.1932 data: 2.9052 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:33 time: 0.3734 data: 0.0054 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:19 time: 0.3437 data: 0.0043 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:09 time: 0.3383 data: 0.0040 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3415 data: 0.0042 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3360 data: 0.0040 max mem: 22448 +eval (validation): [11] Total time: 0:00:32 (0.3845 s / it) +cv: [11] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.468 acc: 0.259 f1: 0.195 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:20:40 lr: nan time: 3.1006 data: 2.7313 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:50 lr: 0.000164 loss: 2.2535 (2.2831) grad: 0.2396 (0.2440) time: 0.4815 data: 0.0046 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:09 lr: 0.000163 loss: 2.2970 (2.3040) grad: 0.2474 (0.2475) time: 0.4448 data: 0.0051 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:48 lr: 0.000161 loss: 2.3178 (2.3042) grad: 0.2494 (0.2474) time: 0.4323 data: 0.0049 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:35 lr: 0.000160 loss: 2.3178 (2.3120) grad: 0.2496 (0.2480) time: 0.4534 data: 0.0050 max mem: 22448 +train: [12] [100/400] eta: 0:02:23 lr: 0.000158 loss: 2.3145 (2.3164) grad: 0.2519 (0.2492) time: 0.4447 data: 0.0052 max mem: 22448 +train: [12] [120/400] eta: 0:02:11 lr: 0.000156 loss: 2.3145 (2.3205) grad: 0.2522 (0.2489) time: 0.4329 data: 0.0050 max mem: 22448 +train: [12] [140/400] eta: 0:02:01 lr: 0.000155 loss: 2.3146 (2.3218) grad: 0.2538 (0.2515) time: 0.4550 data: 0.0051 max mem: 22448 +train: [12] [160/400] eta: 0:01:51 lr: 0.000153 loss: 2.3299 (2.3228) grad: 0.2675 (0.2534) time: 0.4464 data: 0.0052 max mem: 22448 +train: [12] [180/400] eta: 0:01:42 lr: 0.000152 loss: 2.3348 (2.3230) grad: 0.2626 (0.2540) time: 0.4632 data: 0.0051 max mem: 22448 +train: [12] [200/400] eta: 0:01:32 lr: 0.000150 loss: 2.3595 (2.3291) grad: 0.2561 (0.2544) time: 0.4285 data: 0.0048 max mem: 22448 +train: [12] [220/400] eta: 0:01:22 lr: 0.000149 loss: 2.3595 (2.3342) grad: 0.2565 (0.2549) time: 0.4568 data: 0.0048 max mem: 22448 +train: [12] [240/400] eta: 0:01:13 lr: 0.000147 loss: 2.3366 (2.3314) grad: 0.2614 (0.2564) time: 0.4558 data: 0.0051 max mem: 22448 +train: [12] [260/400] eta: 0:01:04 lr: 0.000145 loss: 2.3084 (2.3311) grad: 0.2639 (0.2564) time: 0.4639 data: 0.0053 max mem: 22448 +train: [12] [280/400] eta: 0:00:55 lr: 0.000144 loss: 2.3007 (2.3274) grad: 0.2537 (0.2563) time: 0.4499 data: 0.0051 max mem: 22448 +train: [12] [300/400] eta: 0:00:45 lr: 0.000142 loss: 2.3256 (2.3301) grad: 0.2601 (0.2573) time: 0.4295 data: 0.0049 max mem: 22448 +train: [12] [320/400] eta: 0:00:36 lr: 0.000141 loss: 2.3505 (2.3311) grad: 0.2623 (0.2575) time: 0.4532 data: 0.0051 max mem: 22448 +train: [12] [340/400] eta: 0:00:27 lr: 0.000139 loss: 2.3248 (2.3311) grad: 0.2568 (0.2573) time: 0.4390 data: 0.0050 max mem: 22448 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 2.3115 (2.3307) grad: 0.2567 (0.2574) time: 0.4364 data: 0.0051 max mem: 22448 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 2.3249 (2.3316) grad: 0.2558 (0.2574) time: 0.4344 data: 0.0050 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.3197 (2.3304) grad: 0.2573 (0.2577) time: 0.4397 data: 0.0049 max mem: 22448 +train: [12] Total time: 0:03:01 (0.4543 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.3197 (2.3304) grad: 0.2573 (0.2577) +eval (validation): [12] [ 0/85] eta: 0:04:27 time: 3.1459 data: 2.8656 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:31 time: 0.3488 data: 0.0035 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:18 time: 0.3569 data: 0.0036 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:09 time: 0.3543 data: 0.0044 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3221 data: 0.0043 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3161 data: 0.0043 max mem: 22448 +eval (validation): [12] Total time: 0:00:32 (0.3806 s / it) +cv: [12] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.479 acc: 0.251 f1: 0.193 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:20:52 lr: nan time: 3.1320 data: 2.8011 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:37 lr: 0.000133 loss: 2.2785 (2.2931) grad: 0.2620 (0.2607) time: 0.4454 data: 0.0050 max mem: 22448 +train: [13] [ 40/400] eta: 0:03:05 lr: 0.000131 loss: 2.2849 (2.2878) grad: 0.2547 (0.2581) time: 0.4542 data: 0.0048 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:46 lr: 0.000130 loss: 2.3016 (2.2880) grad: 0.2543 (0.2573) time: 0.4367 data: 0.0050 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:31 lr: 0.000128 loss: 2.2601 (2.2826) grad: 0.2516 (0.2577) time: 0.4297 data: 0.0049 max mem: 22448 +train: [13] [100/400] eta: 0:02:20 lr: 0.000127 loss: 2.2447 (2.2778) grad: 0.2460 (0.2555) time: 0.4443 data: 0.0048 max mem: 22448 +train: [13] [120/400] eta: 0:02:09 lr: 0.000125 loss: 2.2473 (2.2728) grad: 0.2460 (0.2558) time: 0.4215 data: 0.0048 max mem: 22448 +train: [13] [140/400] eta: 0:01:59 lr: 0.000124 loss: 2.2870 (2.2804) grad: 0.2654 (0.2573) time: 0.4536 data: 0.0052 max mem: 22448 +train: [13] [160/400] eta: 0:01:49 lr: 0.000122 loss: 2.3249 (2.2834) grad: 0.2666 (0.2590) time: 0.4357 data: 0.0050 max mem: 22448 +train: [13] [180/400] eta: 0:01:40 lr: 0.000120 loss: 2.3249 (2.2902) grad: 0.2637 (0.2595) time: 0.4366 data: 0.0050 max mem: 22448 +train: [13] [200/400] eta: 0:01:30 lr: 0.000119 loss: 2.2813 (2.2856) grad: 0.2550 (0.2590) time: 0.4434 data: 0.0051 max mem: 22448 +train: [13] [220/400] eta: 0:01:21 lr: 0.000117 loss: 2.2617 (2.2866) grad: 0.2568 (0.2599) time: 0.4406 data: 0.0050 max mem: 22448 +train: [13] [240/400] eta: 0:01:12 lr: 0.000116 loss: 2.2832 (2.2867) grad: 0.2594 (0.2599) time: 0.4398 data: 0.0052 max mem: 22448 +train: [13] [260/400] eta: 0:01:03 lr: 0.000114 loss: 2.2725 (2.2873) grad: 0.2531 (0.2591) time: 0.4575 data: 0.0053 max mem: 22448 +train: [13] [280/400] eta: 0:00:54 lr: 0.000113 loss: 2.2615 (2.2845) grad: 0.2426 (0.2584) time: 0.4447 data: 0.0051 max mem: 22448 +train: [13] [300/400] eta: 0:00:44 lr: 0.000111 loss: 2.2577 (2.2844) grad: 0.2431 (0.2576) time: 0.4240 data: 0.0045 max mem: 22448 +train: [13] [320/400] eta: 0:00:36 lr: 0.000110 loss: 2.2987 (2.2852) grad: 0.2561 (0.2581) time: 0.4623 data: 0.0052 max mem: 22448 +train: [13] [340/400] eta: 0:00:27 lr: 0.000108 loss: 2.2940 (2.2852) grad: 0.2586 (0.2578) time: 0.4481 data: 0.0052 max mem: 22448 +train: [13] [360/400] eta: 0:00:17 lr: 0.000107 loss: 2.2765 (2.2860) grad: 0.2584 (0.2583) time: 0.4365 data: 0.0052 max mem: 22448 +train: [13] [380/400] eta: 0:00:08 lr: 0.000105 loss: 2.2768 (2.2858) grad: 0.2640 (0.2586) time: 0.4377 data: 0.0050 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.2765 (2.2861) grad: 0.2595 (0.2587) time: 0.4415 data: 0.0050 max mem: 22448 +train: [13] Total time: 0:02:59 (0.4490 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.2765 (2.2861) grad: 0.2595 (0.2587) +eval (validation): [13] [ 0/85] eta: 0:04:38 time: 3.2780 data: 2.9819 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:34 time: 0.3908 data: 0.0034 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:20 time: 0.3646 data: 0.0043 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:10 time: 0.3844 data: 0.0047 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:02 time: 0.3420 data: 0.0044 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3271 data: 0.0043 max mem: 22448 +eval (validation): [13] Total time: 0:00:34 (0.4048 s / it) +cv: [13] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.465 acc: 0.256 f1: 0.195 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:20:22 lr: nan time: 3.0570 data: 2.7324 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:38 lr: 0.000102 loss: 2.2099 (2.2001) grad: 0.2421 (0.2494) time: 0.4516 data: 0.0045 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:04 lr: 0.000101 loss: 2.2120 (2.2124) grad: 0.2419 (0.2483) time: 0.4438 data: 0.0044 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:45 lr: 0.000099 loss: 2.2120 (2.2055) grad: 0.2497 (0.2498) time: 0.4363 data: 0.0050 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:32 lr: 0.000098 loss: 2.2138 (2.2174) grad: 0.2492 (0.2498) time: 0.4431 data: 0.0050 max mem: 22448 +train: [14] [100/400] eta: 0:02:20 lr: 0.000096 loss: 2.2546 (2.2273) grad: 0.2503 (0.2513) time: 0.4352 data: 0.0052 max mem: 22448 +train: [14] [120/400] eta: 0:02:09 lr: 0.000095 loss: 2.2127 (2.2245) grad: 0.2557 (0.2524) time: 0.4334 data: 0.0048 max mem: 22448 +train: [14] [140/400] eta: 0:01:59 lr: 0.000093 loss: 2.2088 (2.2243) grad: 0.2600 (0.2538) time: 0.4571 data: 0.0050 max mem: 22448 +train: [14] [160/400] eta: 0:01:49 lr: 0.000092 loss: 2.1930 (2.2223) grad: 0.2582 (0.2540) time: 0.4266 data: 0.0048 max mem: 22448 +train: [14] [180/400] eta: 0:01:40 lr: 0.000090 loss: 2.1873 (2.2196) grad: 0.2551 (0.2546) time: 0.4547 data: 0.0051 max mem: 22448 +train: [14] [200/400] eta: 0:01:31 lr: 0.000089 loss: 2.1855 (2.2194) grad: 0.2552 (0.2555) time: 0.4425 data: 0.0050 max mem: 22448 +train: [14] [220/400] eta: 0:01:21 lr: 0.000088 loss: 2.2224 (2.2210) grad: 0.2604 (0.2561) time: 0.4440 data: 0.0048 max mem: 22448 +train: [14] [240/400] eta: 0:01:12 lr: 0.000086 loss: 2.2224 (2.2249) grad: 0.2593 (0.2562) time: 0.4477 data: 0.0051 max mem: 22448 +train: [14] [260/400] eta: 0:01:03 lr: 0.000085 loss: 2.2254 (2.2261) grad: 0.2510 (0.2560) time: 0.4569 data: 0.0052 max mem: 22448 +train: [14] [280/400] eta: 0:00:54 lr: 0.000083 loss: 2.2348 (2.2270) grad: 0.2488 (0.2556) time: 0.4521 data: 0.0053 max mem: 22448 +train: [14] [300/400] eta: 0:00:45 lr: 0.000082 loss: 2.2716 (2.2320) grad: 0.2551 (0.2559) time: 0.4290 data: 0.0047 max mem: 22448 +train: [14] [320/400] eta: 0:00:36 lr: 0.000081 loss: 2.2791 (2.2335) grad: 0.2625 (0.2564) time: 0.4739 data: 0.0054 max mem: 22448 +train: [14] [340/400] eta: 0:00:27 lr: 0.000079 loss: 2.2398 (2.2334) grad: 0.2625 (0.2568) time: 0.4433 data: 0.0049 max mem: 22448 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 2.2226 (2.2343) grad: 0.2589 (0.2568) time: 0.4399 data: 0.0049 max mem: 22448 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 2.2173 (2.2331) grad: 0.2558 (0.2569) time: 0.4371 data: 0.0048 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.2058 (2.2337) grad: 0.2605 (0.2574) time: 0.4424 data: 0.0050 max mem: 22448 +train: [14] Total time: 0:03:00 (0.4518 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.2058 (2.2337) grad: 0.2605 (0.2574) +eval (validation): [14] [ 0/85] eta: 0:04:32 time: 3.2076 data: 2.9748 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:32 time: 0.3579 data: 0.0033 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:19 time: 0.3517 data: 0.0041 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:09 time: 0.3425 data: 0.0043 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3346 data: 0.0044 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3310 data: 0.0042 max mem: 22448 +eval (validation): [14] Total time: 0:00:32 (0.3825 s / it) +cv: [14] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.459 acc: 0.262 f1: 0.201 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:20:33 lr: nan time: 3.0837 data: 2.7596 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:39 lr: 0.000074 loss: 2.1663 (2.1773) grad: 0.2465 (0.2480) time: 0.4512 data: 0.0047 max mem: 22448 +train: [15] [ 40/400] eta: 0:03:02 lr: 0.000072 loss: 2.1539 (2.1719) grad: 0.2447 (0.2474) time: 0.4329 data: 0.0046 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:45 lr: 0.000071 loss: 2.1441 (2.1686) grad: 0.2476 (0.2503) time: 0.4447 data: 0.0051 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:31 lr: 0.000070 loss: 2.1374 (2.1609) grad: 0.2510 (0.2497) time: 0.4377 data: 0.0051 max mem: 22448 +train: [15] [100/400] eta: 0:02:19 lr: 0.000068 loss: 2.1200 (2.1545) grad: 0.2476 (0.2507) time: 0.4338 data: 0.0048 max mem: 22448 +train: [15] [120/400] eta: 0:02:09 lr: 0.000067 loss: 2.1657 (2.1612) grad: 0.2546 (0.2523) time: 0.4330 data: 0.0049 max mem: 22448 +train: [15] [140/400] eta: 0:01:59 lr: 0.000066 loss: 2.2038 (2.1696) grad: 0.2583 (0.2542) time: 0.4560 data: 0.0052 max mem: 22448 +train: [15] [160/400] eta: 0:01:49 lr: 0.000064 loss: 2.1710 (2.1703) grad: 0.2583 (0.2548) time: 0.4353 data: 0.0049 max mem: 22448 +train: [15] [180/400] eta: 0:01:40 lr: 0.000063 loss: 2.2114 (2.1797) grad: 0.2625 (0.2560) time: 0.4398 data: 0.0050 max mem: 22448 +train: [15] [200/400] eta: 0:01:30 lr: 0.000062 loss: 2.2114 (2.1800) grad: 0.2559 (0.2554) time: 0.4494 data: 0.0052 max mem: 22448 +train: [15] [220/400] eta: 0:01:21 lr: 0.000061 loss: 2.1937 (2.1821) grad: 0.2550 (0.2557) time: 0.4532 data: 0.0051 max mem: 22448 +train: [15] [240/400] eta: 0:01:12 lr: 0.000059 loss: 2.1668 (2.1822) grad: 0.2616 (0.2562) time: 0.4478 data: 0.0052 max mem: 22448 +train: [15] [260/400] eta: 0:01:03 lr: 0.000058 loss: 2.2016 (2.1861) grad: 0.2558 (0.2560) time: 0.4595 data: 0.0055 max mem: 22448 +train: [15] [280/400] eta: 0:00:54 lr: 0.000057 loss: 2.2066 (2.1852) grad: 0.2516 (0.2554) time: 0.4479 data: 0.0048 max mem: 22448 +train: [15] [300/400] eta: 0:00:45 lr: 0.000056 loss: 2.1537 (2.1842) grad: 0.2448 (0.2553) time: 0.4406 data: 0.0050 max mem: 22448 +train: [15] [320/400] eta: 0:00:36 lr: 0.000054 loss: 2.1915 (2.1856) grad: 0.2555 (0.2553) time: 0.4445 data: 0.0050 max mem: 22448 +train: [15] [340/400] eta: 0:00:27 lr: 0.000053 loss: 2.1876 (2.1861) grad: 0.2530 (0.2553) time: 0.4409 data: 0.0053 max mem: 22448 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 2.1778 (2.1868) grad: 0.2530 (0.2554) time: 0.4570 data: 0.0052 max mem: 22448 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 2.1561 (2.1848) grad: 0.2477 (0.2552) time: 0.4463 data: 0.0049 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 2.1718 (2.1864) grad: 0.2475 (0.2550) time: 0.4478 data: 0.0047 max mem: 22448 +train: [15] Total time: 0:03:00 (0.4522 s / it) +train: [15] Summary: lr: 0.000050 loss: 2.1718 (2.1864) grad: 0.2475 (0.2550) +eval (validation): [15] [ 0/85] eta: 0:04:28 time: 3.1532 data: 2.9113 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:33 time: 0.3855 data: 0.0048 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:19 time: 0.3553 data: 0.0041 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:10 time: 0.3751 data: 0.0048 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:01 time: 0.3308 data: 0.0041 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3161 data: 0.0038 max mem: 22448 +eval (validation): [15] Total time: 0:00:33 (0.3959 s / it) +cv: [15] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.466 acc: 0.261 f1: 0.198 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:20:48 lr: nan time: 3.1210 data: 2.7764 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:38 lr: 0.000048 loss: 2.0992 (2.1268) grad: 0.2369 (0.2373) time: 0.4486 data: 0.0041 max mem: 22448 +train: [16] [ 40/400] eta: 0:03:01 lr: 0.000047 loss: 2.1056 (2.1246) grad: 0.2380 (0.2402) time: 0.4309 data: 0.0038 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:46 lr: 0.000046 loss: 2.1201 (2.1236) grad: 0.2401 (0.2422) time: 0.4606 data: 0.0050 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:33 lr: 0.000045 loss: 2.1337 (2.1322) grad: 0.2448 (0.2454) time: 0.4473 data: 0.0049 max mem: 22448 +train: [16] [100/400] eta: 0:02:20 lr: 0.000044 loss: 2.1427 (2.1348) grad: 0.2561 (0.2468) time: 0.4295 data: 0.0046 max mem: 22448 +train: [16] [120/400] eta: 0:02:11 lr: 0.000043 loss: 2.1249 (2.1372) grad: 0.2504 (0.2472) time: 0.4623 data: 0.0051 max mem: 22448 +train: [16] [140/400] eta: 0:02:00 lr: 0.000042 loss: 2.1153 (2.1355) grad: 0.2441 (0.2474) time: 0.4418 data: 0.0048 max mem: 22448 +train: [16] [160/400] eta: 0:01:50 lr: 0.000041 loss: 2.1543 (2.1444) grad: 0.2504 (0.2487) time: 0.4395 data: 0.0050 max mem: 22448 +train: [16] [180/400] eta: 0:01:41 lr: 0.000040 loss: 2.1606 (2.1457) grad: 0.2520 (0.2488) time: 0.4439 data: 0.0047 max mem: 22448 +train: [16] [200/400] eta: 0:01:31 lr: 0.000039 loss: 2.1518 (2.1446) grad: 0.2472 (0.2481) time: 0.4423 data: 0.0048 max mem: 22448 +train: [16] [220/400] eta: 0:01:22 lr: 0.000038 loss: 2.1445 (2.1452) grad: 0.2440 (0.2479) time: 0.4417 data: 0.0050 max mem: 22448 +train: [16] [240/400] eta: 0:01:13 lr: 0.000036 loss: 2.1279 (2.1448) grad: 0.2480 (0.2483) time: 0.4601 data: 0.0051 max mem: 22448 +train: [16] [260/400] eta: 0:01:03 lr: 0.000035 loss: 2.1454 (2.1467) grad: 0.2586 (0.2492) time: 0.4438 data: 0.0051 max mem: 22448 +train: [16] [280/400] eta: 0:00:54 lr: 0.000034 loss: 2.1461 (2.1472) grad: 0.2571 (0.2493) time: 0.4422 data: 0.0048 max mem: 22448 +train: [16] [300/400] eta: 0:00:45 lr: 0.000033 loss: 2.1461 (2.1483) grad: 0.2489 (0.2495) time: 0.4509 data: 0.0051 max mem: 22448 +train: [16] [320/400] eta: 0:00:36 lr: 0.000032 loss: 2.1823 (2.1527) grad: 0.2503 (0.2500) time: 0.4459 data: 0.0052 max mem: 22448 +train: [16] [340/400] eta: 0:00:27 lr: 0.000031 loss: 2.1735 (2.1520) grad: 0.2525 (0.2502) time: 0.4405 data: 0.0050 max mem: 22448 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 2.1569 (2.1524) grad: 0.2503 (0.2505) time: 0.4495 data: 0.0051 max mem: 22448 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 2.1709 (2.1528) grad: 0.2499 (0.2509) time: 0.4468 data: 0.0047 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 2.1694 (2.1535) grad: 0.2544 (0.2513) time: 0.4304 data: 0.0046 max mem: 22448 +train: [16] Total time: 0:03:00 (0.4523 s / it) +train: [16] Summary: lr: 0.000029 loss: 2.1694 (2.1535) grad: 0.2544 (0.2513) +eval (validation): [16] [ 0/85] eta: 0:04:37 time: 3.2608 data: 2.9639 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:35 time: 0.4038 data: 0.0057 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:20 time: 0.3490 data: 0.0043 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:10 time: 0.3487 data: 0.0044 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3329 data: 0.0042 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3180 data: 0.0041 max mem: 22448 +eval (validation): [16] Total time: 0:00:33 (0.3933 s / it) +cv: [16] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.469 acc: 0.258 f1: 0.199 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:21:54 lr: nan time: 3.2867 data: 2.9106 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:50 lr: 0.000028 loss: 2.0581 (2.0830) grad: 0.2259 (0.2358) time: 0.4738 data: 0.0031 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:11 lr: 0.000027 loss: 2.0993 (2.0998) grad: 0.2362 (0.2385) time: 0.4520 data: 0.0047 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:49 lr: 0.000026 loss: 2.1297 (2.1112) grad: 0.2367 (0.2389) time: 0.4321 data: 0.0050 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:35 lr: 0.000025 loss: 2.1116 (2.1015) grad: 0.2376 (0.2390) time: 0.4408 data: 0.0050 max mem: 22448 +train: [17] [100/400] eta: 0:02:22 lr: 0.000024 loss: 2.1035 (2.1098) grad: 0.2343 (0.2390) time: 0.4350 data: 0.0048 max mem: 22448 +train: [17] [120/400] eta: 0:02:11 lr: 0.000023 loss: 2.1183 (2.1095) grad: 0.2390 (0.2400) time: 0.4475 data: 0.0050 max mem: 22448 +train: [17] [140/400] eta: 0:02:01 lr: 0.000023 loss: 2.1022 (2.1098) grad: 0.2413 (0.2403) time: 0.4558 data: 0.0050 max mem: 22448 +train: [17] [160/400] eta: 0:01:51 lr: 0.000022 loss: 2.1022 (2.1096) grad: 0.2418 (0.2414) time: 0.4503 data: 0.0049 max mem: 22448 +train: [17] [180/400] eta: 0:01:41 lr: 0.000021 loss: 2.1112 (2.1093) grad: 0.2408 (0.2411) time: 0.4380 data: 0.0049 max mem: 22448 +train: [17] [200/400] eta: 0:01:32 lr: 0.000020 loss: 2.0927 (2.1097) grad: 0.2384 (0.2408) time: 0.4418 data: 0.0049 max mem: 22448 +train: [17] [220/400] eta: 0:01:22 lr: 0.000019 loss: 2.1284 (2.1119) grad: 0.2449 (0.2420) time: 0.4461 data: 0.0050 max mem: 22448 +train: [17] [240/400] eta: 0:01:13 lr: 0.000019 loss: 2.1499 (2.1111) grad: 0.2449 (0.2426) time: 0.4678 data: 0.0050 max mem: 22448 +train: [17] [260/400] eta: 0:01:04 lr: 0.000018 loss: 2.0728 (2.1108) grad: 0.2415 (0.2428) time: 0.4377 data: 0.0049 max mem: 22448 +train: [17] [280/400] eta: 0:00:54 lr: 0.000017 loss: 2.1062 (2.1128) grad: 0.2425 (0.2429) time: 0.4454 data: 0.0050 max mem: 22448 +train: [17] [300/400] eta: 0:00:45 lr: 0.000016 loss: 2.1311 (2.1148) grad: 0.2432 (0.2430) time: 0.4582 data: 0.0050 max mem: 22448 +train: [17] [320/400] eta: 0:00:36 lr: 0.000016 loss: 2.1307 (2.1149) grad: 0.2385 (0.2425) time: 0.4364 data: 0.0048 max mem: 22448 +train: [17] [340/400] eta: 0:00:27 lr: 0.000015 loss: 2.1212 (2.1153) grad: 0.2378 (0.2429) time: 0.4472 data: 0.0050 max mem: 22448 +train: [17] [360/400] eta: 0:00:18 lr: 0.000014 loss: 2.1536 (2.1173) grad: 0.2467 (0.2431) time: 0.4405 data: 0.0050 max mem: 22448 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 2.1467 (2.1170) grad: 0.2484 (0.2440) time: 0.4428 data: 0.0048 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 2.0887 (2.1156) grad: 0.2480 (0.2438) time: 0.4411 data: 0.0051 max mem: 22448 +train: [17] Total time: 0:03:01 (0.4543 s / it) +train: [17] Summary: lr: 0.000013 loss: 2.0887 (2.1156) grad: 0.2480 (0.2438) +eval (validation): [17] [ 0/85] eta: 0:04:30 time: 3.1784 data: 2.9374 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:32 time: 0.3623 data: 0.0040 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:19 time: 0.3666 data: 0.0043 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:10 time: 0.3525 data: 0.0046 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3320 data: 0.0041 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3176 data: 0.0039 max mem: 22448 +eval (validation): [17] Total time: 0:00:32 (0.3873 s / it) +cv: [17] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.462 acc: 0.262 f1: 0.202 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:21:17 lr: nan time: 3.1950 data: 2.8623 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:40 lr: 0.000012 loss: 2.1168 (2.1170) grad: 0.2394 (0.2392) time: 0.4496 data: 0.0045 max mem: 22448 +train: [18] [ 40/400] eta: 0:03:05 lr: 0.000012 loss: 2.1168 (2.0841) grad: 0.2378 (0.2373) time: 0.4451 data: 0.0045 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:47 lr: 0.000011 loss: 2.0498 (2.0849) grad: 0.2426 (0.2421) time: 0.4457 data: 0.0051 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:33 lr: 0.000011 loss: 2.0682 (2.0906) grad: 0.2393 (0.2402) time: 0.4403 data: 0.0047 max mem: 22448 +train: [18] [100/400] eta: 0:02:21 lr: 0.000010 loss: 2.0822 (2.0908) grad: 0.2351 (0.2401) time: 0.4433 data: 0.0049 max mem: 22448 +train: [18] [120/400] eta: 0:02:11 lr: 0.000009 loss: 2.0725 (2.0852) grad: 0.2366 (0.2395) time: 0.4502 data: 0.0051 max mem: 22448 +train: [18] [140/400] eta: 0:02:00 lr: 0.000009 loss: 2.0840 (2.0904) grad: 0.2388 (0.2400) time: 0.4440 data: 0.0051 max mem: 22448 +train: [18] [160/400] eta: 0:01:50 lr: 0.000008 loss: 2.1084 (2.0902) grad: 0.2404 (0.2396) time: 0.4373 data: 0.0053 max mem: 22448 +train: [18] [180/400] eta: 0:01:41 lr: 0.000008 loss: 2.0707 (2.0875) grad: 0.2404 (0.2402) time: 0.4490 data: 0.0051 max mem: 22448 +train: [18] [200/400] eta: 0:01:31 lr: 0.000007 loss: 2.0901 (2.0907) grad: 0.2404 (0.2405) time: 0.4401 data: 0.0050 max mem: 22448 +train: [18] [220/400] eta: 0:01:22 lr: 0.000007 loss: 2.0959 (2.0911) grad: 0.2375 (0.2404) time: 0.4436 data: 0.0050 max mem: 22448 +train: [18] [240/400] eta: 0:01:13 lr: 0.000006 loss: 2.1163 (2.0949) grad: 0.2405 (0.2404) time: 0.4554 data: 0.0051 max mem: 22448 +train: [18] [260/400] eta: 0:01:03 lr: 0.000006 loss: 2.1119 (2.0949) grad: 0.2402 (0.2404) time: 0.4485 data: 0.0052 max mem: 22448 +train: [18] [280/400] eta: 0:00:54 lr: 0.000006 loss: 2.1052 (2.0956) grad: 0.2398 (0.2405) time: 0.4276 data: 0.0046 max mem: 22448 +train: [18] [300/400] eta: 0:00:45 lr: 0.000005 loss: 2.1061 (2.0947) grad: 0.2345 (0.2401) time: 0.4538 data: 0.0050 max mem: 22448 +train: [18] [320/400] eta: 0:00:36 lr: 0.000005 loss: 2.1209 (2.0965) grad: 0.2345 (0.2401) time: 0.4361 data: 0.0049 max mem: 22448 +train: [18] [340/400] eta: 0:00:27 lr: 0.000004 loss: 2.1115 (2.0947) grad: 0.2379 (0.2399) time: 0.4464 data: 0.0051 max mem: 22448 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 2.0952 (2.0953) grad: 0.2391 (0.2399) time: 0.4603 data: 0.0050 max mem: 22448 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 2.1127 (2.0959) grad: 0.2405 (0.2399) time: 0.4394 data: 0.0049 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 2.0840 (2.0940) grad: 0.2405 (0.2401) time: 0.4382 data: 0.0049 max mem: 22448 +train: [18] Total time: 0:03:00 (0.4522 s / it) +train: [18] Summary: lr: 0.000003 loss: 2.0840 (2.0940) grad: 0.2405 (0.2401) +eval (validation): [18] [ 0/85] eta: 0:04:35 time: 3.2388 data: 2.9426 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:35 time: 0.4172 data: 0.0045 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:20 time: 0.3628 data: 0.0039 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:10 time: 0.3499 data: 0.0041 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:02 time: 0.3333 data: 0.0043 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3209 data: 0.0042 max mem: 22448 +eval (validation): [18] Total time: 0:00:34 (0.4004 s / it) +cv: [18] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.466 acc: 0.260 f1: 0.202 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:21:11 lr: nan time: 3.1785 data: 2.8505 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:42 lr: 0.000003 loss: 2.1116 (2.1046) grad: 0.2292 (0.2376) time: 0.4551 data: 0.0041 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:07 lr: 0.000003 loss: 2.0643 (2.0737) grad: 0.2325 (0.2380) time: 0.4559 data: 0.0049 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:49 lr: 0.000002 loss: 2.0643 (2.0915) grad: 0.2317 (0.2376) time: 0.4516 data: 0.0050 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:36 lr: 0.000002 loss: 2.1294 (2.0946) grad: 0.2317 (0.2363) time: 0.4533 data: 0.0049 max mem: 22448 +train: [19] [100/400] eta: 0:02:23 lr: 0.000002 loss: 2.0807 (2.0926) grad: 0.2365 (0.2367) time: 0.4406 data: 0.0048 max mem: 22448 +train: [19] [120/400] eta: 0:02:13 lr: 0.000002 loss: 2.0903 (2.0968) grad: 0.2372 (0.2375) time: 0.4626 data: 0.0052 max mem: 22448 +train: [19] [140/400] eta: 0:02:02 lr: 0.000001 loss: 2.1046 (2.0928) grad: 0.2340 (0.2363) time: 0.4458 data: 0.0050 max mem: 22448 +train: [19] [160/400] eta: 0:01:52 lr: 0.000001 loss: 2.0947 (2.0893) grad: 0.2311 (0.2358) time: 0.4450 data: 0.0052 max mem: 22448 +train: [19] [180/400] eta: 0:01:42 lr: 0.000001 loss: 2.0618 (2.0873) grad: 0.2357 (0.2368) time: 0.4553 data: 0.0051 max mem: 22448 +train: [19] [200/400] eta: 0:01:33 lr: 0.000001 loss: 2.0698 (2.0864) grad: 0.2388 (0.2366) time: 0.4514 data: 0.0050 max mem: 22448 +train: [19] [220/400] eta: 0:01:23 lr: 0.000001 loss: 2.0780 (2.0857) grad: 0.2348 (0.2365) time: 0.4510 data: 0.0049 max mem: 22448 +train: [19] [240/400] eta: 0:01:14 lr: 0.000001 loss: 2.0773 (2.0850) grad: 0.2379 (0.2368) time: 0.4653 data: 0.0053 max mem: 22448 +train: [19] [260/400] eta: 0:01:04 lr: 0.000000 loss: 2.0773 (2.0846) grad: 0.2388 (0.2367) time: 0.4545 data: 0.0049 max mem: 22448 +train: [19] [280/400] eta: 0:00:55 lr: 0.000000 loss: 2.0952 (2.0868) grad: 0.2356 (0.2368) time: 0.4386 data: 0.0051 max mem: 22448 +train: [19] [300/400] eta: 0:00:46 lr: 0.000000 loss: 2.0893 (2.0860) grad: 0.2340 (0.2366) time: 0.4554 data: 0.0053 max mem: 22448 +train: [19] [320/400] eta: 0:00:36 lr: 0.000000 loss: 2.1040 (2.0896) grad: 0.2381 (0.2370) time: 0.4605 data: 0.0051 max mem: 22448 +train: [19] [340/400] eta: 0:00:27 lr: 0.000000 loss: 2.1346 (2.0915) grad: 0.2354 (0.2368) time: 0.4514 data: 0.0050 max mem: 22448 +train: [19] [360/400] eta: 0:00:18 lr: 0.000000 loss: 2.1007 (2.0906) grad: 0.2279 (0.2364) time: 0.4405 data: 0.0047 max mem: 22448 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 2.0746 (2.0893) grad: 0.2279 (0.2362) time: 0.4520 data: 0.0051 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 2.0841 (2.0914) grad: 0.2345 (0.2363) time: 0.4498 data: 0.0050 max mem: 22448 +train: [19] Total time: 0:03:03 (0.4593 s / it) +train: [19] Summary: lr: 0.000000 loss: 2.0841 (2.0914) grad: 0.2345 (0.2363) +eval (validation): [19] [ 0/85] eta: 0:04:22 time: 3.0906 data: 2.8070 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:33 time: 0.3926 data: 0.0042 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:20 time: 0.3914 data: 0.0045 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:10 time: 0.3547 data: 0.0050 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:02 time: 0.3346 data: 0.0044 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3254 data: 0.0044 max mem: 22448 +eval (validation): [19] Total time: 0:00:34 (0.4016 s / it) +cv: [19] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.465 acc: 0.260 f1: 0.201 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.2602436323366556, "hparam": [0.44, 1.0], "hparam_id": 19, "epoch": 19, "is_best": false, "best_score": 0.2652270210409745} +eval (train): [20] [ 0/509] eta: 0:24:49 time: 2.9270 data: 2.6230 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:08 time: 0.3879 data: 0.0040 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:22 time: 0.3491 data: 0.0041 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:05 time: 0.3756 data: 0.0047 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:49 time: 0.3453 data: 0.0044 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:37 time: 0.3345 data: 0.0043 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:28 time: 0.3671 data: 0.0044 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:19 time: 0.3661 data: 0.0043 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:10 time: 0.3430 data: 0.0042 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:01 time: 0.3340 data: 0.0043 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:53 time: 0.3293 data: 0.0042 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:45 time: 0.3744 data: 0.0043 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:38 time: 0.3590 data: 0.0048 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:31 time: 0.3669 data: 0.0042 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:23 time: 0.3471 data: 0.0041 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:16 time: 0.3717 data: 0.0043 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:09 time: 0.3630 data: 0.0045 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:01 time: 0.3467 data: 0.0040 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:54 time: 0.3592 data: 0.0044 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:47 time: 0.3851 data: 0.0045 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:39 time: 0.3678 data: 0.0047 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:32 time: 0.3935 data: 0.0046 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:25 time: 0.3810 data: 0.0051 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3750 data: 0.0047 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3676 data: 0.0046 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3517 data: 0.0043 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3363 data: 0.0042 max mem: 22448 +eval (train): [20] Total time: 0:03:07 (0.3678 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:27 time: 3.1448 data: 2.8352 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:33 time: 0.3799 data: 0.0050 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3441 data: 0.0036 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3727 data: 0.0044 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3444 data: 0.0040 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3398 data: 0.0040 max mem: 22448 +eval (validation): [20] Total time: 0:00:33 (0.3950 s / it) +eval (test): [20] [ 0/85] eta: 0:04:30 time: 3.1809 data: 2.8768 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:32 time: 0.3732 data: 0.0042 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:19 time: 0.3768 data: 0.0040 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3673 data: 0.0042 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3327 data: 0.0041 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3234 data: 0.0039 max mem: 22448 +eval (test): [20] Total time: 0:00:33 (0.3967 s / it) +eval (testid): [20] [ 0/82] eta: 0:03:47 time: 2.7795 data: 2.5040 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3883 data: 0.0050 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3856 data: 0.0044 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3809 data: 0.0052 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3515 data: 0.0044 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3404 data: 0.0044 max mem: 22448 +eval (testid): [20] Total time: 0:00:33 (0.4071 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.2652270210409745, "hparam": [1, 1.0], "hparam_id": 24, "epoch": 6, "is_best": true, "best_score": 0.2652270210409745} +eval (train): [20] [ 0/509] eta: 0:23:41 time: 2.7925 data: 2.5399 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:04 time: 0.3852 data: 0.0283 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:29 time: 0.3911 data: 0.0055 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:07 time: 0.3578 data: 0.0027 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:51 time: 0.3465 data: 0.0040 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:40 time: 0.3581 data: 0.0043 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:30 time: 0.3663 data: 0.0045 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:22 time: 0.3705 data: 0.0045 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:14 time: 0.3815 data: 0.0046 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:05 time: 0.3514 data: 0.0039 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:56 time: 0.3253 data: 0.0038 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:47 time: 0.3387 data: 0.0042 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:39 time: 0.3525 data: 0.0042 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:31 time: 0.3436 data: 0.0041 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:23 time: 0.3431 data: 0.0044 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:16 time: 0.3373 data: 0.0042 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:08 time: 0.3263 data: 0.0039 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:01 time: 0.3537 data: 0.0040 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:53 time: 0.3363 data: 0.0036 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:46 time: 0.3341 data: 0.0037 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:38 time: 0.3300 data: 0.0038 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:31 time: 0.3604 data: 0.0041 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:24 time: 0.3606 data: 0.0042 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:17 time: 0.3336 data: 0.0041 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3309 data: 0.0039 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3238 data: 0.0039 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3179 data: 0.0039 max mem: 22448 +eval (train): [20] Total time: 0:03:01 (0.3560 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:10 time: 2.9503 data: 2.6632 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:33 time: 0.3867 data: 0.0062 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3580 data: 0.0044 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3449 data: 0.0033 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3284 data: 0.0044 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3215 data: 0.0040 max mem: 22448 +eval (validation): [20] Total time: 0:00:32 (0.3859 s / it) +eval (test): [20] [ 0/85] eta: 0:03:57 time: 2.7965 data: 2.5540 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:29 time: 0.3391 data: 0.0049 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:17 time: 0.3221 data: 0.0037 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:09 time: 0.3489 data: 0.0035 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3366 data: 0.0043 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3214 data: 0.0041 max mem: 22448 +eval (test): [20] Total time: 0:00:31 (0.3667 s / it) +eval (testid): [20] [ 0/82] eta: 0:03:59 time: 2.9189 data: 2.6875 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:30 time: 0.3674 data: 0.0055 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:17 time: 0.3293 data: 0.0032 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3304 data: 0.0043 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3267 data: 0.0041 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3119 data: 0.0041 max mem: 22448 +eval (testid): [20] Total time: 0:00:30 (0.3709 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|-------:|-----:|------------:|:---------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.0003 | 0.05 | 24 | [1, 1.0] | train | 2.1266 | 0.35932 | 0.0023684 | 0.30619 | 0.0024262 | +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.0003 | 0.05 | 24 | [1, 1.0] | validation | 2.4375 | 0.26523 | 0.0055322 | 0.20775 | 0.0050754 | +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.0003 | 0.05 | 24 | [1, 1.0] | test | 2.3717 | 0.27699 | 0.0053215 | 0.21331 | 0.0051785 | +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.0003 | 0.05 | 24 | [1, 1.0] | testid | 2.3368 | 0.29593 | 0.0057669 | 0.24068 | 0.0055414 | + + +done! total time: 1:23:02 diff --git a/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/train_log.json b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..8f737e5a4ac1e5f8fa129b1a56cfbfaab694c0c7 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.155613205432892, "train/grad": 0.17131446480751036, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.207442626953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.206539306640625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.2049755859375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.203482666015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.20200927734375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.200107421875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.197984619140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.195830078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.19307373046875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.19030029296875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.187784423828125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.18434814453125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.18131591796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.177508544921875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.17443603515625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.171768798828125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.168929443359375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1660205078125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.16327880859375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.161168212890625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.158775634765625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.156783447265625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.15496337890625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.15335693359375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.15183349609375, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.15051025390625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.14949462890625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.148809814453125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.1481591796875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.147767333984375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.147581787109375, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.147568359375, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.147872314453125, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.148333740234375, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.14893310546875, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.149635009765625, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.150592041015625, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1517169189453127, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1529620361328123, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.1547149658203124, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.1550079345703126, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.1511044311523437, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1382452392578126, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.122934036254883, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.098272247314453, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.075573196411133, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.0588929748535154, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.0415777587890624, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.0247735595703125, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02745564072392881, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.027380064893513918, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.027257116856053473, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.027137470273301004, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02702145862393081, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02686614714562893, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02669565809890628, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.026513817477971314, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0262873462587595, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02606185511685908, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.025854604407213627, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025568225244060157, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0253185183275491, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02500239779241383, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02474228238221258, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.024526578108780086, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.024295764290727676, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.024068823526613416, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02386125800665468, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02370286070741713, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.023538864203728735, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02339941910933703, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.023273486429825425, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02316179297864437, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023066235799342395, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02297497058287263, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022909108181484044, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.022860633404925466, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02280834463890642, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.022759469393640756, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.022724790209904314, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02269546295516193, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.022649743780493736, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.022598592760041357, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.022514387713745237, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.022400223976001145, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02224500212352723, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02206483149435371, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.021836932660080494, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02153512424323708, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02135525719728321, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02141033756081015, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.022008990179747345, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02263901705853641, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.023652432770468294, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.024833087963052093, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.025925355930812657, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.027243938581086695, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.028195504606701435, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.202747106552124, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1997487545013428, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1949524879455566, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1904683113098145, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1862592697143555, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.180774450302124, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.175067901611328, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.169259548187256, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1624395847320557, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1562001705169678, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1509456634521484, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.144479274749756, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1395652294158936, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1344375610351562, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.131148099899292, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1290338039398193, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.127361536026001, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.126293897628784, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1258010864257812, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1256606578826904, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.125777244567871, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1261074542999268, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.126702308654785, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1276092529296875, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1289029121398926, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.130751848220825, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1324737071990967, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.133986711502075, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1358814239501953, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1373307704925537, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.137516975402832, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1366889476776123, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1347904205322266, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1331801414489746, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1317126750946045, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.131227493286133, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1317851543426514, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1334526538848877, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1349241733551025, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.13012433052063, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0956614017486572, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9717140197753906, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8254778385162354, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7830615043640137, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.7301199436187744, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.682194948196411, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.647329330444336, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.652129650115967, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.6764767169952393, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.039128829826504244, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.04152823920265781, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.04152823920265781, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.04355850867478774, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.04521963824289406, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.049464747139165745, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.05444813584348468, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06016980435585087, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07161314138058324, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06533776301218161, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.055740125507567365, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.056109265411590996, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.052417866371354746, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.05278700627537837, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.053156146179401995, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.05370985603543743, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.055740125507567365, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.06275378368401624, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.0651531930601698, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.067921742340347, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.06699889258028793, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.067921742340347, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.06866002214839424, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.06736803248431156, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.06773717238833518, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.08010335917312661, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.11775562938353636, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.16555924695459578, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.1611295681063123, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.1877076411960133, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.19287559985234404, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.20653377630121816, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.20653377630121816, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.20191952750092285, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014483232944512585, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.015492864234587575, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014131072336629621, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014279486836438754, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013958449250428462, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0145880965470536, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014137502485758695, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01379944669412731, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.014321430722023408, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014267897888847985, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013039648776434482, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.012482715817858562, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013396777877582327, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013120302415887323, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.013277789212584427, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.013109185893546598, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.013370491036930396, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.013427367146146363, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.013577522494122632, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.014046184199844805, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.013998677149207493, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.015074568614917369, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.017546683868922965, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.019832174837548455, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.020550786865273247, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.017293538154786993, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.01507486201856306, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.010928821453093374, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.009847023666833965, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.009719229835300236, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.010508395002940632, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.012343237943660977, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.01696901434010918, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.01754453691404161, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.017856657153505064, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.01959079793853446, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.022086722471480108, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.02534461701118083, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.02623575122966357, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.023167858856765176, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.029990809553369763, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.05684602082893019, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.08852800352642633, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.09478926969489536, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.11991910764514692, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.13612963976001322, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.14636182001391582, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.14421023976938327, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.14761964233060226, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 3.0588929748535154, "validation/loss_best": 2.647329330444336, "validation/acc_best": 0.20653377630121816, "validation/f1_best": 0.14636182001391582} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 3.0080054605007174, "train/grad": 0.18460923977196217, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.175465087890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.17137451171875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.165474853515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.16039306640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.15619873046875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.15131103515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.146981201171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1431494140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1393359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.136572265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.134580078125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.132568359375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.131395263671875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1301904296875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.129608154296875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.129288330078125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.12912353515625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.128922119140625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.12878662109375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.128609619140625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.1285546875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.12837890625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.12828125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.1281884765625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.1282666015625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.128409423828125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.128714599609375, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.128883056640625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.129454345703125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.13015625, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.1304412841796876, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.129095458984375, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.1115863037109377, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.071799621582031, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.014117431640625, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9530517578125, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.879203987121582, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.8129028511047363, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.7551524353027346, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.6906270217895507, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.6434359169006347, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.623556728363037, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.601336803436279, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5880009174346923, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5836020851135255, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.6037095618247985, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.6329353046417237, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.683871479034424, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.7472270274162294, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024566554808989168, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.024225533213466405, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023722610184922815, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023295395295135676, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022931774724274875, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022514184419997037, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0221436214633286, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021830557617358862, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021537587800994514, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02132659403141588, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021188563467003405, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02106453764718026, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02100182239897549, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02096588003914803, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02096523391548544, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020974631365388633, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02099644158966839, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02102556098718196, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021056126123294234, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021083002714440226, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.021112974360585214, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.021139838006347417, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021162534411996604, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02117859323043376, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.021175444452092053, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02113494495395571, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.021054396172985435, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02094624296762049, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02074048422742635, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.020425262772478164, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.020084372921846807, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.019786287788301705, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.019840896003879605, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02066511704120785, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02185318901203573, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.023131279880180954, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.024635007535107434, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02654555322602391, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.028331981911323963, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.030570785300806164, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03293434308841824, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03363601069897413, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03500522383488715, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03589280758984387, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.037666988698765634, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.039458960378542546, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04064642518758774, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04449078637175262, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.049698994401842354, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1593472957611084, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1540491580963135, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1469812393188477, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1416242122650146, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.13763427734375, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.133652448654175, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1308023929595947, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1288247108459473, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1274304389953613, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.126710891723633, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1263699531555176, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.126129150390625, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1259822845458984, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.125741958618164, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1254498958587646, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1251091957092285, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.124671220779419, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1241064071655273, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.123613119125366, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1233561038970947, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.123361825942993, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1236820220947266, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1244914531707764, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1257894039154053, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1278254985809326, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1314992904663086, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.135871648788452, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.140308380126953, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1460399627685547, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.150153398513794, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1475770473480225, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.120288848876953, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.893970251083374, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7599194049835205, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6421334743499756, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5880870819091797, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.556108236312866, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.5442585945129395, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5223441123962402, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5281927585601807, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.5408594608306885, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.541405200958252, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.560009479522705, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6091177463531494, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.6742799282073975, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.6879477500915527, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.735029458999634, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.017094373703003, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.070310354232788, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06312292358803986, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.0664451827242525, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06699889258028793, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.0664451827242525, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.06699889258028793, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.06496862310815799, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.0636766334440753, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.06349206349206349, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.06921373200442968, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.08490217792543374, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.14525655223329642, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.16906607604282023, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.19139904023624954, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21853082318198597, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23089700996677742, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2355112587670727, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2364341085271318, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23624953857511996, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2292358803986711, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22000738279808046, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.20948689553340716, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.2069029162052418, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.21834625322997417, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.16555924695459578, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.15780730897009967, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.015807714953225877, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014630543420223026, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013135922993168306, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013068058259469115, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012194340730284003, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01201466644373015, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.012131510690143723, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012483104021042276, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01191601749210208, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.011766775739719655, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012191053438938013, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01226871374236461, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01293386032663217, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013812033960202036, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.013824091984228662, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01382613667404462, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.014599687518778845, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.014523326433404022, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.016367005905892493, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.016864365913826524, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.01917479506003084, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.021234328626517505, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.020507268292274197, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.020078781027595704, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.018135887378513512, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.01589507380040561, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.015651174762341482, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.01478801873044075, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.014978340392618597, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.017382713230461245, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.023246350407121977, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.0347817235464138, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.0695276866530217, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.09462709259248532, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1298791796370129, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15379635277822326, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1555042434901666, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15712180826324015, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16979901875296435, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16838465931468063, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16117306221878736, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16796190581845408, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16348076240068235, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15518080802618128, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.15800605488478314, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.1490370551232355, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.14254143446478704, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0944747023310017, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.11073470882013318, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 2.6434359169006347, "validation/loss_best": 2.5408594608306885, "validation/acc_best": 0.2364341085271318, "validation/f1_best": 0.16117306221878736} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 2.9781634747982024, "train/grad": 0.3311583649367094, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.144619140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1415625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1381591796875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13604248046875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.13467529296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.133804931640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13319091796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13284423828125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.132630615234375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.132445068359375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.132296142578125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.13201171875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.13171875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.131361083984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1308935546875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.130660400390625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.130174560546875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.12982421875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.12935546875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.12913818359375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.128828125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.128653564453125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.128355712890625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.128216552734375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.127779541015625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.1264892578125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.119716796875, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.0887371826171877, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.009092254638672, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.8915435028076173, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.787046356201172, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.7004376602172853, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.5923330879211424, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.525845775604248, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.475755500793457, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.437249250411987, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.4111159324645994, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.399673492908478, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.405209929943085, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.40990482211113, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.410198965072632, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.437852621078491, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4781174087524414, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5216037225723267, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5715884351730347, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.660059496164322, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.760756882429123, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.2057189524173735, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.19472337603569, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022027696841396393, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021747595546767116, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021436747061088682, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021255354657769204, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021152612790465354, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021086204568855463, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02106224906630814, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021063911709934473, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021078860415145756, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021099820039235054, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021119515034370123, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02114450722467154, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021163547560572624, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0211897014034912, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021209457768127323, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021225130865350366, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021242580846883356, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.021254794071428478, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02125229996163398, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021241290071047844, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.021201253673061728, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.021127433287911116, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.021004416137002407, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02082081163302064, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.020564034995622933, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.020193320959806443, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.019986209888011218, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.020568611486814915, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.022181933070532976, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.024291945928707717, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02646038099192083, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02834110451862216, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.030819180784747005, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.031992011293768885, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.033067995561286805, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.033814529972150925, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03471886275336147, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.03609378575347364, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03798635083250702, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.038806639406830074, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.040192203894257546, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.040480056591331956, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04261684320867062, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04488598171621561, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.04887941036373377, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.054703269582241774, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05979419337585568, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.13395761204883455, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.11796991657465697, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.13405179977417, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.131537437438965, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.129040002822876, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1277172565460205, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1270053386688232, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1265623569488525, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1263537406921387, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1262736320495605, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1262786388397217, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1263201236724854, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1263906955718994, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1266095638275146, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1268744468688965, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1273722648620605, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.127842903137207, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1282668113708496, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1286628246307373, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.12876558303833, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.128300905227661, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.127405881881714, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1256792545318604, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.12353777885437, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1209211349487305, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1181583404541016, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1148955821990967, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1050214767456055, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.03730845451355, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8159828186035156, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.699153423309326, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5784664154052734, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5150718688964844, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5023093223571777, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.503300666809082, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.516928195953369, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.546562433242798, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.550949811935425, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.5300981998443604, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.5985586643218994, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.583885431289673, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5664455890655518, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.625601053237915, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.6454403400421143, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6727182865142822, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6973445415496826, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.792365550994873, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.025986433029175, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1305525302886963, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07345884090070137, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07954964931709117, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.11314138058324105, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.1613141380583241, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.19158361018826134, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23015873015873015, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24289405684754523, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24787744555186417, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.25083056478405313, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.24455518641565152, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23754152823920266, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24049464747139165, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23588039867109634, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22369878183831673, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2277593207825766, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22259136212624583, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2249907715023994, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21834625322997417, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2220376522702104, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2069029162052418, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.16832779623477298, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.15873015873015872, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011397604785377493, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011273439833811743, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010957723482505155, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010923000281822415, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01075727367582527, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010919389798865446, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010907551367029873, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.010829236548695368, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.010787716984194312, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.011399528233142026, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.011823200785379582, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.012974659679263728, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013712211607417854, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013902743143367835, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01411285159492444, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.014302056825288815, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01402254810434006, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.013842019228923858, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.013878709438574789, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.013343486133758522, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.014352496539539053, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.014274862142308234, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.01665601450575017, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.017032733121370394, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.017161177118061446, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.02072719558305463, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.04487554753344793, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.08543889799829692, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.11881151614480097, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.16152778003853405, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.16841904141761255, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17339656240427778, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1757725742050932, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17083361891239282, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16334731677303413, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16680893589183796, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17831589380215898, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17498321592126423, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18042274948105827, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17820703505912117, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17957937595514314, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.168570629351986, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1531989850457452, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16155717454626728, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.14780347260917848, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.11781990873850685, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.11966811909704388, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.5923330879211424, "validation/loss_best": 2.503300666809082, "validation/acc_best": 0.25083056478405313, "validation/f1_best": 0.1757725742050932} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 2.9389761769771576, "train/grad": 0.3705428518354893, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13367431640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.132598876953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.131612548828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.131033935546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.130616455078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.130142822265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.129781494140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12925537109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.128775634765625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.128302001953125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.127769775390625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12724853515625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.126673583984375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.125908203125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.125206298828125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.12461669921875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.123997802734375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.123233642578125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.122481689453125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.121766357421875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.12077392578125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.119505615234375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1153009033203123, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0808526611328126, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.9829527282714845, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.8415892028808596, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.707177200317383, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.615830955505371, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.5295042037963866, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.4589061164855956, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.4066162109375, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.3893414688110353, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.342967710494995, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.3320256233215333, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3262361812591554, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.3127649307250975, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.3205594062805175, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3249467468261718, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.34695298910141, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3833131766319275, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.409787030220032, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4749037396907805, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.555845431089401, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.621288924217224, "train/loss_044_lr2.6e+01_wd1.0e+00": 5.346738739013672, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.92240163564682, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.722305936813354, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021035923417657615, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020991062419489025, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02097389232367277, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020981531939469278, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02099507689010352, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02101344581693411, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021030483078211547, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021045399466529488, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021062949649058284, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0210786328651011, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021091168648563325, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021107600703835487, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021116859982721507, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021124456096440553, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02112203790806234, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021110157314687968, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021078553646802903, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.021022515296936037, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020926151294261217, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020808860920369626, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.020615923809818925, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020352694750763477, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.020043221302330494, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02057513580657542, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02255581969860941, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.025594689534045756, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02830559334717691, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.029658547546714546, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.031198891792446375, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03274295325390995, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.034134902292862535, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0347285373788327, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.035913231568410996, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03674861785955727, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0378608182631433, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.03895176211372018, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.040036997850984336, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04189193222671747, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04311118526384235, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04418427456170321, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.046344860326498746, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04829797158017755, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05261492505669594, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05562504731118679, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.16181271629408, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.07379778400063515, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.09025954201817513, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1273791790008545, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1268160343170166, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.126363754272461, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.126103162765503, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.125920295715332, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1256561279296875, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1253511905670166, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.124986410140991, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1245036125183105, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.124014377593994, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1235334873199463, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1228442192077637, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1222968101501465, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1216132640838623, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.121211290359497, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1209917068481445, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.120995283126831, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.121412754058838, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.122121810913086, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.122781991958618, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1231110095977783, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1209380626678467, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0967917442321777, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.8626620769500732, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.725318670272827, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.612766742706299, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5468590259552, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.531766176223755, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5293540954589844, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.52480411529541, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.528336763381958, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.53554630279541, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5526483058929443, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.583416700363159, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5727498531341553, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.599081516265869, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6150596141815186, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6812188625335693, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.665724992752075, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.6917662620544434, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9172937870025635, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8552520275115967, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.778501033782959, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.845885992050171, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1369311809539795, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.067921742340347, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06275378368401624, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.061461794019933555, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06127722406792174, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.061461794019933555, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06183093392395718, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.08102620893318568, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.14987080103359174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.18604651162790697, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22517534145441123, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2292358803986711, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22812846068660023, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23311184939091917, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23403469915097821, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23237356958287192, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23403469915097821, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2203765227021041, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21613141380583242, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19361387966039129, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1925064599483204, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21040974529346623, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20210409745293467, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.19638242894056848, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012178990994819163, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012314508146670561, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012282686372812171, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011999075404845778, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012146661286080348, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.012375932318878449, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.012499718319542504, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012781507544070621, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013685094425265769, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014604333990217313, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.014767464192605833, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.017092380615475005, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0179128415135622, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.017962216579543406, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.01749135370731873, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.017179918544394164, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.016700706481647513, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.016841427628285405, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.01718391381415989, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.017422980197965333, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.019222632753355097, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.02294294436764863, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.0380698957525771, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.07981738386056071, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.11708586300731959, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.13934946619690997, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.15954660737794518, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.16418706521390883, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1688094342151892, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17045700552056323, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17507310602132517, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17504823925620686, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1665948982257947, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16968792669947108, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16374734605223726, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16506122275030566, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1670652742828005, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16510693658322056, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16658307359330005, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16870479647688233, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1517101435858417, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1459900768136093, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.149191211724543, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14375124472174977, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.13743563394112576, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.342967710494995, "validation/loss_best": 2.5526483058929443, "validation/acc_best": 0.23403469915097821, "validation/f1_best": 0.1665948982257947} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.825298285484314, "train/grad": 0.26131917975842955, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13379150390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.133514404296875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1331298828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.132681884765625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1323046875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.13182861328125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.131346435546875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.130751953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.13016845703125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12950927734375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.128846435546875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.127972412109375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.127239990234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1261572265625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1251953125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1242724609375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.123240966796875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.121922607421875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1200732421875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1162139892578127, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0858560180664063, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.976817321777344, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.8234236907958983, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.6664624404907227, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.5588153839111327, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.4709718132019045, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.421929397583008, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.390573616027832, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.3479547786712645, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.307945008277893, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.2668628025054933, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.2664861488342285, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.2334739661216734, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.233467879295349, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.2432872605323793, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.241496031284332, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.263180192708969, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.275068295001984, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.33157772064209, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.4014020442962645, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.483520965576172, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.55406534075737, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.6413723385334014, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.7530372965335848, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.952909653186798, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020766358459368348, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020779713881202044, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020796867455355823, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020809837663546205, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02081983649171889, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020829829587601125, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020839347685687244, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020849121171049773, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020860347775742413, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020867978050373496, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020874350620433687, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020877044820226728, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02087026179768145, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020844895206391812, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02079868398141116, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020734480447135866, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020626370110549033, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020458361827768386, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.020218831500969828, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0199668882926926, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0202381971757859, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02231495349202305, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02550946149043739, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0287355334777385, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.030680222203955054, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03219456211663783, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03249602871015668, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03303580243140459, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03387595019303262, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.034777327226474884, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03592218517325819, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0364996896777302, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03761037960648537, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03863252377137542, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03992838343605399, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.041333792693912984, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04280733143910766, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04480642560869455, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04768186658620834, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.049719580356031655, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.055162921603769066, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.057820301055908206, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.061766143515706064, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06541171541437507, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.07815904589369893, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.126490354537964, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1264145374298096, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.126358985900879, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1263504028320312, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.126338243484497, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1263115406036377, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.126220226287842, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.126113176345825, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1258413791656494, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1254563331604004, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1249845027923584, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.124202251434326, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1233973503112793, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1223723888397217, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1216163635253906, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1212246417999268, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1211445331573486, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1212921142578125, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.120328187942505, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1084985733032227, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.9280285835266113, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.7516674995422363, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6484200954437256, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.570155620574951, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.511307716369629, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.48569917678833, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.488036870956421, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4982454776763916, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5042216777801514, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.526898145675659, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5306553840637207, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5167794227600098, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5890860557556152, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.663907051086426, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.7049643993377686, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.707928419113159, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6665725708007812, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6668314933776855, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.677337408065796, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7812955379486084, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.843014717102051, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9146811962127686, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.068502902984619, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.071027994155884, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07290513104466592, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.13695090439276486, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.17552602436323367, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.20376522702104097, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.22572905131044665, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2499077150239941, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24861572535991142, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24584717607973422, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24289405684754523, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22868217054263565, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2187153931339978, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22425249169435216, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20191952750092285, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19472129937246216, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20302694721299372, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19767441860465115, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.1836471022517534, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01028867097581695, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010508482346829676, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010507457243424653, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010882953409141754, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010542239910013665, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.011470069868721072, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.012557178951784416, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012918968347029082, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01477343706978339, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01480925685580775, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.016137813780880073, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.0180581260276609, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.018309801220821053, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.017750028389655795, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.017321190463453765, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.016206315641681696, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01547462830580189, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.016493743023933676, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.01595082934799825, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.022985399497576737, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.07054308745477662, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.10132920545493906, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.13211970327792147, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.15408230444047508, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.17030253624740765, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18422048954364734, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18118668282034064, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17886765859349787, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18073717695975877, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18110282875480369, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19037488685139017, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18609416500121112, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17752741937563576, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16843365198487206, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16278529712035542, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15421477560968475, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1630744624830337, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17792515391479402, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17403506639849622, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1561592669375919, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15618183127713087, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15180421130472502, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15162999511865685, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.12052333021684412, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 2.4709718132019045, "validation/loss_best": 2.48569917678833, "validation/acc_best": 0.2499077150239941, "validation/f1_best": 0.18422048954364734} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.726717219352722, "train/grad": 0.23527185022830963, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.126461181640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1259814453125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.125460205078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1249169921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.124476318359375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.123802490234375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12310302734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.122515869140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.121734619140625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.120906982421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1202197265625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.11914794921875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.11820068359375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.117030029296875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.115782470703125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.114520263671875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1123974609375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1060296630859376, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.044386901855469, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.897392120361328, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.7137469482421874, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.587124824523926, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.4794538497924803, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.4096180534362794, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.34852970123291, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.2936829566955566, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2669239616394044, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2374603652954104, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.2060248947143553, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.1748513889312746, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.1401570224761963, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.141289472579956, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.127724549770355, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.131027703285217, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.140329339504242, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.1625697839260103, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.1910620450973513, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2063133984804155, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2507785725593568, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.356024159193039, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.391562634706497, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.446243387460709, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.587908262014389, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.654896821975708, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02106375476811081, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02107349977362901, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021086965831927956, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021098574763163923, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021107694450765848, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021120176459662618, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02113342996221036, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02114426497835666, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021155794197693468, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021160242767073213, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02115981578361243, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021148127503693104, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02112183035351336, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021057932656258344, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02096729346551001, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02085468847770244, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020670447098091245, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020456449263729155, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021472733924165367, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.024189051557332277, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.027791006378829478, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0301587001606822, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.032105867424979805, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03297459597699344, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.034071505134925247, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.034943281458690766, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03498361838981509, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03575988193973899, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.036915176194161174, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.037663518460467456, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03875968173146248, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.039076412757858635, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.040974965430796144, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04196429155766964, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04288685027509928, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04486239444464445, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04658331621438265, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04844984963536263, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05016327634453774, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.053558720368891956, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05408133650198579, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05544100454077125, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06315354682505131, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06413710271939635, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1249027252197266, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1245839595794678, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124091625213623, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.123640537261963, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1231961250305176, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122676372528076, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.122093677520752, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1214916706085205, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1206893920898438, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.119893789291382, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1191229820251465, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1180360317230225, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1169769763946533, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1155285835266113, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1141984462738037, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.112841844558716, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1098296642303467, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.089434862136841, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.850151538848877, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.716688632965088, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.6028711795806885, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.527920961380005, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4952595233917236, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4883363246917725, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4490551948547363, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.446617364883423, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.447681188583374, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.456691265106201, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.485799789428711, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5563173294067383, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5531084537506104, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.558335304260254, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5997769832611084, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.6712169647216797, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.750718355178833, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.7923123836517334, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7619738578796387, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.839320421218872, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.94608473777771, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8926258087158203, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0033183097839355, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8465654850006104, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.903134346008301, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9116299152374268, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.067921742340347, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07161314138058324, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.08508674787744555, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.15337763012181616, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.18235511258767073, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.21336286452565523, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.23292727943890734, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24234034699150978, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24880029531192321, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.260797342192691, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2591362126245847, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.26116648209671467, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25655223329641935, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24510889627168697, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24215577703949798, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24898486526393504, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24640088593576967, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23920265780730898, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23237356958287192, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22812846068660023, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22240679217423404, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21853082318198597, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21354743447766703, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.17552602436323367, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.18456995201181248, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.17054263565891473, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.18992248062015504, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.18290882244370615, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.19158361018826134, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010235822857641063, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01021266655277999, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0097999382714657, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0095993693082092, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009210947496529457, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008984829023381078, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.008902773034984794, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009261521359187697, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.009348779028787111, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.009603224489603397, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.010067059564629425, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.011283122147416176, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013425862165778332, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01766990234636505, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.019250588295384844, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.021137134194104127, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.023956520835083515, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.03724197616654596, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.07863039771589563, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.10532689592582771, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.14231149986912475, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.16482624244680402, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.17151946828441958, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.17664517932094595, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18956391330216507, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19112303113823362, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19355631465255177, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1955123264139015, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18643379112048244, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18127313467877115, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18571861481741814, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18896420626584007, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18793583967846397, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17962865258172753, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1711383973405871, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16351096146249636, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16300124705702032, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1638910696426749, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1377485843035051, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1446473107662655, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.13409104799972044, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16380067881475116, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15869953434834502, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14610064223602445, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 26, "lr_best": 0.00041999999999999996, "wd_best": 0.05, "train/loss_best": 2.2669239616394044, "validation/loss_best": 2.447681188583374, "validation/acc_best": 0.26116648209671467, "validation/f1_best": 0.19355631465255177} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.6676265347003936, "train/grad": 0.23786965034902097, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.130347900390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.129847412109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.128980712890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12830322265625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12761474609375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12675537109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.125806884765625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.124930419921875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.123680419921875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.122598876953125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.121531982421875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12009765625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.118724365234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.1167333984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.114473876953125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.110787353515625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.092000732421875, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.9411761474609377, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.735690155029297, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.6119710540771486, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.498964538574219, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.417549285888672, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.34872013092041, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.2960578536987306, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.241984100341797, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.198426504135132, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.1822638368606566, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.145235528945923, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.117800989151001, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.0814191579818724, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.0540633726119997, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.0588875818252563, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.0430614924430848, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.0488080871105194, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.06807106256485, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.0864899873733522, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.11912495136261, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.1458931958675382, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2037572169303896, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.301904591321945, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3393566900491716, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4345012295246122, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4849624633789062, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.584971468448639, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021049868199042976, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021053517796099187, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0210590557474643, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02106295231729746, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021065132985822858, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021068326495587826, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021068678074516355, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021066000829450787, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021055931756272912, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02103707142174244, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02101000181399286, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02095233664382249, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020876256716437638, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020727262860164047, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02054222606122494, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020335744288749992, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02038476745132357, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02327753899153322, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.027186545729637145, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02956771439872682, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.031527605606243014, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.032674649311229584, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03381082503125071, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03423294521868229, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03515723374672234, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03589740738272667, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03586917316541076, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03699674910865724, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03792409705929458, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.038785340208560226, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03960092309862375, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04015166957862675, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0417403182387352, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04247601512819529, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.044019389767199754, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04566158464178443, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04698494469746947, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04901037508621812, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.051183146722614765, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05329265870153904, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05503188457340002, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05683118319138884, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05713066682219505, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06081807715818286, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1247239112854004, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1244285106658936, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1239993572235107, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.123640298843384, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1233158111572266, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122922897338867, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.122572183609009, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1221275329589844, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.121697187423706, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1211557388305664, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.120699644088745, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1198930740356445, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1190102100372314, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.117310047149658, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1144914627075195, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1073851585388184, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.036864995956421, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.7586801052093506, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.6372525691986084, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.550267219543457, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4947142601013184, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4727590084075928, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4557547569274902, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4554595947265625, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4374959468841553, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4538955688476562, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.463702917098999, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5044121742248535, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5392844676971436, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6112473011016846, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.61012864112854, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.623414993286133, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.672414541244507, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7353501319885254, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.7561545372009277, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.7598419189453125, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.737039089202881, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.762272596359253, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8026280403137207, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7925777435302734, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8082127571105957, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7637124061584473, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.907634973526001, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9234464168548584, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.11277224067921743, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.1760797342192691, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.20671834625322996, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.23403469915097821, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2427094868955334, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24713916574381692, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2547065337763012, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25101513473606496, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2652270210409745, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25839793281653745, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24640088593576967, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2454780361757106, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.25230712440014763, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24141749723145073, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2369878183831672, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22425249169435216, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22148394241417496, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2233296419342931, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2072720561092654, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20634920634920634, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22093023255813954, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2026578073089701, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20321151716500555, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0099563366697488, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010137380043558937, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010373393530899286, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010195760394082624, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01107096432665962, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.011640317037607562, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.013344376552545523, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.014784370032665847, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.016192246126231328, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.017759273739388282, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01793945913512972, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.019198658994051623, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.019234131867329408, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.020757648056144107, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.022784816092759574, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.02570134584424839, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.058845068401388284, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.09922061783526082, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1399987165701381, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.16934739147987374, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.18062130735710955, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18572516870228195, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1947622526698177, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1921594166964644, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2077494155918342, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19945080425750983, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19984868744260878, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18860561861752193, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18900315436706774, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18536892377280711, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1944110378083459, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18999197367284038, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18430466170852677, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17572618649028862, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1668516956764814, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1775453138265913, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17685702085628105, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18431718023097518, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16865890286304686, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17242634393723055, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16360992719765666, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17401681506868302, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17185930972512523, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16277357989798816, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 2.241984100341797, "validation/loss_best": 2.4374959468841553, "validation/acc_best": 0.2652270210409745, "validation/f1_best": 0.2077494155918342} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.5965847885608673, "train/grad": 0.24301145672798158, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1288525390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1282373046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.127196044921875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.126378173828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12548095703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12440673828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12313720703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.121973876953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.120335693359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11875732421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11718505859375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.114908447265625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.11265869140625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.108651123046875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1015179443359373, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0671893310546876, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.8622467041015627, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.6826324462890625, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.5426580810546877, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.44710391998291, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.3708720588684082, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.3052491188049316, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.240333766937256, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.1925476455688475, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.1405372190475465, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.0923317861557007, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.0809450054168703, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.0366528987884522, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.011023359298706, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.986371922492981, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.9499347925186157, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9676223254203797, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.9544424653053283, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.9580412554740905, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.970157424211502, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9834107518196107, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.0156635355949404, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.0432221448421477, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0968052023649215, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.174326519370079, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.2437212949991228, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.3023113417625427, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.3854796159267426, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.469771399497986, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02056585577316582, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020569424307905138, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020573236970230935, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020575711480341853, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020575663754716516, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02057282840833068, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02056706099305302, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020556036019697785, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020531908366829155, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020495909550227225, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02045047575607896, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02036075315438211, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020251138363964857, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020049903220497073, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019850156670436263, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020306996135041118, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02417303410358727, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.027633013501763344, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.030322464564815164, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.031934621063992384, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03280740628018975, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03374170364812017, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03488074372522533, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03540460810996592, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03638476622290909, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.037456725668162105, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.037485736766830084, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03884234388358891, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.039847359675914046, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.041363438777625564, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04227513587102294, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04273558221757412, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.044381966330111024, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.044951373785734175, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04554007885977626, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.047044279929250477, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04873011594638228, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05013854028657079, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.051568437833338976, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05291429268196225, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.055853690560907124, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.054608325473964214, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05677049638703466, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.058671469762921334, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.124297857284546, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.124025583267212, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123615026473999, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.123297691345215, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1230132579803467, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122647523880005, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.122256278991699, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.121870994567871, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1214759349823, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1210782527923584, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1207430362701416, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.120201826095581, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1195461750030518, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1170389652252197, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1053664684295654, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.0057265758514404, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.751845121383667, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.6302480697631836, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5356245040893555, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4919593334198, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.465916156768799, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.463373899459839, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.458277940750122, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.465689182281494, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4700067043304443, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.500382423400879, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5058929920196533, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.554856300354004, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6000356674194336, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6673691272735596, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7035489082336426, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.679910898208618, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.7550065517425537, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.832629442214966, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.89367938041687, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.8799664974212646, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.8432669639587402, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9013872146606445, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.92434024810791, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.943737030029297, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.016878843307495, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.841758966445923, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0998897552490234, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0214221477508545, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07456626061277224, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.08250276854928018, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.12403100775193798, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.18456995201181248, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2172388335179033, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2351421188630491, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24898486526393504, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2541528239202658, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24880029531192321, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2500922849760059, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2574750830564784, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24750830564784054, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2499077150239941, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24344776670358065, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23403469915097821, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23920265780730898, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23477297895902546, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2039497969730528, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21207087486157253, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21059431524547803, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20136581764488742, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22056109265411591, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19933554817275748, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20210409745293467, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010314653662774823, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01133297283921131, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012517229079034265, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013140275004971063, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.013563664588740204, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014299989151639428, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.015372361429582688, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.016001846847766576, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.017095678001520506, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01811241081018927, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.018751476787373517, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.0198207639854769, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.019785161339141214, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.020984503751664968, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.026174284968078238, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.05366541667858818, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.10578961348410539, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.14369531149632625, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.16545981041875787, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18092407775594568, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1895078977417813, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1894256361415423, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1935324686696057, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2004537586014814, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20628412942667632, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2025894841423667, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20537232052268503, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20134043942363833, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19321079319669732, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19047792183470577, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19228634357153407, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1911139105303331, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1782572052807331, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1683919713589801, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16569437683377927, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16716513020626853, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17288431975430144, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16495834115043514, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16578965847879465, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15903957958375228, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1562922300757168, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17006439808762344, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.14203324355909863, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14069179751898536, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 2.1405372190475465, "validation/loss_best": 2.4700067043304443, "validation/acc_best": 0.2574750830564784, "validation/f1_best": 0.20628412942667632} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.5353606510162354, "train/grad": 0.2510354097932577, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.124814453125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124132080078125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12295166015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1220947265625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.121131591796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11989990234375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11872314453125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.117281494140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.115726318359375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.113990478515625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.112352294921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1098828125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.106961669921875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.09945068359375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.053935546875, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.849351806640625, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.680653839111328, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.540834732055664, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.422416954040527, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.3524924659729005, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.2868761825561523, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.2244665145874025, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.1587362766265867, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.1137461614608766, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.061620903015137, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.0095831203460692, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.997105393409729, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.9528123021125794, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.9241241168975831, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.8920987701416017, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8654697561264038, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8732877922058107, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8592645251750946, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8596805334091187, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.8892451936006547, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.905949090719223, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9178422260284425, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.941351954936981, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9917945486307145, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.0873765552043917, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.1313900536298753, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.22069139957428, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.3074764239788057, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.372230923175812, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021203686441294847, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02120768836699426, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02121254179161042, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02121707275509834, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021218126392923295, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021217060862109064, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021210792046040296, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021196668781340123, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021170008475892246, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021127772834151983, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02107218775432557, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02096655230037868, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020834550322033466, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020625063264742494, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0212669976009056, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.024977883994579314, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02845426796004176, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.031222061561420558, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03314533903263509, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03389531630091369, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0347864755988121, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03557897163555026, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03650667721405625, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0368931445106864, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03791360995732248, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03898116700351238, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03902529106475413, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04065130438655615, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04169744020327926, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.042976616844534875, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04489440524950623, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04458000242710113, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04573472945019603, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04669468395411968, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04754474762827158, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0483836642280221, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04939349317923188, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.050860820170491934, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05225411547347903, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05354697791859508, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05478163115680218, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.056045203637331724, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05859168330207467, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.056826556865125895, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1241869926452637, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1238298416137695, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1233057975769043, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1228220462799072, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1223721504211426, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1217639446258545, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.121074914932251, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120364189147949, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.119480848312378, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1185567378997803, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.117584705352783, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1159496307373047, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.113455057144165, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1020607948303223, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.990245819091797, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.760272741317749, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.644540548324585, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5489614009857178, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.500213384628296, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4823179244995117, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4797210693359375, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.500164031982422, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5070860385894775, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5303092002868652, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.552098274230957, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5891120433807373, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5808892250061035, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.629112720489502, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.646878480911255, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7119929790496826, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.725044012069702, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7236599922180176, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8464059829711914, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.947484016418457, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.940969944000244, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.9479551315307617, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.995891809463501, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0119080543518066, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.059399366378784, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9493813514709473, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9025814533233643, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8392531871795654, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8426175117492676, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.953977108001709, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.067921742340347, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07751937984496124, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.12550756736803248, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.17220376522702105, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.20487264673311184, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.22868217054263565, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2397563676633444, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24234034699150978, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.24806201550387597, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24215577703949798, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2425249169435216, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24049464747139165, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2397563676633444, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23329641934293097, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2353266888150609, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23237356958287192, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22812846068660023, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23588039867109634, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22093023255813954, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21225544481358435, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20948689553340716, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21244001476559615, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20893318567737174, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2070874861572536, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20339608711701734, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2157622739018088, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21686969361387967, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.19287559985234404, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012035758073068406, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012647962775760158, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012261123441109299, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013009639951003954, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014608756661590616, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01693558751143547, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01793140707001106, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01929757416847522, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02168404398783734, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.022660028506858, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02402128735028565, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.023226075841017255, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0262330649962688, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.03468133191455803, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06380931121017042, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.09612818164279857, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.13977281629988667, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.16652789192493248, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17853262951700602, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18169895384421278, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.18865141677275998, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18553864591752242, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18520255720251033, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18645887468748468, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1838264000998827, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1812752473123417, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18512474777607954, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18311967138434207, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18626207119884267, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1856478530077821, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.183242880349511, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18610732959030504, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16228029210859884, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15515940551642338, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1557410126976044, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1592852917518189, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.15717495026507824, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15634054051433396, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.15427235045720478, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15369897800282953, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16173061555709428, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16536973632413923, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1602388369662272, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14757396803393244, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.2868761825561523, "validation/loss_best": 2.4797210693359375, "validation/acc_best": 0.24806201550387597, "validation/f1_best": 0.18865141677275998} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.490806505680084, "train/grad": 0.255279158577323, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.122757568359375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122117919921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.120865478515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.119652099609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1185693359375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1171923828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.115662841796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.113997802734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11192138671875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.109818115234375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.107767333984375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.10427490234375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0996368408203123, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0725433349609377, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.86621337890625, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.7041468811035156, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.5724784088134767, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.4475310897827147, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.3617766952514647, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.3024336433410646, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.237794809341431, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.1746423053741455, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.1104668045043944, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.065495100021362, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.0129659080505373, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.9543180084228515, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.938199610710144, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.8918697130680084, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8562876772880554, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.8171066057682037, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7864858263731003, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7953660875558852, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.7819885516166687, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.7872161906957627, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.8057612156867981, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.8248111671209335, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.844547031521797, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.874758568406105, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9294391322135924, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.0220353335142134, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.097547741532326, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.167185767292976, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.2589985501766203, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.3435013741254807, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021082575866021216, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021084920698776842, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021086617717519403, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021086225430481137, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021084379218518735, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021076463637873532, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02106364875100553, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021042980095371602, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021005427958443762, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020949182342737915, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020883259074762465, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020755578544922174, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020607820646837353, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020758884749375282, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.024402661565691233, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.027643341729417445, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0302661526016891, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.032772089438512925, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.033833483438938855, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03432307203300297, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03513679133728147, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0359102647844702, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03706043207086623, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03749065015465021, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0385579914227128, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03977634673938155, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.040003558732569215, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.041723140254616736, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0426261168718338, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.043907211124897004, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04513243438676, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.045819876864552495, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04683966616168618, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04750466208904982, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04793414324522018, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.049061898980289696, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05043781330808997, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.052173045743256805, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05332297010347247, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05443278232589364, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05677513806149363, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.055959271416068074, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.057609246019273995, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05836920168250799, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1234261989593506, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1230340003967285, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.122426748275757, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1218626499176025, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.121361494064331, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1207401752471924, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1200709342956543, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1193251609802246, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1183815002441406, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1173043251037598, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.11615252494812, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.113739013671875, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.108858108520508, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.055661201477051, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.786893129348755, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6799395084381104, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.579383373260498, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.505488634109497, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4757964611053467, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4623196125030518, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4579336643218994, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.481419086456299, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.493304967880249, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5111725330352783, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5336129665374756, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.577617645263672, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.571134328842163, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6632583141326904, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7237889766693115, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7847113609313965, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7817399501800537, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7936336994171143, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8104569911956787, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.9278647899627686, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.9607996940612793, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.979616403579712, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.956864595413208, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0164804458618164, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.991184711456299, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.942016839981079, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0369551181793213, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.002556562423706, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.914430618286133, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9477758407592773, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.10963455149501661, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.16925064599483206, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.19435215946843853, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.22572905131044665, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.23809523809523808, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24234034699150978, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2482465854558878, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25839793281653745, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2511997046880768, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24584717607973422, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24732373569582872, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24326319675156885, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23864894795127353, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24381690660760427, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21520856404577335, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21797711332595054, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21077888519748986, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.1891842008121078, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20081210778885197, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20579549649317092, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2100406053894426, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2009966777408638, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2074566260612772, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2069029162052418, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.18696936138796605, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.18512366186784793, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20136581764488742, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.19601328903654486, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01106688903669729, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011729995380155367, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013553159685079546, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015432416818469871, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016521069842751854, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01718599943227163, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019022252175303244, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.019515002048834662, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.021521530933633324, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.022469417594536214, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.022846311002353695, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.026045781422436635, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.02866981555253277, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.05577370455942469, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.09207008980659709, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.12153079834358414, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.15544333459392304, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1711610454463323, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17819367273576994, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18278138687923398, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1927723065426897, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18898544881602267, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18778755054267396, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19515048483772232, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19639615038667738, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19645110088987275, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20365089249983934, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18472678641347137, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18002200564388357, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18209877867632937, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17954845735341576, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18142041057878477, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1771032082991156, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16227631421499167, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17214639615715635, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16878252429356086, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17215376738697266, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1583841932246251, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1601736274276944, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16806165967650757, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15536417817940681, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.14406938943797531, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1658982121645624, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16512211591142403, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.237794809341431, "validation/loss_best": 2.4579336643218994, "validation/acc_best": 0.25839793281653745, "validation/f1_best": 0.1927723065426897} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.427416431903839, "train/grad": 0.2536269897967577, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.122930908203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.121947021484375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.120570068359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.119249267578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11803466796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.116468505859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.114769287109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.113004150390625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.110760498046875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.108399658203125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.10604248046875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1016583251953125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0930267333984376, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.983304443359375, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.741337890625, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.620209503173828, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4882216262817383, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3801542854309083, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.300086669921875, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.244300813674927, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.1810626792907715, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.1185376834869385, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.0520681810379027, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.000449104309082, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9429241347312927, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.876729290485382, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8652341318130494, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.8065627062320708, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.7666288387775422, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.7146943128108978, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.678834188580513, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6880963027477265, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6735985112190246, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.667527156472206, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6878541696071625, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.7122884315252305, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.737472532391548, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.7537115281820297, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8055016684532166, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8838216704130173, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.9466995048522948, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.0223229295015335, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.0918778729438783, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.1850579273700714, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020808731378056108, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020809060325846077, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020809105187654494, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020807466320693493, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0208029915811494, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02079258657991886, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020776757798157634, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020751384566538036, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020708575542084873, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020649557076394556, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020578264337964357, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02045052166096866, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02034165187738836, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022185604078695177, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.026540510300546886, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.029006910296157002, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03147627686150372, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.033510558232665064, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.034422497525811196, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03483189415186644, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03554044530726969, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03642175434157252, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03756596866995096, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03817700749263167, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0388955133035779, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04034539485350251, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04073358887806535, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04223780857399106, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04352996977046132, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.044963293429464105, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04629878181964159, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04620849752798677, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04714201632887125, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.048185768239200115, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04894079437479377, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04987239893525839, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05086379546672106, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.051146201081573965, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05279235867783427, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05235574319958687, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05418114215135574, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05328236492350698, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.053050750326365234, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.053754289597272874, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123340368270874, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1230010986328125, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.122485876083374, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.12199068069458, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1215291023254395, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.120889663696289, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1201939582824707, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.119431257247925, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1183602809906006, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1170825958251953, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1155781745910645, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.111790180206299, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.100336790084839, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.909898519515991, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.716351270675659, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.618523120880127, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.535382032394409, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4897267818450928, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.471400022506714, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4619874954223633, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4577267169952393, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.482492446899414, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4979095458984375, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5379114151000977, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.568005084991455, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6267101764678955, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6231212615966797, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.690981864929199, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.752816677093506, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8247828483581543, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.889169931411743, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.841122627258301, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.9197640419006348, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.02685809135437, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.061998128890991, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.01025652885437, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9899473190307617, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1190335750579834, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0582973957061768, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.049309015274048, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.038510322570801, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.980813980102539, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.030651569366455, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0277156829833984, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06939830195644149, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.14876338132152087, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.19121447028423771, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.21520856404577335, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2336655592469546, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2454780361757106, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.25064599483204136, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2502768549280177, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2602436323366556, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24861572535991142, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24012550756736803, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2355112587670727, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2131782945736434, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2011812476928756, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20468807678110004, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2026578073089701, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20339608711701734, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19472129937246216, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20911775562938353, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19232188999630861, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20062753783684018, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011495546583288573, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012065788337183858, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013021023439095997, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015026987557544726, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016675898377926726, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.019061447184465858, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.020671269868700888, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020398895702665735, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02196827952754665, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02283917170967807, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02299785681165921, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.025292405477347915, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.03179250141478205, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07621873076896091, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.115670028106592, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14267657295727323, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.16766687792737967, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1798572156476356, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.18541349572805643, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18407637970709476, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19747711863691472, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19274238351123948, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19504554383365322, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18788547968485636, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1918498992452448, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18367501931547195, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18648906873322954, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17955861973761714, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17922520063740885, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17948136713409682, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17636552926294227, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18446657788265272, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17368830445190694, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1664601426090529, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.15986797243040782, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16440653672292474, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16838404284947353, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15431688296197324, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1596369616697719, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1590546721036384, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1597775260687183, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16117347535373033, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.147425182417773, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15229975356437703, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.1810626792907715, "validation/loss_best": 2.4577267169952393, "validation/acc_best": 0.2602436323366556, "validation/f1_best": 0.19747711863691472} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.3945093631744383, "train/grad": 0.259647558927536, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.127470703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.126573486328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12518798828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1238818359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12267822265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.121141357421875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1194580078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11763427734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.115169677734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.112633056640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.109906005859375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.103702392578125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.084674072265625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.8457571411132814, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6807176208496095, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.565643310546875, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.441649913787842, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3490045356750486, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.275136556625366, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.220307960510254, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.154218397140503, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.086911668777466, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.017179846763611, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9646459770202638, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9070035541057586, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8399376392364502, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8212578654289246, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7611406707763673, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.7132389807701112, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.6602187943458557, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.6210110884904863, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6355229622125627, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6193062251806258, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.6148642963171005, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.626679354906082, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.6507115733623505, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.6693784767389297, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.6812690538167954, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.7290575623512268, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8134003901481628, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8841369825601577, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9837342154979707, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.0206263202428816, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.1078068482875825, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020886811879463493, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02088638030923903, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02088346782606095, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02087902900762856, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020873636226169764, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020862582875415682, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02084693207871169, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020822924911044538, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020778109654784202, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020713302362710236, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02063683807849884, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02050294421147555, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.020584301799535753, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024604719476774336, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02810257876291871, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03048315833322704, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03278641661629081, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03446901014074683, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03538951309397817, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03575298567302525, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.036517881974577907, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03753244997002184, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03869964793324471, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03927768396213651, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04021947655826807, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04179495058953762, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04237795174121857, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.044205802120268346, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04604751503095031, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04733195872977376, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04836479805409908, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04874453499913216, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.049334599394351246, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04997265534475446, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05003989277407527, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.050303712300956246, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05135347979143262, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0519178201071918, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.052628219742327925, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05252224849537015, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05471943693235517, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05452723851427436, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.052764003649353984, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05276017498224974, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1234588623046875, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1230642795562744, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1224358081817627, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.121823310852051, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.12125301361084, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.120450019836426, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1196064949035645, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.118675708770752, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1174120903015137, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1158998012542725, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1139626502990723, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1081485748291016, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.079392910003662, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7978556156158447, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6708385944366455, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5827548503875732, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5164754390716553, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4851415157318115, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4742958545684814, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4680352210998535, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4748411178588867, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.507779598236084, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5266404151916504, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5638210773468018, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.590975284576416, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.642843723297119, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6456847190856934, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7174417972564697, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.770580530166626, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8131086826324463, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.8765156269073486, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.8479833602905273, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.970613479614258, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.062694549560547, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0727994441986084, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0290067195892334, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.981400728225708, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.006059169769287, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.95414137840271, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9408326148986816, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.923750400543213, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8937387466430664, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.835822582244873, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.830850601196289, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07585825027685493, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.09819121447028424, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.16925064599483206, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.19878183831672203, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2235142118863049, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.24012550756736803, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.24953857511997046, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2528608342561831, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2587670727205611, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25489110372831303, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24935400516795866, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24492432631967515, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.23661867847914358, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2425249169435216, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23108157991878922, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22702104097452935, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2203765227021041, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2216685123661868, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2294204503506829, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21705426356589147, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2026578073089701, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21133259505352528, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21834625322997417, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21779254337393872, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21557770394979697, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21779254337393872, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2207456626061277, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2222222222222222, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01082408723805054, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011115484603906338, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011892516760162128, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013325489344227275, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014743526402149576, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017340160289163225, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019231614008910703, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0208886584291354, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02238086172520164, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02525465976891654, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02819775260628966, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.032637655548170934, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.047141858496513395, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.08916178570097984, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.12368940300516294, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.14989833016438828, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17179096468076205, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1837560690955411, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.18596642355220264, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1949542019936342, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19367888855132712, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19177085641255334, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19254097454470678, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18746302725867495, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19582220084246607, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18817041029675322, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18714581003941091, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1877434411056658, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18273138637871755, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18820682205931763, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1845337699325619, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19504833627028775, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17330620061427382, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1679103636113358, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1755513752765255, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18021345502819808, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1852297296190325, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17398563745288267, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18127988334761894, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17598400730918998, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17909327171990239, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17532755458479687, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17726325504185647, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1835561686246102, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.220307960510254, "validation/loss_best": 2.4680352210998535, "validation/acc_best": 0.2587670727205611, "validation/f1_best": 0.1949542019936342} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.330361031293869, "train/grad": 0.2576656096428633, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1203857421875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.119365234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.117908935546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11636962890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.114942626953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.113182373046875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1111669921875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.108895263671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.105946044921875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1028741455078124, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0991571044921873, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0896270751953123, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.044844970703125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7585528564453123, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6150326538085937, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.498739700317383, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3920207977294923, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.307094917297363, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.2362253189086916, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1802068996429442, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.11265718460083, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.0415009164810183, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9692651271820067, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9102415442466736, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8458766770362853, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7744729083776474, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7521849185228349, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6832220673561096, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.6193488729000092, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.549072152376175, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.496288267970085, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.5044156581163406, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.4823239332437514, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.476445665359497, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5012600308656692, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.519423777461052, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.5291954678297044, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5381807321310044, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.6072712546586991, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.6730402040481567, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.7156646543741225, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.8044848370552062, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.892949143052101, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.97699265062809, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020890211053192616, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020891194357536735, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020890463539399207, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020887206722982228, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020881281713955105, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02086991732940078, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02085110724437982, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020821358556859196, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020770485638640823, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02069872023537755, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02061702184379101, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020500243813730775, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021093445122241972, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.026240561148151754, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02940784123726189, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.031582569275051355, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03364769677631557, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03512050097808242, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03593655876815319, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.036197541560977695, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03701307121664286, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03805900752544403, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039267458114773036, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03996254490688443, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04118207531049847, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04262395923957229, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042977233994752166, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0446304801851511, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04587053507566452, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04684056617319584, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04804833084344864, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04858298756182194, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0491452469304204, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04947444047778845, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04986970143392682, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04989011198282242, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0504602712765336, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05102468160912395, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05152681529521942, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05149543737992644, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.051579607166349885, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.050795766543596986, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.050785766597837206, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.050326346773654225, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1234283447265625, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1230170726776123, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1223485469818115, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.121718168258667, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.121108055114746, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.120286703109741, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1193745136260986, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1183416843414307, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1169445514678955, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.11517596244812, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.112788200378418, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.103822708129883, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.043519973754883, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7548418045043945, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6408469676971436, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.56231427192688, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.508704662322998, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4848968982696533, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4806067943573, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4785635471343994, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4922337532043457, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5371387004852295, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5623726844787598, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6131479740142822, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6421611309051514, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.714820623397827, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.729281187057495, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8014509677886963, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8793609142303467, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9528145790100098, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.013047456741333, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.013411045074463, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0571839809417725, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.150871515274048, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1655101776123047, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.158032178878784, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1471378803253174, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1651878356933594, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.095950126647949, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.019624948501587, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.03006649017334, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.942126750946045, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9793949127197266, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.006519317626953, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0753045404208195, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.08028792912513842, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11092654115909929, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1790328534514581, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.20450350682908822, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.22369878183831673, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2368032484311554, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2427094868955334, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24640088593576967, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2513842746400886, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25046142488002954, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2369878183831672, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23421926910299004, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2294204503506829, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22443706164636398, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.21760797342192692, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22388335179032853, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21280915466961978, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21945367294204504, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22093023255813954, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22277593207825766, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21096345514950166, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2174234034699151, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22093023255813954, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21686969361387967, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22812846068660023, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22517534145441123, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2233296419342931, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22185308231819859, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2157622739018088, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2157622739018088, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010613828404363193, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010814792839838374, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011155754564154791, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011900265834875403, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012886714915019299, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014428324252576619, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.016433220773948223, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.018209169843731266, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02032028558999277, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.022689907116953328, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.027815146732023708, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.03495776706800189, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.05250675731779635, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.09666435632122666, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.12881996540192164, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.15426329338565622, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17231294385707396, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.17735311656047262, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.18514402428525698, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1925128526329001, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19550575341182722, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18461440834890896, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18286350593637798, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1788885715624334, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18657287903637196, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.17405114776884045, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.16778730370894798, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17033469142377503, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.16510821425691247, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.16999400370796214, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.16491234207876798, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17386381641228865, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16171839333884477, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1684330868814964, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1718326957858657, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17156605807505887, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16731513959345443, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16761800333452534, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17463219100289792, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17862773613506286, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16847154889068405, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16879834864775722, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16096626483972934, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16461700655421038, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.1802068996429442, "validation/loss_best": 2.4785635471343994, "validation/acc_best": 0.2513842746400886, "validation/f1_best": 0.1925128526329001} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.2860768830776212, "train/grad": 0.25868377201259135, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.123126220703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122027587890625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.120423583984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11887451171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.117467041015625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.115601806640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.113631591796875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.1113916015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10848388671875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.10512939453125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.101190185546875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.08863037109375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0100079345703126, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.727725067138672, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5868927001953126, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4725724411010743, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3722765159606936, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.286163330078125, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.2132000160217284, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1558810901641845, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0852076053619384, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.0133111476898193, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9362784671783446, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8749451541900635, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8098150897026062, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.735693621635437, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6959554886817931, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6258600395917893, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5490268111228942, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4657956212759018, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4072175085544587, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.4041047459840774, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.378432651758194, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.3639285498857499, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.3909538304805755, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4019772148132323, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.4192986488342285, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.4362067764997482, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4780531489849091, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5624372208118438, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.6049948245286942, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.6960349106788635, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7845789432525634, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.8675898283720016, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02125929944217205, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02125893230549991, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021257039881311356, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02125086716376245, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02124425856396556, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021231553689576684, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021208521067164837, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02117716773878783, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021123641924932598, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02104941738769412, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020963520836085082, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020886293025687338, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02219194165430963, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0271423795260489, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030175663102418183, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03229568803682923, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03403187615796924, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.035418707495555285, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03621732098981738, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03640870195813477, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037163482271134855, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038205841816961766, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03938032811507583, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04012409383431077, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0410618363507092, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04269342914223671, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04345194708555937, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04496861349791288, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04632814539596439, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.047888035979121925, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04878681156784296, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04864702301099896, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04955587780103087, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04959496663883328, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0494218928180635, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.049944118335843085, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05097018498927355, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05079684725031257, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.051251739636063574, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05114921426400542, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05149472007527947, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.050670931451022624, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04999153519049287, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0491821857355535, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1233139038085938, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.12284779548645, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1220548152923584, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.121295690536499, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1205599308013916, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1195032596588135, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.118347644805908, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1170735359191895, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.115314483642578, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1130635738372803, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.109952211380005, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0965023040771484, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.9954211711883545, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7235753536224365, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6110188961029053, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5398945808410645, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.493865966796875, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.471858024597168, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.467010021209717, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4646310806274414, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4786202907562256, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5194053649902344, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.543482780456543, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.582540512084961, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6301159858703613, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6926019191741943, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7152507305145264, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8002068996429443, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.895282506942749, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.996182918548584, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.059922933578491, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.047841787338257, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1700713634490967, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3351311683654785, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3573217391967773, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.2840774059295654, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.210529327392578, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.3232240676879883, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.208711862564087, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.24552059173584, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.212521553039551, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.153073787689209, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.107752561569214, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.991792678833008, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.08305647840531562, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.12809154669619785, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.18512366186784793, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2144702842377261, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23163528977482467, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.24049464747139165, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2484311554078996, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2543373938722776, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2561830933923957, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25378368401624213, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24806201550387597, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24326319675156885, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.23735695828719083, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2382798080472499, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23403469915097821, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21908453303802142, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2117017349575489, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20948689553340716, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20634920634920634, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.18512366186784793, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.18696936138796605, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.18733850129198967, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20136581764488742, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1982281284606866, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2172388335179033, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2102251753414544, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2074566260612772, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20893318567737174, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22000738279808046, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011455073867627225, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011792281030763978, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014187744182873671, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015041143786492595, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01642007837112767, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.019081304331525554, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.020366890009406925, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.020757715090834412, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.022554786017948848, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.025459122660482586, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.027294967548965165, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.03564237724992391, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.061007558444220816, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.10560986105388515, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.14046123735702182, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16111540880098393, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17539727872516356, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.18395191714448633, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19130537719057714, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19482040731956451, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19803117995346473, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19510189405735667, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19196476005101837, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18793512407665822, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18875848726331168, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1859313440159013, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18040181510508915, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1758272297088002, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17323031507472422, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.173781516740711, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1756203338619772, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17868402253023116, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1708515821046371, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15721773823392013, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1558412787763134, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15463889237598868, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16824520557328226, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15799879402605185, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16842008659669436, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16319530172628438, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1603771619282762, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.14869430669798286, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15647934169668026, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16811645824959665, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.1558810901641845, "validation/loss_best": 2.4646310806274414, "validation/acc_best": 0.2561830933923957, "validation/f1_best": 0.19482040731956451} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.233677463531494, "train/grad": 0.25739467576146124, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.120615234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11950927734375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.117701416015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.116201171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11458251953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1125341796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.110408935546875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.107857666015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.104573974609375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.100828857421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0960546875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0783172607421876, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9552508544921876, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6884806823730467, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5490949249267576, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4399281692504884, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3474577713012694, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2641609954833983, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1924016284942627, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1365749883651732, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0599346685409547, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9874658632278441, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9080380940437316, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.844595835208893, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.774992960691452, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.689410834312439, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6504374623298645, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5656135433912277, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4794493436813354, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.385071604847908, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.310164664387703, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.3085107159614564, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.267009510397911, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.2466317129135132, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.2641244542598724, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.282181459069252, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2891690903902053, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2932452845573426, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.3342878818511963, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.410746139883995, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.4470555996894836, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.5459723198413848, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.6397700732946396, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.734408779144287, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02083786063361913, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0208357313554734, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020831718882545827, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020824179938063025, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020814843708649278, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020800601998344062, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02077578807249665, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020744587928056717, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020691420999355614, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02061940057668835, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020542457085102796, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02054185860324651, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022591950921341777, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02746705391444266, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030477926125749944, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03262813929468393, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0342638089787215, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03559769466519356, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03635277807712555, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03652837897650898, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037387360008433464, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038403871208429335, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03959767885506153, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04037122132256627, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04151805257424712, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.043064792454242704, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04359578737989068, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04512505004182458, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04655106917023659, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04778429921716452, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04847962202504277, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0486180598847568, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04923328811302781, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.049511867389082906, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.049684055522084235, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04956505520269275, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04972479745745659, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05006834330037236, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05025689287111163, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05023099480196833, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05042313324287534, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05028826108202338, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04963635770604014, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.049011161755770444, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.122802495956421, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1222493648529053, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.12137770652771, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.120572090148926, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.11977219581604, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1187238693237305, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1175239086151123, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1162033081054688, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1143670082092285, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.111999750137329, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.108372211456299, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0898711681365967, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.94616436958313, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7031404972076416, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5935144424438477, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.527341842651367, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4852542877197266, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4649159908294678, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.460702896118164, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4587836265563965, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.472520351409912, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.510199546813965, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5376901626586914, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.583862781524658, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.625413656234741, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6919682025909424, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7113423347473145, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8103811740875244, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9155328273773193, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0191776752471924, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.117797374725342, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.096501350402832, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.205235004425049, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3909718990325928, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.4629456996917725, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.342705011367798, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.360337972640991, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.4408791065216064, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.319467067718506, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2916624546051025, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.242331027984619, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.14193058013916, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2114040851593018, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0915210247039795, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0753045404208195, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.08896271686969362, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.14433370247323735, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.19269102990033224, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22000738279808046, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2321889996308601, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2410483573274271, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2558139534883721, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2576596530084902, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26153562200073827, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2552602436323367, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2497231450719823, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24473975636766335, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.23661867847914358, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2364341085271318, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22739018087855298, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22000738279808046, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2070874861572536, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20893318567737174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20616463639719454, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21336286452565523, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.1998892580287929, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.18973791066814322, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.1893687707641196, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20228866740494647, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1998892580287929, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.18660022148394242, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20431893687707642, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19416758951642674, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20155038759689922, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2011812476928756, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19195275009228496, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2009966777408638, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011493458944739611, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012166908732575482, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013136756382439269, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014013163384125277, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015250437542680203, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01650880784767775, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.020636465745010265, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021046809435439292, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02226022559978669, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.024937114539493144, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.027846594910590272, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04015765970346048, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0699399812196615, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.11273333775754568, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1454214286084928, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16154727886203063, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17643136901253662, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1882007659637823, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.193347270954806, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2007975459041922, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19872657431502974, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19588888777229388, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19751381490626033, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18920131012067265, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.189049845434924, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18409070760298926, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18525182359578715, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17942378101946513, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1743224212222161, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17532573403990437, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17208246210083986, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18157540188481083, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16839643553125408, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1607377926101489, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1570629364185942, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16829817893943066, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17005590169825957, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1603565987976498, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17016712149095178, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16322844172188633, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17028293674329764, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16433423682674891, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15667804311054467, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16425205569981652, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.1365749883651732, "validation/loss_best": 2.4587836265563965, "validation/acc_best": 0.26153562200073827, "validation/f1_best": 0.2007975459041922} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.186428357362747, "train/grad": 0.2550416547805071, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.115836181640625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.114794921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1129931640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1113330078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.109893798828125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10792724609375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.105714111328125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10329345703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.100035400390625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.09617431640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0909759521484377, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0693658447265624, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9143740844726564, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6731356811523437, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5317952728271482, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4216103744506836, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.328481521606445, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2435112476348875, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.17022837638855, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1148965644836424, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0407145977020265, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9608379077911378, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8803672122955322, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8143030166625977, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7370620846748352, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6480900633335114, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6016915714740754, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.514338534474373, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4163121587038041, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.310084195137024, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2313208824396134, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2242102599143982, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1629042875766755, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.128316211104393, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1431605327129364, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1495269772410392, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.160023557841778, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1728394100069999, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.212488996386528, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.287821687757969, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.3238329994678497, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.410486825108528, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.5157886886596679, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.5927238714694978, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021240573255345226, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021238691308535635, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021232976000756024, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021224702671170236, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021214684434235097, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02119767092168331, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021169371251016856, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021130355801433326, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021069542029872535, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020986690893769266, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02090120748616755, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020958061153069137, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023410932356491686, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.028072808105498552, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.031051233606413008, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.033161147087812426, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03467817119322717, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.035964464638382196, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.036702197240665556, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03683382159098983, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03757799727842212, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03856437236070633, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03968144444748759, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04045020516961813, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04147525608539581, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.042919891215860845, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.043508515898138284, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.044790523201227186, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04582355871796608, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04644391588866711, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04702429754659534, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04721479021012783, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04710667818784714, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04732492683455348, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04772055869922042, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04790217259898782, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.048941827584058045, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.049134701564908026, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05004276532679796, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04969759339466691, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0508705778606236, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0499786532856524, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04910899955779314, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.047864646464586255, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.122779369354248, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122264862060547, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1214282512664795, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.120649576187134, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1198716163635254, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1188602447509766, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1177000999450684, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1164236068725586, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.114532947540283, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1120619773864746, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1080124378204346, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0849862098693848, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.9118363857269287, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6911797523498535, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5831196308135986, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.520413637161255, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4825456142425537, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.466090440750122, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4659554958343506, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.465939521789551, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4830594062805176, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5204527378082275, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.54775071144104, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5940539836883545, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6336259841918945, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7098793983459473, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7353053092956543, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.829716444015503, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.932676315307617, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0487148761749268, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1609060764312744, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1515657901763916, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.266390323638916, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4604599475860596, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.4709901809692383, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4542551040649414, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.43528413772583, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.4716594219207764, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.395050048828125, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.321262836456299, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3013830184936523, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.208897829055786, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.140117883682251, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.067944049835205, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07290513104466592, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07678110003691399, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.09265411590992986, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.15227021040974528, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.19656699889258028, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2220376522702104, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23606496862310816, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.24658545588778147, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2572905131044666, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26116648209671467, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2602436323366556, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25858250276854927, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25230712440014763, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24344776670358065, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2351421188630491, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23883351790328536, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22739018087855298, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22240679217423404, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20967146548541898, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2026578073089701, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20358065706902917, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19564414913252123, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.18401624215577703, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.18955334071613142, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19748984865263935, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.18733850129198967, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.18327796234772978, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19952011812476927, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20136581764488742, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2056109265411591, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2026578073089701, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.1934293097083795, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20948689553340716, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011735460643965778, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011964794249978117, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0140323149351049, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014908213356245528, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016642109707065977, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.020117172754204275, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021495795025245467, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.023141259301969617, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.025216454972586965, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.027692270764190025, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.03030238550146344, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.043502954000160375, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.07615425853171927, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12109365075669354, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.14810325569446087, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16890532330379002, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18163403467553996, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19178661116051332, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19847863761583426, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20007279930889132, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20091005904483525, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19807738734710242, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19476521726751028, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18505571696384127, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19113071962763853, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.185791729576261, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18504047157580442, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18230490792384993, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17917191117504463, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1729089026693794, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17114956235987522, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17412180055157322, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16262422442412713, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15784875813126384, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16246565261582216, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16732869768316816, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16351956109845686, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15939408281722656, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17186080131239265, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17421830413402165, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17904644557066782, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16825369352510108, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16198407634813428, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1776313228965228, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.17022837638855, "validation/loss_best": 2.4659554958343506, "validation/acc_best": 0.26116648209671467, "validation/f1_best": 0.19847863761583426} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 2.153523972630501, "train/grad": 0.2513217628002167, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.124522705078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.123343505859375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.121572265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1199560546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.118363037109375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1162744140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1140380859375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.111561279296875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.108331298828125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1042755126953123, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.098865966796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0733148193359376, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.895657958984375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6749510192871093, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.537160720825195, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.430026206970215, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.339691219329834, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2559735870361326, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.182307424545288, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.125949068069458, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.050969181060791, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9695903158187866, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8839230275154113, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.815483381152153, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.732141501903534, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6331638312339782, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5803133988380431, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4838786911964417, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3837278360128402, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2625207418203355, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.169484848678112, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1564442175626755, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0986637791991234, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0532634681463242, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.051579089462757, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.0591935485601425, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.0416153338551521, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0460092175006865, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0739499679207802, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.1307549113035202, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.168572481572628, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.2526003903150558, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.351406273841858, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.4379165947437287, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021206924044527113, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021203068126924338, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021196441166102885, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02118630865123123, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021174988746643066, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021155629665590824, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021126139052212237, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02108771708328277, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021027532308362424, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02094573342241347, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020866567767225206, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021006288812495767, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023778026299551128, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02842219850048423, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03134510955773294, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03342254413291812, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03491681983694434, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03620986979454756, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03692997217178345, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03705849207937718, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03784461058676243, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03881787315942347, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03989369258284569, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04059701919555664, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.041690181251615284, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04308872858062387, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04355610223487019, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04489729646593332, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045981333386152984, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04621505731716752, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04658571207895875, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04637524364516139, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04653417168185115, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04646995918825269, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04607203619554639, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.046702085956931115, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04689799163490534, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04667291162535548, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04746776532381773, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04745241096243262, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04854679083451629, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.048386214207857846, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04709596255794168, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04688126612454653, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.122711658477783, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122191905975342, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121356964111328, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1205697059631348, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1197850704193115, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1187448501586914, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.117560625076294, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1162378787994385, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1143155097961426, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1117026805877686, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1073803901672363, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.081244707107544, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.8919479846954346, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6849822998046875, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5786828994750977, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.518781900405884, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.482667922973633, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4667696952819824, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4678356647491455, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.468783140182495, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4909749031066895, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.536182165145874, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5667784214019775, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6160614490509033, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6652212142944336, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.741201639175415, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.76352596282959, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.857060194015503, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9587345123291016, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0978848934173584, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.217419385910034, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.202385663986206, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3235843181610107, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.5011658668518066, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.517428159713745, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5252208709716797, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.539820909500122, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.6021695137023926, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.5194475650787354, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.454035520553589, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.469964027404785, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.332300901412964, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2594926357269287, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.238564968109131, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07733480989294943, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.09505352528608342, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.15596160944998155, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.19712070874861573, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22093023255813954, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2368032484311554, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.24234034699150978, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2543373938722776, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2572905131044666, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24473975636766335, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24215577703949798, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.23385012919896642, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2201919527500923, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.21520856404577335, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20782576596530086, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20579549649317092, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20155038759689922, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2059800664451827, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19730527870062753, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.1891842008121078, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.1906607604282023, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19767441860465115, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19287559985234404, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.1805094130675526, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20136581764488742, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.1997046880767811, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1984126984126984, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20062753783684018, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19453672942045036, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20007382798080472, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011775240585585383, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011626098633540853, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01331435593635208, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014897012654096852, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016004435468246335, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.019222710933527235, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.0210902069906783, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.022340376071289334, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.022604312125822507, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02596006272752459, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02890028049662284, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04446043986087778, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0786201993919199, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1210620738831043, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.14849630124049829, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1686305858745011, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17861076382284394, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.18954466947039372, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19418220603349026, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1990249077814293, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20012047052899598, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1929858399770398, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19408579756512545, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1850387743547449, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19015833061058937, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18259377678272723, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1774202481220183, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17630776965991513, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17646302338464995, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1708835614630535, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.16842748693957377, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17433492787620586, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16427033583192738, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16050831433425272, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1614784626277369, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16305132339091916, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16083331638083678, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15199383379669504, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16517438382601216, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1687839901961051, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1684449588304843, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.166731622317735, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16324657807603174, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16843918322653664, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.125949068069458, "validation/loss_best": 2.468783140182495, "validation/acc_best": 0.2582133628645257, "validation/f1_best": 0.1990249077814293} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 2.115596797466278, "train/grad": 0.24379089452326297, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.120069580078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.118934326171875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11714599609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1155810546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.114024658203125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.112012939453125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1098291015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.10736083984375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10401611328125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.100081787109375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0945025634765626, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.066845703125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.8791238403320314, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.656929168701172, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.517545852661133, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4134217834472658, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3249964141845703, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.242718849182129, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1690694236755372, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1131628227233885, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.036729669570923, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9574142837524413, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8726194095611572, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7973618268966676, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7197578406333924, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6147729003429412, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5568603849411011, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4643878763914109, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3510818231105803, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2215721547603606, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1257463696599006, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1063192519545555, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0377946683764459, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.9927699610590934, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.9723005139827728, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.9638773334026337, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.9467766326665878, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.934615948498249, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.9458800080418587, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.9951710173487663, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.0306368264555932, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.1028976571559905, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.1966985750198365, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.2634548631310463, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02099113950971514, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020987305990420282, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020978188458830118, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020967561663128437, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020953916106373073, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02093159809242934, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020901867719367147, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020860818708315493, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02079852897208184, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02071709995623678, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020641701500862836, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020825900030322374, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023785918494686486, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.028286607516929508, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.031145262168720364, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03312946692109108, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03455255076289177, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03580319129861891, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03656239226460457, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.036716082347556946, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03743778198957443, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038477190993726255, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03952372534200549, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04033411774784326, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04124371791258454, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04248591350391507, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04293836845085025, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.044170890133827925, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04495810816064477, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.045203327313065526, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.045154892951250074, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04501345766708255, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.044643050897866486, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04440208375453949, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04419238869100809, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04439281824976206, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04468818936496973, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04433494715020061, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04466144442558288, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04513234481215477, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.045218754932284354, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04523087497800589, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04452900342643261, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.044700887352228165, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1226933002471924, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1221814155578613, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1213576793670654, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.120537042617798, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.119783401489258, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.118729591369629, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.117541551589966, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1161975860595703, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1142256259918213, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1115152835845947, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1070199012756348, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0790326595306396, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.8822927474975586, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6805899143218994, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.575174570083618, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5160346031188965, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4800493717193604, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.462996244430542, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4626073837280273, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4621293544769287, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4793927669525146, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5219779014587402, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5491580963134766, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.601921558380127, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.644822597503662, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.720280170440674, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.741093873977661, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8470804691314697, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.95824933052063, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.09108567237854, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1932220458984375, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.193861961364746, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.323246955871582, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.512070655822754, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.5413472652435303, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5304741859436035, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5559427738189697, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.6297178268432617, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.5671842098236084, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.529243230819702, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.5026283264160156, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.379640817642212, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.3429038524627686, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.2735867500305176, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07456626061277224, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07751937984496124, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.09560723514211886, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.15946843853820597, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.19804355850867478, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22277593207825766, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23588039867109634, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.24455518641565152, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2576596530084902, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2619047619047619, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2556293835363603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24880029531192321, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24067921742340348, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.23754152823920266, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24031007751937986, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2277593207825766, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22056109265411591, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2172388335179033, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20524178663713546, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20376522702104097, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2069029162052418, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19785898855666298, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.18973791066814322, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.19490586932447398, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19859726836471023, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19804355850867478, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.18087855297157623, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.1997046880767811, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2024732373569583, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2039497969730528, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20228866740494647, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19564414913252123, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20302694721299372, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011594048746481476, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012177641089540727, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013958569925581948, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015185265100484457, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.017560764241052157, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02014658530404674, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021262139596549306, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.022015011060400786, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.023624906530232245, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.027433770379829758, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.030401480633846678, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04575024037791662, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0815958563552756, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12396288176704158, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15095747394508727, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1685910988627616, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18171041973421373, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.18983847649070684, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19529197689796116, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2022515443196835, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20109101522104664, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19655209805375837, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19411016396534653, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19011509904383006, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19413608833335694, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.185160449127166, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18069525837243874, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18127264416938224, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17488240275962677, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1729062363977297, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1712345609164685, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17423714799489845, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16422837936683257, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.162108924643065, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16543034119880748, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16695218703911205, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1685784353488552, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15209159763097996, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16887688151191882, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16911377512007297, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1762685790617088, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16620465608501453, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16718835432815082, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17508389664506838, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.1131628227233885, "validation/loss_best": 2.4621293544769287, "validation/acc_best": 0.2619047619047619, "validation/f1_best": 0.2022515443196835} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 2.0939568197727203, "train/grad": 0.24009729646146297, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12213623046875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.120947265625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.119141845703125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.117425537109375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11587646484375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.113704833984375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1113525390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.108829345703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.105369873046875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1011016845703123, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.09530029296875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0664794921875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.876422119140625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6586663818359373, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.518837738037109, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4143231391906737, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.32508996963501, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2416180896759035, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1672984409332274, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1082192993164064, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.030591835975647, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9478295850753784, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8629234528541565, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7900137519836425, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7065879440307616, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5981415283679963, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5472263616323472, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4410466134548188, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3215887647867204, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.195196748971939, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.0989350003004075, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.07304053992033, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0022414028644562, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.9531766274571418, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.9233420693874359, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.9165064346790314, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8964338982105255, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8730227023363113, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.8879594907164574, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.9112216424942017, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.9323507246375083, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9945189198851585, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0860617262125016, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.146413176357746, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021094596949405967, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021089270659722388, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021079352400265636, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021066386187449097, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021052485052496194, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021030724905431272, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021002057762816547, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020961958495900036, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02090011125896126, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020820929538458585, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0207465853728354, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020953150331042707, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02394280462525785, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.028386748442426325, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.031230913950130345, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03322716034017503, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.034639841578900814, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03585593918338418, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03651964709162712, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03661850960925221, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03737128242850304, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03827943740412593, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03935429733246565, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04007043924182654, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04105261305347085, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04228233437985182, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04269137805327773, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04370987886562944, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04453014075756073, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.044687808267772196, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04467985892668366, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0443013471737504, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0439066000841558, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04349270487204194, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04320930806919932, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04333328995853662, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.043307090420275925, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.042987182755023244, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04324222831055522, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0430166170373559, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04341464180499315, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.043067682348191735, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04300034733489156, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.043088628351688384, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1226844787597656, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122170925140381, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121335744857788, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.120529890060425, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1197729110717773, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.118701457977295, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.117513418197632, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1161530017852783, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1141583919525146, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1114401817321777, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1069064140319824, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0782642364501953, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.8790225982666016, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.679713726043701, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5746564865112305, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5161874294281006, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4809272289276123, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4648399353027344, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.465104818344116, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4655489921569824, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4850950241088867, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.527656316757202, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.556238889694214, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.607781410217285, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6510729789733887, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7288365364074707, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.749343156814575, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8493943214416504, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.957545042037964, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0898380279541016, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.2071433067321777, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2010324001312256, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.337944984436035, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.526064395904541, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.5645017623901367, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.565687417984009, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5965819358825684, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.6760270595550537, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.6075570583343506, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.609743356704712, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.569451332092285, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.4556922912597656, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.406568765640259, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.369741916656494, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07456626061277224, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07751937984496124, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.09819121447028424, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.15854558877814692, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.19712070874861573, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22406792174234036, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23624953857511996, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.24381690660760427, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25544481358434845, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2604282022886674, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2567368032484312, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2482465854558878, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24067921742340348, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2355112587670727, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22683647102251753, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2203765227021041, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2174234034699151, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20634920634920634, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20450350682908822, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2026578073089701, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20893318567737174, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19712070874861573, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.18752307124400147, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.19472129937246216, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19748984865263935, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19195275009228496, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.17977113325950536, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19472129937246216, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19656699889258028, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20007382798080472, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19748984865263935, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19748984865263935, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20210409745293467, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011643999649963021, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012108989010469461, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013910727487624982, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015584146019163336, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.017794984752093013, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.019910995151351037, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02106530727512683, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.021648065130495924, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02345660210744917, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.027585078112671232, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.030688287516352098, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04698684378809389, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.08065660979033808, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1231044569232711, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.152075898617024, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16865950072924696, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1809715506218549, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1906805542885992, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19487627350149092, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20189743091851095, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20156867138692994, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19568801943938827, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.191528498970291, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1865904600464703, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1920810529003204, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18304704117985604, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.17880903410134694, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1789956325522292, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1745559079631368, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17084510744516435, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.16954184963765032, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17785717903614817, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1650857045196961, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16148810830922314, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16519625865863508, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1672609952874804, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16231632850098485, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.15200181267273197, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1650471580133157, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16441558808503856, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17357527793945826, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16457684399725767, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1691371011726445, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1728990254132773, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.1082192993164064, "validation/loss_best": 2.4655489921569824, "validation/acc_best": 0.2604282022886674, "validation/f1_best": 0.20189743091851095} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 2.0913614082336425, "train/grad": 0.23634225770831108, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12340087890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12231201171875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12062255859375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1190087890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11748291015625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.115445556640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.113140869140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11070068359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10729736328125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.10306884765625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.09741455078125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.068143310546875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.8714105224609376, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6598332214355467, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5248209381103517, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4235051345825194, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3340942001342775, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2512483406066894, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.17987548828125, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1228643035888672, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.044254770278931, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9608880805969238, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8762514734268187, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8046596193313598, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.718855459690094, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6132276630401612, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5555933040380479, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4543396335840226, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3356444233655929, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1920088315010071, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.0989310508966446, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0666166931390761, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9968107104301452, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.941838975250721, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.910479202568531, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.8964179426431655, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8769817382097245, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8498522484302521, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.8551615181565285, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.8858594220876693, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.8985332918167114, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.964054496884346, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0458938121795653, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.0884910893440247, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020729605695232748, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02072570227552205, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02071543292608112, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020702288108877837, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0206893598055467, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020666201068088412, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020636442662216724, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02059408833272755, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02053232387173921, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020450300620868802, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02037302526179701, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02057989774737507, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02351019282825291, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.027910346947610378, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030733471373096108, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.032723996071144935, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03414538596756756, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.035383705133572224, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03611809043213725, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.036269872384145856, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03704972685314715, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03807971497997641, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039162270333617924, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.039928209148347375, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04085595956072211, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0421361219137907, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04257378850132227, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04368822189047933, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.044230110850185154, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.044247537553310394, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04419206546619534, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0438121641241014, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.043302780520170926, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04301486385986209, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04263320980593562, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04228355087339878, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.042183376755565404, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04171858286485076, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.041893845051527025, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04180697655305266, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04211124466732144, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0417725202254951, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04180326849222183, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04160780506208539, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1226770877838135, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1221652030944824, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121342897415161, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1205148696899414, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1197426319122314, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.118680715560913, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1174845695495605, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1161396503448486, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1141395568847656, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1114110946655273, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1068577766418457, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.078136920928955, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.878504753112793, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6794517040252686, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5744645595550537, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.515913724899292, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4805748462677, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4643805027008057, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4645864963531494, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.464930534362793, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4841907024383545, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.526604413986206, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.555696487426758, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6071393489837646, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6513047218322754, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.729731321334839, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7513468265533447, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8514180183410645, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9594509601593018, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0947349071502686, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.2115557193756104, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.205202579498291, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.33912992477417, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.5310914516448975, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.5724382400512695, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5620031356811523, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5912742614746094, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.6787898540496826, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.610853672027588, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.6030073165893555, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.568272829055786, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.452242612838745, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.4113235473632812, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.3722646236419678, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07825765965300849, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.09800664451827243, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.15873015873015872, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.19859726836471023, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22443706164636398, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2355112587670727, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2427094868955334, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2552602436323367, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2571059431524548, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2602436323366556, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2567368032484312, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24861572535991142, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.23994093761535623, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.23569582871908454, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23901808785529716, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2174234034699151, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.20671834625322996, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.20468807678110004, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.19730527870062753, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.18678479143595422, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.19287559985234404, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.19638242894056848, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19324473975636766, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.18087855297157623, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.19195275009228496, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.1969361387966039, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20339608711701734, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1997046880767811, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19490586932447398, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2011812476928756, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011651259262410537, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012096992690038194, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013886948728407221, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.015323703554349479, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.017342595504161017, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.019914304238029368, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02104397163040689, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0215160852802465, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02322546561243805, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.027614752697003492, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.03095108379981619, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.046486604835518815, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.08107532334133659, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12409841621163088, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1527023683441665, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16758209909443486, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1801117167382831, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19025955621868504, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19369880663833028, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2011314630607287, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20162396445698086, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19570689097076188, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.191797783956741, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1869705921930159, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.191398602928642, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18199627513310776, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1796626255192102, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1797524723566226, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17445967228843307, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17020766781623883, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17143647592575903, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17616927091971912, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1643882403308102, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16037681184722472, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16335144017667522, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1663737950868115, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16330134148603703, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1522309794027122, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16163531475449616, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1658515247398045, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17667956055098635, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16577716084760563, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1652199939363177, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17266283068307142, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.1228643035888672, "validation/loss_best": 2.464930534362793, "validation/acc_best": 0.2602436323366556, "validation/f1_best": 0.2011314630607287} diff --git a/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic/config.yaml b/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..491c97f7029e010afb1347501a2f41daf8beb675 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..459714d8dcb4257d268505b58d1e052489a726f1 --- /dev/null +++ b/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,0.046415888336127774,train,0.8202846975088968,0.015726594707113167,0.8004955801978806,0.01822297337868578,0.789507780671876,0.01809600265514082 +flat_mae,patch,logistic,ppmi_dx,,0.046415888336127774,test,0.66,0.03959686351215207,0.587178241864983,0.05202205256428426,0.5907335907335907,0.044524695437980266 +flat_mae,patch,logistic,ppmi_dx,1,0.046415888336127774,train,0.7953736654804271,0.01563053617680801,0.7739537974572863,0.017930848393320396,0.7651065082423465,0.017735614295664753 +flat_mae,patch,logistic,ppmi_dx,1,0.046415888336127774,test,0.67,0.042991399139827954,0.6296711929076422,0.05070285814781705,0.6269100169779287,0.04756127783137477 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,train,0.8131672597864769,0.015444155568093847,0.7915452358495565,0.01812697869533012,0.780427103403982,0.01786560983005608 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,test,0.7,0.04370929420615254,0.6657754010695187,0.05029799244923566,0.6612903225806452,0.048050219798343674 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,train,0.9074733096085409,0.01109432759617436,0.8993123027521672,0.012426869930410613,0.8891966388353671,0.013216360232598513 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,test,0.64,0.04614541797405242,0.6179966044142615,0.04903595476690983,0.6179966044142615,0.04905232691167993 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,train,0.7206405693950177,0.016017373753275003,0.6661786126402209,0.021183276203668902,0.6626659173624492,0.018419837699978476 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,test,0.63,0.04355981634488374,0.5847828526540231,0.04893680912277844,0.5844651952461799,0.04598717493813799 +flat_mae,patch,logistic,ppmi_dx,5,0.3593813663804626,train,0.9128113879003559,0.011527544606153165,0.9058320652452682,0.012783539736854357,0.8978805394990366,0.013738054033805493 +flat_mae,patch,logistic,ppmi_dx,5,0.3593813663804626,test,0.56,0.04866924696356006,0.5280995280995281,0.05021525517482673,0.5280135823429541,0.04955424754322252 +flat_mae,patch,logistic,ppmi_dx,6,0.046415888336127774,train,0.7864768683274022,0.01580878052131648,0.7614601018675722,0.018592737335823617,0.7517929779490473,0.018102784283061037 +flat_mae,patch,logistic,ppmi_dx,6,0.046415888336127774,test,0.62,0.04685253461660319,0.5766488413547237,0.05439224794308487,0.5764006791171477,0.051106054770743876 +flat_mae,patch,logistic,ppmi_dx,7,0.005994842503189409,train,0.7241992882562278,0.01696132748662482,0.6837458203693686,0.02072090512819354,0.6777322843074288,0.01915374599066819 +flat_mae,patch,logistic,ppmi_dx,7,0.005994842503189409,test,0.61,0.0482936807460355,0.584,0.05114997190125896,0.583616298811545,0.050782257417075 +flat_mae,patch,logistic,ppmi_dx,8,0.000774263682681127,train,0.6797153024911032,0.015010254301573146,0.597498090145149,0.021529209057573294,0.6076857204024835,0.017081490554406272 +flat_mae,patch,logistic,ppmi_dx,8,0.000774263682681127,test,0.66,0.032059918901956065,0.5582120582120582,0.04955735471862308,0.5780984719864176,0.036897339886839776 +flat_mae,patch,logistic,ppmi_dx,9,0.3593813663804626,train,0.9145907473309609,0.01150419709622928,0.9080397076469946,0.012601460040200605,0.9010650824234638,0.013273852745095334 +flat_mae,patch,logistic,ppmi_dx,9,0.3593813663804626,test,0.67,0.046710765354466194,0.6515679442508711,0.048747588668362124,0.652376910016978,0.049037342030697637 +flat_mae,patch,logistic,ppmi_dx,10,0.005994842503189409,train,0.7277580071174378,0.016104625878414703,0.6800580454317129,0.021111865043668004,0.6745343609505459,0.018777977084921106 +flat_mae,patch,logistic,ppmi_dx,10,0.005994842503189409,test,0.65,0.038705121108194455,0.5792763553311696,0.048311785710483786,0.5853140916808149,0.04158671378549177 +flat_mae,patch,logistic,ppmi_dx,11,2.782559402207126,train,0.998220640569395,0.0017247858812886351,0.9981184064710746,0.0018267165815497462,0.9976851851851851,0.0022438186696393777 +flat_mae,patch,logistic,ppmi_dx,11,2.782559402207126,test,0.65,0.04777537440983587,0.6266666666666667,0.05055561989183651,0.6260611205432938,0.05042858603180044 +flat_mae,patch,logistic,ppmi_dx,12,0.3593813663804626,train,0.8985765124555161,0.01312820214728253,0.890684984797453,0.014463444081752663,0.8837106615285806,0.015254441475719246 +flat_mae,patch,logistic,ppmi_dx,12,0.3593813663804626,test,0.64,0.04326382322449092,0.5989304812834224,0.04897820171314989,0.597623089983022,0.04641724349419522 +flat_mae,patch,logistic,ppmi_dx,13,0.046415888336127774,train,0.8220640569395018,0.015746778444978696,0.8036749807866974,0.018132085000941742,0.7937406336972811,0.01813114746022617 +flat_mae,patch,logistic,ppmi_dx,13,0.046415888336127774,test,0.63,0.04190012410482814,0.5906626839252129,0.0481412748472322,0.5895585738539898,0.04556962223768487 +flat_mae,patch,logistic,ppmi_dx,14,0.3593813663804626,train,0.9145907473309609,0.011998887057215805,0.9085893977797959,0.01304586222490516,0.9036742667523014,0.013865043424062338 +flat_mae,patch,logistic,ppmi_dx,14,0.3593813663804626,test,0.56,0.048807782166371785,0.5164835164835164,0.05229830926036382,0.5178268251273345,0.050322021262548575 +flat_mae,patch,logistic,ppmi_dx,15,0.005994842503189409,train,0.7402135231316725,0.01504370435148827,0.6931993239706256,0.019866090783508275,0.6863894241061872,0.017700413768255777 +flat_mae,patch,logistic,ppmi_dx,15,0.005994842503189409,test,0.64,0.042326705517911505,0.5863970588235294,0.050689142819478804,0.5874363327674024,0.046092242555005936 +flat_mae,patch,logistic,ppmi_dx,16,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,16,166.81005372000556,test,0.55,0.04976833933335529,0.54226426609704,0.04990079194712915,0.550509337860781,0.05160013476762149 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,train,0.7402135231316725,0.01702263758490832,0.701672484002327,0.021014544268474992,0.6942169770926996,0.01948451012854846 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,test,0.61,0.04508523483359048,0.5555555555555556,0.051378164575987,0.5581494057724957,0.04744566849420916 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,train,0.7437722419928826,0.01613186376109185,0.704002808988764,0.02009487396371664,0.6962374223934917,0.018447279268718333 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,test,0.58,0.04601653615821164,0.525101763907734,0.051156397434389024,0.5288624787775891,0.047612069063633786 +flat_mae,patch,logistic,ppmi_dx,19,0.3593813663804626,train,0.9163701067615658,0.011731201618968132,0.9104060457433205,0.012737569478323107,0.9051193534575037,0.01348334458824142 +flat_mae,patch,logistic,ppmi_dx,19,0.3593813663804626,test,0.57,0.04991714735439114,0.557203171661003,0.050639736163346036,0.5615449915110357,0.0520586155962837 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,train,0.7295373665480427,0.016239188706875858,0.683671051072402,0.020767202990758186,0.6777189038749732,0.018686364973187677 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,test,0.66,0.03978809369648161,0.5952380952380952,0.0504538898816794,0.5984719864176571,0.04378667942341389 +flat_mae,patch,logistic,ppmi_dx,21,0.046415888336127774,train,0.791814946619217,0.01622529910508603,0.767721834232363,0.01887394323996225,0.7578676942838793,0.01835064982649824 +flat_mae,patch,logistic,ppmi_dx,21,0.046415888336127774,test,0.7,0.040261644278394786,0.6493688639551192,0.0505019090132266,0.6460101867572157,0.04512126534182046 +flat_mae,patch,logistic,ppmi_dx,22,0.3593813663804626,train,0.9110320284697508,0.011793499270477044,0.9045931116905693,0.012779810065121763,0.8990446371226718,0.013321327528813381 +flat_mae,patch,logistic,ppmi_dx,22,0.3593813663804626,test,0.55,0.05023743624031783,0.529239460194581,0.05135623578423934,0.5301358234295416,0.05200645016518059 +flat_mae,patch,logistic,ppmi_dx,23,0.3593813663804626,train,0.9128113879003559,0.012059560988418845,0.9067756732510706,0.013035950687355751,0.9022291800470992,0.013646055697092676 +flat_mae,patch,logistic,ppmi_dx,23,0.3593813663804626,test,0.58,0.04774488873167472,0.5586380832282472,0.05021390360545659,0.5594227504244482,0.050717732200855736 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,train,0.7366548042704626,0.016945819570080133,0.694856555873505,0.021085197048277084,0.687847891243845,0.019245986896144082 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,test,0.67,0.03841572074034274,0.6033177064551027,0.05141215931577883,0.6065365025466893,0.04363383544759328 +flat_mae,patch,logistic,ppmi_dx,25,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,25,21.54434690031882,test,0.54,0.04655437680820139,0.5066495066495067,0.04955665687938645,0.5067911714770797,0.048825406897122246 +flat_mae,patch,logistic,ppmi_dx,26,0.000774263682681127,train,0.6797153024911032,0.015031492630371953,0.5958579030634259,0.022177295692537704,0.6068159922928709,0.01737184201120008 +flat_mae,patch,logistic,ppmi_dx,26,0.000774263682681127,test,0.64,0.04216714835034497,0.5714285714285714,0.051657413526891224,0.5772495755517827,0.04505306532627655 +flat_mae,patch,logistic,ppmi_dx,27,0.3593813663804626,train,0.9199288256227758,0.01133732988618638,0.9140482947304025,0.012384007067442132,0.9080095268679084,0.013244973745081741 +flat_mae,patch,logistic,ppmi_dx,27,0.3593813663804626,test,0.56,0.049906216045699156,0.5452666391070691,0.0511684591787432,0.5483870967741935,0.052257772062490916 +flat_mae,patch,logistic,ppmi_dx,28,0.046415888336127774,train,0.8202846975088968,0.01393194392617329,0.801472465592921,0.01623701266686387,0.7914258188824663,0.0163574177704368 +flat_mae,patch,logistic,ppmi_dx,28,0.046415888336127774,test,0.69,0.038416433983387885,0.6343908479773559,0.04998619799540876,0.6328522920203735,0.04397644907176859 +flat_mae,patch,logistic,ppmi_dx,29,0.3593813663804626,train,0.9092526690391459,0.01195876537312177,0.9021918285029842,0.013211522364247708,0.894990366088632,0.014159184330451963 +flat_mae,patch,logistic,ppmi_dx,29,0.3593813663804626,test,0.6,0.04833630519599114,0.570999570999571,0.05158636023274216,0.5704584040747029,0.051118394618665734 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,train,0.7864768683274022,0.01619623115960258,0.7638390856118333,0.01843866603508495,0.7552718903874973,0.01806591569064938 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,test,0.68,0.04060704864921851,0.6190476190476191,0.052926260378695335,0.6196943972835314,0.04585223555978332 +flat_mae,patch,logistic,ppmi_dx,31,0.005994842503189409,train,0.7064056939501779,0.016490215419130955,0.6549645587989061,0.021451994207580802,0.6519749518304432,0.018945305348455153 +flat_mae,patch,logistic,ppmi_dx,31,0.005994842503189409,test,0.65,0.044317039612320676,0.5944849959448499,0.052481987988483775,0.5955008488964346,0.04742607210320109 +flat_mae,patch,logistic,ppmi_dx,32,0.005994842503189409,train,0.7366548042704626,0.017396675853322028,0.6929641934293097,0.022163169083558018,0.68610843502462,0.02009770102099594 +flat_mae,patch,logistic,ppmi_dx,32,0.005994842503189409,test,0.65,0.044789154937328296,0.6011396011396011,0.05265009605184291,0.6005942275042444,0.048486715513746864 +flat_mae,patch,logistic,ppmi_dx,33,0.3593813663804626,train,0.9092526690391459,0.012354701307359398,0.9025880673611228,0.01348516155840359,0.896729822307857,0.014206777040904922 +flat_mae,patch,logistic,ppmi_dx,33,0.3593813663804626,test,0.66,0.046463163904323174,0.6427070197562001,0.04877923375425467,0.6443123938879457,0.04912794789875995 +flat_mae,patch,logistic,ppmi_dx,34,0.046415888336127774,train,0.7953736654804271,0.016093351029739623,0.7705055411744236,0.01909222048343885,0.7598881395846714,0.0186057165641644 +flat_mae,patch,logistic,ppmi_dx,34,0.046415888336127774,test,0.69,0.03976214782931123,0.627359057579036,0.051555161141658534,0.6277589134125636,0.04471209144975881 +flat_mae,patch,logistic,ppmi_dx,35,0.046415888336127774,train,0.798932384341637,0.016055535513187984,0.7789427150231654,0.018192443765721832,0.7706058659815885,0.01806344539414284 +flat_mae,patch,logistic,ppmi_dx,35,0.046415888336127774,test,0.62,0.04877817134743778,0.5967741935483871,0.05065602372510524,0.5967741935483871,0.05027718008497745 +flat_mae,patch,logistic,ppmi_dx,36,0.3593813663804626,train,0.9110320284697508,0.011656939849437806,0.9044022589644145,0.012746536028264826,0.8981749090130593,0.013491957851269252 +flat_mae,patch,logistic,ppmi_dx,36,0.3593813663804626,test,0.59,0.046245652768665726,0.5464100011063171,0.05044095005220448,0.5471137521222411,0.048144747235433216 +flat_mae,patch,logistic,ppmi_dx,37,0.046415888336127774,train,0.806049822064057,0.014908530948142559,0.7841530921096677,0.0174580903269661,0.7737770284735603,0.017218137289844337 +flat_mae,patch,logistic,ppmi_dx,37,0.046415888336127774,test,0.68,0.044492246515544696,0.6483516483516483,0.049707297687495516,0.6451612903225806,0.04816168749207426 +flat_mae,patch,logistic,ppmi_dx,38,0.046415888336127774,train,0.8042704626334519,0.014811668354160663,0.779607843137255,0.017797711908202157,0.7679833012202955,0.017405103546235593 +flat_mae,patch,logistic,ppmi_dx,38,0.046415888336127774,test,0.68,0.04272477501403605,0.64349376114082,0.049860446511543116,0.6400679117147707,0.04729238406732512 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,train,0.9092526690391459,0.012028720332625422,0.9019884760716057,0.013335569616590045,0.8941206379790194,0.014245808099880073 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,test,0.69,0.04342568364458986,0.6656239887822242,0.0472121445704808,0.6634125636672326,0.04662563971707049 +flat_mae,patch,logistic,ppmi_dx,40,0.000774263682681127,train,0.6743772241992882,0.015209701295149458,0.5865703500922566,0.02251664442815048,0.5998715478484264,0.017503650524404203 +flat_mae,patch,logistic,ppmi_dx,40,0.000774263682681127,test,0.65,0.04048938132399654,0.5792763553311696,0.05342554211671432,0.5853140916808149,0.04541620623356039 +flat_mae,patch,logistic,ppmi_dx,41,0.3593813663804626,train,0.9039145907473309,0.012045869093670924,0.8956683168316832,0.01340177852400451,0.8863064654249626,0.014115819574262056 +flat_mae,patch,logistic,ppmi_dx,41,0.3593813663804626,test,0.61,0.04840685488647243,0.5882166613873931,0.05007008983830634,0.5887096774193548,0.05007994573596387 +flat_mae,patch,logistic,ppmi_dx,42,0.046415888336127774,train,0.797153024911032,0.015643705125766384,0.775095833859893,0.018133607236277882,0.7656818668379362,0.017892275147273245 +flat_mae,patch,logistic,ppmi_dx,42,0.046415888336127774,test,0.69,0.0389987486978749,0.6343908479773559,0.04984146926642825,0.6328522920203735,0.044170739244711425 +flat_mae,patch,logistic,ppmi_dx,43,0.3593813663804626,train,0.9181494661921709,0.01162962480687875,0.9122256627553238,0.012651754607459,0.9065644401627061,0.013369453970210521 +flat_mae,patch,logistic,ppmi_dx,43,0.3593813663804626,test,0.58,0.04763583105184583,0.5543293718166383,0.04972701867915571,0.5543293718166383,0.04947074528543621 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,train,0.7224199288256228,0.017069427199443772,0.6802952287877241,0.021428276227693145,0.6745477413830016,0.019644268217951365 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,test,0.66,0.04044094954374835,0.6026180458158018,0.05197614084029882,0.6035653650254669,0.045659241611082216 +flat_mae,patch,logistic,ppmi_dx,45,0.005994842503189409,train,0.7241992882562278,0.016217147058439775,0.6779369627507164,0.020934926142100806,0.6725139156497538,0.018794669725918905 +flat_mae,patch,logistic,ppmi_dx,45,0.005994842503189409,test,0.76,0.03774406443402725,0.7142857142857143,0.05050911690360241,0.7045840407470289,0.045086534372282806 +flat_mae,patch,logistic,ppmi_dx,46,0.005994842503189409,train,0.7277580071174378,0.016422199337980403,0.6820926148442554,0.02070764890830447,0.6762738171697709,0.018685191928157076 +flat_mae,patch,logistic,ppmi_dx,46,0.005994842503189409,test,0.66,0.04342191151941609,0.6212121212121212,0.04901615282467326,0.6188455008488964,0.046256631844899046 +flat_mae,patch,logistic,ppmi_dx,47,0.000774263682681127,train,0.6832740213523132,0.01456791199791435,0.5970255071623081,0.021914585289207773,0.608836437593663,0.016966154770571857 +flat_mae,patch,logistic,ppmi_dx,47,0.000774263682681127,test,0.7,0.03635563780213461,0.6279761904761905,0.052714950751817216,0.6307300509337861,0.04288382864792189 +flat_mae,patch,logistic,ppmi_dx,48,0.046415888336127774,train,0.798932384341637,0.01655953987775648,0.7756629681047609,0.019292934595629797,0.7653874973239134,0.01885130461942534 +flat_mae,patch,logistic,ppmi_dx,48,0.046415888336127774,test,0.69,0.04580349331655829,0.6726850385386971,0.04811538857927285,0.6735993208828523,0.048582212897652494 +flat_mae,patch,logistic,ppmi_dx,49,0.3593813663804626,train,0.905693950177936,0.012733004580850383,0.8983562139344738,0.013962244109924263,0.8912304645686149,0.014639682105853228 +flat_mae,patch,logistic,ppmi_dx,49,0.3593813663804626,test,0.63,0.044234913812507874,0.5906626839252129,0.049334485116104734,0.5895585738539898,0.04712544961777677 +flat_mae,patch,logistic,ppmi_dx,50,0.3593813663804626,train,0.905693950177936,0.01221488505376477,0.8991655241287091,0.013218933766376941,0.8947093770070649,0.013811546465469357 +flat_mae,patch,logistic,ppmi_dx,50,0.3593813663804626,test,0.66,0.04281238605824254,0.6212121212121212,0.04892482463785491,0.6188455008488964,0.04652108373423623 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,train,0.8042704626334519,0.015572067284652241,0.7835191618108471,0.017873436630657824,0.774071397987583,0.017613475090626995 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,test,0.66,0.041866354032803,0.6026180458158018,0.051235713366207275,0.6035653650254669,0.04573628194016217 +flat_mae,patch,logistic,ppmi_dx,52,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,52,166.81005372000556,test,0.61,0.04801537670371857,0.5741893219783819,0.05280358208262193,0.5734295415959253,0.05109430898210244 +flat_mae,patch,logistic,ppmi_dx,53,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,53,21.54434690031882,test,0.61,0.04803489981253214,0.5953937130407718,0.049728637230405104,0.5988964346349746,0.05089478882732174 +flat_mae,patch,logistic,ppmi_dx,54,0.046415888336127774,train,0.797153024911032,0.015466070571226148,0.7739659333060498,0.017869777501524013,0.7639424106187112,0.017471756376053794 +flat_mae,patch,logistic,ppmi_dx,54,0.046415888336127774,test,0.53,0.04857605994726209,0.48684354187138335,0.05075875091920785,0.4885398981324278,0.04945652184394988 +flat_mae,patch,logistic,ppmi_dx,55,0.046415888336127774,train,0.797153024911032,0.016299738903727316,0.7727988425039363,0.01924597475374538,0.7622029543994862,0.01878826022414904 +flat_mae,patch,logistic,ppmi_dx,55,0.046415888336127774,test,0.66,0.043057036591014945,0.6212121212121212,0.04915928234227596,0.6188455008488964,0.04664681946543334 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,train,0.7206405693950177,0.01613483886390941,0.6758045729948596,0.020392361643785422,0.6704934703489617,0.018492881103792894 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,test,0.65,0.044826759865062744,0.6011396011396011,0.05299898326396332,0.6005942275042444,0.048482903364188534 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,train,0.806049822064057,0.01586605385718758,0.7836041019771587,0.018747907844913043,0.7729073003639477,0.018443188174583243 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,test,0.68,0.04218994666979326,0.6381727725011307,0.048826657384470014,0.634974533106961,0.04568274126796049 +flat_mae,patch,logistic,ppmi_dx,58,0.046415888336127774,train,0.8042704626334519,0.015936168359664236,0.7835191618108471,0.018404863871851033,0.774071397987583,0.018308198841238837 +flat_mae,patch,logistic,ppmi_dx,58,0.046415888336127774,test,0.65,0.04496649419289878,0.6072270227808326,0.05096491938924456,0.6056876061120543,0.04803200805484872 +flat_mae,patch,logistic,ppmi_dx,59,0.046415888336127774,train,0.7864768683274022,0.015192588872605555,0.7602116281715781,0.017793953208465246,0.7500535217298223,0.017222362274503716 +flat_mae,patch,logistic,ppmi_dx,59,0.046415888336127774,test,0.71,0.04098270854885021,0.6640018537828757,0.05126500819571108,0.6591680814940577,0.046375501435369434 +flat_mae,patch,logistic,ppmi_dx,60,0.005994842503189409,train,0.7170818505338078,0.01631074025656799,0.6726753237238777,0.020372333079626586,0.667603296938557,0.01856767354825718 +flat_mae,patch,logistic,ppmi_dx,60,0.005994842503189409,test,0.62,0.04304172394316938,0.5634191176470589,0.050738982370252456,0.566213921901528,0.046203899182167954 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,train,0.7206405693950177,0.01611250355660363,0.6727456151087274,0.020663190465840215,0.6678842860201242,0.018487854091814478 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,test,0.68,0.03767681515202684,0.6259934548854604,0.04811676358013127,0.6247877758913413,0.04269656583528458 +flat_mae,patch,logistic,ppmi_dx,62,0.000774263682681127,train,0.6832740213523132,0.015048193666431656,0.6066839663442636,0.021422319196067262,0.614054806251338,0.01726208206780293 +flat_mae,patch,logistic,ppmi_dx,62,0.000774263682681127,test,0.63,0.0352262629298085,0.5250930560903607,0.05089078457638085,0.548811544991511,0.038995777979749 +flat_mae,patch,logistic,ppmi_dx,63,0.3593813663804626,train,0.9145907473309609,0.011254064528639732,0.907460515663378,0.01250762149743199,0.8984558980946264,0.013418519866580277 +flat_mae,patch,logistic,ppmi_dx,63,0.3593813663804626,test,0.66,0.04194710478686223,0.6155585707824514,0.049604786964150625,0.6137521222410866,0.0461046913699586 +flat_mae,patch,logistic,ppmi_dx,64,0.005994842503189409,train,0.7348754448398577,0.015504737731952267,0.6894209977783465,0.019795252504359262,0.6829238921001927,0.01777835753179589 +flat_mae,patch,logistic,ppmi_dx,64,0.005994842503189409,test,0.65,0.040583425188123294,0.5792763553311696,0.05108577355447291,0.5853140916808149,0.04405450590318645 +flat_mae,patch,logistic,ppmi_dx,65,0.046415888336127774,train,0.806049822064057,0.01589983495866768,0.7846932499165247,0.018429595962791114,0.7746467565831727,0.018209754358907537 +flat_mae,patch,logistic,ppmi_dx,65,0.046415888336127774,test,0.6,0.04537754510768514,0.5477159656264134,0.05111232277454663,0.5500848896434635,0.04742729091557596 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,train,0.7295373665480427,0.016366421912369863,0.6875585205992509,0.020306891487240967,0.6811978163134232,0.018544942705137256 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,test,0.57,0.045578394004177014,0.5017958521608157,0.05119310695587717,0.5106112054329371,0.04661836669279137 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,train,0.8167259786476868,0.016102051039203126,0.7970471812887641,0.018706018010982605,0.7867961892528367,0.01867577910951707 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,test,0.6,0.04714798405022213,0.5659722222222222,0.05165425926743109,0.565365025466893,0.05057734557142793 +flat_mae,patch,logistic,ppmi_dx,68,0.000774263682681127,train,0.6779359430604982,0.014896188434734127,0.5927780780239796,0.022058842630380528,0.6045011774780561,0.017213913105251197 +flat_mae,patch,logistic,ppmi_dx,68,0.000774263682681127,test,0.69,0.03853008694513937,0.627359057579036,0.051986317480952206,0.6277589134125636,0.04407866318133792 +flat_mae,patch,logistic,ppmi_dx,69,0.3593813663804626,train,0.9217081850533808,0.01103763833948279,0.9158739878886848,0.012050041637958664,0.9094546135731107,0.01280909227526331 +flat_mae,patch,logistic,ppmi_dx,69,0.3593813663804626,test,0.63,0.04820471346248207,0.6053333333333333,0.05188304485146367,0.6048387096774194,0.05170277084265787 +flat_mae,patch,logistic,ppmi_dx,70,0.046415888336127774,train,0.8042704626334519,0.01514830206099393,0.7813384267119412,0.017897915965342598,0.770592485549133,0.017626941226316577 +flat_mae,patch,logistic,ppmi_dx,70,0.046415888336127774,test,0.65,0.04554858504937338,0.612789025334661,0.05137421292303487,0.6107809847198642,0.049077106756407564 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,train,0.7313167259786477,0.016031943945017935,0.6832124472250946,0.02065177782178386,0.6774245343609505,0.018363782228525534 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,test,0.61,0.041944129505808087,0.5311936530833032,0.05027656761740102,0.5428692699490663,0.0436578525635267 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,train,0.7348754448398577,0.01647009383956892,0.6884225409759819,0.021579134087620164,0.6820541639905802,0.019270239830610006 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,test,0.67,0.04467385812754479,0.6349153667441089,0.05030191060721593,0.6320033955857385,0.04824418910943202 +flat_mae,patch,logistic,ppmi_dx,73,0.3593813663804626,train,0.9163701067615658,0.012095755602920281,0.9105807478122514,0.013074752375862413,0.9059890815671163,0.013654389950257462 +flat_mae,patch,logistic,ppmi_dx,73,0.3593813663804626,test,0.57,0.04984463461597446,0.557203171661003,0.05027999974512429,0.5615449915110357,0.0515276652049769 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,train,0.7455516014234875,0.017044642156505817,0.7047137193520059,0.021544920046985508,0.6968127809890816,0.019743589839147847 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,test,0.58,0.041518858365807695,0.5174632352941176,0.04813540713086425,0.5237691001697793,0.04373118281675481 +flat_mae,patch,logistic,ppmi_dx,75,0.005994842503189409,train,0.7135231316725978,0.016065523247653857,0.6611199125103463,0.02070674466158131,0.6577552986512524,0.01819179924633627 +flat_mae,patch,logistic,ppmi_dx,75,0.005994842503189409,test,0.68,0.03978825957490475,0.6259934548854604,0.05037358869767038,0.6247877758913413,0.04497107602041557 +flat_mae,patch,logistic,ppmi_dx,76,0.000774263682681127,train,0.6761565836298933,0.01472832413651206,0.6009332521809698,0.020820990653175848,0.6082744594305288,0.016941594993324538 +flat_mae,patch,logistic,ppmi_dx,76,0.000774263682681127,test,0.69,0.0350101413878893,0.6112852664576802,0.05028381258023231,0.6175721561969439,0.04039313169396479 +flat_mae,patch,logistic,ppmi_dx,77,0.3593813663804626,train,0.9181494661921709,0.010622385943953962,0.9128975741239892,0.01136585525206246,0.9100433526011561,0.011757552862015885 +flat_mae,patch,logistic,ppmi_dx,77,0.3593813663804626,test,0.57,0.047387762133276556,0.5361881134721174,0.05086333748928709,0.5360780984719864,0.049897717538456926 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,train,0.7402135231316725,0.015635254705352413,0.7025420165020809,0.019359764358839356,0.6950867052023122,0.018024006352518378 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,test,0.64,0.04194751005721317,0.5714285714285714,0.05161044178631641,0.5772495755517827,0.045057567358548135 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,train,0.7348754448398577,0.01587267444541805,0.6853315296018638,0.020721569434257816,0.6794449796617427,0.018311511636425956 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,test,0.68,0.0402858585610385,0.6190476190476191,0.05133947600692871,0.6196943972835314,0.044777281426618264 +flat_mae,patch,logistic,ppmi_dx,80,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,80,1291.5496650148827,test,0.6,0.04929996348883029,0.595959595959596,0.04915415780123267,0.6112054329371817,0.05064002684294983 +flat_mae,patch,logistic,ppmi_dx,81,0.000774263682681127,train,0.6779359430604982,0.014371012276282513,0.5960767963150475,0.02073014569819938,0.6062406336972811,0.01643482148647498 +flat_mae,patch,logistic,ppmi_dx,81,0.000774263682681127,test,0.66,0.039989498621513124,0.6026180458158018,0.04766541728883674,0.6035653650254669,0.04285285957316557 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,train,0.7259786476868327,0.01617543854106575,0.6742249725220952,0.021757292354445446,0.6696103618068936,0.019004766693457173 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,test,0.66,0.04183703622390095,0.609375,0.05155969963978729,0.6086587436332768,0.04685720355596312 +flat_mae,patch,logistic,ppmi_dx,83,2.782559402207126,train,0.998220640569395,0.0018334103739675526,0.9981184064710746,0.0019424429883194865,0.9976851851851851,0.002385131088355941 +flat_mae,patch,logistic,ppmi_dx,83,2.782559402207126,test,0.47,0.04987540877025471,0.4578005115089514,0.05061745851797269,0.4605263157894737,0.05245153367805284 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,train,0.7473309608540926,0.015543455768790834,0.706326542628356,0.019711940165355185,0.6982578676942839,0.01795741928022394 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,test,0.58,0.04529856068353607,0.525101763907734,0.05066025601085856,0.5288624787775891,0.04738187986164782 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,train,0.797153024911032,0.015824215070629487,0.7733870967741936,0.018473029127612572,0.7630726825090987,0.017998455890998424 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,test,0.62,0.04139534273321095,0.5558672276764843,0.05007553005431887,0.5611205432937181,0.04438627553462632 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,train,0.7402135231316725,0.01586293533421413,0.6980540508714084,0.019985001717106262,0.6907380646542496,0.01817017636503844 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,test,0.59,0.039857119815661535,0.5071523019593701,0.049865059708747654,0.5216468590831919,0.04226859732099989 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,train,0.7295373665480427,0.015604164937594483,0.6805910770105144,0.02050610286541465,0.6751097195461357,0.018134328283903364 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,test,0.64,0.04312201757803084,0.5792426367461431,0.053004143050194644,0.5823429541595926,0.04699492725343523 +flat_mae,patch,logistic,ppmi_dx,88,0.3593813663804626,train,0.9128113879003559,0.011022735151229769,0.9067756732510706,0.011874716606473532,0.9022291800470992,0.012335471253154056 +flat_mae,patch,logistic,ppmi_dx,88,0.3593813663804626,test,0.64,0.04368970588136294,0.5989304812834224,0.05002938448313633,0.597623089983022,0.04705184798042599 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,train,0.7135231316725978,0.017185266357253814,0.667544816892818,0.021469130892375766,0.6629736673089275,0.019404852240277277 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,test,0.66,0.04003916083036706,0.587178241864983,0.053408743145655024,0.5933786078098472,0.04482195037118057 +flat_mae,patch,logistic,ppmi_dx,90,0.3593813663804626,train,0.905693950177936,0.011815042554170391,0.8987679915713631,0.012850221912196778,0.8929699207878399,0.013388643816738885 +flat_mae,patch,logistic,ppmi_dx,90,0.3593813663804626,test,0.65,0.043528399924646896,0.6178622120318812,0.04745547938045911,0.615874363327674,0.04623533835102397 +flat_mae,patch,logistic,ppmi_dx,91,0.3593813663804626,train,0.9039145907473309,0.012388184048252341,0.8963312154129945,0.013556560329888036,0.8889156497538,0.014023970389161105 +flat_mae,patch,logistic,ppmi_dx,91,0.3593813663804626,test,0.65,0.0468549890619985,0.6338529134846741,0.048137792076565634,0.6362478777589134,0.04847414856514755 +flat_mae,patch,logistic,ppmi_dx,92,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,92,166.81005372000556,test,0.65,0.04877528472495061,0.6338529134846741,0.05052444789300337,0.6362478777589134,0.05100456651728048 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,train,0.7384341637010676,0.016851448452668734,0.6964539632499642,0.02102757599437324,0.6892929779490473,0.019219305620358482 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,test,0.61,0.046602055748646974,0.5793334052421529,0.05029825362918144,0.5785229202037352,0.049494298391112565 +flat_mae,patch,logistic,ppmi_dx,94,0.3593813663804626,train,0.9039145907473309,0.012086265892706031,0.8967544396815676,0.013219383508119626,0.890655105973025,0.013899091774497033 +flat_mae,patch,logistic,ppmi_dx,94,0.3593813663804626,test,0.68,0.04448450966347724,0.6567996567996568,0.04783182636038274,0.6553480475382003,0.04739133670982033 +flat_mae,patch,logistic,ppmi_dx,95,0.046415888336127774,train,0.798932384341637,0.015983066148760685,0.7750845566751076,0.018679902998160733,0.764517769214301,0.018224654168825214 +flat_mae,patch,logistic,ppmi_dx,95,0.046415888336127774,test,0.64,0.044780223313422636,0.5989304812834224,0.050904823821396286,0.597623089983022,0.04809124532873826 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,train,0.7402135231316725,0.01572845200036327,0.6980540508714084,0.02037652937573708,0.6907380646542496,0.018532703452272137 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,test,0.57,0.042834654194938926,0.50997150997151,0.049019654087023565,0.515704584040747,0.04504790708149108 +flat_mae,patch,logistic,ppmi_dx,97,0.3593813663804626,train,0.905693950177936,0.012051051682673881,0.8989685196679997,0.013095939948641357,0.8938396488974524,0.013747230377505639 +flat_mae,patch,logistic,ppmi_dx,97,0.3593813663804626,test,0.64,0.04691108184640384,0.6216897856242118,0.04820639606438912,0.6230899830220713,0.048450672974368766 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,train,0.905693950177936,0.011598694087598104,0.8985639024257346,0.01268004650988587,0.8921001926782274,0.013326167314523648 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,test,0.72,0.039147025429782024,0.6727442730247779,0.04934977870089382,0.66723259762309,0.04472554355831128 +flat_mae,patch,logistic,ppmi_dx,99,0.046415888336127774,train,0.791814946619217,0.015613764941640138,0.7658958611481976,0.018424999770673234,0.7552585099550417,0.01780720157033566 +flat_mae,patch,logistic,ppmi_dx,99,0.046415888336127774,test,0.65,0.04584991166839911,0.612789025334661,0.05198627067045844,0.6107809847198642,0.04938372088814832 +flat_mae,patch,logistic,ppmi_dx,100,0.005994842503189409,train,0.7348754448398577,0.017284373286565017,0.6913715387195336,0.021631562997452506,0.6846633483194177,0.019680235283410976 +flat_mae,patch,logistic,ppmi_dx,100,0.005994842503189409,test,0.69,0.039490449478323225,0.6343908479773559,0.05108376819112403,0.6328522920203735,0.044965487837163066 diff --git a/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic/log.txt b/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..0d6f4085ef482c49e46192655cc1c4ffb930525a --- /dev/null +++ b/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:26:15 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n400_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n400_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:16:09 time: 4.1771 data: 3.3417 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:20 time: 0.1916 data: 0.0681 max mem: 2851 +extract (train) [ 40/232] eta: 0:00:53 time: 0.1706 data: 0.0560 max mem: 2851 +extract (train) [ 60/232] eta: 0:00:41 time: 0.1670 data: 0.0542 max mem: 2851 +extract (train) [ 80/232] eta: 0:00:34 time: 0.1748 data: 0.0606 max mem: 2851 +extract (train) [100/232] eta: 0:00:28 time: 0.1681 data: 0.0552 max mem: 2851 +extract (train) [120/232] eta: 0:00:23 time: 0.1656 data: 0.0559 max mem: 2851 +extract (train) [140/232] eta: 0:00:18 time: 0.1868 data: 0.0613 max mem: 2851 +extract (train) [160/232] eta: 0:00:14 time: 0.1692 data: 0.0524 max mem: 2851 +extract (train) [180/232] eta: 0:00:10 time: 0.1976 data: 0.0664 max mem: 2851 +extract (train) [200/232] eta: 0:00:06 time: 0.1853 data: 0.0624 max mem: 2851 +extract (train) [220/232] eta: 0:00:02 time: 0.1491 data: 0.0438 max mem: 2851 +extract (train) [231/232] eta: 0:00:00 time: 0.1427 data: 0.0416 max mem: 2851 +extract (train) Total time: 0:00:44 (0.1923 s / it) +extract (validation) [ 0/50] eta: 0:02:53 time: 3.4606 data: 3.2986 max mem: 2851 +extract (validation) [20/50] eta: 0:00:11 time: 0.2306 data: 0.0867 max mem: 2851 +extract (validation) [40/50] eta: 0:00:02 time: 0.1561 data: 0.0469 max mem: 2851 +extract (validation) [49/50] eta: 0:00:00 time: 0.1427 data: 0.0408 max mem: 2851 +extract (validation) Total time: 0:00:12 (0.2539 s / it) +extract (test) [ 0/50] eta: 0:03:19 time: 3.9886 data: 3.8177 max mem: 2851 +extract (test) [20/50] eta: 0:00:11 time: 0.2085 data: 0.0723 max mem: 2851 +extract (test) [40/50] eta: 0:00:02 time: 0.1413 data: 0.0389 max mem: 2851 +extract (test) [49/50] eta: 0:00:00 time: 0.1412 data: 0.0391 max mem: 2851 +extract (test) Total time: 0:00:12 (0.2500 s / it) +feature extraction time: 0:01:09 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 0.046416 | train | 0.82028 | 0.015727 | 0.8005 | 0.018223 | 0.78951 | 0.018096 | +| flat_mae | patch | logistic | ppmi_dx | | 0.046416 | test | 0.66 | 0.039597 | 0.58718 | 0.052022 | 0.59073 | 0.044525 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.042991399139827954, "f1": 0.6296711929076422, "f1_std": 0.05070285814781705, "bacc": 0.6269100169779287, "bacc_std": 0.04756127783137477} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.04370929420615254, "f1": 0.6657754010695187, "f1_std": 0.05029799244923566, "bacc": 0.6612903225806452, "bacc_std": 0.048050219798343674} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04614541797405242, "f1": 0.6179966044142615, "f1_std": 0.04903595476690983, "bacc": 0.6179966044142615, "bacc_std": 0.04905232691167993} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04355981634488374, "f1": 0.5847828526540231, "f1_std": 0.04893680912277844, "bacc": 0.5844651952461799, "bacc_std": 0.04598717493813799} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.56, "acc_std": 0.04866924696356006, "f1": 0.5280995280995281, "f1_std": 0.05021525517482673, "bacc": 0.5280135823429541, "bacc_std": 0.04955424754322252} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04685253461660319, "f1": 0.5766488413547237, "f1_std": 0.05439224794308487, "bacc": 0.5764006791171477, "bacc_std": 0.051106054770743876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.0482936807460355, "f1": 0.584, "f1_std": 0.05114997190125896, "bacc": 0.583616298811545, "bacc_std": 0.050782257417075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.000774263682681127, "split": "test", "acc": 0.66, "acc_std": 0.032059918901956065, "f1": 0.5582120582120582, "f1_std": 0.04955735471862308, "bacc": 0.5780984719864176, "bacc_std": 0.036897339886839776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.67, "acc_std": 0.046710765354466194, "f1": 0.6515679442508711, "f1_std": 0.048747588668362124, "bacc": 0.652376910016978, "bacc_std": 0.049037342030697637} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.038705121108194455, "f1": 0.5792763553311696, "f1_std": 0.048311785710483786, "bacc": 0.5853140916808149, "bacc_std": 0.04158671378549177} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.04777537440983587, "f1": 0.6266666666666667, "f1_std": 0.05055561989183651, "bacc": 0.6260611205432938, "bacc_std": 0.05042858603180044} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04326382322449092, "f1": 0.5989304812834224, "f1_std": 0.04897820171314989, "bacc": 0.597623089983022, "bacc_std": 0.04641724349419522} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04190012410482814, "f1": 0.5906626839252129, "f1_std": 0.0481412748472322, "bacc": 0.5895585738539898, "bacc_std": 0.04556962223768487} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.56, "acc_std": 0.048807782166371785, "f1": 0.5164835164835164, "f1_std": 0.05229830926036382, "bacc": 0.5178268251273345, "bacc_std": 0.050322021262548575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.042326705517911505, "f1": 0.5863970588235294, "f1_std": 0.050689142819478804, "bacc": 0.5874363327674024, "bacc_std": 0.046092242555005936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 166.81005372000556, "split": "test", "acc": 0.55, "acc_std": 0.04976833933335529, "f1": 0.54226426609704, "f1_std": 0.04990079194712915, "bacc": 0.550509337860781, "bacc_std": 0.05160013476762149} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04508523483359048, "f1": 0.5555555555555556, "f1_std": 0.051378164575987, "bacc": 0.5581494057724957, "bacc_std": 0.04744566849420916} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.04601653615821164, "f1": 0.525101763907734, "f1_std": 0.051156397434389024, "bacc": 0.5288624787775891, "bacc_std": 0.047612069063633786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.57, "acc_std": 0.04991714735439114, "f1": 0.557203171661003, "f1_std": 0.050639736163346036, "bacc": 0.5615449915110357, "bacc_std": 0.0520586155962837} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.03978809369648161, "f1": 0.5952380952380952, "f1_std": 0.0504538898816794, "bacc": 0.5984719864176571, "bacc_std": 0.04378667942341389} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.040261644278394786, "f1": 0.6493688639551192, "f1_std": 0.0505019090132266, "bacc": 0.6460101867572157, "bacc_std": 0.04512126534182046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.55, "acc_std": 0.05023743624031783, "f1": 0.529239460194581, "f1_std": 0.05135623578423934, "bacc": 0.5301358234295416, "bacc_std": 0.05200645016518059} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.04774488873167472, "f1": 0.5586380832282472, "f1_std": 0.05021390360545659, "bacc": 0.5594227504244482, "bacc_std": 0.050717732200855736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.03841572074034274, "f1": 0.6033177064551027, "f1_std": 0.05141215931577883, "bacc": 0.6065365025466893, "bacc_std": 0.04363383544759328} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 21.54434690031882, "split": "test", "acc": 0.54, "acc_std": 0.04655437680820139, "f1": 0.5066495066495067, "f1_std": 0.04955665687938645, "bacc": 0.5067911714770797, "bacc_std": 0.048825406897122246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 0.000774263682681127, "split": "test", "acc": 0.64, "acc_std": 0.04216714835034497, "f1": 0.5714285714285714, "f1_std": 0.051657413526891224, "bacc": 0.5772495755517827, "bacc_std": 0.04505306532627655} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.56, "acc_std": 0.049906216045699156, "f1": 0.5452666391070691, "f1_std": 0.0511684591787432, "bacc": 0.5483870967741935, "bacc_std": 0.052257772062490916} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.038416433983387885, "f1": 0.6343908479773559, "f1_std": 0.04998619799540876, "bacc": 0.6328522920203735, "bacc_std": 0.04397644907176859} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.04833630519599114, "f1": 0.570999570999571, "f1_std": 0.05158636023274216, "bacc": 0.5704584040747029, "bacc_std": 0.051118394618665734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.04060704864921851, "f1": 0.6190476190476191, "f1_std": 0.052926260378695335, "bacc": 0.6196943972835314, "bacc_std": 0.04585223555978332} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.044317039612320676, "f1": 0.5944849959448499, "f1_std": 0.052481987988483775, "bacc": 0.5955008488964346, "bacc_std": 0.04742607210320109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.044789154937328296, "f1": 0.6011396011396011, "f1_std": 0.05265009605184291, "bacc": 0.6005942275042444, "bacc_std": 0.048486715513746864} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.046463163904323174, "f1": 0.6427070197562001, "f1_std": 0.04877923375425467, "bacc": 0.6443123938879457, "bacc_std": 0.04912794789875995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.03976214782931123, "f1": 0.627359057579036, "f1_std": 0.051555161141658534, "bacc": 0.6277589134125636, "bacc_std": 0.04471209144975881} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04877817134743778, "f1": 0.5967741935483871, "f1_std": 0.05065602372510524, "bacc": 0.5967741935483871, "bacc_std": 0.05027718008497745} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.046245652768665726, "f1": 0.5464100011063171, "f1_std": 0.05044095005220448, "bacc": 0.5471137521222411, "bacc_std": 0.048144747235433216} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.044492246515544696, "f1": 0.6483516483516483, "f1_std": 0.049707297687495516, "bacc": 0.6451612903225806, "bacc_std": 0.04816168749207426} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.04272477501403605, "f1": 0.64349376114082, "f1_std": 0.049860446511543116, "bacc": 0.6400679117147707, "bacc_std": 0.04729238406732512} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.69, "acc_std": 0.04342568364458986, "f1": 0.6656239887822242, "f1_std": 0.0472121445704808, "bacc": 0.6634125636672326, "bacc_std": 0.04662563971707049} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 0.000774263682681127, "split": "test", "acc": 0.65, "acc_std": 0.04048938132399654, "f1": 0.5792763553311696, "f1_std": 0.05342554211671432, "bacc": 0.5853140916808149, "bacc_std": 0.04541620623356039} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.04840685488647243, "f1": 0.5882166613873931, "f1_std": 0.05007008983830634, "bacc": 0.5887096774193548, "bacc_std": 0.05007994573596387} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.0389987486978749, "f1": 0.6343908479773559, "f1_std": 0.04984146926642825, "bacc": 0.6328522920203735, "bacc_std": 0.044170739244711425} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.04763583105184583, "f1": 0.5543293718166383, "f1_std": 0.04972701867915571, "bacc": 0.5543293718166383, "bacc_std": 0.04947074528543621} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04044094954374835, "f1": 0.6026180458158018, "f1_std": 0.05197614084029882, "bacc": 0.6035653650254669, "bacc_std": 0.045659241611082216} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.76, "acc_std": 0.03774406443402725, "f1": 0.7142857142857143, "f1_std": 0.05050911690360241, "bacc": 0.7045840407470289, "bacc_std": 0.045086534372282806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04342191151941609, "f1": 0.6212121212121212, "f1_std": 0.04901615282467326, "bacc": 0.6188455008488964, "bacc_std": 0.046256631844899046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 0.000774263682681127, "split": "test", "acc": 0.7, "acc_std": 0.03635563780213461, "f1": 0.6279761904761905, "f1_std": 0.052714950751817216, "bacc": 0.6307300509337861, "bacc_std": 0.04288382864792189} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.04580349331655829, "f1": 0.6726850385386971, "f1_std": 0.04811538857927285, "bacc": 0.6735993208828523, "bacc_std": 0.048582212897652494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.044234913812507874, "f1": 0.5906626839252129, "f1_std": 0.049334485116104734, "bacc": 0.5895585738539898, "bacc_std": 0.04712544961777677} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04281238605824254, "f1": 0.6212121212121212, "f1_std": 0.04892482463785491, "bacc": 0.6188455008488964, "bacc_std": 0.04652108373423623} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.041866354032803, "f1": 0.6026180458158018, "f1_std": 0.051235713366207275, "bacc": 0.6035653650254669, "bacc_std": 0.04573628194016217} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 166.81005372000556, "split": "test", "acc": 0.61, "acc_std": 0.04801537670371857, "f1": 0.5741893219783819, "f1_std": 0.05280358208262193, "bacc": 0.5734295415959253, "bacc_std": 0.05109430898210244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 21.54434690031882, "split": "test", "acc": 0.61, "acc_std": 0.04803489981253214, "f1": 0.5953937130407718, "f1_std": 0.049728637230405104, "bacc": 0.5988964346349746, "bacc_std": 0.05089478882732174} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.53, "acc_std": 0.04857605994726209, "f1": 0.48684354187138335, "f1_std": 0.05075875091920785, "bacc": 0.4885398981324278, "bacc_std": 0.04945652184394988} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.043057036591014945, "f1": 0.6212121212121212, "f1_std": 0.04915928234227596, "bacc": 0.6188455008488964, "bacc_std": 0.04664681946543334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.044826759865062744, "f1": 0.6011396011396011, "f1_std": 0.05299898326396332, "bacc": 0.6005942275042444, "bacc_std": 0.048482903364188534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.04218994666979326, "f1": 0.6381727725011307, "f1_std": 0.048826657384470014, "bacc": 0.634974533106961, "bacc_std": 0.04568274126796049} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04496649419289878, "f1": 0.6072270227808326, "f1_std": 0.05096491938924456, "bacc": 0.6056876061120543, "bacc_std": 0.04803200805484872} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.71, "acc_std": 0.04098270854885021, "f1": 0.6640018537828757, "f1_std": 0.05126500819571108, "bacc": 0.6591680814940577, "bacc_std": 0.046375501435369434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04304172394316938, "f1": 0.5634191176470589, "f1_std": 0.050738982370252456, "bacc": 0.566213921901528, "bacc_std": 0.046203899182167954} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.03767681515202684, "f1": 0.6259934548854604, "f1_std": 0.04811676358013127, "bacc": 0.6247877758913413, "bacc_std": 0.04269656583528458} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 0.000774263682681127, "split": "test", "acc": 0.63, "acc_std": 0.0352262629298085, "f1": 0.5250930560903607, "f1_std": 0.05089078457638085, "bacc": 0.548811544991511, "bacc_std": 0.038995777979749} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04194710478686223, "f1": 0.6155585707824514, "f1_std": 0.049604786964150625, "bacc": 0.6137521222410866, "bacc_std": 0.0461046913699586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.040583425188123294, "f1": 0.5792763553311696, "f1_std": 0.05108577355447291, "bacc": 0.5853140916808149, "bacc_std": 0.04405450590318645} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.04537754510768514, "f1": 0.5477159656264134, "f1_std": 0.05111232277454663, "bacc": 0.5500848896434635, "bacc_std": 0.04742729091557596} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.57, "acc_std": 0.045578394004177014, "f1": 0.5017958521608157, "f1_std": 0.05119310695587717, "bacc": 0.5106112054329371, "bacc_std": 0.04661836669279137} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.04714798405022213, "f1": 0.5659722222222222, "f1_std": 0.05165425926743109, "bacc": 0.565365025466893, "bacc_std": 0.05057734557142793} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 0.000774263682681127, "split": "test", "acc": 0.69, "acc_std": 0.03853008694513937, "f1": 0.627359057579036, "f1_std": 0.051986317480952206, "bacc": 0.6277589134125636, "bacc_std": 0.04407866318133792} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.04820471346248207, "f1": 0.6053333333333333, "f1_std": 0.05188304485146367, "bacc": 0.6048387096774194, "bacc_std": 0.05170277084265787} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04554858504937338, "f1": 0.612789025334661, "f1_std": 0.05137421292303487, "bacc": 0.6107809847198642, "bacc_std": 0.049077106756407564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.041944129505808087, "f1": 0.5311936530833032, "f1_std": 0.05027656761740102, "bacc": 0.5428692699490663, "bacc_std": 0.0436578525635267} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04467385812754479, "f1": 0.6349153667441089, "f1_std": 0.05030191060721593, "bacc": 0.6320033955857385, "bacc_std": 0.04824418910943202} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.57, "acc_std": 0.04984463461597446, "f1": 0.557203171661003, "f1_std": 0.05027999974512429, "bacc": 0.5615449915110357, "bacc_std": 0.0515276652049769} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.041518858365807695, "f1": 0.5174632352941176, "f1_std": 0.04813540713086425, "bacc": 0.5237691001697793, "bacc_std": 0.04373118281675481} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.03978825957490475, "f1": 0.6259934548854604, "f1_std": 0.05037358869767038, "bacc": 0.6247877758913413, "bacc_std": 0.04497107602041557} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 0.000774263682681127, "split": "test", "acc": 0.69, "acc_std": 0.0350101413878893, "f1": 0.6112852664576802, "f1_std": 0.05028381258023231, "bacc": 0.6175721561969439, "bacc_std": 0.04039313169396479} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.57, "acc_std": 0.047387762133276556, "f1": 0.5361881134721174, "f1_std": 0.05086333748928709, "bacc": 0.5360780984719864, "bacc_std": 0.049897717538456926} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04194751005721317, "f1": 0.5714285714285714, "f1_std": 0.05161044178631641, "bacc": 0.5772495755517827, "bacc_std": 0.045057567358548135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.0402858585610385, "f1": 0.6190476190476191, "f1_std": 0.05133947600692871, "bacc": 0.6196943972835314, "bacc_std": 0.044777281426618264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 1291.5496650148827, "split": "test", "acc": 0.6, "acc_std": 0.04929996348883029, "f1": 0.595959595959596, "f1_std": 0.04915415780123267, "bacc": 0.6112054329371817, "bacc_std": 0.05064002684294983} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.000774263682681127, "split": "test", "acc": 0.66, "acc_std": 0.039989498621513124, "f1": 0.6026180458158018, "f1_std": 0.04766541728883674, "bacc": 0.6035653650254669, "bacc_std": 0.04285285957316557} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04183703622390095, "f1": 0.609375, "f1_std": 0.05155969963978729, "bacc": 0.6086587436332768, "bacc_std": 0.04685720355596312} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 2.782559402207126, "split": "test", "acc": 0.47, "acc_std": 0.04987540877025471, "f1": 0.4578005115089514, "f1_std": 0.05061745851797269, "bacc": 0.4605263157894737, "bacc_std": 0.05245153367805284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.04529856068353607, "f1": 0.525101763907734, "f1_std": 0.05066025601085856, "bacc": 0.5288624787775891, "bacc_std": 0.04738187986164782} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04139534273321095, "f1": 0.5558672276764843, "f1_std": 0.05007553005431887, "bacc": 0.5611205432937181, "bacc_std": 0.04438627553462632} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.039857119815661535, "f1": 0.5071523019593701, "f1_std": 0.049865059708747654, "bacc": 0.5216468590831919, "bacc_std": 0.04226859732099989} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04312201757803084, "f1": 0.5792426367461431, "f1_std": 0.053004143050194644, "bacc": 0.5823429541595926, "bacc_std": 0.04699492725343523} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04368970588136294, "f1": 0.5989304812834224, "f1_std": 0.05002938448313633, "bacc": 0.597623089983022, "bacc_std": 0.04705184798042599} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04003916083036706, "f1": 0.587178241864983, "f1_std": 0.053408743145655024, "bacc": 0.5933786078098472, "bacc_std": 0.04482195037118057} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.3593813663804626, "split": "test", "acc": 0.65, "acc_std": 0.043528399924646896, "f1": 0.6178622120318812, "f1_std": 0.04745547938045911, "bacc": 0.615874363327674, "bacc_std": 0.04623533835102397} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.65, "acc_std": 0.0468549890619985, "f1": 0.6338529134846741, "f1_std": 0.048137792076565634, "bacc": 0.6362478777589134, "bacc_std": 0.04847414856514755} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 166.81005372000556, "split": "test", "acc": 0.65, "acc_std": 0.04877528472495061, "f1": 0.6338529134846741, "f1_std": 0.05052444789300337, "bacc": 0.6362478777589134, "bacc_std": 0.05100456651728048} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.046602055748646974, "f1": 0.5793334052421529, "f1_std": 0.05029825362918144, "bacc": 0.5785229202037352, "bacc_std": 0.049494298391112565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.68, "acc_std": 0.04448450966347724, "f1": 0.6567996567996568, "f1_std": 0.04783182636038274, "bacc": 0.6553480475382003, "bacc_std": 0.04739133670982033} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.044780223313422636, "f1": 0.5989304812834224, "f1_std": 0.050904823821396286, "bacc": 0.597623089983022, "bacc_std": 0.04809124532873826} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.57, "acc_std": 0.042834654194938926, "f1": 0.50997150997151, "f1_std": 0.049019654087023565, "bacc": 0.515704584040747, "bacc_std": 0.04504790708149108} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04691108184640384, "f1": 0.6216897856242118, "f1_std": 0.04820639606438912, "bacc": 0.6230899830220713, "bacc_std": 0.048450672974368766} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.72, "acc_std": 0.039147025429782024, "f1": 0.6727442730247779, "f1_std": 0.04934977870089382, "bacc": 0.66723259762309, "bacc_std": 0.04472554355831128} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04584991166839911, "f1": 0.612789025334661, "f1_std": 0.05198627067045844, "bacc": 0.6107809847198642, "bacc_std": 0.04938372088814832} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.039490449478323225, "f1": 0.6343908479773559, "f1_std": 0.05108376819112403, "bacc": 0.6328522920203735, "bacc_std": 0.044965487837163066} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 18.517 | 131.74 | 0.81418 | 0.095753 | 0.78572 | 0.11732 | 0.78039 | 0.11612 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 18.517 | 131.74 | 0.6361 | 0.047798 | 0.59189 | 0.046532 | 0.59336 | 0.044029 | + + +done! total time: 0:05:16 diff --git a/data_scaling/n400_2/pretrain/config.yaml b/data_scaling/n400_2/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4fe46d46b75972e41daed2471bcc1a49619b5fca --- /dev/null +++ b/data_scaling/n400_2/pretrain/config.yaml @@ -0,0 +1,109 @@ +name: data_scaling/n400_2/pretrain +notes: data scaling experiment n400_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n400_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..01199}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 diff --git a/data_scaling/n400_2/pretrain/log.json b/data_scaling/n400_2/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..897f2ad845c0a1174d212b51aaa1ee1c4a2899c8 --- /dev/null +++ b/data_scaling/n400_2/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.05635068432345986, "train/loss": 0.9932361174106598, "eval/hcp-train-subset/loss": 0.9906312948273074, "eval/hcp-val/loss": 0.9907501272616848, "eval/nsd-val/loss": 0.9895575450312707} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.07866375012218953, "train/loss": 0.988717139377594, "eval/hcp-train-subset/loss": 0.9874634213985936, "eval/hcp-val/loss": 0.9880307182188957, "eval/nsd-val/loss": 0.9882757721408721} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.18920457504556137, "train/loss": 0.9824911705589294, "eval/hcp-train-subset/loss": 0.9788453742381065, "eval/hcp-val/loss": 0.9780855563379103, "eval/nsd-val/loss": 0.9766755075223984} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.25203070312645404, "train/loss": 0.9665275077056885, "eval/hcp-train-subset/loss": 0.9427597964963605, "eval/hcp-val/loss": 0.9418385374930597, "eval/nsd-val/loss": 0.9193775048179011} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.25995811873214575, "train/loss": 0.9253546210384369, "eval/hcp-train-subset/loss": 0.9091613936808801, "eval/hcp-val/loss": 0.9076877784344458, "eval/nsd-val/loss": 0.8719301002640878} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.17763041310597938, "train/loss": 0.8862278780460358, "eval/hcp-train-subset/loss": 0.8761211422181898, "eval/hcp-val/loss": 0.874087599016005, "eval/nsd-val/loss": 0.8397323152711315} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.12029707309683825, "train/loss": 0.8648020808696747, "eval/hcp-train-subset/loss": 0.8647705662635065, "eval/hcp-val/loss": 0.8626801265824225, "eval/nsd-val/loss": 0.8294521281796117} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.09761470623704382, "train/loss": 0.8555569929981232, "eval/hcp-train-subset/loss": 0.8605878112777587, "eval/hcp-val/loss": 0.8582754279336622, "eval/nsd-val/loss": 0.8283297448388992} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.08821544866644028, "train/loss": 0.8497183385276794, "eval/hcp-train-subset/loss": 0.8563904502699452, "eval/hcp-val/loss": 0.8542350078782728, "eval/nsd-val/loss": 0.8232548794438762} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.0828382992340674, "train/loss": 0.8477129728889465, "eval/hcp-train-subset/loss": 0.8551490422218077, "eval/hcp-val/loss": 0.8532385162768825, "eval/nsd-val/loss": 0.8235158670333124} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.08018509658688314, "train/loss": 0.8441340670871734, "eval/hcp-train-subset/loss": 0.8535443131000765, "eval/hcp-val/loss": 0.8513954483693645, "eval/nsd-val/loss": 0.8199014163786366} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.07772167760905035, "train/loss": 0.8406481398487091, "eval/hcp-train-subset/loss": 0.8515328361142066, "eval/hcp-val/loss": 0.8492292790643631, "eval/nsd-val/loss": 0.8211445702660468} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.08000346439739542, "train/loss": 0.8361996452617645, "eval/hcp-train-subset/loss": 0.851604207869499, "eval/hcp-val/loss": 0.8489707689131459, "eval/nsd-val/loss": 0.8200855024399296} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.08265432946122843, "train/loss": 0.8348379740715027, "eval/hcp-train-subset/loss": 0.8497994522894582, "eval/hcp-val/loss": 0.8475233412558033, "eval/nsd-val/loss": 0.8173866156608828} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.07928130906764004, "train/loss": 0.8344313511657715, "eval/hcp-train-subset/loss": 0.8499155929011684, "eval/hcp-val/loss": 0.8480289895688334, "eval/nsd-val/loss": 0.8205395148646447} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.08467528230132637, "train/loss": 0.8301352157497406, "eval/hcp-train-subset/loss": 0.8496170822651156, "eval/hcp-val/loss": 0.8475837707519531, "eval/nsd-val/loss": 0.8183422809646975} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.08345494114669949, "train/loss": 0.8300760833644867, "eval/hcp-train-subset/loss": 0.8498869067238223, "eval/hcp-val/loss": 0.8476115532459751, "eval/nsd-val/loss": 0.8206927680200146} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.08659024269733859, "train/loss": 0.8265652526187897, "eval/hcp-train-subset/loss": 0.8492270486970102, "eval/hcp-val/loss": 0.8467700692915148, "eval/nsd-val/loss": 0.8198115373811414} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.09107964606502507, "train/loss": 0.8232648073482514, "eval/hcp-train-subset/loss": 0.8480717347514245, "eval/hcp-val/loss": 0.8467618265459614, "eval/nsd-val/loss": 0.8197645275823532} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.09268862574431483, "train/loss": 0.8219217417049408, "eval/hcp-train-subset/loss": 0.8498235133386427, "eval/hcp-val/loss": 0.8483697137525005, "eval/nsd-val/loss": 0.821802003729728} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.09169223270034382, "train/loss": 0.8219239550304412, "eval/hcp-train-subset/loss": 0.8507208074292829, "eval/hcp-val/loss": 0.848676917053038, "eval/nsd-val/loss": 0.821632690968052} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.09615981963817292, "train/loss": 0.8171747678565979, "eval/hcp-train-subset/loss": 0.8495672124047433, "eval/hcp-val/loss": 0.8467878843507459, "eval/nsd-val/loss": 0.8194500073309867} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.09827692109719396, "train/loss": 0.8138992547130585, "eval/hcp-train-subset/loss": 0.8493031167214916, "eval/hcp-val/loss": 0.8473079714082903, "eval/nsd-val/loss": 0.819362913408587} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.10131598125771166, "train/loss": 0.8129368589401245, "eval/hcp-train-subset/loss": 0.8493087080217177, "eval/hcp-val/loss": 0.8467657393024813, "eval/nsd-val/loss": 0.8184849216092017} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.1001485963609193, "train/loss": 0.8124135359764099, "eval/hcp-train-subset/loss": 0.8494963424821054, "eval/hcp-val/loss": 0.8475409765397349, "eval/nsd-val/loss": 0.8210751183571354} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.10163678466314809, "train/loss": 0.8108221112823486, "eval/hcp-train-subset/loss": 0.8492146326649573, "eval/hcp-val/loss": 0.8465155939902028, "eval/nsd-val/loss": 0.821913460569997} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.10421671002755054, "train/loss": 0.8083383344650269, "eval/hcp-train-subset/loss": 0.8499036412085256, "eval/hcp-val/loss": 0.8476196921640827, "eval/nsd-val/loss": 0.8234078403442137} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.10769301561192358, "train/loss": 0.8053927421379089, "eval/hcp-train-subset/loss": 0.8496210430898974, "eval/hcp-val/loss": 0.8477713427236003, "eval/nsd-val/loss": 0.8268291181133639} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.10793547592604015, "train/loss": 0.8049360713291168, "eval/hcp-train-subset/loss": 0.8497130793909873, "eval/hcp-val/loss": 0.8478488864437226, "eval/nsd-val/loss": 0.8201313172617266} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.1073543113374918, "train/loss": 0.8043116555023193, "eval/hcp-train-subset/loss": 0.8479224693390631, "eval/hcp-val/loss": 0.8456348997931327, "eval/nsd-val/loss": 0.8209854114440179} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.1123005234442111, "train/loss": 0.8003253763389587, "eval/hcp-train-subset/loss": 0.8501418271372395, "eval/hcp-val/loss": 0.8490340959641242, "eval/nsd-val/loss": 0.8235022944788779} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.11506957301909644, "train/loss": 0.797425700559616, "eval/hcp-train-subset/loss": 0.8501046049979425, "eval/hcp-val/loss": 0.8478668947373668, "eval/nsd-val/loss": 0.8264337749250473} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.11685629358736013, "train/loss": 0.7956889285469055, "eval/hcp-train-subset/loss": 0.8510858964535498, "eval/hcp-val/loss": 0.8484696543985798, "eval/nsd-val/loss": 0.8210352774589292} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.1191470552344357, "train/loss": 0.7964073915195465, "eval/hcp-train-subset/loss": 0.8513738051537545, "eval/hcp-val/loss": 0.8491514209778078, "eval/nsd-val/loss": 0.8239549927173122} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.12189270039466264, "train/loss": 0.7935317193412781, "eval/hcp-train-subset/loss": 0.8491838660932356, "eval/hcp-val/loss": 0.8477869139563653, "eval/nsd-val/loss": 0.8205884041324738} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.12358998172354817, "train/loss": 0.7907526597690582, "eval/hcp-train-subset/loss": 0.8498920432982906, "eval/hcp-val/loss": 0.8478847165261546, "eval/nsd-val/loss": 0.821757230066484} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.1265676138605374, "train/loss": 0.7866825600624084, "eval/hcp-train-subset/loss": 0.8491038853122342, "eval/hcp-val/loss": 0.8479302227497101, "eval/nsd-val/loss": 0.823501076429121} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.1258621008109144, "train/loss": 0.7869554843521118, "eval/hcp-train-subset/loss": 0.8500762533756995, "eval/hcp-val/loss": 0.8487050129521277, "eval/nsd-val/loss": 0.8274364490662852} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.1232751765945466, "train/loss": 0.7920317204856873, "eval/hcp-train-subset/loss": 0.849446520689995, "eval/hcp-val/loss": 0.8473763850427443, "eval/nsd-val/loss": 0.8218359437681013} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.12864076458132337, "train/loss": 0.7855114776229858, "eval/hcp-train-subset/loss": 0.8500234551968113, "eval/hcp-val/loss": 0.8478061422224967, "eval/nsd-val/loss": 0.8245024190795037} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.13270794438880712, "train/loss": 0.7802533388233185, "eval/hcp-train-subset/loss": 0.8509843128342782, "eval/hcp-val/loss": 0.8486421752360559, "eval/nsd-val/loss": 0.8244385536639921} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.1326127517929667, "train/loss": 0.7826643078231812, "eval/hcp-train-subset/loss": 0.8516142445225869, "eval/hcp-val/loss": 0.8492815234968739, "eval/nsd-val/loss": 0.8242420371501676} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.13537501466726448, "train/loss": 0.7782075979328156, "eval/hcp-train-subset/loss": 0.848774992650555, "eval/hcp-val/loss": 0.8471933334104477, "eval/nsd-val/loss": 0.8235553666468589} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.13744881478775897, "train/loss": 0.7788818356990814, "eval/hcp-train-subset/loss": 0.8516915190604425, "eval/hcp-val/loss": 0.8493912268069482, "eval/nsd-val/loss": 0.8229732926814787} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.13641463244565202, "train/loss": 0.7839728309822083, "eval/hcp-train-subset/loss": 0.8502550788464085, "eval/hcp-val/loss": 0.8482731907598434, "eval/nsd-val/loss": 0.8225354311927673} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.14331406819372988, "train/loss": 0.7740284731578827, "eval/hcp-train-subset/loss": 0.850404953764331, "eval/hcp-val/loss": 0.848980269124431, "eval/nsd-val/loss": 0.8230697156921509} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.14098283999850125, "train/loss": 0.7784574715137482, "eval/hcp-train-subset/loss": 0.8519063822684749, "eval/hcp-val/loss": 0.8492936511193553, "eval/nsd-val/loss": 0.8251897019724692} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.14600561219648456, "train/loss": 0.7725044074916839, "eval/hcp-train-subset/loss": 0.8506675208768537, "eval/hcp-val/loss": 0.8486616419207665, "eval/nsd-val/loss": 0.8247537497551211} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.15140051445977407, "train/loss": 0.7684958643817902, "eval/hcp-train-subset/loss": 0.8514625285902331, "eval/hcp-val/loss": 0.8498099715478958, "eval/nsd-val/loss": 0.8287082131831877} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.1495206797788833, "train/loss": 0.7705097451591492, "eval/hcp-train-subset/loss": 0.85235839793759, "eval/hcp-val/loss": 0.8497707065074674, "eval/nsd-val/loss": 0.8277650033274004} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.15048401103868947, "train/loss": 0.7703633936405182, "eval/hcp-train-subset/loss": 0.8533451682136904, "eval/hcp-val/loss": 0.8510381752444852, "eval/nsd-val/loss": 0.8245495029034153} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.15032640206848155, "train/loss": 0.7702743682098389, "eval/hcp-train-subset/loss": 0.8541311179437945, "eval/hcp-val/loss": 0.8509108549164187, "eval/nsd-val/loss": 0.8248226421494638} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.15640313917634124, "train/loss": 0.7654408138847351, "eval/hcp-train-subset/loss": 0.8529393336465282, "eval/hcp-val/loss": 0.8505341276045768, "eval/nsd-val/loss": 0.8251894827811949} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.15359744290642802, "train/loss": 0.7721607199192048, "eval/hcp-train-subset/loss": 0.8532282892734774, "eval/hcp-val/loss": 0.8516511128794763, "eval/nsd-val/loss": 0.825623934307406} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.15692090576463633, "train/loss": 0.7665882638835907, "eval/hcp-train-subset/loss": 0.8532217587194135, "eval/hcp-val/loss": 0.8512643566054683, "eval/nsd-val/loss": 0.8246326167737285} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.16362492530874995, "train/loss": 0.7602403983116149, "eval/hcp-train-subset/loss": 0.8527176783930871, "eval/hcp-val/loss": 0.8513278240157712, "eval/nsd-val/loss": 0.8258843758413869} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.16344790136091, "train/loss": 0.7622082487106323, "eval/hcp-train-subset/loss": 0.8552103513671506, "eval/hcp-val/loss": 0.8539175246992419, "eval/nsd-val/loss": 0.827966695831668} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.16375779301963514, "train/loss": 0.7668319429206848, "eval/hcp-train-subset/loss": 0.8523464856609222, "eval/hcp-val/loss": 0.8510613768331466, "eval/nsd-val/loss": 0.8243618636362015} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.16940796238140093, "train/loss": 0.7608862261676789, "eval/hcp-train-subset/loss": 0.8551082197696932, "eval/hcp-val/loss": 0.8528601709873446, "eval/nsd-val/loss": 0.8276094544318414} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.16784347036064015, "train/loss": 0.7620065269470215, "eval/hcp-train-subset/loss": 0.8523883088942497, "eval/hcp-val/loss": 0.8507286252514008, "eval/nsd-val/loss": 0.8247964882081554} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.169396733264933, "train/loss": 0.7631460801887512, "eval/hcp-train-subset/loss": 0.8562375624333659, "eval/hcp-val/loss": 0.8529415640138811, "eval/nsd-val/loss": 0.8265001139333171} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.17213782241139505, "train/loss": 0.7617701811218262, "eval/hcp-train-subset/loss": 0.8571944794347209, "eval/hcp-val/loss": 0.8554210134090916, "eval/nsd-val/loss": 0.8320573924049255} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.17499683083181553, "train/loss": 0.759325956363678, "eval/hcp-train-subset/loss": 0.8544335884432639, "eval/hcp-val/loss": 0.851645311040263, "eval/nsd-val/loss": 0.828973900887274} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.17980426243108852, "train/loss": 0.7536037030887603, "eval/hcp-train-subset/loss": 0.8548633802321649, "eval/hcp-val/loss": 0.8524973536691358, "eval/nsd-val/loss": 0.8294216144469476} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.17692845260015314, "train/loss": 0.7607378728199005, "eval/hcp-train-subset/loss": 0.8536307898259932, "eval/hcp-val/loss": 0.8511412778208333, "eval/nsd-val/loss": 0.826415785858708} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.1796252549090512, "train/loss": 0.7585319431686401, "eval/hcp-train-subset/loss": 0.8539082840565713, "eval/hcp-val/loss": 0.8520021140575409, "eval/nsd-val/loss": 0.8281115514616813} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.18120754116760246, "train/loss": 0.7606888244152069, "eval/hcp-train-subset/loss": 0.8527631413552069, "eval/hcp-val/loss": 0.8513341969059359, "eval/nsd-val/loss": 0.8301582538312481} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.18581511920721064, "train/loss": 0.7535261880683899, "eval/hcp-train-subset/loss": 0.8549658406165338, "eval/hcp-val/loss": 0.853772904603712, "eval/nsd-val/loss": 0.8320912891818631} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.18759153839486928, "train/loss": 0.754447957983017, "eval/hcp-train-subset/loss": 0.8549116821058335, "eval/hcp-val/loss": 0.8536504045609505, "eval/nsd-val/loss": 0.8277922974478814} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.19255375672010722, "train/loss": 0.7517802131462097, "eval/hcp-train-subset/loss": 0.855097682245316, "eval/hcp-val/loss": 0.8532117211049602, "eval/nsd-val/loss": 0.8339734077453613} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.188908314659104, "train/loss": 0.7541222350692749, "eval/hcp-train-subset/loss": 0.8553568566999128, "eval/hcp-val/loss": 0.854283538556868, "eval/nsd-val/loss": 0.8311366102387828} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.18841699307821114, "train/loss": 0.7547305200099945, "eval/hcp-train-subset/loss": 0.8557137000945306, "eval/hcp-val/loss": 0.8538425449402102, "eval/nsd-val/loss": 0.8326558326521227} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.1930091792345524, "train/loss": 0.7538053363990783, "eval/hcp-train-subset/loss": 0.8560904418268511, "eval/hcp-val/loss": 0.8547302647944419, "eval/nsd-val/loss": 0.8331084376381289} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.1941799668204063, "train/loss": 0.7524051087093353, "eval/hcp-train-subset/loss": 0.8562128476558193, "eval/hcp-val/loss": 0.8561235416320062, "eval/nsd-val/loss": 0.8305958567127105} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.19974286267475755, "train/loss": 0.7485844553756714, "eval/hcp-train-subset/loss": 0.856697634343178, "eval/hcp-val/loss": 0.8558589248887954, "eval/nsd-val/loss": 0.8320134849317612} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.19949260560730964, "train/loss": 0.7513493559646607, "eval/hcp-train-subset/loss": 0.8559862615600708, "eval/hcp-val/loss": 0.8543095415638339, "eval/nsd-val/loss": 0.8308407750821882} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.19934886127886828, "train/loss": 0.7510187508964539, "eval/hcp-train-subset/loss": 0.8572418257113426, "eval/hcp-val/loss": 0.8554319950842089, "eval/nsd-val/loss": 0.8333123085960266} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.20003685860237821, "train/loss": 0.7544321667671203, "eval/hcp-train-subset/loss": 0.8560902207128464, "eval/hcp-val/loss": 0.8554654554013283, "eval/nsd-val/loss": 0.831211615954676} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.20408483137507352, "train/loss": 0.7474359239006042, "eval/hcp-train-subset/loss": 0.8557065792622105, "eval/hcp-val/loss": 0.8542823253139373, "eval/nsd-val/loss": 0.8304501637335746} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.20662996451586255, "train/loss": 0.7461285776138306, "eval/hcp-train-subset/loss": 0.8567891947684749, "eval/hcp-val/loss": 0.8543242148814663, "eval/nsd-val/loss": 0.8331241357711053} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.20363455915047451, "train/loss": 0.7508656202793121, "eval/hcp-train-subset/loss": 0.8562845058979527, "eval/hcp-val/loss": 0.8546986310712753, "eval/nsd-val/loss": 0.831471860408783} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.2036687437189736, "train/loss": 0.7511858002567291, "eval/hcp-train-subset/loss": 0.8574726341232177, "eval/hcp-val/loss": 0.8552569256674859, "eval/nsd-val/loss": 0.8312046124089149} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.20496703196887647, "train/loss": 0.7514018508148194, "eval/hcp-train-subset/loss": 0.8578659007626195, "eval/hcp-val/loss": 0.8564570200058722, "eval/nsd-val/loss": 0.8321936861161263} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.20670216574690067, "train/loss": 0.7485760294628143, "eval/hcp-train-subset/loss": 0.8575369802213484, "eval/hcp-val/loss": 0.8564222003183057, "eval/nsd-val/loss": 0.8328486873257545} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.2067251171518437, "train/loss": 0.7521237924575805, "eval/hcp-train-subset/loss": 0.8571438837435937, "eval/hcp-val/loss": 0.8552941353090348, "eval/nsd-val/loss": 0.8303472745803094} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.21214783106299734, "train/loss": 0.7465244474601745, "eval/hcp-train-subset/loss": 0.8579679208417093, "eval/hcp-val/loss": 0.8559206839530699, "eval/nsd-val/loss": 0.8320329064323057} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.20559060781459457, "train/loss": 0.7505450461006165, "eval/hcp-train-subset/loss": 0.8579278694045159, "eval/hcp-val/loss": 0.8570122103537282, "eval/nsd-val/loss": 0.8329966654700618} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.21066897420186392, "train/loss": 0.7488291010379792, "eval/hcp-train-subset/loss": 0.8573428546228716, "eval/hcp-val/loss": 0.8568273859639322, "eval/nsd-val/loss": 0.8317155001624938} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.21222653118432838, "train/loss": 0.7459420511245728, "eval/hcp-train-subset/loss": 0.8587126126212459, "eval/hcp-val/loss": 0.8568132481267375, "eval/nsd-val/loss": 0.8337275299333757} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.20969840123886946, "train/loss": 0.7521753520202636, "eval/hcp-train-subset/loss": 0.8578509697991032, "eval/hcp-val/loss": 0.8569792076464622, "eval/nsd-val/loss": 0.8333796762651012} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.2128521234443498, "train/loss": 0.746775191898346, "eval/hcp-train-subset/loss": 0.857924162380157, "eval/hcp-val/loss": 0.8563059424200365, "eval/nsd-val/loss": 0.8336504149821496} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.21496859530502938, "train/loss": 0.7488382335662842, "eval/hcp-train-subset/loss": 0.8572587313190583, "eval/hcp-val/loss": 0.85598444265704, "eval/nsd-val/loss": 0.8328481355021077} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.21310120334350308, "train/loss": 0.7490423098564148, "eval/hcp-train-subset/loss": 0.8576381408399151, "eval/hcp-val/loss": 0.8557057313380703, "eval/nsd-val/loss": 0.8336334382334063} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.21154950002372913, "train/loss": 0.7529990596675873, "eval/hcp-train-subset/loss": 0.8581858911821919, "eval/hcp-val/loss": 0.8565669501981428, "eval/nsd-val/loss": 0.8329493047729615} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.21539692161902882, "train/loss": 0.7503788470172882, "eval/hcp-train-subset/loss": 0.8573839702913838, "eval/hcp-val/loss": 0.8556105581022078, "eval/nsd-val/loss": 0.833456386481562} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.2141248466668796, "train/loss": 0.7515652469444275, "eval/hcp-train-subset/loss": 0.8573988329979682, "eval/hcp-val/loss": 0.8557549686201157, "eval/nsd-val/loss": 0.832778924895871} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.21430213378251256, "train/loss": 0.750100710811615, "eval/hcp-train-subset/loss": 0.8576985232291683, "eval/hcp-val/loss": 0.8560283376324561, "eval/nsd-val/loss": 0.8329095427067049} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.21669787533458412, "train/loss": 0.7498219616222381, "eval/hcp-train-subset/loss": 0.8573736625332986, "eval/hcp-val/loss": 0.8565660599739321, "eval/nsd-val/loss": 0.8324574635874841} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.21459521643731566, "train/loss": 0.7475621801948548, "eval/hcp-train-subset/loss": 0.8581377190928305, "eval/hcp-val/loss": 0.8563805770489478, "eval/nsd-val/loss": 0.8331276082223461} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.21497020919815601, "train/loss": 0.750729683189392, "eval/hcp-train-subset/loss": 0.8574908221921613, "eval/hcp-val/loss": 0.8557311575258931, "eval/nsd-val/loss": 0.8334302094674879} diff --git a/data_scaling/n400_2/pretrain/log.txt b/data_scaling/n400_2/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..c4c73c17fe6f4386bde58f0eb69e96e3a92f30ce --- /dev/null +++ b/data_scaling/n400_2/pretrain/log.txt @@ -0,0 +1,8241 @@ +pretraining fmri mae +start: 2026-01-17 20:36:04 +cwd: /admin/home/connor/fmri-fm +sha: 4c3ccfb0b63e4f01e9758042b5299530a6d93949, status: has uncommitted changes, branch: dev/clane9 +config: +name: data_scaling/n400_2/pretrain +notes: data scaling experiment n400_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n400_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..01199}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +val transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00800..01199}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [8543, 6917, 6772, 3955, 6165, 1554, 1082, 5811, 6919, 3150] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +loading dataset: nsd-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1493, 4276, 245, 3092, 3905, 1862, 2362, 4411, 1138, 2824] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=0, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=True, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 13:40:56 lr: 0.000000 grad: 0.0150 (0.0150) loss: 0.9963 (0.9963) time: 7.8811 data: 6.7721 max mem: 8570 +Train: [0] [ 100/6250] eta: 0:21:27 lr: 0.000000 grad: 0.0129 (0.0156) loss: 0.9960 (0.9959) time: 0.1612 data: 0.0748 max mem: 9377 +Train: [0] [ 200/6250] eta: 0:19:00 lr: 0.000001 grad: 0.0138 (0.0147) loss: 0.9955 (0.9959) time: 0.1990 data: 0.1027 max mem: 9377 +Train: [0] [ 300/6250] eta: 0:17:56 lr: 0.000001 grad: 0.0134 (0.0143) loss: 0.9957 (0.9958) time: 0.1755 data: 0.0831 max mem: 9377 +Train: [0] [ 400/6250] eta: 0:17:13 lr: 0.000002 grad: 0.0125 (0.0140) loss: 0.9964 (0.9959) time: 0.1558 data: 0.0565 max mem: 9377 +Train: [0] [ 500/6250] eta: 0:16:37 lr: 0.000002 grad: 0.0128 (0.0139) loss: 0.9957 (0.9959) time: 0.1738 data: 0.0854 max mem: 9377 +Train: [0] [ 600/6250] eta: 0:16:02 lr: 0.000002 grad: 0.0126 (0.0138) loss: 0.9957 (0.9959) time: 0.1646 data: 0.0824 max mem: 9377 +Train: [0] [ 700/6250] eta: 0:15:42 lr: 0.000003 grad: 0.0131 (0.0137) loss: 0.9962 (0.9959) time: 0.1763 data: 0.0811 max mem: 9377 +Train: [0] [ 800/6250] eta: 0:15:09 lr: 0.000003 grad: 0.0131 (0.0136) loss: 0.9953 (0.9959) time: 0.1507 data: 0.0487 max mem: 9377 +Train: [0] [ 900/6250] eta: 0:14:41 lr: 0.000004 grad: 0.0134 (0.0136) loss: 0.9956 (0.9959) time: 0.1421 data: 0.0420 max mem: 9377 +Train: [0] [1000/6250] eta: 0:14:18 lr: 0.000004 grad: 0.0135 (0.0136) loss: 0.9954 (0.9958) time: 0.1435 data: 0.0427 max mem: 9377 +Train: [0] [1100/6250] eta: 0:13:53 lr: 0.000004 grad: 0.0156 (0.0137) loss: 0.9954 (0.9958) time: 0.1599 data: 0.0620 max mem: 9377 +Train: [0] [1200/6250] eta: 0:13:30 lr: 0.000005 grad: 0.0180 (0.0140) loss: 0.9959 (0.9958) time: 0.1406 data: 0.0573 max mem: 9377 +Train: [0] [1300/6250] eta: 0:13:09 lr: 0.000005 grad: 0.0193 (0.0144) loss: 0.9958 (0.9958) time: 0.1670 data: 0.0774 max mem: 9377 +Train: [0] [1400/6250] eta: 0:12:49 lr: 0.000006 grad: 0.0219 (0.0149) loss: 0.9956 (0.9958) time: 0.1420 data: 0.0430 max mem: 9377 +Train: [0] [1500/6250] eta: 0:12:27 lr: 0.000006 grad: 0.0300 (0.0161) loss: 0.9951 (0.9958) time: 0.1457 data: 0.0508 max mem: 9377 +Train: [0] [1600/6250] eta: 0:12:07 lr: 0.000006 grad: 0.0472 (0.0175) loss: 0.9947 (0.9957) time: 0.1388 data: 0.0444 max mem: 9377 +Train: [0] [1700/6250] eta: 0:11:47 lr: 0.000007 grad: 0.0503 (0.0192) loss: 0.9955 (0.9957) time: 0.1160 data: 0.0193 max mem: 9377 +Train: [0] [1800/6250] eta: 0:11:29 lr: 0.000007 grad: 0.0403 (0.0205) loss: 0.9953 (0.9957) time: 0.1568 data: 0.0669 max mem: 9377 +Train: [0] [1900/6250] eta: 0:11:10 lr: 0.000008 grad: 0.0534 (0.0222) loss: 0.9953 (0.9957) time: 0.1346 data: 0.0460 max mem: 9377 +Train: [0] [2000/6250] eta: 0:10:52 lr: 0.000008 grad: 0.0517 (0.0238) loss: 0.9953 (0.9956) time: 0.1523 data: 0.0679 max mem: 9377 +Train: [0] [2100/6250] eta: 0:10:35 lr: 0.000008 grad: 0.0447 (0.0254) loss: 0.9948 (0.9956) time: 0.1290 data: 0.0468 max mem: 9377 +Train: [0] [2200/6250] eta: 0:10:18 lr: 0.000009 grad: 0.0367 (0.0268) loss: 0.9949 (0.9955) time: 0.1377 data: 0.0331 max mem: 9377 +Train: [0] [2300/6250] eta: 0:10:02 lr: 0.000009 grad: 0.0378 (0.0276) loss: 0.9943 (0.9955) time: 0.1413 data: 0.0471 max mem: 9377 +Train: [0] [2400/6250] eta: 0:09:45 lr: 0.000010 grad: 0.0459 (0.0288) loss: 0.9951 (0.9955) time: 0.1475 data: 0.0581 max mem: 9377 +Train: [0] [2500/6250] eta: 0:09:28 lr: 0.000010 grad: 0.0399 (0.0299) loss: 0.9940 (0.9954) time: 0.1254 data: 0.0325 max mem: 9377 +Train: [0] [2600/6250] eta: 0:09:12 lr: 0.000010 grad: 0.0559 (0.0309) loss: 0.9946 (0.9954) time: 0.1457 data: 0.0582 max mem: 9377 +Train: [0] [2700/6250] eta: 0:08:56 lr: 0.000011 grad: 0.0424 (0.0316) loss: 0.9946 (0.9954) time: 0.1598 data: 0.0703 max mem: 9377 +Train: [0] [2800/6250] eta: 0:08:41 lr: 0.000011 grad: 0.0556 (0.0324) loss: 0.9949 (0.9953) time: 0.1528 data: 0.0686 max mem: 9377 +Train: [0] [2900/6250] eta: 0:08:25 lr: 0.000012 grad: 0.0559 (0.0331) loss: 0.9935 (0.9953) time: 0.1388 data: 0.0478 max mem: 9377 +Train: [0] [3000/6250] eta: 0:08:09 lr: 0.000012 grad: 0.0461 (0.0339) loss: 0.9937 (0.9953) time: 0.1399 data: 0.0445 max mem: 9377 +Train: [0] [3100/6250] eta: 0:07:54 lr: 0.000012 grad: 0.0522 (0.0348) loss: 0.9933 (0.9952) time: 0.1515 data: 0.0609 max mem: 9377 +Train: [0] [3200/6250] eta: 0:07:38 lr: 0.000013 grad: 0.0689 (0.0358) loss: 0.9928 (0.9952) time: 0.1531 data: 0.0683 max mem: 9377 +Train: [0] [3300/6250] eta: 0:07:23 lr: 0.000013 grad: 0.0734 (0.0370) loss: 0.9933 (0.9951) time: 0.1515 data: 0.0663 max mem: 9377 +Train: [0] [3400/6250] eta: 0:07:07 lr: 0.000014 grad: 0.0852 (0.0380) loss: 0.9921 (0.9950) time: 0.1292 data: 0.0386 max mem: 9377 +Train: [0] [3500/6250] eta: 0:06:51 lr: 0.000014 grad: 0.0532 (0.0390) loss: 0.9936 (0.9950) time: 0.1416 data: 0.0592 max mem: 9377 +Train: [0] [3600/6250] eta: 0:06:36 lr: 0.000014 grad: 0.0691 (0.0400) loss: 0.9939 (0.9949) time: 0.1496 data: 0.0644 max mem: 9377 +Train: [0] [3700/6250] eta: 0:06:22 lr: 0.000015 grad: 0.0844 (0.0411) loss: 0.9922 (0.9948) time: 0.1620 data: 0.0807 max mem: 9377 +Train: [0] [3800/6250] eta: 0:06:07 lr: 0.000015 grad: 0.0601 (0.0420) loss: 0.9929 (0.9947) time: 0.1679 data: 0.0876 max mem: 9377 +Train: [0] [3900/6250] eta: 0:05:52 lr: 0.000016 grad: 0.0614 (0.0430) loss: 0.9920 (0.9947) time: 0.1430 data: 0.0537 max mem: 9377 +Train: [0] [4000/6250] eta: 0:05:37 lr: 0.000016 grad: 0.0938 (0.0442) loss: 0.9905 (0.9946) time: 0.1319 data: 0.0489 max mem: 9377 +Train: [0] [4100/6250] eta: 0:05:22 lr: 0.000016 grad: 0.0816 (0.0451) loss: 0.9903 (0.9945) time: 0.1362 data: 0.0426 max mem: 9377 +Train: [0] [4200/6250] eta: 0:05:07 lr: 0.000017 grad: 0.0908 (0.0461) loss: 0.9911 (0.9944) time: 0.1547 data: 0.0676 max mem: 9377 +Train: [0] [4300/6250] eta: 0:04:52 lr: 0.000017 grad: 0.0650 (0.0470) loss: 0.9932 (0.9943) time: 0.1535 data: 0.0724 max mem: 9377 +Train: [0] [4400/6250] eta: 0:04:38 lr: 0.000018 grad: 0.0807 (0.0477) loss: 0.9907 (0.9943) time: 0.1416 data: 0.0606 max mem: 9377 +Train: [0] [4500/6250] eta: 0:04:23 lr: 0.000018 grad: 0.0788 (0.0483) loss: 0.9905 (0.9942) time: 0.1519 data: 0.0647 max mem: 9377 +Train: [0] [4600/6250] eta: 0:04:08 lr: 0.000018 grad: 0.0796 (0.0490) loss: 0.9904 (0.9941) time: 0.1494 data: 0.0641 max mem: 9377 +Train: [0] [4700/6250] eta: 0:03:53 lr: 0.000019 grad: 0.0783 (0.0496) loss: 0.9923 (0.9941) time: 0.1549 data: 0.0599 max mem: 9377 +Train: [0] [4800/6250] eta: 0:03:38 lr: 0.000019 grad: 0.0786 (0.0503) loss: 0.9902 (0.9940) time: 0.1328 data: 0.0496 max mem: 9377 +Train: [0] [4900/6250] eta: 0:03:23 lr: 0.000020 grad: 0.0704 (0.0509) loss: 0.9908 (0.9939) time: 0.1724 data: 0.0886 max mem: 9377 +Train: [0] [5000/6250] eta: 0:03:08 lr: 0.000020 grad: 0.0653 (0.0515) loss: 0.9919 (0.9939) time: 0.1508 data: 0.0604 max mem: 9377 +Train: [0] [5100/6250] eta: 0:02:53 lr: 0.000020 grad: 0.0801 (0.0520) loss: 0.9919 (0.9938) time: 0.1366 data: 0.0450 max mem: 9377 +Train: [0] [5200/6250] eta: 0:02:38 lr: 0.000021 grad: 0.0846 (0.0526) loss: 0.9900 (0.9938) time: 0.1473 data: 0.0653 max mem: 9377 +Train: [0] [5300/6250] eta: 0:02:23 lr: 0.000021 grad: 0.0809 (0.0531) loss: 0.9887 (0.9937) time: 0.1464 data: 0.0515 max mem: 9377 +Train: [0] [5400/6250] eta: 0:02:08 lr: 0.000022 grad: 0.0616 (0.0536) loss: 0.9922 (0.9936) time: 0.1972 data: 0.0679 max mem: 9377 +Train: [0] [5500/6250] eta: 0:01:53 lr: 0.000022 grad: 0.0723 (0.0540) loss: 0.9901 (0.9936) time: 0.1666 data: 0.0720 max mem: 9377 +Train: [0] [5600/6250] eta: 0:01:38 lr: 0.000022 grad: 0.0676 (0.0543) loss: 0.9915 (0.9935) time: 0.1420 data: 0.0613 max mem: 9377 +Train: [0] [5700/6250] eta: 0:01:23 lr: 0.000023 grad: 0.0594 (0.0546) loss: 0.9915 (0.9935) time: 0.1462 data: 0.0571 max mem: 9377 +Train: [0] [5800/6250] eta: 0:01:08 lr: 0.000023 grad: 0.0751 (0.0550) loss: 0.9910 (0.9935) time: 0.1592 data: 0.0702 max mem: 9377 +Train: [0] [5900/6250] eta: 0:00:53 lr: 0.000024 grad: 0.0660 (0.0553) loss: 0.9898 (0.9934) time: 0.1628 data: 0.0795 max mem: 9377 +Train: [0] [6000/6250] eta: 0:00:38 lr: 0.000024 grad: 0.0673 (0.0556) loss: 0.9909 (0.9934) time: 0.1982 data: 0.1140 max mem: 9377 +Train: [0] [6100/6250] eta: 0:00:22 lr: 0.000024 grad: 0.0681 (0.0559) loss: 0.9902 (0.9933) time: 0.1837 data: 0.1030 max mem: 9377 +Train: [0] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.0739 (0.0562) loss: 0.9908 (0.9933) time: 0.1514 data: 0.0679 max mem: 9377 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.0783 (0.0564) loss: 0.9906 (0.9932) time: 0.1435 data: 0.0585 max mem: 9377 +Train: [0] Total time: 0:15:59 (0.1535 s / it) +Averaged stats: lr: 0.000025 grad: 0.0783 (0.0564) loss: 0.9906 (0.9932) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:04:38 loss: 0.9886 (0.9886) time: 4.4912 data: 4.4609 max mem: 9377 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9911 (0.9906) time: 0.1400 data: 0.1095 max mem: 9377 +Eval (hcp-train-subset): [0] Total time: 0:00:14 (0.2279 s / it) +Averaged stats (hcp-train-subset): loss: 0.9911 (0.9906) +Eval (hcp-val): [0] [ 0/62] eta: 0:03:16 loss: 0.9864 (0.9864) time: 3.1717 data: 3.1092 max mem: 9377 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9910 (0.9908) time: 0.1450 data: 0.1195 max mem: 9377 +Eval (hcp-val): [0] Total time: 0:00:14 (0.2343 s / it) +Averaged stats (hcp-val): loss: 0.9910 (0.9908) +Eval (nsd-val): [0] [ 0/62] eta: 0:05:24 loss: 0.9901 (0.9901) time: 5.2348 data: 5.1970 max mem: 9377 +Eval (nsd-val): [0] [61/62] eta: 0:00:00 loss: 0.9897 (0.9896) time: 0.1710 data: 0.1435 max mem: 9377 +Eval (nsd-val): [0] Total time: 0:00:18 (0.2931 s / it) +Averaged stats (nsd-val): loss: 0.9897 (0.9896) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [1] [ 0/6250] eta: 14:47:47 lr: 0.000025 grad: 0.0488 (0.0488) loss: 0.9956 (0.9956) time: 8.5228 data: 8.4116 max mem: 9377 +Train: [1] [ 100/6250] eta: 0:26:49 lr: 0.000025 grad: 0.0939 (0.1000) loss: 0.9890 (0.9901) time: 0.2057 data: 0.0985 max mem: 9377 +Train: [1] [ 200/6250] eta: 0:24:22 lr: 0.000026 grad: 0.0803 (0.1018) loss: 0.9901 (0.9891) time: 0.2212 data: 0.1118 max mem: 9377 +Train: [1] [ 300/6250] eta: 0:22:57 lr: 0.000026 grad: 0.0776 (0.0994) loss: 0.9884 (0.9890) time: 0.1748 data: 0.0551 max mem: 9377 +Train: [1] [ 400/6250] eta: 0:21:44 lr: 0.000027 grad: 0.0780 (0.0955) loss: 0.9894 (0.9891) time: 0.1994 data: 0.0995 max mem: 9377 +Train: [1] [ 500/6250] eta: 0:20:42 lr: 0.000027 grad: 0.0839 (0.0931) loss: 0.9897 (0.9892) time: 0.1850 data: 0.0890 max mem: 9377 +Train: [1] [ 600/6250] eta: 0:19:53 lr: 0.000027 grad: 0.0745 (0.0908) loss: 0.9890 (0.9892) time: 0.1920 data: 0.1014 max mem: 9377 +Train: [1] [ 700/6250] eta: 0:19:11 lr: 0.000028 grad: 0.0664 (0.0892) loss: 0.9892 (0.9893) time: 0.1787 data: 0.0903 max mem: 9377 +Train: [1] [ 800/6250] eta: 0:18:34 lr: 0.000028 grad: 0.0640 (0.0873) loss: 0.9907 (0.9893) time: 0.1857 data: 0.0936 max mem: 9377 +Train: [1] [ 900/6250] eta: 0:17:56 lr: 0.000029 grad: 0.0610 (0.0857) loss: 0.9905 (0.9894) time: 0.1833 data: 0.0837 max mem: 9377 +Train: [1] [1000/6250] eta: 0:17:16 lr: 0.000029 grad: 0.0676 (0.0844) loss: 0.9906 (0.9895) time: 0.1474 data: 0.0644 max mem: 9377 +Train: [1] [1100/6250] eta: 0:17:01 lr: 0.000029 grad: 0.0638 (0.0834) loss: 0.9914 (0.9896) time: 0.1131 data: 0.0005 max mem: 9377 +Train: [1] [1200/6250] eta: 0:16:30 lr: 0.000030 grad: 0.0719 (0.0827) loss: 0.9900 (0.9896) time: 0.1910 data: 0.1061 max mem: 9377 +Train: [1] [1300/6250] eta: 0:16:00 lr: 0.000030 grad: 0.0782 (0.0822) loss: 0.9887 (0.9896) time: 0.1867 data: 0.0980 max mem: 9377 +Train: [1] [1400/6250] eta: 0:15:29 lr: 0.000031 grad: 0.0645 (0.0819) loss: 0.9902 (0.9896) time: 0.1399 data: 0.0425 max mem: 9377 +Train: [1] [1500/6250] eta: 0:15:03 lr: 0.000031 grad: 0.0782 (0.0817) loss: 0.9884 (0.9896) time: 0.1693 data: 0.0831 max mem: 9377 +Train: [1] [1600/6250] eta: 0:14:34 lr: 0.000031 grad: 0.0683 (0.0815) loss: 0.9893 (0.9896) time: 0.1443 data: 0.0532 max mem: 9377 +Train: [1] [1700/6250] eta: 0:14:09 lr: 0.000032 grad: 0.0687 (0.0814) loss: 0.9902 (0.9895) time: 0.1412 data: 0.0485 max mem: 9377 +Train: [1] [1800/6250] eta: 0:13:42 lr: 0.000032 grad: 0.0779 (0.0812) loss: 0.9888 (0.9895) time: 0.1383 data: 0.0473 max mem: 9377 +Train: [1] [1900/6250] eta: 0:13:20 lr: 0.000033 grad: 0.0682 (0.0810) loss: 0.9887 (0.9895) time: 0.1510 data: 0.0651 max mem: 9377 +Train: [1] [2000/6250] eta: 0:12:58 lr: 0.000033 grad: 0.0756 (0.0809) loss: 0.9896 (0.9894) time: 0.1499 data: 0.0620 max mem: 9377 +Train: [1] [2100/6250] eta: 0:12:36 lr: 0.000033 grad: 0.0727 (0.0810) loss: 0.9890 (0.9894) time: 0.1741 data: 0.0910 max mem: 9377 +Train: [1] [2200/6250] eta: 0:12:15 lr: 0.000034 grad: 0.0644 (0.0810) loss: 0.9903 (0.9894) time: 0.1536 data: 0.0656 max mem: 9377 +Train: [1] [2300/6250] eta: 0:11:54 lr: 0.000034 grad: 0.0661 (0.0806) loss: 0.9887 (0.9894) time: 0.1645 data: 0.0689 max mem: 9377 +Train: [1] [2400/6250] eta: 0:11:33 lr: 0.000035 grad: 0.0756 (0.0804) loss: 0.9897 (0.9894) time: 0.1622 data: 0.0590 max mem: 9377 +Train: [1] [2500/6250] eta: 0:11:13 lr: 0.000035 grad: 0.0573 (0.0802) loss: 0.9900 (0.9894) time: 0.1456 data: 0.0498 max mem: 9377 +Train: [1] [2600/6250] eta: 0:10:54 lr: 0.000035 grad: 0.0621 (0.0800) loss: 0.9914 (0.9893) time: 0.1652 data: 0.0756 max mem: 9377 +Train: [1] [2700/6250] eta: 0:10:35 lr: 0.000036 grad: 0.0635 (0.0798) loss: 0.9896 (0.9893) time: 0.1572 data: 0.0618 max mem: 9377 +Train: [1] [2800/6250] eta: 0:10:16 lr: 0.000036 grad: 0.0673 (0.0796) loss: 0.9885 (0.9893) time: 0.1692 data: 0.0756 max mem: 9377 +Train: [1] [2900/6250] eta: 0:09:56 lr: 0.000037 grad: 0.0705 (0.0794) loss: 0.9897 (0.9893) time: 0.1695 data: 0.0832 max mem: 9377 +Train: [1] [3000/6250] eta: 0:09:37 lr: 0.000037 grad: 0.0611 (0.0793) loss: 0.9895 (0.9893) time: 0.1923 data: 0.1114 max mem: 9377 +Train: [1] [3100/6250] eta: 0:09:18 lr: 0.000037 grad: 0.0671 (0.0791) loss: 0.9910 (0.9893) time: 0.1739 data: 0.0855 max mem: 9377 +Train: [1] [3200/6250] eta: 0:09:00 lr: 0.000038 grad: 0.0665 (0.0790) loss: 0.9892 (0.9892) time: 0.1744 data: 0.0899 max mem: 9377 +Train: [1] [3300/6250] eta: 0:08:42 lr: 0.000038 grad: 0.0717 (0.0788) loss: 0.9895 (0.9892) time: 0.1964 data: 0.1045 max mem: 9377 +Train: [1] [3400/6250] eta: 0:08:23 lr: 0.000039 grad: 0.0717 (0.0787) loss: 0.9897 (0.9892) time: 0.1527 data: 0.0667 max mem: 9377 +Train: [1] [3500/6250] eta: 0:08:04 lr: 0.000039 grad: 0.0729 (0.0786) loss: 0.9896 (0.9892) time: 0.1663 data: 0.0806 max mem: 9377 +Train: [1] [3600/6250] eta: 0:07:45 lr: 0.000039 grad: 0.0766 (0.0785) loss: 0.9884 (0.9892) time: 0.1540 data: 0.0687 max mem: 9377 +Train: [1] [3700/6250] eta: 0:07:26 lr: 0.000040 grad: 0.0619 (0.0785) loss: 0.9893 (0.9892) time: 0.1523 data: 0.0606 max mem: 9377 +Train: [1] [3800/6250] eta: 0:07:08 lr: 0.000040 grad: 0.0682 (0.0784) loss: 0.9890 (0.9892) time: 0.1649 data: 0.0712 max mem: 9377 +Train: [1] [3900/6250] eta: 0:06:49 lr: 0.000041 grad: 0.0756 (0.0783) loss: 0.9876 (0.9892) time: 0.1600 data: 0.0671 max mem: 9377 +Train: [1] [4000/6250] eta: 0:06:31 lr: 0.000041 grad: 0.0699 (0.0783) loss: 0.9882 (0.9891) time: 0.1385 data: 0.0392 max mem: 9377 +Train: [1] [4100/6250] eta: 0:06:13 lr: 0.000041 grad: 0.0654 (0.0782) loss: 0.9883 (0.9891) time: 0.1529 data: 0.0624 max mem: 9377 +Train: [1] [4200/6250] eta: 0:05:55 lr: 0.000042 grad: 0.0757 (0.0783) loss: 0.9886 (0.9891) time: 0.1704 data: 0.0776 max mem: 9377 +Train: [1] [4300/6250] eta: 0:05:37 lr: 0.000042 grad: 0.0772 (0.0783) loss: 0.9876 (0.9891) time: 0.1461 data: 0.0530 max mem: 9377 +Train: [1] [4400/6250] eta: 0:05:19 lr: 0.000043 grad: 0.0769 (0.0783) loss: 0.9888 (0.9890) time: 0.1689 data: 0.0862 max mem: 9377 +Train: [1] [4500/6250] eta: 0:05:01 lr: 0.000043 grad: 0.0707 (0.0783) loss: 0.9898 (0.9890) time: 0.1616 data: 0.0720 max mem: 9377 +Train: [1] [4600/6250] eta: 0:04:44 lr: 0.000043 grad: 0.0690 (0.0783) loss: 0.9886 (0.9890) time: 0.1754 data: 0.0866 max mem: 9377 +Train: [1] [4700/6250] eta: 0:04:27 lr: 0.000044 grad: 0.0692 (0.0783) loss: 0.9895 (0.9890) time: 0.1507 data: 0.0569 max mem: 9377 +Train: [1] [4800/6250] eta: 0:04:09 lr: 0.000044 grad: 0.0768 (0.0783) loss: 0.9888 (0.9890) time: 0.1542 data: 0.0709 max mem: 9377 +Train: [1] [4900/6250] eta: 0:03:52 lr: 0.000045 grad: 0.0667 (0.0783) loss: 0.9883 (0.9890) time: 0.3057 data: 0.2234 max mem: 9377 +Train: [1] [5000/6250] eta: 0:03:34 lr: 0.000045 grad: 0.0689 (0.0782) loss: 0.9889 (0.9890) time: 0.1445 data: 0.0548 max mem: 9377 +Train: [1] [5100/6250] eta: 0:03:17 lr: 0.000045 grad: 0.0717 (0.0782) loss: 0.9873 (0.9889) time: 0.1464 data: 0.0628 max mem: 9377 +Train: [1] [5200/6250] eta: 0:02:59 lr: 0.000046 grad: 0.0767 (0.0782) loss: 0.9869 (0.9889) time: 0.1298 data: 0.0283 max mem: 9377 +Train: [1] [5300/6250] eta: 0:02:42 lr: 0.000046 grad: 0.0709 (0.0782) loss: 0.9881 (0.9889) time: 0.1650 data: 0.0804 max mem: 9377 +Train: [1] [5400/6250] eta: 0:02:25 lr: 0.000047 grad: 0.0795 (0.0783) loss: 0.9879 (0.9889) time: 0.1464 data: 0.0531 max mem: 9377 +Train: [1] [5500/6250] eta: 0:02:07 lr: 0.000047 grad: 0.0840 (0.0784) loss: 0.9877 (0.9889) time: 0.1590 data: 0.0722 max mem: 9377 +Train: [1] [5600/6250] eta: 0:01:50 lr: 0.000047 grad: 0.0724 (0.0784) loss: 0.9871 (0.9889) time: 0.1658 data: 0.0860 max mem: 9377 +Train: [1] [5700/6250] eta: 0:01:33 lr: 0.000048 grad: 0.0751 (0.0784) loss: 0.9897 (0.9888) time: 0.1657 data: 0.0836 max mem: 9377 +Train: [1] [5800/6250] eta: 0:01:16 lr: 0.000048 grad: 0.0743 (0.0785) loss: 0.9870 (0.9888) time: 0.1390 data: 0.0453 max mem: 9377 +Train: [1] [5900/6250] eta: 0:00:59 lr: 0.000049 grad: 0.0706 (0.0786) loss: 0.9877 (0.9888) time: 0.1919 data: 0.1098 max mem: 9377 +Train: [1] [6000/6250] eta: 0:00:42 lr: 0.000049 grad: 0.0880 (0.0786) loss: 0.9868 (0.9888) time: 0.1791 data: 0.0908 max mem: 9377 +Train: [1] [6100/6250] eta: 0:00:25 lr: 0.000049 grad: 0.0755 (0.0786) loss: 0.9874 (0.9888) time: 0.2015 data: 0.1002 max mem: 9377 +Train: [1] [6200/6250] eta: 0:00:08 lr: 0.000050 grad: 0.0735 (0.0786) loss: 0.9872 (0.9887) time: 0.1807 data: 0.0911 max mem: 9377 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0703 (0.0787) loss: 0.9875 (0.9887) time: 0.2089 data: 0.1135 max mem: 9377 +Train: [1] Total time: 0:18:00 (0.1728 s / it) +Averaged stats: lr: 0.000050 grad: 0.0703 (0.0787) loss: 0.9875 (0.9887) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:06:11 loss: 0.9902 (0.9902) time: 5.9955 data: 5.9648 max mem: 9377 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9885 (0.9875) time: 0.1344 data: 0.1087 max mem: 9377 +Eval (hcp-train-subset): [1] Total time: 0:00:16 (0.2650 s / it) +Averaged stats (hcp-train-subset): loss: 0.9885 (0.9875) +Eval (hcp-val): [1] [ 0/62] eta: 0:06:08 loss: 0.9822 (0.9822) time: 5.9487 data: 5.9199 max mem: 9377 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9886 (0.9880) time: 0.1697 data: 0.1417 max mem: 9377 +Eval (hcp-val): [1] Total time: 0:00:17 (0.2889 s / it) +Averaged stats (hcp-val): loss: 0.9886 (0.9880) +Eval (nsd-val): [1] [ 0/62] eta: 0:03:57 loss: 0.9881 (0.9881) time: 3.8312 data: 3.7596 max mem: 9377 +Eval (nsd-val): [1] [61/62] eta: 0:00:00 loss: 0.9898 (0.9883) time: 0.1391 data: 0.1109 max mem: 9377 +Eval (nsd-val): [1] Total time: 0:00:17 (0.2822 s / it) +Averaged stats (nsd-val): loss: 0.9898 (0.9883) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [2] [ 0/6250] eta: 13:15:32 lr: 0.000050 grad: 0.0517 (0.0517) loss: 0.9945 (0.9945) time: 7.6372 data: 7.5300 max mem: 9377 +Train: [2] [ 100/6250] eta: 0:25:47 lr: 0.000050 grad: 0.0658 (0.0802) loss: 0.9894 (0.9877) time: 0.2028 data: 0.1075 max mem: 9377 +Train: [2] [ 200/6250] eta: 0:22:49 lr: 0.000051 grad: 0.0669 (0.0847) loss: 0.9865 (0.9864) time: 0.1905 data: 0.0857 max mem: 9377 +Train: [2] [ 300/6250] eta: 0:22:08 lr: 0.000051 grad: 0.0712 (0.0858) loss: 0.9875 (0.9860) time: 0.2128 data: 0.0922 max mem: 9377 +Train: [2] [ 400/6250] eta: 0:21:24 lr: 0.000052 grad: 0.0913 (0.0868) loss: 0.9852 (0.9858) time: 0.1771 data: 0.0455 max mem: 9377 +Train: [2] [ 500/6250] eta: 0:20:41 lr: 0.000052 grad: 0.0901 (0.0884) loss: 0.9835 (0.9855) time: 0.2234 data: 0.1232 max mem: 9377 +Train: [2] [ 600/6250] eta: 0:19:40 lr: 0.000052 grad: 0.0919 (0.0894) loss: 0.9834 (0.9853) time: 0.1649 data: 0.0768 max mem: 9377 +Train: [2] [ 700/6250] eta: 0:18:51 lr: 0.000053 grad: 0.0763 (0.0903) loss: 0.9858 (0.9854) time: 0.1577 data: 0.0542 max mem: 9377 +Train: [2] [ 800/6250] eta: 0:18:14 lr: 0.000053 grad: 0.1024 (0.0912) loss: 0.9860 (0.9853) time: 0.1768 data: 0.0844 max mem: 9377 +Train: [2] [ 900/6250] eta: 0:17:42 lr: 0.000054 grad: 0.0855 (0.0910) loss: 0.9868 (0.9855) time: 0.1809 data: 0.0892 max mem: 9377 +Train: [2] [1000/6250] eta: 0:17:08 lr: 0.000054 grad: 0.0872 (0.0919) loss: 0.9870 (0.9855) time: 0.1436 data: 0.0438 max mem: 9377 +Train: [2] [1100/6250] eta: 0:16:40 lr: 0.000054 grad: 0.0822 (0.0924) loss: 0.9859 (0.9855) time: 0.1951 data: 0.1119 max mem: 9377 +Train: [2] [1200/6250] eta: 0:16:04 lr: 0.000055 grad: 0.0792 (0.0929) loss: 0.9859 (0.9854) time: 0.1623 data: 0.0787 max mem: 9377 +Train: [2] [1300/6250] eta: 0:15:35 lr: 0.000055 grad: 0.0811 (0.0936) loss: 0.9870 (0.9855) time: 0.1340 data: 0.0404 max mem: 9377 +Train: [2] [1400/6250] eta: 0:15:07 lr: 0.000056 grad: 0.0889 (0.0946) loss: 0.9862 (0.9855) time: 0.1680 data: 0.0879 max mem: 9377 +Train: [2] [1500/6250] eta: 0:14:43 lr: 0.000056 grad: 0.0988 (0.0958) loss: 0.9842 (0.9855) time: 0.1747 data: 0.0894 max mem: 9377 +Train: [2] [1600/6250] eta: 0:14:17 lr: 0.000056 grad: 0.1113 (0.0975) loss: 0.9861 (0.9855) time: 0.1483 data: 0.0647 max mem: 9377 +Train: [2] [1700/6250] eta: 0:13:53 lr: 0.000057 grad: 0.1447 (0.1016) loss: 0.9848 (0.9854) time: 0.1623 data: 0.0782 max mem: 9377 +Train: [2] [1800/6250] eta: 0:13:29 lr: 0.000057 grad: 0.1101 (0.1057) loss: 0.9852 (0.9853) time: 0.1629 data: 0.0776 max mem: 9377 +Train: [2] [1900/6250] eta: 0:13:07 lr: 0.000058 grad: 0.2137 (0.1093) loss: 0.9849 (0.9853) time: 0.1586 data: 0.0685 max mem: 9377 +Train: [2] [2000/6250] eta: 0:12:46 lr: 0.000058 grad: 0.2358 (0.1141) loss: 0.9879 (0.9853) time: 0.1570 data: 0.0624 max mem: 9377 +Train: [2] [2100/6250] eta: 0:12:24 lr: 0.000058 grad: 0.1744 (0.1181) loss: 0.9853 (0.9853) time: 0.1673 data: 0.0694 max mem: 9377 +Train: [2] [2200/6250] eta: 0:12:03 lr: 0.000059 grad: 0.1913 (0.1218) loss: 0.9838 (0.9852) time: 0.1770 data: 0.0884 max mem: 9377 +Train: [2] [2300/6250] eta: 0:11:42 lr: 0.000059 grad: 0.1562 (0.1267) loss: 0.9849 (0.9851) time: 0.1812 data: 0.0946 max mem: 9377 +Train: [2] [2400/6250] eta: 0:11:21 lr: 0.000060 grad: 0.1492 (0.1313) loss: 0.9840 (0.9851) time: 0.1505 data: 0.0581 max mem: 9377 +Train: [2] [2500/6250] eta: 0:11:01 lr: 0.000060 grad: 0.1537 (0.1357) loss: 0.9854 (0.9851) time: 0.1406 data: 0.0479 max mem: 9377 +Train: [2] [2600/6250] eta: 0:10:41 lr: 0.000060 grad: 0.1749 (0.1387) loss: 0.9847 (0.9850) time: 0.1440 data: 0.0600 max mem: 9377 +Train: [2] [2700/6250] eta: 0:10:22 lr: 0.000061 grad: 0.1591 (0.1414) loss: 0.9815 (0.9849) time: 0.1425 data: 0.0574 max mem: 9377 +Train: [2] [2800/6250] eta: 0:10:03 lr: 0.000061 grad: 0.1346 (0.1434) loss: 0.9821 (0.9848) time: 0.1565 data: 0.0670 max mem: 9377 +Train: [2] [2900/6250] eta: 0:09:44 lr: 0.000062 grad: 0.1842 (0.1463) loss: 0.9840 (0.9847) time: 0.1730 data: 0.0882 max mem: 9377 +Train: [2] [3000/6250] eta: 0:09:26 lr: 0.000062 grad: 0.1635 (0.1484) loss: 0.9816 (0.9846) time: 0.1798 data: 0.0976 max mem: 9377 +Train: [2] [3100/6250] eta: 0:09:06 lr: 0.000062 grad: 0.2447 (0.1518) loss: 0.9840 (0.9845) time: 0.1458 data: 0.0565 max mem: 9377 +Train: [2] [3200/6250] eta: 0:08:47 lr: 0.000063 grad: 0.1703 (0.1542) loss: 0.9812 (0.9844) time: 0.1704 data: 0.0861 max mem: 9377 +Train: [2] [3300/6250] eta: 0:08:28 lr: 0.000063 grad: 0.1581 (0.1561) loss: 0.9835 (0.9843) time: 0.1681 data: 0.0837 max mem: 9377 +Train: [2] [3400/6250] eta: 0:08:10 lr: 0.000064 grad: 0.1980 (0.1582) loss: 0.9828 (0.9843) time: 0.1570 data: 0.0729 max mem: 9377 +Train: [2] [3500/6250] eta: 0:07:51 lr: 0.000064 grad: 0.2312 (0.1609) loss: 0.9799 (0.9842) time: 0.1292 data: 0.0456 max mem: 9377 +Train: [2] [3600/6250] eta: 0:07:34 lr: 0.000064 grad: 0.1835 (0.1632) loss: 0.9820 (0.9842) time: 0.1670 data: 0.0802 max mem: 9377 +Train: [2] [3700/6250] eta: 0:07:16 lr: 0.000065 grad: 0.3244 (0.1665) loss: 0.9818 (0.9841) time: 0.1452 data: 0.0468 max mem: 9377 +Train: [2] [3800/6250] eta: 0:06:58 lr: 0.000065 grad: 0.1983 (0.1682) loss: 0.9843 (0.9841) time: 0.1478 data: 0.0602 max mem: 9377 +Train: [2] [3900/6250] eta: 0:06:39 lr: 0.000066 grad: 0.2878 (0.1699) loss: 0.9808 (0.9840) time: 0.1414 data: 0.0530 max mem: 9377 +Train: [2] [4000/6250] eta: 0:06:22 lr: 0.000066 grad: 0.2758 (0.1719) loss: 0.9827 (0.9840) time: 0.1531 data: 0.0587 max mem: 9377 +Train: [2] [4100/6250] eta: 0:06:04 lr: 0.000066 grad: 0.1303 (0.1723) loss: 0.9829 (0.9839) time: 0.1632 data: 0.0779 max mem: 9377 +Train: [2] [4200/6250] eta: 0:05:47 lr: 0.000067 grad: 0.1166 (0.1738) loss: 0.9829 (0.9839) time: 0.1554 data: 0.0731 max mem: 9377 +Train: [2] [4300/6250] eta: 0:05:29 lr: 0.000067 grad: 0.1268 (0.1749) loss: 0.9821 (0.9838) time: 0.1636 data: 0.0728 max mem: 9377 +Train: [2] [4400/6250] eta: 0:05:12 lr: 0.000068 grad: 0.2283 (0.1764) loss: 0.9790 (0.9838) time: 0.1585 data: 0.0629 max mem: 9377 +Train: [2] [4500/6250] eta: 0:04:54 lr: 0.000068 grad: 0.2034 (0.1773) loss: 0.9836 (0.9837) time: 0.1672 data: 0.0747 max mem: 9377 +Train: [2] [4600/6250] eta: 0:04:37 lr: 0.000068 grad: 0.1972 (0.1786) loss: 0.9808 (0.9837) time: 0.1441 data: 0.0532 max mem: 9377 +Train: [2] [4700/6250] eta: 0:04:20 lr: 0.000069 grad: 0.2040 (0.1796) loss: 0.9814 (0.9836) time: 0.1493 data: 0.0552 max mem: 9377 +Train: [2] [4800/6250] eta: 0:04:03 lr: 0.000069 grad: 0.2012 (0.1803) loss: 0.9821 (0.9835) time: 0.1578 data: 0.0716 max mem: 9377 +Train: [2] [4900/6250] eta: 0:03:46 lr: 0.000070 grad: 0.1570 (0.1807) loss: 0.9804 (0.9835) time: 0.1969 data: 0.1136 max mem: 9377 +Train: [2] [5000/6250] eta: 0:03:29 lr: 0.000070 grad: 0.1685 (0.1812) loss: 0.9798 (0.9834) time: 0.1665 data: 0.0775 max mem: 9377 +Train: [2] [5100/6250] eta: 0:03:12 lr: 0.000070 grad: 0.1942 (0.1818) loss: 0.9809 (0.9833) time: 0.1586 data: 0.0700 max mem: 9377 +Train: [2] [5200/6250] eta: 0:02:55 lr: 0.000071 grad: 0.1566 (0.1832) loss: 0.9804 (0.9833) time: 0.1686 data: 0.0866 max mem: 9377 +Train: [2] [5300/6250] eta: 0:02:38 lr: 0.000071 grad: 0.1162 (0.1832) loss: 0.9809 (0.9832) time: 0.1960 data: 0.1143 max mem: 9377 +Train: [2] [5400/6250] eta: 0:02:21 lr: 0.000072 grad: 0.1825 (0.1845) loss: 0.9806 (0.9831) time: 0.1603 data: 0.0657 max mem: 9377 +Train: [2] [5500/6250] eta: 0:02:05 lr: 0.000072 grad: 0.1625 (0.1849) loss: 0.9763 (0.9830) time: 0.1521 data: 0.0563 max mem: 9377 +Train: [2] [5600/6250] eta: 0:01:48 lr: 0.000072 grad: 0.1725 (0.1853) loss: 0.9811 (0.9830) time: 0.1518 data: 0.0517 max mem: 9377 +Train: [2] [5700/6250] eta: 0:01:31 lr: 0.000073 grad: 0.1295 (0.1858) loss: 0.9764 (0.9829) time: 0.1743 data: 0.0804 max mem: 9377 +Train: [2] [5800/6250] eta: 0:01:14 lr: 0.000073 grad: 0.1575 (0.1863) loss: 0.9807 (0.9828) time: 0.1624 data: 0.0773 max mem: 9377 +Train: [2] [5900/6250] eta: 0:00:58 lr: 0.000074 grad: 0.1883 (0.1873) loss: 0.9783 (0.9828) time: 0.2337 data: 0.1529 max mem: 9377 +Train: [2] [6000/6250] eta: 0:00:41 lr: 0.000074 grad: 0.1409 (0.1875) loss: 0.9811 (0.9827) time: 0.2433 data: 0.1718 max mem: 9377 +Train: [2] [6100/6250] eta: 0:00:25 lr: 0.000074 grad: 0.3045 (0.1883) loss: 0.9791 (0.9826) time: 0.2297 data: 0.1493 max mem: 9377 +Train: [2] [6200/6250] eta: 0:00:08 lr: 0.000075 grad: 0.2259 (0.1886) loss: 0.9804 (0.9825) time: 0.1767 data: 0.0942 max mem: 9377 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.1858 (0.1892) loss: 0.9765 (0.9825) time: 0.1653 data: 0.0797 max mem: 9377 +Train: [2] Total time: 0:17:34 (0.1688 s / it) +Averaged stats: lr: 0.000075 grad: 0.1858 (0.1892) loss: 0.9765 (0.9825) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:03:47 loss: 0.9816 (0.9816) time: 3.6764 data: 3.6048 max mem: 9377 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9786 (0.9788) time: 0.1451 data: 0.1198 max mem: 9377 +Eval (hcp-train-subset): [2] Total time: 0:00:15 (0.2426 s / it) +Averaged stats (hcp-train-subset): loss: 0.9786 (0.9788) +Eval (hcp-val): [2] [ 0/62] eta: 0:05:59 loss: 0.9705 (0.9705) time: 5.7978 data: 5.7672 max mem: 9377 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9788 (0.9781) time: 0.1021 data: 0.0765 max mem: 9377 +Eval (hcp-val): [2] Total time: 0:00:20 (0.3292 s / it) +Averaged stats (hcp-val): loss: 0.9788 (0.9781) +Eval (nsd-val): [2] [ 0/62] eta: 0:06:00 loss: 0.9734 (0.9734) time: 5.8174 data: 5.7867 max mem: 9377 +Eval (nsd-val): [2] [61/62] eta: 0:00:00 loss: 0.9783 (0.9767) time: 0.1735 data: 0.1479 max mem: 9377 +Eval (nsd-val): [2] Total time: 0:00:16 (0.2708 s / it) +Averaged stats (nsd-val): loss: 0.9783 (0.9767) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [3] [ 0/6250] eta: 9:11:13 lr: 0.000075 grad: 0.2882 (0.2882) loss: 0.9566 (0.9566) time: 5.2918 data: 5.0053 max mem: 9377 +Train: [3] [ 100/6250] eta: 0:21:45 lr: 0.000075 grad: 0.2097 (0.2241) loss: 0.9828 (0.9801) time: 0.1756 data: 0.0652 max mem: 9377 +Train: [3] [ 200/6250] eta: 0:19:28 lr: 0.000076 grad: 0.2615 (0.2216) loss: 0.9826 (0.9805) time: 0.1715 data: 0.0776 max mem: 9377 +Train: [3] [ 300/6250] eta: 0:18:40 lr: 0.000076 grad: 0.1328 (0.2094) loss: 0.9795 (0.9807) time: 0.1766 data: 0.0788 max mem: 9377 +Train: [3] [ 400/6250] eta: 0:18:05 lr: 0.000077 grad: 0.1709 (0.2059) loss: 0.9799 (0.9801) time: 0.1766 data: 0.0701 max mem: 9377 +Train: [3] [ 500/6250] eta: 0:18:07 lr: 0.000077 grad: 0.1495 (0.1997) loss: 0.9770 (0.9796) time: 0.2110 data: 0.0934 max mem: 9377 +Train: [3] [ 600/6250] eta: 0:17:29 lr: 0.000077 grad: 0.1265 (0.1958) loss: 0.9772 (0.9792) time: 0.1729 data: 0.0721 max mem: 9377 +Train: [3] [ 700/6250] eta: 0:17:03 lr: 0.000078 grad: 0.2015 (0.1999) loss: 0.9782 (0.9790) time: 0.1595 data: 0.0630 max mem: 9377 +Train: [3] [ 800/6250] eta: 0:16:39 lr: 0.000078 grad: 0.1494 (0.2022) loss: 0.9779 (0.9787) time: 0.1859 data: 0.0949 max mem: 9377 +Train: [3] [ 900/6250] eta: 0:16:14 lr: 0.000079 grad: 0.1570 (0.2004) loss: 0.9751 (0.9783) time: 0.1708 data: 0.0821 max mem: 9377 +Train: [3] [1000/6250] eta: 0:15:40 lr: 0.000079 grad: 0.1552 (0.2015) loss: 0.9783 (0.9781) time: 0.1567 data: 0.0763 max mem: 9377 +Train: [3] [1100/6250] eta: 0:15:13 lr: 0.000079 grad: 0.2165 (0.2046) loss: 0.9776 (0.9779) time: 0.1440 data: 0.0489 max mem: 9377 +Train: [3] [1200/6250] eta: 0:14:51 lr: 0.000080 grad: 0.1426 (0.2051) loss: 0.9738 (0.9777) time: 0.1621 data: 0.0729 max mem: 9377 +Train: [3] [1300/6250] eta: 0:14:29 lr: 0.000080 grad: 0.1933 (0.2065) loss: 0.9750 (0.9775) time: 0.1699 data: 0.0810 max mem: 9377 +Train: [3] [1400/6250] eta: 0:14:06 lr: 0.000081 grad: 0.1950 (0.2096) loss: 0.9769 (0.9773) time: 0.1554 data: 0.0707 max mem: 9377 +Train: [3] [1500/6250] eta: 0:13:42 lr: 0.000081 grad: 0.2069 (0.2137) loss: 0.9729 (0.9772) time: 0.1569 data: 0.0658 max mem: 9377 +Train: [3] [1600/6250] eta: 0:13:20 lr: 0.000081 grad: 0.1773 (0.2155) loss: 0.9737 (0.9770) time: 0.1488 data: 0.0612 max mem: 9377 +Train: [3] [1700/6250] eta: 0:12:57 lr: 0.000082 grad: 0.2556 (0.2167) loss: 0.9766 (0.9768) time: 0.1431 data: 0.0520 max mem: 9377 +Train: [3] [1800/6250] eta: 0:12:38 lr: 0.000082 grad: 0.1593 (0.2166) loss: 0.9764 (0.9766) time: 0.1806 data: 0.0981 max mem: 9377 +Train: [3] [1900/6250] eta: 0:12:18 lr: 0.000083 grad: 0.1737 (0.2162) loss: 0.9762 (0.9765) time: 0.1816 data: 0.0885 max mem: 9377 +Train: [3] [2000/6250] eta: 0:11:58 lr: 0.000083 grad: 0.2340 (0.2175) loss: 0.9743 (0.9764) time: 0.1538 data: 0.0626 max mem: 9377 +Train: [3] [2100/6250] eta: 0:11:40 lr: 0.000083 grad: 0.2982 (0.2183) loss: 0.9783 (0.9763) time: 0.1449 data: 0.0586 max mem: 9377 +Train: [3] [2200/6250] eta: 0:11:21 lr: 0.000084 grad: 0.2263 (0.2191) loss: 0.9772 (0.9762) time: 0.1518 data: 0.0634 max mem: 9377 +Train: [3] [2300/6250] eta: 0:11:03 lr: 0.000084 grad: 0.2187 (0.2210) loss: 0.9724 (0.9760) time: 0.1656 data: 0.0725 max mem: 9377 +Train: [3] [2400/6250] eta: 0:10:44 lr: 0.000085 grad: 0.1437 (0.2217) loss: 0.9747 (0.9759) time: 0.1676 data: 0.0766 max mem: 9377 +Train: [3] [2500/6250] eta: 0:10:27 lr: 0.000085 grad: 0.1844 (0.2209) loss: 0.9718 (0.9757) time: 0.1787 data: 0.0883 max mem: 9377 +Train: [3] [2600/6250] eta: 0:10:10 lr: 0.000085 grad: 0.1660 (0.2204) loss: 0.9713 (0.9756) time: 0.1495 data: 0.0667 max mem: 9377 +Train: [3] [2700/6250] eta: 0:09:52 lr: 0.000086 grad: 0.1720 (0.2205) loss: 0.9695 (0.9754) time: 0.1618 data: 0.0814 max mem: 9377 +Train: [3] [2800/6250] eta: 0:09:34 lr: 0.000086 grad: 0.1748 (0.2202) loss: 0.9745 (0.9752) time: 0.1615 data: 0.0737 max mem: 9377 +Train: [3] [2900/6250] eta: 0:09:18 lr: 0.000087 grad: 0.1766 (0.2206) loss: 0.9729 (0.9751) time: 0.1845 data: 0.0992 max mem: 9377 +Train: [3] [3000/6250] eta: 0:09:00 lr: 0.000087 grad: 0.2414 (0.2221) loss: 0.9685 (0.9749) time: 0.1429 data: 0.0523 max mem: 9377 +Train: [3] [3100/6250] eta: 0:08:42 lr: 0.000087 grad: 0.1879 (0.2231) loss: 0.9695 (0.9748) time: 0.1568 data: 0.0618 max mem: 9377 +Train: [3] [3200/6250] eta: 0:08:25 lr: 0.000088 grad: 0.1776 (0.2230) loss: 0.9693 (0.9746) time: 0.1687 data: 0.0826 max mem: 9377 +Train: [3] [3300/6250] eta: 0:08:08 lr: 0.000088 grad: 0.2959 (0.2238) loss: 0.9690 (0.9745) time: 0.1848 data: 0.0912 max mem: 9377 +Train: [3] [3400/6250] eta: 0:07:51 lr: 0.000089 grad: 0.1773 (0.2231) loss: 0.9691 (0.9743) time: 0.1576 data: 0.0714 max mem: 9377 +Train: [3] [3500/6250] eta: 0:07:34 lr: 0.000089 grad: 0.2687 (0.2243) loss: 0.9696 (0.9742) time: 0.1461 data: 0.0536 max mem: 9377 +Train: [3] [3600/6250] eta: 0:07:17 lr: 0.000089 grad: 0.2699 (0.2244) loss: 0.9678 (0.9740) time: 0.1551 data: 0.0652 max mem: 9377 +Train: [3] [3700/6250] eta: 0:07:00 lr: 0.000090 grad: 0.2364 (0.2256) loss: 0.9690 (0.9739) time: 0.1514 data: 0.0570 max mem: 9377 +Train: [3] [3800/6250] eta: 0:06:42 lr: 0.000090 grad: 0.2149 (0.2261) loss: 0.9681 (0.9737) time: 0.1451 data: 0.0583 max mem: 9377 +Train: [3] [3900/6250] eta: 0:06:25 lr: 0.000091 grad: 0.2661 (0.2278) loss: 0.9681 (0.9736) time: 0.1548 data: 0.0691 max mem: 9377 +Train: [3] [4000/6250] eta: 0:06:08 lr: 0.000091 grad: 0.1876 (0.2274) loss: 0.9689 (0.9735) time: 0.1501 data: 0.0685 max mem: 9377 +Train: [3] [4100/6250] eta: 0:05:52 lr: 0.000091 grad: 0.2136 (0.2281) loss: 0.9691 (0.9734) time: 0.1653 data: 0.0833 max mem: 9377 +Train: [3] [4200/6250] eta: 0:05:35 lr: 0.000092 grad: 0.1807 (0.2289) loss: 0.9652 (0.9732) time: 0.1335 data: 0.0494 max mem: 9377 +Train: [3] [4300/6250] eta: 0:05:18 lr: 0.000092 grad: 0.2113 (0.2295) loss: 0.9642 (0.9730) time: 0.1621 data: 0.0714 max mem: 9377 +Train: [3] [4400/6250] eta: 0:05:02 lr: 0.000093 grad: 0.2403 (0.2305) loss: 0.9647 (0.9729) time: 0.1624 data: 0.0817 max mem: 9377 +Train: [3] [4500/6250] eta: 0:04:48 lr: 0.000093 grad: 0.2160 (0.2310) loss: 0.9640 (0.9727) time: 0.3636 data: 0.2809 max mem: 9377 +Train: [3] [4600/6250] eta: 0:04:31 lr: 0.000093 grad: 0.2033 (0.2313) loss: 0.9622 (0.9724) time: 0.1705 data: 0.0872 max mem: 9377 +Train: [3] [4700/6250] eta: 0:04:14 lr: 0.000094 grad: 0.2853 (0.2321) loss: 0.9576 (0.9721) time: 0.1397 data: 0.0582 max mem: 9377 +Train: [3] [4800/6250] eta: 0:03:57 lr: 0.000094 grad: 0.2224 (0.2330) loss: 0.9546 (0.9719) time: 0.1586 data: 0.0633 max mem: 9377 +Train: [3] [4900/6250] eta: 0:03:41 lr: 0.000095 grad: 0.1922 (0.2339) loss: 0.9555 (0.9716) time: 0.1716 data: 0.0866 max mem: 9377 +Train: [3] [5000/6250] eta: 0:03:25 lr: 0.000095 grad: 0.3458 (0.2358) loss: 0.9577 (0.9712) time: 0.1862 data: 0.0972 max mem: 9377 +Train: [3] [5100/6250] eta: 0:03:08 lr: 0.000095 grad: 0.2677 (0.2372) loss: 0.9539 (0.9709) time: 0.1434 data: 0.0584 max mem: 9377 +Train: [3] [5200/6250] eta: 0:02:52 lr: 0.000096 grad: 0.3607 (0.2397) loss: 0.9550 (0.9705) time: 0.1514 data: 0.0646 max mem: 9377 +Train: [3] [5300/6250] eta: 0:02:35 lr: 0.000096 grad: 0.4159 (0.2411) loss: 0.9564 (0.9702) time: 0.1773 data: 0.0929 max mem: 9377 +Train: [3] [5400/6250] eta: 0:02:18 lr: 0.000097 grad: 0.2333 (0.2428) loss: 0.9500 (0.9698) time: 0.1580 data: 0.0687 max mem: 9377 +Train: [3] [5500/6250] eta: 0:02:02 lr: 0.000097 grad: 0.2347 (0.2441) loss: 0.9471 (0.9695) time: 0.1468 data: 0.0642 max mem: 9377 +Train: [3] [5600/6250] eta: 0:01:46 lr: 0.000097 grad: 0.2708 (0.2453) loss: 0.9439 (0.9691) time: 0.1605 data: 0.0683 max mem: 9377 +Train: [3] [5700/6250] eta: 0:01:29 lr: 0.000098 grad: 0.3443 (0.2466) loss: 0.9464 (0.9687) time: 0.1643 data: 0.0819 max mem: 9377 +Train: [3] [5800/6250] eta: 0:01:13 lr: 0.000098 grad: 0.2569 (0.2478) loss: 0.9428 (0.9683) time: 0.1761 data: 0.0881 max mem: 9377 +Train: [3] [5900/6250] eta: 0:00:57 lr: 0.000099 grad: 0.2977 (0.2485) loss: 0.9430 (0.9679) time: 0.2304 data: 0.1478 max mem: 9377 +Train: [3] [6000/6250] eta: 0:00:41 lr: 0.000099 grad: 0.2627 (0.2492) loss: 0.9459 (0.9675) time: 0.1927 data: 0.1148 max mem: 9377 +Train: [3] [6100/6250] eta: 0:00:24 lr: 0.000099 grad: 0.2950 (0.2510) loss: 0.9427 (0.9671) time: 0.1361 data: 0.0618 max mem: 9377 +Train: [3] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.2580 (0.2517) loss: 0.9427 (0.9667) time: 0.1685 data: 0.0932 max mem: 9377 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.2752 (0.2520) loss: 0.9426 (0.9665) time: 0.1799 data: 0.0857 max mem: 9377 +Train: [3] Total time: 0:17:20 (0.1664 s / it) +Averaged stats: lr: 0.000100 grad: 0.2752 (0.2520) loss: 0.9426 (0.9665) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:04:53 loss: 0.9427 (0.9427) time: 4.7375 data: 4.6957 max mem: 9377 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9421 (0.9428) time: 0.1425 data: 0.1173 max mem: 9377 +Eval (hcp-train-subset): [3] Total time: 0:00:16 (0.2593 s / it) +Averaged stats (hcp-train-subset): loss: 0.9421 (0.9428) +Eval (hcp-val): [3] [ 0/62] eta: 0:06:05 loss: 0.9391 (0.9391) time: 5.8957 data: 5.8665 max mem: 9377 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9406 (0.9418) time: 0.1552 data: 0.1282 max mem: 9377 +Eval (hcp-val): [3] Total time: 0:00:16 (0.2669 s / it) +Averaged stats (hcp-val): loss: 0.9406 (0.9418) +Eval (nsd-val): [3] [ 0/62] eta: 0:05:56 loss: 0.9071 (0.9071) time: 5.7547 data: 5.7235 max mem: 9377 +Eval (nsd-val): [3] [61/62] eta: 0:00:00 loss: 0.9195 (0.9194) time: 0.1615 data: 0.1355 max mem: 9377 +Eval (nsd-val): [3] Total time: 0:00:15 (0.2528 s / it) +Averaged stats (nsd-val): loss: 0.9195 (0.9194) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [4] [ 0/6250] eta: 11:56:12 lr: 0.000100 grad: 0.3683 (0.3683) loss: 0.9430 (0.9430) time: 6.8756 data: 6.7839 max mem: 9377 +Train: [4] [ 100/6250] eta: 0:22:48 lr: 0.000100 grad: 0.2510 (0.4078) loss: 0.9449 (0.9450) time: 0.1805 data: 0.0792 max mem: 9377 +Train: [4] [ 200/6250] eta: 0:21:19 lr: 0.000101 grad: 0.3379 (0.3677) loss: 0.9438 (0.9431) time: 0.1978 data: 0.0999 max mem: 9377 +Train: [4] [ 300/6250] eta: 0:20:35 lr: 0.000101 grad: 0.3333 (0.3681) loss: 0.9409 (0.9428) time: 0.2174 data: 0.1080 max mem: 9377 +Train: [4] [ 400/6250] eta: 0:19:41 lr: 0.000102 grad: 0.3679 (0.3586) loss: 0.9406 (0.9424) time: 0.2189 data: 0.1088 max mem: 9377 +Train: [4] [ 500/6250] eta: 0:19:03 lr: 0.000102 grad: 0.3359 (0.3527) loss: 0.9378 (0.9420) time: 0.1918 data: 0.0843 max mem: 9377 +Train: [4] [ 600/6250] eta: 0:18:26 lr: 0.000102 grad: 0.3229 (0.3420) loss: 0.9400 (0.9416) time: 0.2056 data: 0.0875 max mem: 9377 +Train: [4] [ 700/6250] eta: 0:17:48 lr: 0.000103 grad: 0.2945 (0.3401) loss: 0.9362 (0.9414) time: 0.1620 data: 0.0564 max mem: 9377 +Train: [4] [ 800/6250] eta: 0:17:18 lr: 0.000103 grad: 0.2428 (0.3328) loss: 0.9363 (0.9409) time: 0.1606 data: 0.0645 max mem: 9377 +Train: [4] [ 900/6250] eta: 0:16:50 lr: 0.000104 grad: 0.2118 (0.3251) loss: 0.9381 (0.9403) time: 0.1859 data: 0.0917 max mem: 9377 +Train: [4] [1000/6250] eta: 0:16:19 lr: 0.000104 grad: 0.2809 (0.3253) loss: 0.9331 (0.9399) time: 0.1440 data: 0.0551 max mem: 9377 +Train: [4] [1100/6250] eta: 0:15:49 lr: 0.000104 grad: 0.2336 (0.3240) loss: 0.9340 (0.9396) time: 0.1871 data: 0.1050 max mem: 9377 +Train: [4] [1200/6250] eta: 0:15:19 lr: 0.000105 grad: 0.3202 (0.3204) loss: 0.9354 (0.9392) time: 0.1572 data: 0.0724 max mem: 9377 +Train: [4] [1300/6250] eta: 0:14:49 lr: 0.000105 grad: 0.3844 (0.3229) loss: 0.9369 (0.9389) time: 0.1364 data: 0.0442 max mem: 9377 +Train: [4] [1400/6250] eta: 0:14:24 lr: 0.000106 grad: 0.2813 (0.3204) loss: 0.9341 (0.9386) time: 0.1869 data: 0.0972 max mem: 9377 +Train: [4] [1500/6250] eta: 0:13:59 lr: 0.000106 grad: 0.2795 (0.3174) loss: 0.9373 (0.9383) time: 0.1631 data: 0.0767 max mem: 9377 +Train: [4] [1600/6250] eta: 0:13:36 lr: 0.000106 grad: 0.2990 (0.3155) loss: 0.9330 (0.9380) time: 0.1513 data: 0.0683 max mem: 9377 +Train: [4] [1700/6250] eta: 0:13:15 lr: 0.000107 grad: 0.2853 (0.3134) loss: 0.9326 (0.9377) time: 0.1649 data: 0.0769 max mem: 9377 +Train: [4] [1800/6250] eta: 0:12:54 lr: 0.000107 grad: 0.2987 (0.3147) loss: 0.9323 (0.9374) time: 0.1674 data: 0.0743 max mem: 9377 +Train: [4] [1900/6250] eta: 0:12:34 lr: 0.000108 grad: 0.2142 (0.3132) loss: 0.9310 (0.9370) time: 0.1455 data: 0.0503 max mem: 9377 +Train: [4] [2000/6250] eta: 0:12:12 lr: 0.000108 grad: 0.2059 (0.3120) loss: 0.9350 (0.9367) time: 0.1499 data: 0.0607 max mem: 9377 +Train: [4] [2100/6250] eta: 0:11:51 lr: 0.000108 grad: 0.3573 (0.3113) loss: 0.9314 (0.9365) time: 0.1625 data: 0.0722 max mem: 9377 +Train: [4] [2200/6250] eta: 0:11:30 lr: 0.000109 grad: 0.2612 (0.3094) loss: 0.9306 (0.9363) time: 0.1477 data: 0.0574 max mem: 9377 +Train: [4] [2300/6250] eta: 0:11:13 lr: 0.000109 grad: 0.2136 (0.3069) loss: 0.9296 (0.9360) time: 0.1692 data: 0.0775 max mem: 9377 +Train: [4] [2400/6250] eta: 0:10:53 lr: 0.000110 grad: 0.3406 (0.3047) loss: 0.9306 (0.9358) time: 0.1439 data: 0.0441 max mem: 9377 +Train: [4] [2500/6250] eta: 0:10:35 lr: 0.000110 grad: 0.2399 (0.3029) loss: 0.9277 (0.9356) time: 0.1731 data: 0.0796 max mem: 9377 +Train: [4] [2600/6250] eta: 0:10:15 lr: 0.000110 grad: 0.2195 (0.3011) loss: 0.9241 (0.9352) time: 0.1419 data: 0.0473 max mem: 9377 +Train: [4] [2700/6250] eta: 0:09:58 lr: 0.000111 grad: 0.2626 (0.3002) loss: 0.9276 (0.9350) time: 0.1681 data: 0.0785 max mem: 9377 +Train: [4] [2800/6250] eta: 0:09:39 lr: 0.000111 grad: 0.2200 (0.2991) loss: 0.9264 (0.9347) time: 0.1354 data: 0.0513 max mem: 9377 +Train: [4] [2900/6250] eta: 0:09:22 lr: 0.000112 grad: 0.2414 (0.2981) loss: 0.9231 (0.9344) time: 0.1448 data: 0.0612 max mem: 9377 +Train: [4] [3000/6250] eta: 0:09:04 lr: 0.000112 grad: 0.1980 (0.2973) loss: 0.9248 (0.9340) time: 0.1709 data: 0.0885 max mem: 9377 +Train: [4] [3100/6250] eta: 0:08:47 lr: 0.000112 grad: 0.2786 (0.2957) loss: 0.9217 (0.9337) time: 0.1677 data: 0.0752 max mem: 9377 +Train: [4] [3200/6250] eta: 0:08:29 lr: 0.000113 grad: 0.2322 (0.2950) loss: 0.9262 (0.9334) time: 0.1523 data: 0.0602 max mem: 9377 +Train: [4] [3300/6250] eta: 0:08:11 lr: 0.000113 grad: 0.1977 (0.2936) loss: 0.9222 (0.9331) time: 0.1616 data: 0.0714 max mem: 9377 +Train: [4] [3400/6250] eta: 0:07:53 lr: 0.000114 grad: 0.2446 (0.2926) loss: 0.9193 (0.9327) time: 0.1694 data: 0.0751 max mem: 9377 +Train: [4] [3500/6250] eta: 0:07:36 lr: 0.000114 grad: 0.2375 (0.2914) loss: 0.9250 (0.9324) time: 0.1577 data: 0.0726 max mem: 9377 +Train: [4] [3600/6250] eta: 0:07:20 lr: 0.000114 grad: 0.2658 (0.2906) loss: 0.9211 (0.9321) time: 0.1734 data: 0.0832 max mem: 9377 +Train: [4] [3700/6250] eta: 0:07:03 lr: 0.000115 grad: 0.2523 (0.2896) loss: 0.9250 (0.9318) time: 0.1669 data: 0.0871 max mem: 9377 +Train: [4] [3800/6250] eta: 0:06:47 lr: 0.000115 grad: 0.2261 (0.2884) loss: 0.9232 (0.9316) time: 0.1897 data: 0.1071 max mem: 9377 +Train: [4] [3900/6250] eta: 0:06:30 lr: 0.000116 grad: 0.2033 (0.2871) loss: 0.9252 (0.9314) time: 0.1891 data: 0.1033 max mem: 9377 +Train: [4] [4000/6250] eta: 0:06:14 lr: 0.000116 grad: 0.1649 (0.2854) loss: 0.9220 (0.9311) time: 0.1608 data: 0.0796 max mem: 9377 +Train: [4] [4100/6250] eta: 0:05:57 lr: 0.000116 grad: 0.2422 (0.2844) loss: 0.9201 (0.9309) time: 0.1634 data: 0.0793 max mem: 9377 +Train: [4] [4200/6250] eta: 0:05:40 lr: 0.000117 grad: 0.2593 (0.2832) loss: 0.9208 (0.9306) time: 0.1713 data: 0.0869 max mem: 9377 +Train: [4] [4300/6250] eta: 0:05:24 lr: 0.000117 grad: 0.2335 (0.2816) loss: 0.9202 (0.9304) time: 0.1919 data: 0.0963 max mem: 9377 +Train: [4] [4400/6250] eta: 0:05:07 lr: 0.000118 grad: 0.1790 (0.2801) loss: 0.9198 (0.9302) time: 0.1722 data: 0.0877 max mem: 9377 +Train: [4] [4500/6250] eta: 0:04:50 lr: 0.000118 grad: 0.1958 (0.2787) loss: 0.9174 (0.9300) time: 0.1665 data: 0.0867 max mem: 9377 +Train: [4] [4600/6250] eta: 0:04:33 lr: 0.000118 grad: 0.2387 (0.2777) loss: 0.9164 (0.9297) time: 0.1108 data: 0.0175 max mem: 9377 +Train: [4] [4700/6250] eta: 0:04:17 lr: 0.000119 grad: 0.2243 (0.2769) loss: 0.9212 (0.9295) time: 0.1535 data: 0.0678 max mem: 9377 +Train: [4] [4800/6250] eta: 0:04:00 lr: 0.000119 grad: 0.2498 (0.2758) loss: 0.9190 (0.9292) time: 0.1491 data: 0.0526 max mem: 9377 +Train: [4] [4900/6250] eta: 0:03:43 lr: 0.000120 grad: 0.2110 (0.2745) loss: 0.9165 (0.9289) time: 0.1370 data: 0.0365 max mem: 9377 +Train: [4] [5000/6250] eta: 0:03:26 lr: 0.000120 grad: 0.2155 (0.2735) loss: 0.9163 (0.9286) time: 0.1703 data: 0.0819 max mem: 9377 +Train: [4] [5100/6250] eta: 0:03:09 lr: 0.000120 grad: 0.2038 (0.2725) loss: 0.9123 (0.9283) time: 0.1568 data: 0.0655 max mem: 9377 +Train: [4] [5200/6250] eta: 0:02:53 lr: 0.000121 grad: 0.2121 (0.2714) loss: 0.9098 (0.9280) time: 0.1624 data: 0.0749 max mem: 9377 +Train: [4] [5300/6250] eta: 0:02:36 lr: 0.000121 grad: 0.2238 (0.2710) loss: 0.9155 (0.9278) time: 0.1637 data: 0.0669 max mem: 9377 +Train: [4] [5400/6250] eta: 0:02:20 lr: 0.000122 grad: 0.1895 (0.2696) loss: 0.9134 (0.9275) time: 0.1557 data: 0.0569 max mem: 9377 +Train: [4] [5500/6250] eta: 0:02:03 lr: 0.000122 grad: 0.1751 (0.2688) loss: 0.9164 (0.9273) time: 0.1687 data: 0.0804 max mem: 9377 +Train: [4] [5600/6250] eta: 0:01:47 lr: 0.000122 grad: 0.2107 (0.2677) loss: 0.9115 (0.9270) time: 0.1764 data: 0.0886 max mem: 9377 +Train: [4] [5700/6250] eta: 0:01:30 lr: 0.000123 grad: 0.2355 (0.2668) loss: 0.9142 (0.9268) time: 0.1731 data: 0.0847 max mem: 9377 +Train: [4] [5800/6250] eta: 0:01:14 lr: 0.000123 grad: 0.1799 (0.2656) loss: 0.9090 (0.9265) time: 0.1584 data: 0.0636 max mem: 9377 +Train: [4] [5900/6250] eta: 0:00:58 lr: 0.000124 grad: 0.1443 (0.2643) loss: 0.9142 (0.9262) time: 0.1618 data: 0.0779 max mem: 9377 +Train: [4] [6000/6250] eta: 0:00:41 lr: 0.000124 grad: 0.1891 (0.2632) loss: 0.9114 (0.9260) time: 0.1534 data: 0.0733 max mem: 9377 +Train: [4] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.1497 (0.2619) loss: 0.9105 (0.9257) time: 0.1572 data: 0.0827 max mem: 9377 +Train: [4] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1716 (0.2606) loss: 0.9114 (0.9255) time: 0.1361 data: 0.0546 max mem: 9377 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1605 (0.2600) loss: 0.9091 (0.9254) time: 0.1212 data: 0.0415 max mem: 9377 +Train: [4] Total time: 0:17:21 (0.1666 s / it) +Averaged stats: lr: 0.000125 grad: 0.1605 (0.2600) loss: 0.9091 (0.9254) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:04:09 loss: 0.9081 (0.9081) time: 4.0189 data: 3.9346 max mem: 9377 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9094 (0.9092) time: 0.1389 data: 0.1134 max mem: 9377 +Eval (hcp-train-subset): [4] Total time: 0:00:14 (0.2335 s / it) +Averaged stats (hcp-train-subset): loss: 0.9094 (0.9092) +Making plots (hcp-train-subset): example=27 +Eval (hcp-val): [4] [ 0/62] eta: 0:05:16 loss: 0.9052 (0.9052) time: 5.0981 data: 5.0188 max mem: 9377 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9084 (0.9077) time: 0.1664 data: 0.1402 max mem: 9377 +Eval (hcp-val): [4] Total time: 0:00:16 (0.2687 s / it) +Averaged stats (hcp-val): loss: 0.9084 (0.9077) +Making plots (hcp-val): example=1 +Eval (nsd-val): [4] [ 0/62] eta: 0:05:25 loss: 0.8621 (0.8621) time: 5.2545 data: 5.2236 max mem: 9377 +Eval (nsd-val): [4] [61/62] eta: 0:00:00 loss: 0.8727 (0.8719) time: 0.1543 data: 0.1287 max mem: 9377 +Eval (nsd-val): [4] Total time: 0:00:15 (0.2427 s / it) +Averaged stats (nsd-val): loss: 0.8727 (0.8719) +Making plots (nsd-val): example=28 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00004.pth +Train: [5] [ 0/6250] eta: 10:25:38 lr: 0.000125 grad: 0.2033 (0.2033) loss: 0.8974 (0.8974) time: 6.0061 data: 5.8525 max mem: 9377 +Train: [5] [ 100/6250] eta: 0:22:14 lr: 0.000125 grad: 0.1901 (0.2692) loss: 0.9013 (0.9067) time: 0.1631 data: 0.0856 max mem: 9377 +Train: [5] [ 200/6250] eta: 0:19:56 lr: 0.000125 grad: 0.1670 (0.2572) loss: 0.9062 (0.9054) time: 0.1745 data: 0.0742 max mem: 9377 +Train: [5] [ 300/6250] eta: 0:18:57 lr: 0.000125 grad: 0.1856 (0.2348) loss: 0.9048 (0.9058) time: 0.1752 data: 0.0742 max mem: 9377 +Train: [5] [ 400/6250] eta: 0:18:00 lr: 0.000125 grad: 0.2108 (0.2299) loss: 0.9021 (0.9052) time: 0.1631 data: 0.0638 max mem: 9377 +Train: [5] [ 500/6250] eta: 0:17:31 lr: 0.000125 grad: 0.1688 (0.2250) loss: 0.8979 (0.9046) time: 0.1867 data: 0.0806 max mem: 9377 +Train: [5] [ 600/6250] eta: 0:17:00 lr: 0.000125 grad: 0.1708 (0.2229) loss: 0.9013 (0.9041) time: 0.1769 data: 0.0715 max mem: 9377 +Train: [5] [ 700/6250] eta: 0:16:34 lr: 0.000125 grad: 0.2087 (0.2191) loss: 0.9035 (0.9038) time: 0.1516 data: 0.0575 max mem: 9377 +Train: [5] [ 800/6250] eta: 0:16:07 lr: 0.000125 grad: 0.2020 (0.2173) loss: 0.9000 (0.9034) time: 0.1752 data: 0.0779 max mem: 9377 +Train: [5] [ 900/6250] eta: 0:15:42 lr: 0.000125 grad: 0.2100 (0.2198) loss: 0.8937 (0.9031) time: 0.1750 data: 0.0800 max mem: 9377 +Train: [5] [1000/6250] eta: 0:15:22 lr: 0.000125 grad: 0.1880 (0.2164) loss: 0.8980 (0.9027) time: 0.1973 data: 0.1074 max mem: 9377 +Train: [5] [1100/6250] eta: 0:14:55 lr: 0.000125 grad: 0.1906 (0.2145) loss: 0.8967 (0.9023) time: 0.1690 data: 0.0756 max mem: 9377 +Train: [5] [1200/6250] eta: 0:14:30 lr: 0.000125 grad: 0.1826 (0.2154) loss: 0.8938 (0.9019) time: 0.1581 data: 0.0713 max mem: 9377 +Train: [5] [1300/6250] eta: 0:14:06 lr: 0.000125 grad: 0.1733 (0.2157) loss: 0.8962 (0.9014) time: 0.1521 data: 0.0627 max mem: 9377 +Train: [5] [1400/6250] eta: 0:13:42 lr: 0.000125 grad: 0.2040 (0.2151) loss: 0.8921 (0.9009) time: 0.1500 data: 0.0660 max mem: 9377 +Train: [5] [1500/6250] eta: 0:13:19 lr: 0.000125 grad: 0.1674 (0.2133) loss: 0.8934 (0.9004) time: 0.1569 data: 0.0740 max mem: 9377 +Train: [5] [1600/6250] eta: 0:12:57 lr: 0.000125 grad: 0.1879 (0.2130) loss: 0.8917 (0.9000) time: 0.1166 data: 0.0335 max mem: 9377 +Train: [5] [1700/6250] eta: 0:12:38 lr: 0.000125 grad: 0.1457 (0.2138) loss: 0.8918 (0.8994) time: 0.1672 data: 0.0788 max mem: 9377 +Train: [5] [1800/6250] eta: 0:12:19 lr: 0.000125 grad: 0.1703 (0.2133) loss: 0.8899 (0.8990) time: 0.1484 data: 0.0572 max mem: 9377 +Train: [5] [1900/6250] eta: 0:12:00 lr: 0.000125 grad: 0.1887 (0.2129) loss: 0.8865 (0.8985) time: 0.1506 data: 0.0635 max mem: 9377 +Train: [5] [2000/6250] eta: 0:11:41 lr: 0.000125 grad: 0.1698 (0.2119) loss: 0.8906 (0.8980) time: 0.1264 data: 0.0403 max mem: 9377 +Train: [5] [2100/6250] eta: 0:11:23 lr: 0.000125 grad: 0.1635 (0.2105) loss: 0.8852 (0.8975) time: 0.1357 data: 0.0472 max mem: 9377 +Train: [5] [2200/6250] eta: 0:11:07 lr: 0.000125 grad: 0.1467 (0.2095) loss: 0.8848 (0.8970) time: 0.1410 data: 0.0435 max mem: 9377 +Train: [5] [2300/6250] eta: 0:10:50 lr: 0.000125 grad: 0.1938 (0.2082) loss: 0.8862 (0.8966) time: 0.1141 data: 0.0259 max mem: 9377 +Train: [5] [2400/6250] eta: 0:10:32 lr: 0.000125 grad: 0.1790 (0.2094) loss: 0.8835 (0.8962) time: 0.1513 data: 0.0665 max mem: 9377 +Train: [5] [2500/6250] eta: 0:10:17 lr: 0.000125 grad: 0.2163 (0.2084) loss: 0.8844 (0.8957) time: 0.1799 data: 0.0931 max mem: 9377 +Train: [5] [2600/6250] eta: 0:10:00 lr: 0.000125 grad: 0.1486 (0.2071) loss: 0.8845 (0.8953) time: 0.1534 data: 0.0630 max mem: 9377 +Train: [5] [2700/6250] eta: 0:09:43 lr: 0.000125 grad: 0.1536 (0.2058) loss: 0.8831 (0.8950) time: 0.1421 data: 0.0543 max mem: 9377 +Train: [5] [2800/6250] eta: 0:09:27 lr: 0.000125 grad: 0.1652 (0.2043) loss: 0.8813 (0.8947) time: 0.1660 data: 0.0766 max mem: 9377 +Train: [5] [2900/6250] eta: 0:09:09 lr: 0.000125 grad: 0.2018 (0.2038) loss: 0.8863 (0.8943) time: 0.1537 data: 0.0518 max mem: 9377 +Train: [5] [3000/6250] eta: 0:08:52 lr: 0.000125 grad: 0.1442 (0.2033) loss: 0.8799 (0.8940) time: 0.1470 data: 0.0475 max mem: 9377 +Train: [5] [3100/6250] eta: 0:08:36 lr: 0.000125 grad: 0.1535 (0.2021) loss: 0.8845 (0.8938) time: 0.1210 data: 0.0246 max mem: 9377 +Train: [5] [3200/6250] eta: 0:08:19 lr: 0.000125 grad: 0.1582 (0.2011) loss: 0.8830 (0.8934) time: 0.1831 data: 0.0966 max mem: 9377 +Train: [5] [3300/6250] eta: 0:08:02 lr: 0.000125 grad: 0.1844 (0.2006) loss: 0.8874 (0.8931) time: 0.1652 data: 0.0713 max mem: 9377 +Train: [5] [3400/6250] eta: 0:07:45 lr: 0.000125 grad: 0.1495 (0.1991) loss: 0.8817 (0.8929) time: 0.1777 data: 0.0948 max mem: 9377 +Train: [5] [3500/6250] eta: 0:07:28 lr: 0.000125 grad: 0.1237 (0.1987) loss: 0.8865 (0.8926) time: 0.1353 data: 0.0488 max mem: 9377 +Train: [5] [3600/6250] eta: 0:07:12 lr: 0.000125 grad: 0.1605 (0.1979) loss: 0.8813 (0.8923) time: 0.1816 data: 0.1022 max mem: 9377 +Train: [5] [3700/6250] eta: 0:06:55 lr: 0.000125 grad: 0.1615 (0.1977) loss: 0.8848 (0.8920) time: 0.1497 data: 0.0532 max mem: 9377 +Train: [5] [3800/6250] eta: 0:06:38 lr: 0.000125 grad: 0.1506 (0.1973) loss: 0.8791 (0.8917) time: 0.1747 data: 0.0943 max mem: 9377 +Train: [5] [3900/6250] eta: 0:06:22 lr: 0.000125 grad: 0.1346 (0.1965) loss: 0.8774 (0.8915) time: 0.1668 data: 0.0733 max mem: 9377 +Train: [5] [4000/6250] eta: 0:06:05 lr: 0.000125 grad: 0.1630 (0.1966) loss: 0.8823 (0.8913) time: 0.1338 data: 0.0404 max mem: 9377 +Train: [5] [4100/6250] eta: 0:05:48 lr: 0.000125 grad: 0.1594 (0.1959) loss: 0.8817 (0.8911) time: 0.1617 data: 0.0776 max mem: 9377 +Train: [5] [4200/6250] eta: 0:05:32 lr: 0.000125 grad: 0.1397 (0.1946) loss: 0.8851 (0.8909) time: 0.1616 data: 0.0701 max mem: 9377 +Train: [5] [4300/6250] eta: 0:05:16 lr: 0.000125 grad: 0.1291 (0.1937) loss: 0.8795 (0.8907) time: 0.1829 data: 0.0997 max mem: 9377 +Train: [5] [4400/6250] eta: 0:04:59 lr: 0.000125 grad: 0.1227 (0.1929) loss: 0.8830 (0.8904) time: 0.1602 data: 0.0792 max mem: 9377 +Train: [5] [4500/6250] eta: 0:04:43 lr: 0.000125 grad: 0.1620 (0.1922) loss: 0.8826 (0.8902) time: 0.1384 data: 0.0505 max mem: 9377 +Train: [5] [4600/6250] eta: 0:04:26 lr: 0.000125 grad: 0.2063 (0.1916) loss: 0.8867 (0.8900) time: 0.1710 data: 0.0766 max mem: 9377 +Train: [5] [4700/6250] eta: 0:04:10 lr: 0.000125 grad: 0.1331 (0.1906) loss: 0.8773 (0.8898) time: 0.1437 data: 0.0635 max mem: 9377 +Train: [5] [4800/6250] eta: 0:03:54 lr: 0.000125 grad: 0.1202 (0.1899) loss: 0.8777 (0.8896) time: 0.1636 data: 0.0839 max mem: 9377 +Train: [5] [4900/6250] eta: 0:03:37 lr: 0.000125 grad: 0.1228 (0.1888) loss: 0.8772 (0.8893) time: 0.1593 data: 0.0794 max mem: 9377 +Train: [5] [5000/6250] eta: 0:03:21 lr: 0.000125 grad: 0.1365 (0.1877) loss: 0.8791 (0.8891) time: 0.1529 data: 0.0713 max mem: 9377 +Train: [5] [5100/6250] eta: 0:03:05 lr: 0.000125 grad: 0.1436 (0.1867) loss: 0.8788 (0.8889) time: 0.1414 data: 0.0482 max mem: 9377 +Train: [5] [5200/6250] eta: 0:02:49 lr: 0.000125 grad: 0.1197 (0.1859) loss: 0.8749 (0.8886) time: 0.1770 data: 0.0897 max mem: 9377 +Train: [5] [5300/6250] eta: 0:02:33 lr: 0.000125 grad: 0.1195 (0.1850) loss: 0.8726 (0.8884) time: 0.1501 data: 0.0586 max mem: 9377 +Train: [5] [5400/6250] eta: 0:02:17 lr: 0.000125 grad: 0.1258 (0.1842) loss: 0.8747 (0.8881) time: 0.1804 data: 0.0967 max mem: 9377 +Train: [5] [5500/6250] eta: 0:02:00 lr: 0.000125 grad: 0.1702 (0.1835) loss: 0.8774 (0.8879) time: 0.1095 data: 0.0158 max mem: 9377 +Train: [5] [5600/6250] eta: 0:01:44 lr: 0.000125 grad: 0.1164 (0.1827) loss: 0.8754 (0.8877) time: 0.1479 data: 0.0576 max mem: 9377 +Train: [5] [5700/6250] eta: 0:01:28 lr: 0.000125 grad: 0.1238 (0.1817) loss: 0.8759 (0.8875) time: 0.1625 data: 0.0762 max mem: 9377 +Train: [5] [5800/6250] eta: 0:01:12 lr: 0.000125 grad: 0.1152 (0.1808) loss: 0.8748 (0.8873) time: 0.1860 data: 0.0847 max mem: 9377 +Train: [5] [5900/6250] eta: 0:00:56 lr: 0.000125 grad: 0.1164 (0.1801) loss: 0.8711 (0.8870) time: 0.1609 data: 0.0762 max mem: 9377 +Train: [5] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.1109 (0.1794) loss: 0.8729 (0.8868) time: 0.1577 data: 0.0757 max mem: 9377 +Train: [5] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.1448 (0.1787) loss: 0.8683 (0.8866) time: 0.1597 data: 0.0768 max mem: 9377 +Train: [5] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1196 (0.1779) loss: 0.8750 (0.8863) time: 0.1817 data: 0.0962 max mem: 9377 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1096 (0.1776) loss: 0.8747 (0.8862) time: 0.1981 data: 0.1123 max mem: 9377 +Train: [5] Total time: 0:17:05 (0.1640 s / it) +Averaged stats: lr: 0.000125 grad: 0.1096 (0.1776) loss: 0.8747 (0.8862) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:05:19 loss: 0.8763 (0.8763) time: 5.1526 data: 5.1203 max mem: 9377 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.8759 (0.8761) time: 0.1276 data: 0.1007 max mem: 9377 +Eval (hcp-train-subset): [5] Total time: 0:00:14 (0.2275 s / it) +Averaged stats (hcp-train-subset): loss: 0.8759 (0.8761) +Eval (hcp-val): [5] [ 0/62] eta: 0:04:37 loss: 0.8691 (0.8691) time: 4.4823 data: 4.4056 max mem: 9377 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.8735 (0.8741) time: 0.1360 data: 0.1109 max mem: 9377 +Eval (hcp-val): [5] Total time: 0:00:14 (0.2389 s / it) +Averaged stats (hcp-val): loss: 0.8735 (0.8741) +Eval (nsd-val): [5] [ 0/62] eta: 0:03:54 loss: 0.8303 (0.8303) time: 3.7807 data: 3.6762 max mem: 9377 +Eval (nsd-val): [5] [61/62] eta: 0:00:00 loss: 0.8406 (0.8397) time: 0.1315 data: 0.1039 max mem: 9377 +Eval (nsd-val): [5] Total time: 0:00:13 (0.2193 s / it) +Averaged stats (nsd-val): loss: 0.8406 (0.8397) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [6] [ 0/6250] eta: 12:06:25 lr: 0.000125 grad: 0.3214 (0.3214) loss: 0.8790 (0.8790) time: 6.9737 data: 6.8123 max mem: 9377 +Train: [6] [ 100/6250] eta: 0:21:55 lr: 0.000125 grad: 0.1213 (0.1517) loss: 0.8707 (0.8735) time: 0.1580 data: 0.0578 max mem: 9377 +Train: [6] [ 200/6250] eta: 0:19:42 lr: 0.000125 grad: 0.1208 (0.1494) loss: 0.8629 (0.8728) time: 0.1589 data: 0.0550 max mem: 9377 +Train: [6] [ 300/6250] eta: 0:18:50 lr: 0.000125 grad: 0.1405 (0.1549) loss: 0.8781 (0.8725) time: 0.1734 data: 0.0683 max mem: 9377 +Train: [6] [ 400/6250] eta: 0:18:10 lr: 0.000125 grad: 0.1172 (0.1510) loss: 0.8707 (0.8724) time: 0.1600 data: 0.0517 max mem: 9377 +Train: [6] [ 500/6250] eta: 0:17:29 lr: 0.000125 grad: 0.1115 (0.1478) loss: 0.8709 (0.8718) time: 0.1675 data: 0.0694 max mem: 9377 +Train: [6] [ 600/6250] eta: 0:16:52 lr: 0.000125 grad: 0.1084 (0.1466) loss: 0.8732 (0.8714) time: 0.1762 data: 0.0850 max mem: 9377 +Train: [6] [ 700/6250] eta: 0:16:17 lr: 0.000125 grad: 0.1124 (0.1427) loss: 0.8708 (0.8709) time: 0.1648 data: 0.0721 max mem: 9377 +Train: [6] [ 800/6250] eta: 0:15:53 lr: 0.000125 grad: 0.1196 (0.1419) loss: 0.8701 (0.8708) time: 0.1689 data: 0.0836 max mem: 9377 +Train: [6] [ 900/6250] eta: 0:15:36 lr: 0.000125 grad: 0.1195 (0.1407) loss: 0.8618 (0.8705) time: 0.2154 data: 0.1306 max mem: 9377 +Train: [6] [1000/6250] eta: 0:15:13 lr: 0.000125 grad: 0.1124 (0.1400) loss: 0.8712 (0.8703) time: 0.1927 data: 0.0997 max mem: 9377 +Train: [6] [1100/6250] eta: 0:14:44 lr: 0.000125 grad: 0.1201 (0.1390) loss: 0.8670 (0.8703) time: 0.1703 data: 0.0908 max mem: 9377 +Train: [6] [1200/6250] eta: 0:14:21 lr: 0.000125 grad: 0.1310 (0.1389) loss: 0.8696 (0.8701) time: 0.1636 data: 0.0773 max mem: 9377 +Train: [6] [1300/6250] eta: 0:13:59 lr: 0.000125 grad: 0.1194 (0.1381) loss: 0.8647 (0.8699) time: 0.1495 data: 0.0619 max mem: 9377 +Train: [6] [1400/6250] eta: 0:13:38 lr: 0.000125 grad: 0.1073 (0.1383) loss: 0.8701 (0.8698) time: 0.1537 data: 0.0679 max mem: 9377 +Train: [6] [1500/6250] eta: 0:13:19 lr: 0.000125 grad: 0.1095 (0.1371) loss: 0.8685 (0.8697) time: 0.1818 data: 0.0932 max mem: 9377 +Train: [6] [1600/6250] eta: 0:13:00 lr: 0.000125 grad: 0.1209 (0.1370) loss: 0.8679 (0.8696) time: 0.1576 data: 0.0739 max mem: 9377 +Train: [6] [1700/6250] eta: 0:12:39 lr: 0.000125 grad: 0.1167 (0.1369) loss: 0.8636 (0.8694) time: 0.1388 data: 0.0519 max mem: 9377 +Train: [6] [1800/6250] eta: 0:12:18 lr: 0.000125 grad: 0.1170 (0.1362) loss: 0.8672 (0.8693) time: 0.1421 data: 0.0611 max mem: 9377 +Train: [6] [1900/6250] eta: 0:12:00 lr: 0.000125 grad: 0.1063 (0.1357) loss: 0.8665 (0.8692) time: 0.1559 data: 0.0648 max mem: 9377 +Train: [6] [2000/6250] eta: 0:11:42 lr: 0.000125 grad: 0.1083 (0.1357) loss: 0.8721 (0.8691) time: 0.1258 data: 0.0290 max mem: 9377 +Train: [6] [2100/6250] eta: 0:11:23 lr: 0.000125 grad: 0.1140 (0.1355) loss: 0.8647 (0.8690) time: 0.1518 data: 0.0654 max mem: 9377 +Train: [6] [2200/6250] eta: 0:11:05 lr: 0.000125 grad: 0.1135 (0.1348) loss: 0.8671 (0.8689) time: 0.1641 data: 0.0802 max mem: 9377 +Train: [6] [2300/6250] eta: 0:10:48 lr: 0.000125 grad: 0.1100 (0.1345) loss: 0.8634 (0.8688) time: 0.1551 data: 0.0672 max mem: 9377 +Train: [6] [2400/6250] eta: 0:10:30 lr: 0.000125 grad: 0.1019 (0.1335) loss: 0.8704 (0.8687) time: 0.1500 data: 0.0645 max mem: 9377 +Train: [6] [2500/6250] eta: 0:10:12 lr: 0.000125 grad: 0.0906 (0.1332) loss: 0.8646 (0.8686) time: 0.1620 data: 0.0851 max mem: 9377 +Train: [6] [2600/6250] eta: 0:09:54 lr: 0.000125 grad: 0.1239 (0.1330) loss: 0.8651 (0.8684) time: 0.1603 data: 0.0751 max mem: 9377 +Train: [6] [2700/6250] eta: 0:09:38 lr: 0.000125 grad: 0.1729 (0.1334) loss: 0.8647 (0.8683) time: 0.1734 data: 0.0898 max mem: 9377 +Train: [6] [2800/6250] eta: 0:09:22 lr: 0.000125 grad: 0.1060 (0.1329) loss: 0.8640 (0.8682) time: 0.1875 data: 0.1070 max mem: 9377 +Train: [6] [2900/6250] eta: 0:09:05 lr: 0.000125 grad: 0.1264 (0.1325) loss: 0.8698 (0.8681) time: 0.1581 data: 0.0717 max mem: 9377 +Train: [6] [3000/6250] eta: 0:08:48 lr: 0.000125 grad: 0.1379 (0.1321) loss: 0.8665 (0.8680) time: 0.1652 data: 0.0819 max mem: 9377 +Train: [6] [3100/6250] eta: 0:08:33 lr: 0.000125 grad: 0.1021 (0.1317) loss: 0.8674 (0.8678) time: 0.2044 data: 0.1223 max mem: 9377 +Train: [6] [3200/6250] eta: 0:08:16 lr: 0.000125 grad: 0.1167 (0.1317) loss: 0.8603 (0.8677) time: 0.1696 data: 0.0829 max mem: 9377 +Train: [6] [3300/6250] eta: 0:08:01 lr: 0.000125 grad: 0.1287 (0.1315) loss: 0.8626 (0.8676) time: 0.1405 data: 0.0569 max mem: 9377 +Train: [6] [3400/6250] eta: 0:07:44 lr: 0.000125 grad: 0.1048 (0.1309) loss: 0.8620 (0.8675) time: 0.1670 data: 0.0788 max mem: 9377 +Train: [6] [3500/6250] eta: 0:07:27 lr: 0.000125 grad: 0.1025 (0.1306) loss: 0.8638 (0.8674) time: 0.1500 data: 0.0670 max mem: 9377 +Train: [6] [3600/6250] eta: 0:07:11 lr: 0.000125 grad: 0.1024 (0.1301) loss: 0.8588 (0.8673) time: 0.1806 data: 0.0960 max mem: 9377 +Train: [6] [3700/6250] eta: 0:06:54 lr: 0.000125 grad: 0.1093 (0.1296) loss: 0.8661 (0.8672) time: 0.1744 data: 0.0896 max mem: 9377 +Train: [6] [3800/6250] eta: 0:06:37 lr: 0.000125 grad: 0.0917 (0.1291) loss: 0.8607 (0.8671) time: 0.1443 data: 0.0514 max mem: 9377 +Train: [6] [3900/6250] eta: 0:06:21 lr: 0.000125 grad: 0.0907 (0.1286) loss: 0.8649 (0.8670) time: 0.1508 data: 0.0564 max mem: 9377 +Train: [6] [4000/6250] eta: 0:06:04 lr: 0.000125 grad: 0.1103 (0.1286) loss: 0.8653 (0.8669) time: 0.1484 data: 0.0598 max mem: 9377 +Train: [6] [4100/6250] eta: 0:05:47 lr: 0.000125 grad: 0.1007 (0.1280) loss: 0.8593 (0.8668) time: 0.1337 data: 0.0392 max mem: 9377 +Train: [6] [4200/6250] eta: 0:05:31 lr: 0.000125 grad: 0.1015 (0.1274) loss: 0.8597 (0.8667) time: 0.1644 data: 0.0799 max mem: 9377 +Train: [6] [4300/6250] eta: 0:05:14 lr: 0.000125 grad: 0.0996 (0.1271) loss: 0.8673 (0.8666) time: 0.1475 data: 0.0608 max mem: 9377 +Train: [6] [4400/6250] eta: 0:04:58 lr: 0.000125 grad: 0.1088 (0.1267) loss: 0.8641 (0.8665) time: 0.1577 data: 0.0805 max mem: 9377 +Train: [6] [4500/6250] eta: 0:04:41 lr: 0.000125 grad: 0.1103 (0.1264) loss: 0.8636 (0.8664) time: 0.1842 data: 0.1033 max mem: 9377 +Train: [6] [4600/6250] eta: 0:04:25 lr: 0.000125 grad: 0.0924 (0.1259) loss: 0.8572 (0.8663) time: 0.1382 data: 0.0521 max mem: 9377 +Train: [6] [4700/6250] eta: 0:04:09 lr: 0.000125 grad: 0.0975 (0.1253) loss: 0.8673 (0.8662) time: 0.1553 data: 0.0685 max mem: 9377 +Train: [6] [4800/6250] eta: 0:03:52 lr: 0.000125 grad: 0.0898 (0.1248) loss: 0.8637 (0.8662) time: 0.1567 data: 0.0715 max mem: 9377 +Train: [6] [4900/6250] eta: 0:03:37 lr: 0.000125 grad: 0.0934 (0.1244) loss: 0.8617 (0.8661) time: 0.1966 data: 0.1041 max mem: 9377 +Train: [6] [5000/6250] eta: 0:03:21 lr: 0.000125 grad: 0.1064 (0.1242) loss: 0.8602 (0.8660) time: 0.1693 data: 0.0790 max mem: 9377 +Train: [6] [5100/6250] eta: 0:03:05 lr: 0.000125 grad: 0.0977 (0.1239) loss: 0.8617 (0.8659) time: 0.1742 data: 0.0916 max mem: 9377 +Train: [6] [5200/6250] eta: 0:02:48 lr: 0.000125 grad: 0.0874 (0.1234) loss: 0.8621 (0.8658) time: 0.1542 data: 0.0653 max mem: 9377 +Train: [6] [5300/6250] eta: 0:02:32 lr: 0.000125 grad: 0.1082 (0.1231) loss: 0.8599 (0.8657) time: 0.1672 data: 0.0729 max mem: 9377 +Train: [6] [5400/6250] eta: 0:02:16 lr: 0.000125 grad: 0.1043 (0.1231) loss: 0.8551 (0.8656) time: 0.1949 data: 0.1104 max mem: 9377 +Train: [6] [5500/6250] eta: 0:02:00 lr: 0.000125 grad: 0.1035 (0.1227) loss: 0.8604 (0.8654) time: 0.1426 data: 0.0555 max mem: 9377 +Train: [6] [5600/6250] eta: 0:01:44 lr: 0.000125 grad: 0.0977 (0.1222) loss: 0.8602 (0.8653) time: 0.1745 data: 0.0843 max mem: 9377 +Train: [6] [5700/6250] eta: 0:01:28 lr: 0.000125 grad: 0.0930 (0.1220) loss: 0.8636 (0.8653) time: 0.3031 data: 0.2328 max mem: 9377 +Train: [6] [5800/6250] eta: 0:01:12 lr: 0.000125 grad: 0.0930 (0.1218) loss: 0.8618 (0.8652) time: 0.1453 data: 0.0666 max mem: 9377 +Train: [6] [5900/6250] eta: 0:00:56 lr: 0.000125 grad: 0.1049 (0.1214) loss: 0.8610 (0.8652) time: 0.1511 data: 0.0730 max mem: 9377 +Train: [6] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.1045 (0.1211) loss: 0.8614 (0.8651) time: 0.1359 data: 0.0513 max mem: 9377 +Train: [6] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.1046 (0.1208) loss: 0.8599 (0.8650) time: 0.1985 data: 0.0989 max mem: 9377 +Train: [6] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.0958 (0.1205) loss: 0.8605 (0.8649) time: 0.1716 data: 0.0856 max mem: 9377 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0903 (0.1203) loss: 0.8573 (0.8648) time: 0.1660 data: 0.0718 max mem: 9377 +Train: [6] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000125 grad: 0.0903 (0.1203) loss: 0.8573 (0.8648) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:04:45 loss: 0.8664 (0.8664) time: 4.6032 data: 4.5263 max mem: 9377 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.8645 (0.8648) time: 0.1478 data: 0.1224 max mem: 9377 +Eval (hcp-train-subset): [6] Total time: 0:00:15 (0.2445 s / it) +Averaged stats (hcp-train-subset): loss: 0.8645 (0.8648) +Eval (hcp-val): [6] [ 0/62] eta: 0:03:31 loss: 0.8619 (0.8619) time: 3.4051 data: 3.3170 max mem: 9377 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.8627 (0.8627) time: 0.1276 data: 0.1020 max mem: 9377 +Eval (hcp-val): [6] Total time: 0:00:15 (0.2500 s / it) +Averaged stats (hcp-val): loss: 0.8627 (0.8627) +Eval (nsd-val): [6] [ 0/62] eta: 0:05:55 loss: 0.8225 (0.8225) time: 5.7342 data: 5.7038 max mem: 9377 +Eval (nsd-val): [6] [61/62] eta: 0:00:00 loss: 0.8315 (0.8295) time: 0.1321 data: 0.1070 max mem: 9377 +Eval (nsd-val): [6] Total time: 0:00:13 (0.2230 s / it) +Averaged stats (nsd-val): loss: 0.8315 (0.8295) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [7] [ 0/6250] eta: 9:03:43 lr: 0.000125 grad: 0.1299 (0.1299) loss: 0.8642 (0.8642) time: 5.2198 data: 5.0067 max mem: 9377 +Train: [7] [ 100/6250] eta: 0:21:09 lr: 0.000125 grad: 0.0886 (0.1015) loss: 0.8708 (0.8713) time: 0.1527 data: 0.0663 max mem: 9377 +Train: [7] [ 200/6250] eta: 0:18:52 lr: 0.000125 grad: 0.1021 (0.1149) loss: 0.8576 (0.8660) time: 0.1836 data: 0.0669 max mem: 9377 +Train: [7] [ 300/6250] eta: 0:18:38 lr: 0.000125 grad: 0.0908 (0.1125) loss: 0.8562 (0.8631) time: 0.1911 data: 0.0640 max mem: 9377 +Train: [7] [ 400/6250] eta: 0:17:58 lr: 0.000125 grad: 0.1002 (0.1107) loss: 0.8543 (0.8611) time: 0.1637 data: 0.0596 max mem: 9377 +Train: [7] [ 500/6250] eta: 0:17:13 lr: 0.000125 grad: 0.0911 (0.1085) loss: 0.8541 (0.8598) time: 0.1682 data: 0.0738 max mem: 9377 +Train: [7] [ 600/6250] eta: 0:16:42 lr: 0.000125 grad: 0.0948 (0.1075) loss: 0.8538 (0.8589) time: 0.1935 data: 0.0778 max mem: 9377 +Train: [7] [ 700/6250] eta: 0:16:10 lr: 0.000125 grad: 0.0919 (0.1080) loss: 0.8576 (0.8584) time: 0.1522 data: 0.0596 max mem: 9377 +Train: [7] [ 800/6250] eta: 0:15:45 lr: 0.000125 grad: 0.0880 (0.1065) loss: 0.8568 (0.8583) time: 0.1615 data: 0.0775 max mem: 9377 +Train: [7] [ 900/6250] eta: 0:15:30 lr: 0.000125 grad: 0.0916 (0.1053) loss: 0.8661 (0.8583) time: 0.1852 data: 0.0890 max mem: 9377 +Train: [7] [1000/6250] eta: 0:15:13 lr: 0.000125 grad: 0.0993 (0.1053) loss: 0.8620 (0.8586) time: 0.1683 data: 0.0776 max mem: 9377 +Train: [7] [1100/6250] eta: 0:14:57 lr: 0.000125 grad: 0.0867 (0.1059) loss: 0.8576 (0.8588) time: 0.1787 data: 0.0804 max mem: 9377 +Train: [7] [1200/6250] eta: 0:14:34 lr: 0.000125 grad: 0.1047 (0.1056) loss: 0.8612 (0.8589) time: 0.1704 data: 0.0816 max mem: 9377 +Train: [7] [1300/6250] eta: 0:14:17 lr: 0.000125 grad: 0.0861 (0.1046) loss: 0.8578 (0.8590) time: 0.1704 data: 0.0839 max mem: 9377 +Train: [7] [1400/6250] eta: 0:13:56 lr: 0.000125 grad: 0.0887 (0.1036) loss: 0.8609 (0.8590) time: 0.1705 data: 0.0817 max mem: 9377 +Train: [7] [1500/6250] eta: 0:13:36 lr: 0.000125 grad: 0.0920 (0.1032) loss: 0.8640 (0.8590) time: 0.1719 data: 0.0872 max mem: 9377 +Train: [7] [1600/6250] eta: 0:13:14 lr: 0.000125 grad: 0.0832 (0.1022) loss: 0.8610 (0.8591) time: 0.1586 data: 0.0742 max mem: 9377 +Train: [7] [1700/6250] eta: 0:12:55 lr: 0.000125 grad: 0.1064 (0.1024) loss: 0.8579 (0.8592) time: 0.1688 data: 0.0784 max mem: 9377 +Train: [7] [1800/6250] eta: 0:12:35 lr: 0.000125 grad: 0.0878 (0.1017) loss: 0.8619 (0.8592) time: 0.1664 data: 0.0816 max mem: 9377 +Train: [7] [1900/6250] eta: 0:12:16 lr: 0.000125 grad: 0.0858 (0.1015) loss: 0.8622 (0.8592) time: 0.1513 data: 0.0676 max mem: 9377 +Train: [7] [2000/6250] eta: 0:11:57 lr: 0.000125 grad: 0.0949 (0.1011) loss: 0.8586 (0.8593) time: 0.1514 data: 0.0564 max mem: 9377 +Train: [7] [2100/6250] eta: 0:11:37 lr: 0.000125 grad: 0.0828 (0.1008) loss: 0.8557 (0.8594) time: 0.1689 data: 0.0760 max mem: 9377 +Train: [7] [2200/6250] eta: 0:11:19 lr: 0.000125 grad: 0.0862 (0.1005) loss: 0.8531 (0.8593) time: 0.1505 data: 0.0606 max mem: 9377 +Train: [7] [2300/6250] eta: 0:10:59 lr: 0.000125 grad: 0.0859 (0.1003) loss: 0.8590 (0.8592) time: 0.1554 data: 0.0663 max mem: 9377 +Train: [7] [2400/6250] eta: 0:10:40 lr: 0.000125 grad: 0.0830 (0.0999) loss: 0.8580 (0.8591) time: 0.1530 data: 0.0647 max mem: 9377 +Train: [7] [2500/6250] eta: 0:10:23 lr: 0.000125 grad: 0.0864 (0.1001) loss: 0.8582 (0.8591) time: 0.1712 data: 0.0859 max mem: 9377 +Train: [7] [2600/6250] eta: 0:10:06 lr: 0.000125 grad: 0.0893 (0.1000) loss: 0.8567 (0.8590) time: 0.1696 data: 0.0900 max mem: 9377 +Train: [7] [2700/6250] eta: 0:09:51 lr: 0.000125 grad: 0.0904 (0.0999) loss: 0.8535 (0.8590) time: 0.2263 data: 0.1422 max mem: 9377 +Train: [7] [2800/6250] eta: 0:09:32 lr: 0.000125 grad: 0.0916 (0.0998) loss: 0.8574 (0.8589) time: 0.1337 data: 0.0470 max mem: 9377 +Train: [7] [2900/6250] eta: 0:09:15 lr: 0.000125 grad: 0.0918 (0.0996) loss: 0.8561 (0.8588) time: 0.1679 data: 0.0841 max mem: 9377 +Train: [7] [3000/6250] eta: 0:08:57 lr: 0.000125 grad: 0.0833 (0.0998) loss: 0.8586 (0.8586) time: 0.1546 data: 0.0604 max mem: 9377 +Train: [7] [3100/6250] eta: 0:08:40 lr: 0.000125 grad: 0.0922 (0.1002) loss: 0.8561 (0.8585) time: 0.1709 data: 0.0913 max mem: 9377 +Train: [7] [3200/6250] eta: 0:08:23 lr: 0.000125 grad: 0.0897 (0.1001) loss: 0.8560 (0.8584) time: 0.1649 data: 0.0710 max mem: 9377 +Train: [7] [3300/6250] eta: 0:08:06 lr: 0.000125 grad: 0.0878 (0.1001) loss: 0.8548 (0.8584) time: 0.1464 data: 0.0597 max mem: 9377 +Train: [7] [3400/6250] eta: 0:07:49 lr: 0.000125 grad: 0.0953 (0.1000) loss: 0.8527 (0.8583) time: 0.1735 data: 0.0879 max mem: 9377 +Train: [7] [3500/6250] eta: 0:07:33 lr: 0.000125 grad: 0.0895 (0.1000) loss: 0.8601 (0.8582) time: 0.1777 data: 0.0869 max mem: 9377 +Train: [7] [3600/6250] eta: 0:07:16 lr: 0.000125 grad: 0.0907 (0.0998) loss: 0.8540 (0.8581) time: 0.1445 data: 0.0547 max mem: 9377 +Train: [7] [3700/6250] eta: 0:07:00 lr: 0.000125 grad: 0.0909 (0.0996) loss: 0.8584 (0.8581) time: 0.1550 data: 0.0687 max mem: 9377 +Train: [7] [3800/6250] eta: 0:06:43 lr: 0.000125 grad: 0.0833 (0.0997) loss: 0.8524 (0.8579) time: 0.1292 data: 0.0440 max mem: 9377 +Train: [7] [3900/6250] eta: 0:06:26 lr: 0.000125 grad: 0.0847 (0.0994) loss: 0.8577 (0.8578) time: 0.1192 data: 0.0348 max mem: 9377 +Train: [7] [4000/6250] eta: 0:06:09 lr: 0.000125 grad: 0.1008 (0.0994) loss: 0.8543 (0.8577) time: 0.1936 data: 0.1023 max mem: 9377 +Train: [7] [4100/6250] eta: 0:05:53 lr: 0.000125 grad: 0.0816 (0.0992) loss: 0.8505 (0.8576) time: 0.1598 data: 0.0748 max mem: 9377 +Train: [7] [4200/6250] eta: 0:05:37 lr: 0.000125 grad: 0.0872 (0.0991) loss: 0.8549 (0.8575) time: 0.1383 data: 0.0441 max mem: 9377 +Train: [7] [4300/6250] eta: 0:05:20 lr: 0.000125 grad: 0.0846 (0.0990) loss: 0.8510 (0.8574) time: 0.2096 data: 0.1273 max mem: 9377 +Train: [7] [4400/6250] eta: 0:05:03 lr: 0.000125 grad: 0.0878 (0.0988) loss: 0.8503 (0.8573) time: 0.1610 data: 0.0692 max mem: 9377 +Train: [7] [4500/6250] eta: 0:04:47 lr: 0.000125 grad: 0.0858 (0.0987) loss: 0.8527 (0.8572) time: 0.1641 data: 0.0855 max mem: 9377 +Train: [7] [4600/6250] eta: 0:04:30 lr: 0.000125 grad: 0.0900 (0.0987) loss: 0.8533 (0.8571) time: 0.1857 data: 0.0957 max mem: 9377 +Train: [7] [4700/6250] eta: 0:04:14 lr: 0.000125 grad: 0.0832 (0.0987) loss: 0.8512 (0.8570) time: 0.1789 data: 0.0927 max mem: 9377 +Train: [7] [4800/6250] eta: 0:03:57 lr: 0.000125 grad: 0.0811 (0.0985) loss: 0.8531 (0.8569) time: 0.1551 data: 0.0638 max mem: 9377 +Train: [7] [4900/6250] eta: 0:03:41 lr: 0.000125 grad: 0.0913 (0.0986) loss: 0.8561 (0.8568) time: 0.1333 data: 0.0380 max mem: 9377 +Train: [7] [5000/6250] eta: 0:03:25 lr: 0.000125 grad: 0.0936 (0.0987) loss: 0.8544 (0.8568) time: 0.1752 data: 0.0894 max mem: 9377 +Train: [7] [5100/6250] eta: 0:03:08 lr: 0.000125 grad: 0.0850 (0.0986) loss: 0.8541 (0.8567) time: 0.1701 data: 0.0821 max mem: 9377 +Train: [7] [5200/6250] eta: 0:02:52 lr: 0.000125 grad: 0.0854 (0.0984) loss: 0.8561 (0.8567) time: 0.1377 data: 0.0474 max mem: 9377 +Train: [7] [5300/6250] eta: 0:02:35 lr: 0.000125 grad: 0.0913 (0.0983) loss: 0.8520 (0.8566) time: 0.1608 data: 0.0732 max mem: 9377 +Train: [7] [5400/6250] eta: 0:02:19 lr: 0.000125 grad: 0.0857 (0.0982) loss: 0.8516 (0.8565) time: 0.1703 data: 0.0876 max mem: 9377 +Train: [7] [5500/6250] eta: 0:02:03 lr: 0.000125 grad: 0.0795 (0.0979) loss: 0.8536 (0.8565) time: 0.1647 data: 0.0771 max mem: 9377 +Train: [7] [5600/6250] eta: 0:01:46 lr: 0.000125 grad: 0.1009 (0.0980) loss: 0.8524 (0.8563) time: 0.1426 data: 0.0579 max mem: 9377 +Train: [7] [5700/6250] eta: 0:01:30 lr: 0.000125 grad: 0.0833 (0.0979) loss: 0.8438 (0.8562) time: 0.1730 data: 0.0810 max mem: 9377 +Train: [7] [5800/6250] eta: 0:01:14 lr: 0.000125 grad: 0.0878 (0.0979) loss: 0.8534 (0.8561) time: 0.1660 data: 0.0773 max mem: 9377 +Train: [7] [5900/6250] eta: 0:00:57 lr: 0.000125 grad: 0.0863 (0.0979) loss: 0.8480 (0.8559) time: 0.1610 data: 0.0775 max mem: 9377 +Train: [7] [6000/6250] eta: 0:00:41 lr: 0.000125 grad: 0.0919 (0.0978) loss: 0.8473 (0.8558) time: 0.1522 data: 0.0668 max mem: 9377 +Train: [7] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.0824 (0.0978) loss: 0.8508 (0.8557) time: 0.1561 data: 0.0683 max mem: 9377 +Train: [7] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.0757 (0.0976) loss: 0.8515 (0.8556) time: 0.1761 data: 0.0827 max mem: 9377 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0806 (0.0976) loss: 0.8542 (0.8556) time: 0.1590 data: 0.0621 max mem: 9377 +Train: [7] Total time: 0:17:18 (0.1662 s / it) +Averaged stats: lr: 0.000125 grad: 0.0806 (0.0976) loss: 0.8542 (0.8556) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:03:59 loss: 0.8623 (0.8623) time: 3.8633 data: 3.7947 max mem: 9377 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.8605 (0.8606) time: 0.1567 data: 0.1275 max mem: 9377 +Eval (hcp-train-subset): [7] Total time: 0:00:15 (0.2436 s / it) +Averaged stats (hcp-train-subset): loss: 0.8605 (0.8606) +Eval (hcp-val): [7] [ 0/62] eta: 0:04:05 loss: 0.8550 (0.8550) time: 3.9605 data: 3.8833 max mem: 9377 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.8576 (0.8583) time: 0.2037 data: 0.1790 max mem: 9377 +Eval (hcp-val): [7] Total time: 0:00:16 (0.2632 s / it) +Averaged stats (hcp-val): loss: 0.8576 (0.8583) +Eval (nsd-val): [7] [ 0/62] eta: 0:05:55 loss: 0.8162 (0.8162) time: 5.7288 data: 5.6987 max mem: 9377 +Eval (nsd-val): [7] [61/62] eta: 0:00:00 loss: 0.8289 (0.8283) time: 0.1280 data: 0.1011 max mem: 9377 +Eval (nsd-val): [7] Total time: 0:00:14 (0.2379 s / it) +Averaged stats (nsd-val): loss: 0.8289 (0.8283) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [8] [ 0/6250] eta: 11:13:43 lr: 0.000125 grad: 0.0834 (0.0834) loss: 0.8700 (0.8700) time: 6.4678 data: 6.3294 max mem: 9377 +Train: [8] [ 100/6250] eta: 0:23:21 lr: 0.000125 grad: 0.0861 (0.0962) loss: 0.8576 (0.8647) time: 0.1692 data: 0.0721 max mem: 9377 +Train: [8] [ 200/6250] eta: 0:19:58 lr: 0.000125 grad: 0.0866 (0.0939) loss: 0.8587 (0.8597) time: 0.1892 data: 0.0873 max mem: 9377 +Train: [8] [ 300/6250] eta: 0:19:13 lr: 0.000125 grad: 0.0787 (0.0945) loss: 0.8571 (0.8587) time: 0.1874 data: 0.0823 max mem: 9377 +Train: [8] [ 400/6250] eta: 0:18:37 lr: 0.000125 grad: 0.0826 (0.0928) loss: 0.8597 (0.8579) time: 0.1586 data: 0.0462 max mem: 9377 +Train: [8] [ 500/6250] eta: 0:17:46 lr: 0.000125 grad: 0.0867 (0.0914) loss: 0.8502 (0.8567) time: 0.1653 data: 0.0640 max mem: 9377 +Train: [8] [ 600/6250] eta: 0:17:09 lr: 0.000125 grad: 0.0873 (0.0916) loss: 0.8514 (0.8560) time: 0.1720 data: 0.0897 max mem: 9377 +Train: [8] [ 700/6250] eta: 0:16:40 lr: 0.000125 grad: 0.0817 (0.0905) loss: 0.8488 (0.8553) time: 0.2102 data: 0.1229 max mem: 9377 +Train: [8] [ 800/6250] eta: 0:16:08 lr: 0.000125 grad: 0.0936 (0.0901) loss: 0.8538 (0.8551) time: 0.1662 data: 0.0747 max mem: 9377 +Train: [8] [ 900/6250] eta: 0:15:49 lr: 0.000125 grad: 0.0878 (0.0900) loss: 0.8574 (0.8548) time: 0.1990 data: 0.1056 max mem: 9377 +Train: [8] [1000/6250] eta: 0:15:23 lr: 0.000125 grad: 0.0893 (0.0899) loss: 0.8469 (0.8545) time: 0.1200 data: 0.0355 max mem: 9377 +Train: [8] [1100/6250] eta: 0:15:02 lr: 0.000125 grad: 0.0828 (0.0896) loss: 0.8505 (0.8542) time: 0.1698 data: 0.0818 max mem: 9377 +Train: [8] [1200/6250] eta: 0:14:37 lr: 0.000125 grad: 0.0914 (0.0896) loss: 0.8552 (0.8539) time: 0.1683 data: 0.0804 max mem: 9377 +Train: [8] [1300/6250] eta: 0:14:14 lr: 0.000125 grad: 0.0799 (0.0893) loss: 0.8528 (0.8537) time: 0.1628 data: 0.0728 max mem: 9377 +Train: [8] [1400/6250] eta: 0:13:53 lr: 0.000125 grad: 0.0878 (0.0890) loss: 0.8504 (0.8536) time: 0.1580 data: 0.0726 max mem: 9377 +Train: [8] [1500/6250] eta: 0:13:33 lr: 0.000125 grad: 0.1033 (0.0892) loss: 0.8478 (0.8533) time: 0.1705 data: 0.0881 max mem: 9377 +Train: [8] [1600/6250] eta: 0:13:14 lr: 0.000125 grad: 0.0778 (0.0891) loss: 0.8508 (0.8530) time: 0.1644 data: 0.0810 max mem: 9377 +Train: [8] [1700/6250] eta: 0:12:55 lr: 0.000125 grad: 0.0895 (0.0895) loss: 0.8456 (0.8526) time: 0.1636 data: 0.0826 max mem: 9377 +Train: [8] [1800/6250] eta: 0:12:34 lr: 0.000125 grad: 0.0839 (0.0896) loss: 0.8520 (0.8524) time: 0.1544 data: 0.0688 max mem: 9377 +Train: [8] [1900/6250] eta: 0:12:16 lr: 0.000125 grad: 0.1006 (0.0902) loss: 0.8444 (0.8521) time: 0.2050 data: 0.1081 max mem: 9377 +Train: [8] [2000/6250] eta: 0:11:56 lr: 0.000125 grad: 0.0834 (0.0906) loss: 0.8507 (0.8518) time: 0.1852 data: 0.1013 max mem: 9377 +Train: [8] [2100/6250] eta: 0:11:38 lr: 0.000125 grad: 0.0852 (0.0907) loss: 0.8474 (0.8516) time: 0.2120 data: 0.1221 max mem: 9377 +Train: [8] [2200/6250] eta: 0:11:19 lr: 0.000125 grad: 0.0791 (0.0905) loss: 0.8467 (0.8515) time: 0.1179 data: 0.0317 max mem: 9377 +Train: [8] [2300/6250] eta: 0:10:59 lr: 0.000125 grad: 0.0825 (0.0903) loss: 0.8491 (0.8514) time: 0.1600 data: 0.0672 max mem: 9377 +Train: [8] [2400/6250] eta: 0:10:40 lr: 0.000125 grad: 0.0815 (0.0902) loss: 0.8467 (0.8512) time: 0.1460 data: 0.0552 max mem: 9377 +Train: [8] [2500/6250] eta: 0:10:24 lr: 0.000125 grad: 0.0822 (0.0903) loss: 0.8442 (0.8510) time: 0.2065 data: 0.1231 max mem: 9377 +Train: [8] [2600/6250] eta: 0:10:05 lr: 0.000125 grad: 0.0870 (0.0909) loss: 0.8532 (0.8508) time: 0.1496 data: 0.0649 max mem: 9377 +Train: [8] [2700/6250] eta: 0:09:48 lr: 0.000125 grad: 0.0821 (0.0908) loss: 0.8446 (0.8507) time: 0.1827 data: 0.0939 max mem: 9377 +Train: [8] [2800/6250] eta: 0:09:29 lr: 0.000125 grad: 0.0852 (0.0909) loss: 0.8469 (0.8505) time: 0.1477 data: 0.0613 max mem: 9377 +Train: [8] [2900/6250] eta: 0:09:12 lr: 0.000125 grad: 0.0930 (0.0910) loss: 0.8449 (0.8503) time: 0.1562 data: 0.0636 max mem: 9377 +Train: [8] [3000/6250] eta: 0:08:56 lr: 0.000125 grad: 0.0842 (0.0908) loss: 0.8418 (0.8502) time: 0.1677 data: 0.0812 max mem: 9377 +Train: [8] [3100/6250] eta: 0:08:40 lr: 0.000125 grad: 0.0929 (0.0907) loss: 0.8444 (0.8501) time: 0.1702 data: 0.0783 max mem: 9377 +Train: [8] [3200/6250] eta: 0:08:22 lr: 0.000125 grad: 0.0784 (0.0908) loss: 0.8536 (0.8500) time: 0.1630 data: 0.0800 max mem: 9377 +Train: [8] [3300/6250] eta: 0:08:06 lr: 0.000125 grad: 0.0870 (0.0907) loss: 0.8494 (0.8499) time: 0.1558 data: 0.0686 max mem: 9377 +Train: [8] [3400/6250] eta: 0:07:50 lr: 0.000125 grad: 0.0904 (0.0907) loss: 0.8431 (0.8498) time: 0.1887 data: 0.1020 max mem: 9377 +Train: [8] [3500/6250] eta: 0:07:32 lr: 0.000125 grad: 0.0750 (0.0904) loss: 0.8474 (0.8498) time: 0.1365 data: 0.0465 max mem: 9377 +Train: [8] [3600/6250] eta: 0:07:15 lr: 0.000125 grad: 0.0847 (0.0903) loss: 0.8515 (0.8498) time: 0.1580 data: 0.0677 max mem: 9377 +Train: [8] [3700/6250] eta: 0:06:58 lr: 0.000125 grad: 0.0775 (0.0904) loss: 0.8469 (0.8499) time: 0.1275 data: 0.0385 max mem: 9377 +Train: [8] [3800/6250] eta: 0:06:41 lr: 0.000125 grad: 0.0812 (0.0902) loss: 0.8532 (0.8499) time: 0.1266 data: 0.0348 max mem: 9377 +Train: [8] [3900/6250] eta: 0:06:24 lr: 0.000125 grad: 0.0806 (0.0902) loss: 0.8532 (0.8499) time: 0.1485 data: 0.0625 max mem: 9377 +Train: [8] [4000/6250] eta: 0:06:07 lr: 0.000125 grad: 0.0822 (0.0901) loss: 0.8433 (0.8499) time: 0.1672 data: 0.0874 max mem: 9377 +Train: [8] [4100/6250] eta: 0:05:50 lr: 0.000125 grad: 0.0824 (0.0899) loss: 0.8424 (0.8498) time: 0.1470 data: 0.0561 max mem: 9377 +Train: [8] [4200/6250] eta: 0:05:33 lr: 0.000125 grad: 0.0821 (0.0901) loss: 0.8459 (0.8497) time: 0.1636 data: 0.0755 max mem: 9377 +Train: [8] [4300/6250] eta: 0:05:17 lr: 0.000125 grad: 0.0772 (0.0901) loss: 0.8504 (0.8498) time: 0.1905 data: 0.1025 max mem: 9377 +Train: [8] [4400/6250] eta: 0:05:01 lr: 0.000125 grad: 0.0784 (0.0901) loss: 0.8486 (0.8497) time: 0.1838 data: 0.0949 max mem: 9377 +Train: [8] [4500/6250] eta: 0:04:44 lr: 0.000125 grad: 0.0766 (0.0899) loss: 0.8516 (0.8497) time: 0.1456 data: 0.0583 max mem: 9377 +Train: [8] [4600/6250] eta: 0:04:28 lr: 0.000125 grad: 0.0802 (0.0898) loss: 0.8491 (0.8497) time: 0.1648 data: 0.0734 max mem: 9377 +Train: [8] [4700/6250] eta: 0:04:11 lr: 0.000125 grad: 0.0792 (0.0897) loss: 0.8502 (0.8497) time: 0.1630 data: 0.0763 max mem: 9377 +Train: [8] [4800/6250] eta: 0:03:55 lr: 0.000125 grad: 0.0814 (0.0898) loss: 0.8440 (0.8497) time: 0.1561 data: 0.0654 max mem: 9377 +Train: [8] [4900/6250] eta: 0:03:39 lr: 0.000125 grad: 0.0765 (0.0897) loss: 0.8528 (0.8496) time: 0.1666 data: 0.0790 max mem: 9377 +Train: [8] [5000/6250] eta: 0:03:23 lr: 0.000125 grad: 0.0838 (0.0896) loss: 0.8494 (0.8497) time: 0.1614 data: 0.0739 max mem: 9377 +Train: [8] [5100/6250] eta: 0:03:06 lr: 0.000125 grad: 0.0784 (0.0895) loss: 0.8519 (0.8497) time: 0.1666 data: 0.0811 max mem: 9377 +Train: [8] [5200/6250] eta: 0:02:50 lr: 0.000124 grad: 0.0783 (0.0894) loss: 0.8482 (0.8497) time: 0.1522 data: 0.0565 max mem: 9377 +Train: [8] [5300/6250] eta: 0:02:34 lr: 0.000124 grad: 0.0859 (0.0893) loss: 0.8419 (0.8497) time: 0.1661 data: 0.0731 max mem: 9377 +Train: [8] [5400/6250] eta: 0:02:18 lr: 0.000124 grad: 0.0808 (0.0892) loss: 0.8518 (0.8497) time: 0.1654 data: 0.0837 max mem: 9377 +Train: [8] [5500/6250] eta: 0:02:01 lr: 0.000124 grad: 0.0766 (0.0891) loss: 0.8509 (0.8497) time: 0.1512 data: 0.0637 max mem: 9377 +Train: [8] [5600/6250] eta: 0:01:45 lr: 0.000124 grad: 0.0816 (0.0890) loss: 0.8534 (0.8497) time: 0.2349 data: 0.1580 max mem: 9377 +Train: [8] [5700/6250] eta: 0:01:29 lr: 0.000124 grad: 0.0818 (0.0888) loss: 0.8511 (0.8497) time: 0.1747 data: 0.0852 max mem: 9377 +Train: [8] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.0752 (0.0887) loss: 0.8497 (0.8498) time: 0.1736 data: 0.0854 max mem: 9377 +Train: [8] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.0769 (0.0886) loss: 0.8483 (0.8498) time: 0.1539 data: 0.0743 max mem: 9377 +Train: [8] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0858 (0.0885) loss: 0.8510 (0.8498) time: 0.1922 data: 0.1010 max mem: 9377 +Train: [8] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0775 (0.0884) loss: 0.8487 (0.8497) time: 0.1512 data: 0.0576 max mem: 9377 +Train: [8] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0808 (0.0883) loss: 0.8516 (0.8497) time: 0.1554 data: 0.0632 max mem: 9377 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0786 (0.0882) loss: 0.8445 (0.8497) time: 0.1713 data: 0.0851 max mem: 9377 +Train: [8] Total time: 0:17:07 (0.1644 s / it) +Averaged stats: lr: 0.000124 grad: 0.0786 (0.0882) loss: 0.8445 (0.8497) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:05:38 loss: 0.8551 (0.8551) time: 5.4608 data: 5.4280 max mem: 9377 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.8587 (0.8564) time: 0.1420 data: 0.1164 max mem: 9377 +Eval (hcp-train-subset): [8] Total time: 0:00:15 (0.2471 s / it) +Averaged stats (hcp-train-subset): loss: 0.8587 (0.8564) +Eval (hcp-val): [8] [ 0/62] eta: 0:03:27 loss: 0.8511 (0.8511) time: 3.3505 data: 3.2886 max mem: 9377 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8532 (0.8542) time: 0.2275 data: 0.2010 max mem: 9377 +Eval (hcp-val): [8] Total time: 0:00:16 (0.2692 s / it) +Averaged stats (hcp-val): loss: 0.8532 (0.8542) +Eval (nsd-val): [8] [ 0/62] eta: 0:06:03 loss: 0.8090 (0.8090) time: 5.8594 data: 5.8292 max mem: 9377 +Eval (nsd-val): [8] [61/62] eta: 0:00:00 loss: 0.8211 (0.8233) time: 0.1091 data: 0.0823 max mem: 9377 +Eval (nsd-val): [8] Total time: 0:00:14 (0.2394 s / it) +Averaged stats (nsd-val): loss: 0.8211 (0.8233) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [9] [ 0/6250] eta: 10:02:35 lr: 0.000124 grad: 0.0596 (0.0596) loss: 0.8731 (0.8731) time: 5.7849 data: 5.6816 max mem: 9377 +Train: [9] [ 100/6250] eta: 0:21:51 lr: 0.000124 grad: 0.0784 (0.0812) loss: 0.8502 (0.8566) time: 0.1600 data: 0.0683 max mem: 9377 +Train: [9] [ 200/6250] eta: 0:19:41 lr: 0.000124 grad: 0.0835 (0.0815) loss: 0.8523 (0.8542) time: 0.2027 data: 0.1011 max mem: 9377 +Train: [9] [ 300/6250] eta: 0:18:33 lr: 0.000124 grad: 0.0728 (0.0806) loss: 0.8614 (0.8543) time: 0.1682 data: 0.0658 max mem: 9377 +Train: [9] [ 400/6250] eta: 0:17:52 lr: 0.000124 grad: 0.0743 (0.0806) loss: 0.8544 (0.8542) time: 0.1627 data: 0.0461 max mem: 9377 +Train: [9] [ 500/6250] eta: 0:17:17 lr: 0.000124 grad: 0.0731 (0.0802) loss: 0.8546 (0.8540) time: 0.1626 data: 0.0785 max mem: 9377 +Train: [9] [ 600/6250] eta: 0:16:46 lr: 0.000124 grad: 0.0727 (0.0799) loss: 0.8496 (0.8537) time: 0.1621 data: 0.0668 max mem: 9377 +Train: [9] [ 700/6250] eta: 0:16:16 lr: 0.000124 grad: 0.0742 (0.0794) loss: 0.8545 (0.8536) time: 0.1375 data: 0.0510 max mem: 9377 +Train: [9] [ 800/6250] eta: 0:15:55 lr: 0.000124 grad: 0.0751 (0.0788) loss: 0.8521 (0.8535) time: 0.1424 data: 0.0499 max mem: 9377 +Train: [9] [ 900/6250] eta: 0:15:31 lr: 0.000124 grad: 0.0764 (0.0789) loss: 0.8496 (0.8531) time: 0.1803 data: 0.0923 max mem: 9377 +Train: [9] [1000/6250] eta: 0:15:10 lr: 0.000124 grad: 0.0824 (0.0789) loss: 0.8502 (0.8529) time: 0.1604 data: 0.0804 max mem: 9377 +Train: [9] [1100/6250] eta: 0:14:50 lr: 0.000124 grad: 0.0748 (0.0791) loss: 0.8521 (0.8527) time: 0.1522 data: 0.0592 max mem: 9377 +Train: [9] [1200/6250] eta: 0:14:25 lr: 0.000124 grad: 0.0746 (0.0790) loss: 0.8489 (0.8525) time: 0.1577 data: 0.0731 max mem: 9377 +Train: [9] [1300/6250] eta: 0:14:08 lr: 0.000124 grad: 0.0756 (0.0788) loss: 0.8511 (0.8524) time: 0.2115 data: 0.1308 max mem: 9377 +Train: [9] [1400/6250] eta: 0:13:51 lr: 0.000124 grad: 0.0780 (0.0795) loss: 0.8524 (0.8522) time: 0.1651 data: 0.0796 max mem: 9377 +Train: [9] [1500/6250] eta: 0:13:47 lr: 0.000124 grad: 0.0740 (0.0797) loss: 0.8503 (0.8521) time: 0.3710 data: 0.2960 max mem: 9377 +Train: [9] [1600/6250] eta: 0:13:21 lr: 0.000124 grad: 0.0796 (0.0801) loss: 0.8517 (0.8520) time: 0.1709 data: 0.0842 max mem: 9377 +Train: [9] [1700/6250] eta: 0:13:01 lr: 0.000124 grad: 0.0804 (0.0803) loss: 0.8501 (0.8519) time: 0.1489 data: 0.0677 max mem: 9377 +Train: [9] [1800/6250] eta: 0:12:46 lr: 0.000124 grad: 0.0825 (0.0805) loss: 0.8485 (0.8518) time: 0.0986 data: 0.0002 max mem: 9377 +Train: [9] [1900/6250] eta: 0:12:28 lr: 0.000124 grad: 0.0761 (0.0805) loss: 0.8515 (0.8516) time: 0.1632 data: 0.0823 max mem: 9377 +Train: [9] [2000/6250] eta: 0:12:10 lr: 0.000124 grad: 0.0749 (0.0806) loss: 0.8541 (0.8514) time: 0.1598 data: 0.0759 max mem: 9377 +Train: [9] [2100/6250] eta: 0:11:54 lr: 0.000124 grad: 0.0795 (0.0808) loss: 0.8478 (0.8513) time: 0.1824 data: 0.0986 max mem: 9377 +Train: [9] [2200/6250] eta: 0:11:38 lr: 0.000124 grad: 0.0833 (0.0808) loss: 0.8477 (0.8511) time: 0.1704 data: 0.0900 max mem: 9377 +Train: [9] [2300/6250] eta: 0:11:20 lr: 0.000124 grad: 0.0787 (0.0811) loss: 0.8448 (0.8508) time: 0.1621 data: 0.0737 max mem: 9377 +Train: [9] [2400/6250] eta: 0:11:02 lr: 0.000124 grad: 0.0799 (0.0815) loss: 0.8459 (0.8506) time: 0.1587 data: 0.0626 max mem: 9377 +Train: [9] [2500/6250] eta: 0:10:43 lr: 0.000124 grad: 0.0797 (0.0815) loss: 0.8454 (0.8505) time: 0.2086 data: 0.1245 max mem: 9377 +Train: [9] [2600/6250] eta: 0:10:22 lr: 0.000124 grad: 0.0772 (0.0816) loss: 0.8460 (0.8503) time: 0.1589 data: 0.0771 max mem: 9377 +Train: [9] [2700/6250] eta: 0:10:02 lr: 0.000124 grad: 0.0804 (0.0817) loss: 0.8439 (0.8502) time: 0.1496 data: 0.0628 max mem: 9377 +Train: [9] [2800/6250] eta: 0:09:44 lr: 0.000124 grad: 0.0780 (0.0817) loss: 0.8484 (0.8501) time: 0.1585 data: 0.0605 max mem: 9377 +Train: [9] [2900/6250] eta: 0:09:27 lr: 0.000124 grad: 0.0759 (0.0818) loss: 0.8461 (0.8499) time: 0.1882 data: 0.1008 max mem: 9377 +Train: [9] [3000/6250] eta: 0:09:09 lr: 0.000124 grad: 0.0772 (0.0821) loss: 0.8471 (0.8498) time: 0.1710 data: 0.0817 max mem: 9377 +Train: [9] [3100/6250] eta: 0:08:51 lr: 0.000124 grad: 0.0859 (0.0822) loss: 0.8425 (0.8496) time: 0.1503 data: 0.0589 max mem: 9377 +Train: [9] [3200/6250] eta: 0:08:34 lr: 0.000124 grad: 0.0785 (0.0823) loss: 0.8471 (0.8495) time: 0.2274 data: 0.1384 max mem: 9377 +Train: [9] [3300/6250] eta: 0:08:15 lr: 0.000124 grad: 0.0738 (0.0824) loss: 0.8461 (0.8494) time: 0.1466 data: 0.0556 max mem: 9377 +Train: [9] [3400/6250] eta: 0:07:57 lr: 0.000124 grad: 0.0810 (0.0825) loss: 0.8428 (0.8493) time: 0.1297 data: 0.0440 max mem: 9377 +Train: [9] [3500/6250] eta: 0:07:40 lr: 0.000124 grad: 0.0769 (0.0826) loss: 0.8383 (0.8492) time: 0.1698 data: 0.0831 max mem: 9377 +Train: [9] [3600/6250] eta: 0:07:21 lr: 0.000124 grad: 0.0745 (0.0825) loss: 0.8445 (0.8491) time: 0.1412 data: 0.0574 max mem: 9377 +Train: [9] [3700/6250] eta: 0:07:04 lr: 0.000124 grad: 0.0732 (0.0826) loss: 0.8479 (0.8491) time: 0.1321 data: 0.0432 max mem: 9377 +Train: [9] [3800/6250] eta: 0:06:47 lr: 0.000124 grad: 0.0789 (0.0826) loss: 0.8490 (0.8491) time: 0.1605 data: 0.0770 max mem: 9377 +Train: [9] [3900/6250] eta: 0:06:30 lr: 0.000124 grad: 0.0825 (0.0827) loss: 0.8403 (0.8490) time: 0.1650 data: 0.0823 max mem: 9377 +Train: [9] [4000/6250] eta: 0:06:12 lr: 0.000124 grad: 0.0799 (0.0827) loss: 0.8494 (0.8489) time: 0.1692 data: 0.0890 max mem: 9377 +Train: [9] [4100/6250] eta: 0:05:55 lr: 0.000124 grad: 0.0800 (0.0826) loss: 0.8477 (0.8489) time: 0.1510 data: 0.0659 max mem: 9377 +Train: [9] [4200/6250] eta: 0:05:38 lr: 0.000124 grad: 0.0782 (0.0826) loss: 0.8437 (0.8489) time: 0.1535 data: 0.0650 max mem: 9377 +Train: [9] [4300/6250] eta: 0:05:21 lr: 0.000124 grad: 0.0746 (0.0827) loss: 0.8417 (0.8488) time: 0.1290 data: 0.0443 max mem: 9377 +Train: [9] [4400/6250] eta: 0:05:04 lr: 0.000124 grad: 0.0757 (0.0827) loss: 0.8437 (0.8488) time: 0.1454 data: 0.0550 max mem: 9377 +Train: [9] [4500/6250] eta: 0:04:48 lr: 0.000124 grad: 0.0777 (0.0826) loss: 0.8482 (0.8487) time: 0.1687 data: 0.0765 max mem: 9377 +Train: [9] [4600/6250] eta: 0:04:31 lr: 0.000124 grad: 0.0803 (0.0825) loss: 0.8493 (0.8486) time: 0.1628 data: 0.0724 max mem: 9377 +Train: [9] [4700/6250] eta: 0:04:14 lr: 0.000124 grad: 0.0820 (0.0826) loss: 0.8474 (0.8485) time: 0.1600 data: 0.0714 max mem: 9377 +Train: [9] [4800/6250] eta: 0:03:58 lr: 0.000124 grad: 0.0867 (0.0827) loss: 0.8466 (0.8485) time: 0.1740 data: 0.0908 max mem: 9377 +Train: [9] [4900/6250] eta: 0:03:41 lr: 0.000124 grad: 0.0871 (0.0829) loss: 0.8481 (0.8484) time: 0.1677 data: 0.0844 max mem: 9377 +Train: [9] [5000/6250] eta: 0:03:25 lr: 0.000124 grad: 0.0825 (0.0829) loss: 0.8421 (0.8483) time: 0.1568 data: 0.0730 max mem: 9377 +Train: [9] [5100/6250] eta: 0:03:08 lr: 0.000124 grad: 0.0791 (0.0829) loss: 0.8438 (0.8483) time: 0.1745 data: 0.0903 max mem: 9377 +Train: [9] [5200/6250] eta: 0:02:52 lr: 0.000124 grad: 0.0783 (0.0829) loss: 0.8445 (0.8483) time: 0.1742 data: 0.0818 max mem: 9377 +Train: [9] [5300/6250] eta: 0:02:35 lr: 0.000124 grad: 0.0812 (0.0828) loss: 0.8478 (0.8482) time: 0.1232 data: 0.0382 max mem: 9377 +Train: [9] [5400/6250] eta: 0:02:19 lr: 0.000124 grad: 0.0875 (0.0829) loss: 0.8391 (0.8481) time: 0.1678 data: 0.0875 max mem: 9377 +Train: [9] [5500/6250] eta: 0:02:02 lr: 0.000124 grad: 0.0811 (0.0829) loss: 0.8493 (0.8481) time: 0.1450 data: 0.0564 max mem: 9377 +Train: [9] [5600/6250] eta: 0:01:46 lr: 0.000124 grad: 0.0789 (0.0829) loss: 0.8397 (0.8480) time: 0.2003 data: 0.1092 max mem: 9377 +Train: [9] [5700/6250] eta: 0:01:30 lr: 0.000124 grad: 0.0797 (0.0829) loss: 0.8440 (0.8479) time: 0.1892 data: 0.1120 max mem: 9377 +Train: [9] [5800/6250] eta: 0:01:14 lr: 0.000124 grad: 0.0811 (0.0829) loss: 0.8455 (0.8478) time: 0.2373 data: 0.1534 max mem: 9377 +Train: [9] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.0762 (0.0829) loss: 0.8436 (0.8478) time: 0.1524 data: 0.0754 max mem: 9377 +Train: [9] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0736 (0.0830) loss: 0.8447 (0.8478) time: 0.1557 data: 0.0789 max mem: 9377 +Train: [9] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0743 (0.0829) loss: 0.8486 (0.8478) time: 0.1696 data: 0.0819 max mem: 9377 +Train: [9] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0799 (0.0829) loss: 0.8441 (0.8477) time: 0.1909 data: 0.0968 max mem: 9377 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0766 (0.0828) loss: 0.8467 (0.8477) time: 0.1702 data: 0.0768 max mem: 9377 +Train: [9] Total time: 0:17:13 (0.1654 s / it) +Averaged stats: lr: 0.000124 grad: 0.0766 (0.0828) loss: 0.8467 (0.8477) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:05:27 loss: 0.8503 (0.8503) time: 5.2815 data: 5.2498 max mem: 9377 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.8537 (0.8551) time: 0.1362 data: 0.1088 max mem: 9377 +Eval (hcp-train-subset): [9] Total time: 0:00:15 (0.2448 s / it) +Averaged stats (hcp-train-subset): loss: 0.8537 (0.8551) +Making plots (hcp-train-subset): example=43 +Eval (hcp-val): [9] [ 0/62] eta: 0:06:14 loss: 0.8483 (0.8483) time: 6.0331 data: 6.0022 max mem: 9377 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8532 (0.8532) time: 0.1789 data: 0.1540 max mem: 9377 +Eval (hcp-val): [9] Total time: 0:00:16 (0.2726 s / it) +Averaged stats (hcp-val): loss: 0.8532 (0.8532) +Making plots (hcp-val): example=26 +Eval (nsd-val): [9] [ 0/62] eta: 0:03:37 loss: 0.8121 (0.8121) time: 3.5133 data: 3.4410 max mem: 9377 +Eval (nsd-val): [9] [61/62] eta: 0:00:00 loss: 0.8213 (0.8235) time: 0.1050 data: 0.0799 max mem: 9377 +Eval (nsd-val): [9] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (nsd-val): loss: 0.8213 (0.8235) +Making plots (nsd-val): example=36 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00009.pth +Train: [10] [ 0/6250] eta: 10:18:54 lr: 0.000124 grad: 0.0942 (0.0942) loss: 0.8686 (0.8686) time: 5.9416 data: 5.7136 max mem: 9377 +Train: [10] [ 100/6250] eta: 0:22:12 lr: 0.000124 grad: 0.0848 (0.0893) loss: 0.8406 (0.8518) time: 0.1615 data: 0.0699 max mem: 9377 +Train: [10] [ 200/6250] eta: 0:19:15 lr: 0.000124 grad: 0.0822 (0.0880) loss: 0.8375 (0.8476) time: 0.1708 data: 0.0664 max mem: 9377 +Train: [10] [ 300/6250] eta: 0:18:16 lr: 0.000124 grad: 0.0758 (0.0875) loss: 0.8465 (0.8455) time: 0.1679 data: 0.0632 max mem: 9377 +Train: [10] [ 400/6250] eta: 0:17:36 lr: 0.000124 grad: 0.0807 (0.0875) loss: 0.8446 (0.8445) time: 0.1756 data: 0.0707 max mem: 9377 +Train: [10] [ 500/6250] eta: 0:17:04 lr: 0.000124 grad: 0.0859 (0.0866) loss: 0.8489 (0.8445) time: 0.1711 data: 0.0493 max mem: 9377 +Train: [10] [ 600/6250] eta: 0:16:45 lr: 0.000124 grad: 0.0814 (0.0859) loss: 0.8423 (0.8446) time: 0.1859 data: 0.0838 max mem: 9377 +Train: [10] [ 700/6250] eta: 0:16:06 lr: 0.000124 grad: 0.0750 (0.0853) loss: 0.8437 (0.8447) time: 0.1554 data: 0.0556 max mem: 9377 +Train: [10] [ 800/6250] eta: 0:15:45 lr: 0.000124 grad: 0.0761 (0.0843) loss: 0.8480 (0.8451) time: 0.1728 data: 0.0785 max mem: 9377 +Train: [10] [ 900/6250] eta: 0:15:20 lr: 0.000124 grad: 0.0716 (0.0839) loss: 0.8496 (0.8454) time: 0.1442 data: 0.0495 max mem: 9377 +Train: [10] [1000/6250] eta: 0:15:00 lr: 0.000124 grad: 0.0766 (0.0838) loss: 0.8422 (0.8453) time: 0.1049 data: 0.0194 max mem: 9377 +Train: [10] [1100/6250] eta: 0:14:48 lr: 0.000124 grad: 0.0731 (0.0835) loss: 0.8475 (0.8454) time: 0.2008 data: 0.1218 max mem: 9377 +Train: [10] [1200/6250] eta: 0:14:30 lr: 0.000124 grad: 0.0760 (0.0831) loss: 0.8409 (0.8453) time: 0.1856 data: 0.1111 max mem: 9377 +Train: [10] [1300/6250] eta: 0:14:15 lr: 0.000124 grad: 0.0768 (0.0829) loss: 0.8444 (0.8452) time: 0.2056 data: 0.1220 max mem: 9377 +Train: [10] [1400/6250] eta: 0:13:57 lr: 0.000124 grad: 0.0748 (0.0827) loss: 0.8458 (0.8451) time: 0.1892 data: 0.1117 max mem: 9377 +Train: [10] [1500/6250] eta: 0:13:38 lr: 0.000124 grad: 0.0746 (0.0824) loss: 0.8510 (0.8452) time: 0.1829 data: 0.1054 max mem: 9377 +Train: [10] [1600/6250] eta: 0:13:19 lr: 0.000124 grad: 0.0784 (0.0822) loss: 0.8475 (0.8452) time: 0.1693 data: 0.0864 max mem: 9377 +Train: [10] [1700/6250] eta: 0:13:00 lr: 0.000124 grad: 0.0736 (0.0819) loss: 0.8479 (0.8453) time: 0.1789 data: 0.0936 max mem: 9377 +Train: [10] [1800/6250] eta: 0:12:39 lr: 0.000124 grad: 0.0757 (0.0818) loss: 0.8396 (0.8453) time: 0.1504 data: 0.0707 max mem: 9377 +Train: [10] [1900/6250] eta: 0:12:18 lr: 0.000124 grad: 0.0766 (0.0818) loss: 0.8504 (0.8454) time: 0.1468 data: 0.0653 max mem: 9377 +Train: [10] [2000/6250] eta: 0:11:59 lr: 0.000124 grad: 0.0768 (0.0817) loss: 0.8451 (0.8454) time: 0.1570 data: 0.0753 max mem: 9377 +Train: [10] [2100/6250] eta: 0:11:40 lr: 0.000124 grad: 0.0753 (0.0816) loss: 0.8448 (0.8454) time: 0.1753 data: 0.0897 max mem: 9377 +Train: [10] [2200/6250] eta: 0:11:22 lr: 0.000124 grad: 0.0799 (0.0815) loss: 0.8494 (0.8454) time: 0.1650 data: 0.0851 max mem: 9377 +Train: [10] [2300/6250] eta: 0:11:04 lr: 0.000124 grad: 0.0738 (0.0813) loss: 0.8445 (0.8454) time: 0.1763 data: 0.0914 max mem: 9377 +Train: [10] [2400/6250] eta: 0:10:47 lr: 0.000124 grad: 0.0787 (0.0811) loss: 0.8512 (0.8455) time: 0.1793 data: 0.0955 max mem: 9377 +Train: [10] [2500/6250] eta: 0:10:28 lr: 0.000124 grad: 0.0699 (0.0810) loss: 0.8445 (0.8455) time: 0.1513 data: 0.0561 max mem: 9377 +Train: [10] [2600/6250] eta: 0:10:10 lr: 0.000124 grad: 0.0751 (0.0809) loss: 0.8434 (0.8455) time: 0.1412 data: 0.0502 max mem: 9377 +Train: [10] [2700/6250] eta: 0:09:52 lr: 0.000124 grad: 0.0766 (0.0808) loss: 0.8529 (0.8455) time: 0.1592 data: 0.0724 max mem: 9377 +Train: [10] [2800/6250] eta: 0:09:34 lr: 0.000124 grad: 0.0765 (0.0807) loss: 0.8453 (0.8455) time: 0.1458 data: 0.0566 max mem: 9377 +Train: [10] [2900/6250] eta: 0:09:17 lr: 0.000124 grad: 0.0712 (0.0805) loss: 0.8462 (0.8456) time: 0.1856 data: 0.0948 max mem: 9377 +Train: [10] [3000/6250] eta: 0:08:59 lr: 0.000124 grad: 0.0740 (0.0806) loss: 0.8468 (0.8456) time: 0.1528 data: 0.0730 max mem: 9377 +Train: [10] [3100/6250] eta: 0:08:41 lr: 0.000124 grad: 0.0781 (0.0805) loss: 0.8497 (0.8456) time: 0.1353 data: 0.0510 max mem: 9377 +Train: [10] [3200/6250] eta: 0:08:24 lr: 0.000124 grad: 0.0775 (0.0805) loss: 0.8406 (0.8457) time: 0.1688 data: 0.0860 max mem: 9377 +Train: [10] [3300/6250] eta: 0:08:06 lr: 0.000124 grad: 0.0807 (0.0805) loss: 0.8458 (0.8457) time: 0.1407 data: 0.0599 max mem: 9377 +Train: [10] [3400/6250] eta: 0:07:51 lr: 0.000124 grad: 0.0766 (0.0806) loss: 0.8449 (0.8457) time: 0.2460 data: 0.1629 max mem: 9377 +Train: [10] [3500/6250] eta: 0:07:33 lr: 0.000124 grad: 0.0765 (0.0806) loss: 0.8427 (0.8457) time: 0.1788 data: 0.0913 max mem: 9377 +Train: [10] [3600/6250] eta: 0:07:15 lr: 0.000124 grad: 0.0742 (0.0808) loss: 0.8409 (0.8457) time: 0.1492 data: 0.0670 max mem: 9377 +Train: [10] [3700/6250] eta: 0:06:58 lr: 0.000124 grad: 0.0714 (0.0808) loss: 0.8401 (0.8457) time: 0.1593 data: 0.0765 max mem: 9377 +Train: [10] [3800/6250] eta: 0:06:41 lr: 0.000124 grad: 0.0707 (0.0807) loss: 0.8491 (0.8457) time: 0.1476 data: 0.0626 max mem: 9377 +Train: [10] [3900/6250] eta: 0:06:24 lr: 0.000124 grad: 0.0769 (0.0807) loss: 0.8434 (0.8456) time: 0.1557 data: 0.0695 max mem: 9377 +Train: [10] [4000/6250] eta: 0:06:07 lr: 0.000124 grad: 0.0792 (0.0808) loss: 0.8430 (0.8456) time: 0.1346 data: 0.0487 max mem: 9377 +Train: [10] [4100/6250] eta: 0:05:50 lr: 0.000124 grad: 0.0763 (0.0808) loss: 0.8475 (0.8455) time: 0.1436 data: 0.0499 max mem: 9377 +Train: [10] [4200/6250] eta: 0:05:34 lr: 0.000124 grad: 0.0708 (0.0807) loss: 0.8485 (0.8455) time: 0.1592 data: 0.0698 max mem: 9377 +Train: [10] [4300/6250] eta: 0:05:17 lr: 0.000124 grad: 0.0753 (0.0807) loss: 0.8414 (0.8454) time: 0.1485 data: 0.0530 max mem: 9377 +Train: [10] [4400/6250] eta: 0:05:01 lr: 0.000124 grad: 0.0732 (0.0806) loss: 0.8452 (0.8454) time: 0.1806 data: 0.0904 max mem: 9377 +Train: [10] [4500/6250] eta: 0:04:44 lr: 0.000124 grad: 0.0763 (0.0806) loss: 0.8433 (0.8453) time: 0.1539 data: 0.0619 max mem: 9377 +Train: [10] [4600/6250] eta: 0:04:28 lr: 0.000124 grad: 0.0778 (0.0806) loss: 0.8403 (0.8452) time: 0.1380 data: 0.0434 max mem: 9377 +Train: [10] [4700/6250] eta: 0:04:11 lr: 0.000124 grad: 0.0717 (0.0806) loss: 0.8404 (0.8451) time: 0.1524 data: 0.0609 max mem: 9377 +Train: [10] [4800/6250] eta: 0:03:55 lr: 0.000124 grad: 0.0743 (0.0806) loss: 0.8365 (0.8450) time: 0.1647 data: 0.0807 max mem: 9377 +Train: [10] [4900/6250] eta: 0:03:39 lr: 0.000124 grad: 0.0743 (0.0806) loss: 0.8400 (0.8449) time: 0.1700 data: 0.0826 max mem: 9377 +Train: [10] [5000/6250] eta: 0:03:22 lr: 0.000124 grad: 0.0743 (0.0805) loss: 0.8449 (0.8449) time: 0.1413 data: 0.0588 max mem: 9377 +Train: [10] [5100/6250] eta: 0:03:06 lr: 0.000124 grad: 0.0738 (0.0805) loss: 0.8422 (0.8448) time: 0.1702 data: 0.0786 max mem: 9377 +Train: [10] [5200/6250] eta: 0:02:50 lr: 0.000124 grad: 0.0758 (0.0805) loss: 0.8419 (0.8447) time: 0.1714 data: 0.0857 max mem: 9377 +Train: [10] [5300/6250] eta: 0:02:33 lr: 0.000124 grad: 0.0801 (0.0805) loss: 0.8382 (0.8447) time: 0.1372 data: 0.0491 max mem: 9377 +Train: [10] [5400/6250] eta: 0:02:17 lr: 0.000124 grad: 0.0722 (0.0805) loss: 0.8425 (0.8446) time: 0.1153 data: 0.0281 max mem: 9377 +Train: [10] [5500/6250] eta: 0:02:01 lr: 0.000124 grad: 0.0761 (0.0804) loss: 0.8434 (0.8446) time: 0.1908 data: 0.1209 max mem: 9377 +Train: [10] [5600/6250] eta: 0:01:45 lr: 0.000124 grad: 0.0789 (0.0804) loss: 0.8428 (0.8446) time: 0.1595 data: 0.0761 max mem: 9377 +Train: [10] [5700/6250] eta: 0:01:29 lr: 0.000124 grad: 0.0779 (0.0804) loss: 0.8360 (0.8445) time: 0.1953 data: 0.1149 max mem: 9377 +Train: [10] [5800/6250] eta: 0:01:12 lr: 0.000124 grad: 0.0842 (0.0804) loss: 0.8432 (0.8444) time: 0.1430 data: 0.0635 max mem: 9377 +Train: [10] [5900/6250] eta: 0:00:56 lr: 0.000124 grad: 0.0774 (0.0803) loss: 0.8377 (0.8443) time: 0.1663 data: 0.0799 max mem: 9377 +Train: [10] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0716 (0.0802) loss: 0.8382 (0.8443) time: 0.1585 data: 0.0688 max mem: 9377 +Train: [10] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0867 (0.0802) loss: 0.8396 (0.8442) time: 0.1835 data: 0.0922 max mem: 9377 +Train: [10] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0775 (0.0802) loss: 0.8443 (0.8442) time: 0.1650 data: 0.0752 max mem: 9377 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0766 (0.0802) loss: 0.8411 (0.8441) time: 0.1699 data: 0.0810 max mem: 9377 +Train: [10] Total time: 0:17:00 (0.1633 s / it) +Averaged stats: lr: 0.000124 grad: 0.0766 (0.0802) loss: 0.8411 (0.8441) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:06:03 loss: 0.8517 (0.8517) time: 5.8703 data: 5.8390 max mem: 9377 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.8542 (0.8535) time: 0.1086 data: 0.0833 max mem: 9377 +Eval (hcp-train-subset): [10] Total time: 0:00:14 (0.2353 s / it) +Averaged stats (hcp-train-subset): loss: 0.8542 (0.8535) +Eval (hcp-val): [10] [ 0/62] eta: 0:04:41 loss: 0.8473 (0.8473) time: 4.5431 data: 4.4609 max mem: 9377 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8507 (0.8514) time: 0.1098 data: 0.0842 max mem: 9377 +Eval (hcp-val): [10] Total time: 0:00:14 (0.2348 s / it) +Averaged stats (hcp-val): loss: 0.8507 (0.8514) +Eval (nsd-val): [10] [ 0/62] eta: 0:06:14 loss: 0.8095 (0.8095) time: 6.0476 data: 6.0162 max mem: 9377 +Eval (nsd-val): [10] [61/62] eta: 0:00:00 loss: 0.8177 (0.8199) time: 0.1547 data: 0.1299 max mem: 9377 +Eval (nsd-val): [10] Total time: 0:00:15 (0.2422 s / it) +Averaged stats (nsd-val): loss: 0.8177 (0.8199) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [11] [ 0/6250] eta: 9:01:12 lr: 0.000124 grad: 0.1033 (0.1033) loss: 0.8021 (0.8021) time: 5.1956 data: 5.0260 max mem: 9377 +Train: [11] [ 100/6250] eta: 0:22:06 lr: 0.000124 grad: 0.0866 (0.0973) loss: 0.8505 (0.8422) time: 0.1688 data: 0.0683 max mem: 9377 +Train: [11] [ 200/6250] eta: 0:18:37 lr: 0.000124 grad: 0.0884 (0.0910) loss: 0.8378 (0.8403) time: 0.1425 data: 0.0502 max mem: 9377 +Train: [11] [ 300/6250] eta: 0:17:34 lr: 0.000124 grad: 0.0782 (0.0880) loss: 0.8395 (0.8406) time: 0.1643 data: 0.0703 max mem: 9377 +Train: [11] [ 400/6250] eta: 0:16:38 lr: 0.000124 grad: 0.0708 (0.0856) loss: 0.8372 (0.8411) time: 0.1683 data: 0.0677 max mem: 9377 +Train: [11] [ 500/6250] eta: 0:15:56 lr: 0.000124 grad: 0.0725 (0.0841) loss: 0.8444 (0.8414) time: 0.1655 data: 0.0702 max mem: 9377 +Train: [11] [ 600/6250] eta: 0:15:20 lr: 0.000124 grad: 0.0727 (0.0829) loss: 0.8392 (0.8415) time: 0.1452 data: 0.0382 max mem: 9377 +Train: [11] [ 700/6250] eta: 0:15:02 lr: 0.000124 grad: 0.0768 (0.0818) loss: 0.8387 (0.8415) time: 0.1755 data: 0.0849 max mem: 9377 +Train: [11] [ 800/6250] eta: 0:14:46 lr: 0.000124 grad: 0.0754 (0.0811) loss: 0.8443 (0.8417) time: 0.1641 data: 0.0722 max mem: 9377 +Train: [11] [ 900/6250] eta: 0:14:40 lr: 0.000124 grad: 0.0741 (0.0805) loss: 0.8404 (0.8420) time: 0.1698 data: 0.0832 max mem: 9377 +Train: [11] [1000/6250] eta: 0:14:25 lr: 0.000124 grad: 0.0700 (0.0799) loss: 0.8490 (0.8425) time: 0.1508 data: 0.0616 max mem: 9377 +Train: [11] [1100/6250] eta: 0:14:09 lr: 0.000124 grad: 0.0690 (0.0795) loss: 0.8443 (0.8427) time: 0.1721 data: 0.0890 max mem: 9377 +Train: [11] [1200/6250] eta: 0:13:49 lr: 0.000124 grad: 0.0752 (0.0792) loss: 0.8446 (0.8429) time: 0.1525 data: 0.0656 max mem: 9377 +Train: [11] [1300/6250] eta: 0:13:33 lr: 0.000124 grad: 0.0726 (0.0788) loss: 0.8439 (0.8429) time: 0.1752 data: 0.0892 max mem: 9377 +Train: [11] [1400/6250] eta: 0:13:17 lr: 0.000124 grad: 0.0750 (0.0786) loss: 0.8406 (0.8430) time: 0.2093 data: 0.1187 max mem: 9377 +Train: [11] [1500/6250] eta: 0:12:58 lr: 0.000124 grad: 0.0756 (0.0784) loss: 0.8447 (0.8430) time: 0.1380 data: 0.0500 max mem: 9377 +Train: [11] [1600/6250] eta: 0:12:42 lr: 0.000124 grad: 0.0700 (0.0782) loss: 0.8436 (0.8431) time: 0.1860 data: 0.1007 max mem: 9377 +Train: [11] [1700/6250] eta: 0:12:26 lr: 0.000124 grad: 0.0742 (0.0781) loss: 0.8400 (0.8430) time: 0.1699 data: 0.0904 max mem: 9377 +Train: [11] [1800/6250] eta: 0:12:09 lr: 0.000124 grad: 0.0739 (0.0779) loss: 0.8443 (0.8429) time: 0.1477 data: 0.0603 max mem: 9377 +Train: [11] [1900/6250] eta: 0:11:51 lr: 0.000124 grad: 0.0728 (0.0780) loss: 0.8414 (0.8429) time: 0.1750 data: 0.0874 max mem: 9377 +Train: [11] [2000/6250] eta: 0:11:32 lr: 0.000124 grad: 0.0734 (0.0780) loss: 0.8411 (0.8429) time: 0.1349 data: 0.0501 max mem: 9377 +Train: [11] [2100/6250] eta: 0:11:14 lr: 0.000124 grad: 0.0720 (0.0778) loss: 0.8415 (0.8428) time: 0.1547 data: 0.0689 max mem: 9377 +Train: [11] [2200/6250] eta: 0:10:56 lr: 0.000124 grad: 0.0681 (0.0777) loss: 0.8408 (0.8427) time: 0.1338 data: 0.0494 max mem: 9377 +Train: [11] [2300/6250] eta: 0:10:39 lr: 0.000124 grad: 0.0737 (0.0776) loss: 0.8361 (0.8426) time: 0.1602 data: 0.0773 max mem: 9377 +Train: [11] [2400/6250] eta: 0:10:21 lr: 0.000124 grad: 0.0758 (0.0777) loss: 0.8419 (0.8426) time: 0.1450 data: 0.0657 max mem: 9377 +Train: [11] [2500/6250] eta: 0:10:05 lr: 0.000124 grad: 0.0763 (0.0777) loss: 0.8418 (0.8425) time: 0.1536 data: 0.0712 max mem: 9377 +Train: [11] [2600/6250] eta: 0:09:48 lr: 0.000124 grad: 0.0713 (0.0777) loss: 0.8390 (0.8425) time: 0.1738 data: 0.0874 max mem: 9377 +Train: [11] [2700/6250] eta: 0:09:32 lr: 0.000124 grad: 0.0813 (0.0777) loss: 0.8326 (0.8423) time: 0.1381 data: 0.0603 max mem: 9377 +Train: [11] [2800/6250] eta: 0:09:15 lr: 0.000124 grad: 0.0764 (0.0777) loss: 0.8393 (0.8423) time: 0.1571 data: 0.0712 max mem: 9377 +Train: [11] [2900/6250] eta: 0:08:58 lr: 0.000124 grad: 0.0765 (0.0778) loss: 0.8362 (0.8422) time: 0.1479 data: 0.0546 max mem: 9377 +Train: [11] [3000/6250] eta: 0:08:42 lr: 0.000124 grad: 0.0708 (0.0776) loss: 0.8381 (0.8421) time: 0.1816 data: 0.0968 max mem: 9377 +Train: [11] [3100/6250] eta: 0:08:26 lr: 0.000124 grad: 0.0742 (0.0776) loss: 0.8390 (0.8420) time: 0.1482 data: 0.0534 max mem: 9377 +Train: [11] [3200/6250] eta: 0:08:10 lr: 0.000124 grad: 0.0740 (0.0776) loss: 0.8395 (0.8419) time: 0.1690 data: 0.0802 max mem: 9377 +Train: [11] [3300/6250] eta: 0:07:53 lr: 0.000124 grad: 0.0675 (0.0775) loss: 0.8376 (0.8419) time: 0.1490 data: 0.0647 max mem: 9377 +Train: [11] [3400/6250] eta: 0:07:36 lr: 0.000124 grad: 0.0781 (0.0775) loss: 0.8360 (0.8418) time: 0.1341 data: 0.0415 max mem: 9377 +Train: [11] [3500/6250] eta: 0:07:20 lr: 0.000124 grad: 0.0746 (0.0775) loss: 0.8358 (0.8417) time: 0.1327 data: 0.0433 max mem: 9377 +Train: [11] [3600/6250] eta: 0:07:04 lr: 0.000124 grad: 0.0808 (0.0777) loss: 0.8368 (0.8415) time: 0.1610 data: 0.0748 max mem: 9377 +Train: [11] [3700/6250] eta: 0:06:47 lr: 0.000124 grad: 0.0767 (0.0777) loss: 0.8408 (0.8414) time: 0.1489 data: 0.0594 max mem: 9377 +Train: [11] [3800/6250] eta: 0:06:31 lr: 0.000124 grad: 0.0812 (0.0778) loss: 0.8329 (0.8413) time: 0.1568 data: 0.0614 max mem: 9377 +Train: [11] [3900/6250] eta: 0:06:15 lr: 0.000124 grad: 0.0772 (0.0778) loss: 0.8333 (0.8412) time: 0.1661 data: 0.0849 max mem: 9377 +Train: [11] [4000/6250] eta: 0:05:58 lr: 0.000123 grad: 0.0724 (0.0778) loss: 0.8388 (0.8412) time: 0.1544 data: 0.0586 max mem: 9377 +Train: [11] [4100/6250] eta: 0:05:42 lr: 0.000123 grad: 0.0750 (0.0778) loss: 0.8405 (0.8411) time: 0.1551 data: 0.0721 max mem: 9377 +Train: [11] [4200/6250] eta: 0:05:26 lr: 0.000123 grad: 0.0760 (0.0778) loss: 0.8345 (0.8410) time: 0.1609 data: 0.0786 max mem: 9377 +Train: [11] [4300/6250] eta: 0:05:10 lr: 0.000123 grad: 0.0718 (0.0778) loss: 0.8383 (0.8409) time: 0.1788 data: 0.1012 max mem: 9377 +Train: [11] [4400/6250] eta: 0:04:54 lr: 0.000123 grad: 0.0723 (0.0778) loss: 0.8459 (0.8409) time: 0.1625 data: 0.0795 max mem: 9377 +Train: [11] [4500/6250] eta: 0:04:37 lr: 0.000123 grad: 0.0697 (0.0778) loss: 0.8408 (0.8409) time: 0.1529 data: 0.0681 max mem: 9377 +Train: [11] [4600/6250] eta: 0:04:22 lr: 0.000123 grad: 0.0784 (0.0778) loss: 0.8349 (0.8408) time: 0.1550 data: 0.0689 max mem: 9377 +Train: [11] [4700/6250] eta: 0:04:05 lr: 0.000123 grad: 0.0744 (0.0779) loss: 0.8389 (0.8408) time: 0.1408 data: 0.0521 max mem: 9377 +Train: [11] [4800/6250] eta: 0:03:49 lr: 0.000123 grad: 0.0703 (0.0778) loss: 0.8451 (0.8407) time: 0.1175 data: 0.0341 max mem: 9377 +Train: [11] [4900/6250] eta: 0:03:33 lr: 0.000123 grad: 0.0817 (0.0779) loss: 0.8410 (0.8407) time: 0.1583 data: 0.0703 max mem: 9377 +Train: [11] [5000/6250] eta: 0:03:17 lr: 0.000123 grad: 0.0759 (0.0779) loss: 0.8375 (0.8407) time: 0.1440 data: 0.0539 max mem: 9377 +Train: [11] [5100/6250] eta: 0:03:02 lr: 0.000123 grad: 0.0703 (0.0779) loss: 0.8448 (0.8407) time: 0.1609 data: 0.0730 max mem: 9377 +Train: [11] [5200/6250] eta: 0:02:46 lr: 0.000123 grad: 0.0728 (0.0778) loss: 0.8421 (0.8407) time: 0.1546 data: 0.0649 max mem: 9377 +Train: [11] [5300/6250] eta: 0:02:30 lr: 0.000123 grad: 0.0762 (0.0777) loss: 0.8442 (0.8408) time: 0.1611 data: 0.0755 max mem: 9377 +Train: [11] [5400/6250] eta: 0:02:14 lr: 0.000123 grad: 0.0751 (0.0777) loss: 0.8423 (0.8408) time: 0.1603 data: 0.0758 max mem: 9377 +Train: [11] [5500/6250] eta: 0:01:59 lr: 0.000123 grad: 0.0729 (0.0777) loss: 0.8442 (0.8408) time: 0.1612 data: 0.0753 max mem: 9377 +Train: [11] [5600/6250] eta: 0:01:43 lr: 0.000123 grad: 0.0751 (0.0776) loss: 0.8404 (0.8408) time: 0.2060 data: 0.1132 max mem: 9377 +Train: [11] [5700/6250] eta: 0:01:27 lr: 0.000123 grad: 0.0738 (0.0776) loss: 0.8387 (0.8408) time: 0.1712 data: 0.0787 max mem: 9377 +Train: [11] [5800/6250] eta: 0:01:11 lr: 0.000123 grad: 0.0718 (0.0776) loss: 0.8405 (0.8408) time: 0.1209 data: 0.0423 max mem: 9377 +Train: [11] [5900/6250] eta: 0:00:55 lr: 0.000123 grad: 0.0785 (0.0776) loss: 0.8354 (0.8408) time: 0.1727 data: 0.0760 max mem: 9377 +Train: [11] [6000/6250] eta: 0:00:39 lr: 0.000123 grad: 0.0741 (0.0776) loss: 0.8325 (0.8407) time: 0.1732 data: 0.0746 max mem: 9377 +Train: [11] [6100/6250] eta: 0:00:23 lr: 0.000123 grad: 0.0765 (0.0776) loss: 0.8383 (0.8407) time: 0.1521 data: 0.0609 max mem: 9377 +Train: [11] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0780 (0.0777) loss: 0.8406 (0.8407) time: 0.1777 data: 0.0895 max mem: 9377 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0726 (0.0777) loss: 0.8384 (0.8406) time: 0.1533 data: 0.0559 max mem: 9377 +Train: [11] Total time: 0:16:41 (0.1602 s / it) +Averaged stats: lr: 0.000123 grad: 0.0726 (0.0777) loss: 0.8384 (0.8406) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:06:04 loss: 0.8507 (0.8507) time: 5.8736 data: 5.8427 max mem: 9377 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8508 (0.8515) time: 0.1280 data: 0.1014 max mem: 9377 +Eval (hcp-train-subset): [11] Total time: 0:00:13 (0.2202 s / it) +Averaged stats (hcp-train-subset): loss: 0.8508 (0.8515) +Eval (hcp-val): [11] [ 0/62] eta: 0:05:58 loss: 0.8452 (0.8452) time: 5.7776 data: 5.7458 max mem: 9377 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8481 (0.8492) time: 0.1191 data: 0.0941 max mem: 9377 +Eval (hcp-val): [11] Total time: 0:00:13 (0.2200 s / it) +Averaged stats (hcp-val): loss: 0.8481 (0.8492) +Eval (nsd-val): [11] [ 0/62] eta: 0:03:19 loss: 0.8096 (0.8096) time: 3.2126 data: 3.1547 max mem: 9377 +Eval (nsd-val): [11] [61/62] eta: 0:00:00 loss: 0.8198 (0.8211) time: 0.1232 data: 0.0977 max mem: 9377 +Eval (nsd-val): [11] Total time: 0:00:13 (0.2108 s / it) +Averaged stats (nsd-val): loss: 0.8198 (0.8211) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [12] [ 0/6250] eta: 10:37:26 lr: 0.000123 grad: 0.1270 (0.1270) loss: 0.8021 (0.8021) time: 6.1194 data: 6.0153 max mem: 9377 +Train: [12] [ 100/6250] eta: 0:20:21 lr: 0.000123 grad: 0.0833 (0.0900) loss: 0.8403 (0.8464) time: 0.1308 data: 0.0459 max mem: 9377 +Train: [12] [ 200/6250] eta: 0:17:46 lr: 0.000123 grad: 0.0733 (0.0867) loss: 0.8367 (0.8434) time: 0.1569 data: 0.0667 max mem: 9377 +Train: [12] [ 300/6250] eta: 0:16:19 lr: 0.000123 grad: 0.0772 (0.0852) loss: 0.8341 (0.8413) time: 0.1613 data: 0.0685 max mem: 9377 +Train: [12] [ 400/6250] eta: 0:15:57 lr: 0.000123 grad: 0.0862 (0.0861) loss: 0.8224 (0.8382) time: 0.1432 data: 0.0454 max mem: 9377 +Train: [12] [ 500/6250] eta: 0:15:32 lr: 0.000123 grad: 0.0830 (0.0859) loss: 0.8300 (0.8364) time: 0.1298 data: 0.0283 max mem: 9377 +Train: [12] [ 600/6250] eta: 0:15:06 lr: 0.000123 grad: 0.0827 (0.0858) loss: 0.8323 (0.8357) time: 0.1597 data: 0.0572 max mem: 9377 +Train: [12] [ 700/6250] eta: 0:14:45 lr: 0.000123 grad: 0.0757 (0.0850) loss: 0.8389 (0.8360) time: 0.1562 data: 0.0544 max mem: 9377 +Train: [12] [ 800/6250] eta: 0:14:28 lr: 0.000123 grad: 0.0741 (0.0845) loss: 0.8409 (0.8361) time: 0.1517 data: 0.0541 max mem: 9377 +Train: [12] [ 900/6250] eta: 0:14:13 lr: 0.000123 grad: 0.0766 (0.0850) loss: 0.8391 (0.8364) time: 0.1707 data: 0.0868 max mem: 9377 +Train: [12] [1000/6250] eta: 0:14:02 lr: 0.000123 grad: 0.0730 (0.0845) loss: 0.8378 (0.8366) time: 0.1598 data: 0.0792 max mem: 9377 +Train: [12] [1100/6250] eta: 0:13:46 lr: 0.000123 grad: 0.0778 (0.0849) loss: 0.8394 (0.8366) time: 0.1478 data: 0.0598 max mem: 9377 +Train: [12] [1200/6250] eta: 0:13:30 lr: 0.000123 grad: 0.0762 (0.0849) loss: 0.8386 (0.8364) time: 0.1600 data: 0.0684 max mem: 9377 +Train: [12] [1300/6250] eta: 0:13:14 lr: 0.000123 grad: 0.0776 (0.0845) loss: 0.8314 (0.8363) time: 0.1475 data: 0.0519 max mem: 9377 +Train: [12] [1400/6250] eta: 0:12:55 lr: 0.000123 grad: 0.0774 (0.0840) loss: 0.8380 (0.8363) time: 0.1559 data: 0.0754 max mem: 9377 +Train: [12] [1500/6250] eta: 0:12:36 lr: 0.000123 grad: 0.0729 (0.0839) loss: 0.8466 (0.8365) time: 0.1263 data: 0.0370 max mem: 9377 +Train: [12] [1600/6250] eta: 0:12:18 lr: 0.000123 grad: 0.0801 (0.0838) loss: 0.8355 (0.8365) time: 0.1515 data: 0.0674 max mem: 9377 +Train: [12] [1700/6250] eta: 0:12:00 lr: 0.000123 grad: 0.0702 (0.0836) loss: 0.8398 (0.8366) time: 0.1417 data: 0.0466 max mem: 9377 +Train: [12] [1800/6250] eta: 0:11:42 lr: 0.000123 grad: 0.0724 (0.0832) loss: 0.8397 (0.8366) time: 0.1473 data: 0.0648 max mem: 9377 +Train: [12] [1900/6250] eta: 0:11:26 lr: 0.000123 grad: 0.0748 (0.0830) loss: 0.8426 (0.8366) time: 0.1624 data: 0.0674 max mem: 9377 +Train: [12] [2000/6250] eta: 0:11:10 lr: 0.000123 grad: 0.0750 (0.0827) loss: 0.8376 (0.8366) time: 0.1710 data: 0.0809 max mem: 9377 +Train: [12] [2100/6250] eta: 0:10:52 lr: 0.000123 grad: 0.0749 (0.0824) loss: 0.8333 (0.8366) time: 0.1514 data: 0.0626 max mem: 9377 +Train: [12] [2200/6250] eta: 0:10:36 lr: 0.000123 grad: 0.0719 (0.0821) loss: 0.8400 (0.8366) time: 0.1549 data: 0.0662 max mem: 9377 +Train: [12] [2300/6250] eta: 0:10:19 lr: 0.000123 grad: 0.0734 (0.0820) loss: 0.8383 (0.8366) time: 0.1458 data: 0.0584 max mem: 9377 +Train: [12] [2400/6250] eta: 0:10:04 lr: 0.000123 grad: 0.0725 (0.0818) loss: 0.8387 (0.8368) time: 0.1720 data: 0.0906 max mem: 9377 +Train: [12] [2500/6250] eta: 0:09:47 lr: 0.000123 grad: 0.0748 (0.0817) loss: 0.8399 (0.8368) time: 0.1542 data: 0.0602 max mem: 9377 +Train: [12] [2600/6250] eta: 0:09:30 lr: 0.000123 grad: 0.0816 (0.0817) loss: 0.8335 (0.8367) time: 0.1605 data: 0.0785 max mem: 9377 +Train: [12] [2700/6250] eta: 0:09:14 lr: 0.000123 grad: 0.0749 (0.0815) loss: 0.8368 (0.8369) time: 0.1444 data: 0.0553 max mem: 9377 +Train: [12] [2800/6250] eta: 0:08:58 lr: 0.000123 grad: 0.0685 (0.0812) loss: 0.8410 (0.8370) time: 0.1425 data: 0.0570 max mem: 9377 +Train: [12] [2900/6250] eta: 0:08:43 lr: 0.000123 grad: 0.0729 (0.0810) loss: 0.8403 (0.8370) time: 0.1706 data: 0.0842 max mem: 9377 +Train: [12] [3000/6250] eta: 0:08:28 lr: 0.000123 grad: 0.0729 (0.0808) loss: 0.8363 (0.8371) time: 0.1671 data: 0.0845 max mem: 9377 +Train: [12] [3100/6250] eta: 0:08:13 lr: 0.000123 grad: 0.0756 (0.0806) loss: 0.8398 (0.8371) time: 0.1752 data: 0.0924 max mem: 9377 +Train: [12] [3200/6250] eta: 0:07:57 lr: 0.000123 grad: 0.0780 (0.0805) loss: 0.8384 (0.8372) time: 0.1494 data: 0.0677 max mem: 9377 +Train: [12] [3300/6250] eta: 0:07:42 lr: 0.000123 grad: 0.0697 (0.0805) loss: 0.8400 (0.8371) time: 0.1620 data: 0.0699 max mem: 9377 +Train: [12] [3400/6250] eta: 0:07:27 lr: 0.000123 grad: 0.0725 (0.0804) loss: 0.8382 (0.8372) time: 0.1803 data: 0.0975 max mem: 9377 +Train: [12] [3500/6250] eta: 0:07:11 lr: 0.000123 grad: 0.0742 (0.0802) loss: 0.8320 (0.8371) time: 0.1676 data: 0.0799 max mem: 9377 +Train: [12] [3600/6250] eta: 0:06:55 lr: 0.000123 grad: 0.0729 (0.0801) loss: 0.8410 (0.8371) time: 0.1469 data: 0.0624 max mem: 9377 +Train: [12] [3700/6250] eta: 0:06:39 lr: 0.000123 grad: 0.0733 (0.0799) loss: 0.8352 (0.8370) time: 0.1492 data: 0.0556 max mem: 9377 +Train: [12] [3800/6250] eta: 0:06:23 lr: 0.000123 grad: 0.0703 (0.0797) loss: 0.8367 (0.8370) time: 0.1585 data: 0.0735 max mem: 9377 +Train: [12] [3900/6250] eta: 0:06:08 lr: 0.000123 grad: 0.0678 (0.0796) loss: 0.8394 (0.8371) time: 0.1787 data: 0.0902 max mem: 9377 +Train: [12] [4000/6250] eta: 0:05:52 lr: 0.000123 grad: 0.0698 (0.0794) loss: 0.8405 (0.8372) time: 0.1424 data: 0.0490 max mem: 9377 +Train: [12] [4100/6250] eta: 0:05:37 lr: 0.000123 grad: 0.0736 (0.0793) loss: 0.8365 (0.8372) time: 0.1563 data: 0.0717 max mem: 9377 +Train: [12] [4200/6250] eta: 0:05:21 lr: 0.000123 grad: 0.0710 (0.0793) loss: 0.8478 (0.8372) time: 0.1543 data: 0.0629 max mem: 9377 +Train: [12] [4300/6250] eta: 0:05:05 lr: 0.000123 grad: 0.0731 (0.0792) loss: 0.8381 (0.8372) time: 0.1498 data: 0.0652 max mem: 9377 +Train: [12] [4400/6250] eta: 0:04:49 lr: 0.000123 grad: 0.0752 (0.0792) loss: 0.8389 (0.8372) time: 0.1403 data: 0.0504 max mem: 9377 +Train: [12] [4500/6250] eta: 0:04:34 lr: 0.000123 grad: 0.0694 (0.0792) loss: 0.8394 (0.8372) time: 0.1852 data: 0.0942 max mem: 9377 +Train: [12] [4600/6250] eta: 0:04:18 lr: 0.000123 grad: 0.0745 (0.0792) loss: 0.8370 (0.8371) time: 0.1358 data: 0.0447 max mem: 9377 +Train: [12] [4700/6250] eta: 0:04:03 lr: 0.000123 grad: 0.0767 (0.0794) loss: 0.8381 (0.8371) time: 0.1511 data: 0.0726 max mem: 9377 +Train: [12] [4800/6250] eta: 0:03:47 lr: 0.000123 grad: 0.0728 (0.0793) loss: 0.8377 (0.8370) time: 0.1605 data: 0.0753 max mem: 9377 +Train: [12] [4900/6250] eta: 0:03:31 lr: 0.000123 grad: 0.0762 (0.0794) loss: 0.8356 (0.8369) time: 0.1413 data: 0.0461 max mem: 9377 +Train: [12] [5000/6250] eta: 0:03:15 lr: 0.000123 grad: 0.0828 (0.0796) loss: 0.8345 (0.8369) time: 0.1544 data: 0.0637 max mem: 9377 +Train: [12] [5100/6250] eta: 0:03:00 lr: 0.000123 grad: 0.0768 (0.0796) loss: 0.8280 (0.8368) time: 0.1520 data: 0.0608 max mem: 9377 +Train: [12] [5200/6250] eta: 0:02:44 lr: 0.000123 grad: 0.0735 (0.0797) loss: 0.8323 (0.8367) time: 0.1362 data: 0.0500 max mem: 9377 +Train: [12] [5300/6250] eta: 0:02:28 lr: 0.000123 grad: 0.0802 (0.0797) loss: 0.8335 (0.8367) time: 0.1656 data: 0.0771 max mem: 9377 +Train: [12] [5400/6250] eta: 0:02:13 lr: 0.000123 grad: 0.0734 (0.0797) loss: 0.8334 (0.8366) time: 0.1469 data: 0.0605 max mem: 9377 +Train: [12] [5500/6250] eta: 0:01:57 lr: 0.000123 grad: 0.0787 (0.0797) loss: 0.8375 (0.8366) time: 0.2454 data: 0.1712 max mem: 9377 +Train: [12] [5600/6250] eta: 0:01:42 lr: 0.000123 grad: 0.0811 (0.0798) loss: 0.8301 (0.8365) time: 0.1693 data: 0.0815 max mem: 9377 +Train: [12] [5700/6250] eta: 0:01:26 lr: 0.000123 grad: 0.0797 (0.0798) loss: 0.8336 (0.8365) time: 0.1873 data: 0.1054 max mem: 9377 +Train: [12] [5800/6250] eta: 0:01:10 lr: 0.000123 grad: 0.0814 (0.0798) loss: 0.8371 (0.8364) time: 0.1486 data: 0.0705 max mem: 9377 +Train: [12] [5900/6250] eta: 0:00:55 lr: 0.000123 grad: 0.0894 (0.0799) loss: 0.8272 (0.8363) time: 0.1689 data: 0.0839 max mem: 9377 +Train: [12] [6000/6250] eta: 0:00:39 lr: 0.000123 grad: 0.0765 (0.0800) loss: 0.8383 (0.8363) time: 0.1678 data: 0.0744 max mem: 9377 +Train: [12] [6100/6250] eta: 0:00:23 lr: 0.000123 grad: 0.0830 (0.0800) loss: 0.8357 (0.8362) time: 0.1476 data: 0.0620 max mem: 9377 +Train: [12] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0735 (0.0800) loss: 0.8410 (0.8362) time: 0.1598 data: 0.0659 max mem: 9377 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0797 (0.0800) loss: 0.8427 (0.8362) time: 0.1608 data: 0.0749 max mem: 9377 +Train: [12] Total time: 0:16:31 (0.1586 s / it) +Averaged stats: lr: 0.000123 grad: 0.0797 (0.0800) loss: 0.8427 (0.8362) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:05:20 loss: 0.8504 (0.8504) time: 5.1737 data: 5.1402 max mem: 9377 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8521 (0.8516) time: 0.1043 data: 0.0792 max mem: 9377 +Eval (hcp-train-subset): [12] Total time: 0:00:13 (0.2170 s / it) +Averaged stats (hcp-train-subset): loss: 0.8521 (0.8516) +Eval (hcp-val): [12] [ 0/62] eta: 0:05:58 loss: 0.8456 (0.8456) time: 5.7753 data: 5.7267 max mem: 9377 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8491 (0.8490) time: 0.1338 data: 0.1083 max mem: 9377 +Eval (hcp-val): [12] Total time: 0:00:13 (0.2207 s / it) +Averaged stats (hcp-val): loss: 0.8491 (0.8490) +Eval (nsd-val): [12] [ 0/62] eta: 0:04:02 loss: 0.8133 (0.8133) time: 3.9110 data: 3.8321 max mem: 9377 +Eval (nsd-val): [12] [61/62] eta: 0:00:00 loss: 0.8215 (0.8201) time: 0.1015 data: 0.0764 max mem: 9377 +Eval (nsd-val): [12] Total time: 0:00:13 (0.2140 s / it) +Averaged stats (nsd-val): loss: 0.8215 (0.8201) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [13] [ 0/6250] eta: 10:53:58 lr: 0.000123 grad: 0.0665 (0.0665) loss: 0.8694 (0.8694) time: 6.2782 data: 6.1833 max mem: 9377 +Train: [13] [ 100/6250] eta: 0:21:18 lr: 0.000123 grad: 0.0933 (0.1270) loss: 0.8438 (0.8437) time: 0.1565 data: 0.0703 max mem: 9377 +Train: [13] [ 200/6250] eta: 0:18:39 lr: 0.000123 grad: 0.0854 (0.1101) loss: 0.8311 (0.8368) time: 0.1641 data: 0.0738 max mem: 9377 +Train: [13] [ 300/6250] eta: 0:17:40 lr: 0.000123 grad: 0.0786 (0.1013) loss: 0.8344 (0.8358) time: 0.1585 data: 0.0468 max mem: 9377 +Train: [13] [ 400/6250] eta: 0:17:37 lr: 0.000123 grad: 0.0744 (0.0972) loss: 0.8320 (0.8348) time: 0.1966 data: 0.0966 max mem: 9377 +Train: [13] [ 500/6250] eta: 0:17:15 lr: 0.000123 grad: 0.0726 (0.0942) loss: 0.8295 (0.8342) time: 0.1565 data: 0.0666 max mem: 9377 +Train: [13] [ 600/6250] eta: 0:16:49 lr: 0.000123 grad: 0.0791 (0.0929) loss: 0.8348 (0.8342) time: 0.1457 data: 0.0471 max mem: 9377 +Train: [13] [ 700/6250] eta: 0:16:24 lr: 0.000123 grad: 0.0798 (0.0916) loss: 0.8249 (0.8340) time: 0.1771 data: 0.0780 max mem: 9377 +Train: [13] [ 800/6250] eta: 0:15:58 lr: 0.000123 grad: 0.0741 (0.0900) loss: 0.8365 (0.8338) time: 0.1625 data: 0.0770 max mem: 9377 +Train: [13] [ 900/6250] eta: 0:15:38 lr: 0.000123 grad: 0.0790 (0.0893) loss: 0.8344 (0.8339) time: 0.1729 data: 0.0880 max mem: 9377 +Train: [13] [1000/6250] eta: 0:15:09 lr: 0.000123 grad: 0.0755 (0.0883) loss: 0.8350 (0.8339) time: 0.1679 data: 0.0821 max mem: 9377 +Train: [13] [1100/6250] eta: 0:14:43 lr: 0.000123 grad: 0.0805 (0.0883) loss: 0.8372 (0.8339) time: 0.1518 data: 0.0579 max mem: 9377 +Train: [13] [1200/6250] eta: 0:14:21 lr: 0.000123 grad: 0.0778 (0.0877) loss: 0.8359 (0.8342) time: 0.1921 data: 0.1091 max mem: 9377 +Train: [13] [1300/6250] eta: 0:13:56 lr: 0.000123 grad: 0.0835 (0.0871) loss: 0.8380 (0.8345) time: 0.1610 data: 0.0764 max mem: 9377 +Train: [13] [1400/6250] eta: 0:13:36 lr: 0.000123 grad: 0.0827 (0.0868) loss: 0.8343 (0.8345) time: 0.1589 data: 0.0790 max mem: 9377 +Train: [13] [1500/6250] eta: 0:13:14 lr: 0.000123 grad: 0.0709 (0.0865) loss: 0.8380 (0.8345) time: 0.1682 data: 0.0804 max mem: 9377 +Train: [13] [1600/6250] eta: 0:12:51 lr: 0.000123 grad: 0.0740 (0.0859) loss: 0.8334 (0.8345) time: 0.1454 data: 0.0617 max mem: 9377 +Train: [13] [1700/6250] eta: 0:12:32 lr: 0.000123 grad: 0.0782 (0.0857) loss: 0.8393 (0.8347) time: 0.1778 data: 0.0961 max mem: 9377 +Train: [13] [1800/6250] eta: 0:12:12 lr: 0.000123 grad: 0.0739 (0.0855) loss: 0.8320 (0.8348) time: 0.1449 data: 0.0631 max mem: 9377 +Train: [13] [1900/6250] eta: 0:11:55 lr: 0.000123 grad: 0.0773 (0.0852) loss: 0.8393 (0.8349) time: 0.1537 data: 0.0666 max mem: 9377 +Train: [13] [2000/6250] eta: 0:11:37 lr: 0.000123 grad: 0.0820 (0.0851) loss: 0.8311 (0.8349) time: 0.1489 data: 0.0579 max mem: 9377 +Train: [13] [2100/6250] eta: 0:11:18 lr: 0.000123 grad: 0.0780 (0.0849) loss: 0.8416 (0.8349) time: 0.1635 data: 0.0740 max mem: 9377 +Train: [13] [2200/6250] eta: 0:11:00 lr: 0.000123 grad: 0.0779 (0.0848) loss: 0.8407 (0.8350) time: 0.1357 data: 0.0403 max mem: 9377 +Train: [13] [2300/6250] eta: 0:10:43 lr: 0.000123 grad: 0.0738 (0.0846) loss: 0.8366 (0.8350) time: 0.1662 data: 0.0773 max mem: 9377 +Train: [13] [2400/6250] eta: 0:10:26 lr: 0.000123 grad: 0.0790 (0.0845) loss: 0.8311 (0.8350) time: 0.1847 data: 0.0985 max mem: 9377 +Train: [13] [2500/6250] eta: 0:10:09 lr: 0.000123 grad: 0.0792 (0.0845) loss: 0.8305 (0.8350) time: 0.1572 data: 0.0635 max mem: 9377 +Train: [13] [2600/6250] eta: 0:09:52 lr: 0.000123 grad: 0.0797 (0.0843) loss: 0.8394 (0.8350) time: 0.1357 data: 0.0551 max mem: 9377 +Train: [13] [2700/6250] eta: 0:09:36 lr: 0.000123 grad: 0.0766 (0.0843) loss: 0.8360 (0.8349) time: 0.1586 data: 0.0739 max mem: 9377 +Train: [13] [2800/6250] eta: 0:09:20 lr: 0.000123 grad: 0.0792 (0.0842) loss: 0.8386 (0.8350) time: 0.1549 data: 0.0634 max mem: 9377 +Train: [13] [2900/6250] eta: 0:09:04 lr: 0.000123 grad: 0.0837 (0.0840) loss: 0.8307 (0.8349) time: 0.1699 data: 0.0686 max mem: 9377 +Train: [13] [3000/6250] eta: 0:08:47 lr: 0.000123 grad: 0.0786 (0.0840) loss: 0.8336 (0.8349) time: 0.1648 data: 0.0800 max mem: 9377 +Train: [13] [3100/6250] eta: 0:08:30 lr: 0.000123 grad: 0.0763 (0.0839) loss: 0.8400 (0.8349) time: 0.1607 data: 0.0738 max mem: 9377 +Train: [13] [3200/6250] eta: 0:08:14 lr: 0.000123 grad: 0.0789 (0.0838) loss: 0.8297 (0.8349) time: 0.1576 data: 0.0672 max mem: 9377 +Train: [13] [3300/6250] eta: 0:07:57 lr: 0.000123 grad: 0.0807 (0.0838) loss: 0.8335 (0.8349) time: 0.1429 data: 0.0493 max mem: 9377 +Train: [13] [3400/6250] eta: 0:07:40 lr: 0.000123 grad: 0.0764 (0.0837) loss: 0.8299 (0.8349) time: 0.1610 data: 0.0704 max mem: 9377 +Train: [13] [3500/6250] eta: 0:07:24 lr: 0.000123 grad: 0.0818 (0.0836) loss: 0.8334 (0.8349) time: 0.1736 data: 0.0860 max mem: 9377 +Train: [13] [3600/6250] eta: 0:07:08 lr: 0.000123 grad: 0.0799 (0.0835) loss: 0.8320 (0.8349) time: 0.1387 data: 0.0482 max mem: 9377 +Train: [13] [3700/6250] eta: 0:06:51 lr: 0.000122 grad: 0.0725 (0.0834) loss: 0.8374 (0.8349) time: 0.1637 data: 0.0690 max mem: 9377 +Train: [13] [3800/6250] eta: 0:06:35 lr: 0.000122 grad: 0.0765 (0.0834) loss: 0.8309 (0.8348) time: 0.1368 data: 0.0444 max mem: 9377 +Train: [13] [3900/6250] eta: 0:06:19 lr: 0.000122 grad: 0.0785 (0.0834) loss: 0.8357 (0.8347) time: 0.1471 data: 0.0610 max mem: 9377 +Train: [13] [4000/6250] eta: 0:06:03 lr: 0.000122 grad: 0.0849 (0.0833) loss: 0.8266 (0.8347) time: 0.1728 data: 0.0868 max mem: 9377 +Train: [13] [4100/6250] eta: 0:05:46 lr: 0.000122 grad: 0.0792 (0.0833) loss: 0.8331 (0.8346) time: 0.1575 data: 0.0706 max mem: 9377 +Train: [13] [4200/6250] eta: 0:05:30 lr: 0.000122 grad: 0.0760 (0.0833) loss: 0.8336 (0.8345) time: 0.1543 data: 0.0635 max mem: 9377 +Train: [13] [4300/6250] eta: 0:05:14 lr: 0.000122 grad: 0.0800 (0.0833) loss: 0.8342 (0.8344) time: 0.1693 data: 0.0863 max mem: 9377 +Train: [13] [4400/6250] eta: 0:04:57 lr: 0.000122 grad: 0.0740 (0.0835) loss: 0.8283 (0.8344) time: 0.1528 data: 0.0617 max mem: 9377 +Train: [13] [4500/6250] eta: 0:04:41 lr: 0.000122 grad: 0.0751 (0.0835) loss: 0.8397 (0.8345) time: 0.1514 data: 0.0679 max mem: 9377 +Train: [13] [4600/6250] eta: 0:04:25 lr: 0.000122 grad: 0.0830 (0.0835) loss: 0.8314 (0.8344) time: 0.1562 data: 0.0709 max mem: 9377 +Train: [13] [4700/6250] eta: 0:04:09 lr: 0.000122 grad: 0.0780 (0.0835) loss: 0.8388 (0.8345) time: 0.1656 data: 0.0825 max mem: 9377 +Train: [13] [4800/6250] eta: 0:03:53 lr: 0.000122 grad: 0.0783 (0.0835) loss: 0.8358 (0.8345) time: 0.1761 data: 0.0908 max mem: 9377 +Train: [13] [4900/6250] eta: 0:03:36 lr: 0.000122 grad: 0.0762 (0.0835) loss: 0.8399 (0.8346) time: 0.1580 data: 0.0784 max mem: 9377 +Train: [13] [5000/6250] eta: 0:03:20 lr: 0.000122 grad: 0.0782 (0.0834) loss: 0.8367 (0.8346) time: 0.1522 data: 0.0626 max mem: 9377 +Train: [13] [5100/6250] eta: 0:03:04 lr: 0.000122 grad: 0.0764 (0.0833) loss: 0.8354 (0.8346) time: 0.1647 data: 0.0760 max mem: 9377 +Train: [13] [5200/6250] eta: 0:02:48 lr: 0.000122 grad: 0.0810 (0.0833) loss: 0.8381 (0.8346) time: 0.1759 data: 0.0916 max mem: 9377 +Train: [13] [5300/6250] eta: 0:02:32 lr: 0.000122 grad: 0.0773 (0.0832) loss: 0.8400 (0.8346) time: 0.1063 data: 0.0003 max mem: 9377 +Train: [13] [5400/6250] eta: 0:02:16 lr: 0.000122 grad: 0.0772 (0.0832) loss: 0.8326 (0.8346) time: 0.1821 data: 0.1045 max mem: 9377 +Train: [13] [5500/6250] eta: 0:02:00 lr: 0.000122 grad: 0.0740 (0.0831) loss: 0.8373 (0.8347) time: 0.1448 data: 0.0626 max mem: 9377 +Train: [13] [5600/6250] eta: 0:01:44 lr: 0.000122 grad: 0.0763 (0.0830) loss: 0.8395 (0.8347) time: 0.1661 data: 0.0850 max mem: 9377 +Train: [13] [5700/6250] eta: 0:01:28 lr: 0.000122 grad: 0.0752 (0.0830) loss: 0.8396 (0.8347) time: 0.1587 data: 0.0770 max mem: 9377 +Train: [13] [5800/6250] eta: 0:01:12 lr: 0.000122 grad: 0.0824 (0.0829) loss: 0.8343 (0.8347) time: 0.1739 data: 0.0952 max mem: 9377 +Train: [13] [5900/6250] eta: 0:00:56 lr: 0.000122 grad: 0.0737 (0.0829) loss: 0.8344 (0.8347) time: 0.1552 data: 0.0651 max mem: 9377 +Train: [13] [6000/6250] eta: 0:00:40 lr: 0.000122 grad: 0.0799 (0.0828) loss: 0.8276 (0.8348) time: 0.1593 data: 0.0643 max mem: 9377 +Train: [13] [6100/6250] eta: 0:00:24 lr: 0.000122 grad: 0.0708 (0.0827) loss: 0.8354 (0.8348) time: 0.1609 data: 0.0735 max mem: 9377 +Train: [13] [6200/6250] eta: 0:00:08 lr: 0.000122 grad: 0.0772 (0.0827) loss: 0.8413 (0.8348) time: 0.1796 data: 0.0823 max mem: 9377 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0722 (0.0827) loss: 0.8357 (0.8348) time: 0.1715 data: 0.0718 max mem: 9377 +Train: [13] Total time: 0:16:53 (0.1622 s / it) +Averaged stats: lr: 0.000122 grad: 0.0722 (0.0827) loss: 0.8357 (0.8348) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:06:46 loss: 0.8496 (0.8496) time: 6.5598 data: 6.5234 max mem: 9377 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8505 (0.8498) time: 0.1202 data: 0.0922 max mem: 9377 +Eval (hcp-train-subset): [13] Total time: 0:00:15 (0.2570 s / it) +Averaged stats (hcp-train-subset): loss: 0.8505 (0.8498) +Eval (hcp-val): [13] [ 0/62] eta: 0:06:41 loss: 0.8446 (0.8446) time: 6.4814 data: 6.4492 max mem: 9377 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8471 (0.8475) time: 0.1381 data: 0.1122 max mem: 9377 +Eval (hcp-val): [13] Total time: 0:00:15 (0.2527 s / it) +Averaged stats (hcp-val): loss: 0.8471 (0.8475) +Eval (nsd-val): [13] [ 0/62] eta: 0:04:05 loss: 0.8090 (0.8090) time: 3.9672 data: 3.8880 max mem: 9377 +Eval (nsd-val): [13] [61/62] eta: 0:00:00 loss: 0.8174 (0.8174) time: 0.2305 data: 0.2055 max mem: 9377 +Eval (nsd-val): [13] Total time: 0:00:16 (0.2660 s / it) +Averaged stats (nsd-val): loss: 0.8174 (0.8174) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [14] [ 0/6250] eta: 11:55:00 lr: 0.000122 grad: 0.1701 (0.1701) loss: 0.8536 (0.8536) time: 6.8641 data: 6.7505 max mem: 9377 +Train: [14] [ 100/6250] eta: 0:24:07 lr: 0.000122 grad: 0.0714 (0.0934) loss: 0.8473 (0.8512) time: 0.1988 data: 0.0917 max mem: 9377 +Train: [14] [ 200/6250] eta: 0:20:00 lr: 0.000122 grad: 0.0689 (0.0866) loss: 0.8473 (0.8473) time: 0.1589 data: 0.0670 max mem: 9377 +Train: [14] [ 300/6250] eta: 0:18:52 lr: 0.000122 grad: 0.0730 (0.0842) loss: 0.8438 (0.8463) time: 0.1820 data: 0.0714 max mem: 9377 +Train: [14] [ 400/6250] eta: 0:18:27 lr: 0.000122 grad: 0.0728 (0.0819) loss: 0.8404 (0.8453) time: 0.1922 data: 0.0962 max mem: 9377 +Train: [14] [ 500/6250] eta: 0:17:46 lr: 0.000122 grad: 0.0683 (0.0801) loss: 0.8382 (0.8442) time: 0.1975 data: 0.0902 max mem: 9377 +Train: [14] [ 600/6250] eta: 0:17:02 lr: 0.000122 grad: 0.0742 (0.0795) loss: 0.8334 (0.8426) time: 0.1605 data: 0.0565 max mem: 9377 +Train: [14] [ 700/6250] eta: 0:16:30 lr: 0.000122 grad: 0.0744 (0.0788) loss: 0.8361 (0.8414) time: 0.1566 data: 0.0646 max mem: 9377 +Train: [14] [ 800/6250] eta: 0:16:05 lr: 0.000122 grad: 0.0730 (0.0786) loss: 0.8390 (0.8407) time: 0.1488 data: 0.0590 max mem: 9377 +Train: [14] [ 900/6250] eta: 0:15:44 lr: 0.000122 grad: 0.0741 (0.0782) loss: 0.8378 (0.8404) time: 0.1563 data: 0.0734 max mem: 9377 +Train: [14] [1000/6250] eta: 0:15:33 lr: 0.000122 grad: 0.0629 (0.0779) loss: 0.8373 (0.8401) time: 0.1609 data: 0.0497 max mem: 9377 +Train: [14] [1100/6250] eta: 0:15:10 lr: 0.000122 grad: 0.0714 (0.0777) loss: 0.8400 (0.8397) time: 0.1302 data: 0.0273 max mem: 9377 +Train: [14] [1200/6250] eta: 0:14:56 lr: 0.000122 grad: 0.0705 (0.0773) loss: 0.8390 (0.8394) time: 0.1672 data: 0.0813 max mem: 9377 +Train: [14] [1300/6250] eta: 0:14:40 lr: 0.000122 grad: 0.0763 (0.0773) loss: 0.8348 (0.8388) time: 0.1643 data: 0.0878 max mem: 9377 +Train: [14] [1400/6250] eta: 0:14:18 lr: 0.000122 grad: 0.0716 (0.0772) loss: 0.8327 (0.8384) time: 0.1753 data: 0.0886 max mem: 9377 +Train: [14] [1500/6250] eta: 0:13:51 lr: 0.000122 grad: 0.0792 (0.0774) loss: 0.8272 (0.8380) time: 0.1345 data: 0.0487 max mem: 9377 +Train: [14] [1600/6250] eta: 0:13:27 lr: 0.000122 grad: 0.0737 (0.0776) loss: 0.8326 (0.8375) time: 0.1594 data: 0.0672 max mem: 9377 +Train: [14] [1700/6250] eta: 0:13:08 lr: 0.000122 grad: 0.0769 (0.0779) loss: 0.8262 (0.8371) time: 0.1421 data: 0.0488 max mem: 9377 +Train: [14] [1800/6250] eta: 0:12:45 lr: 0.000122 grad: 0.0802 (0.0783) loss: 0.8357 (0.8368) time: 0.1575 data: 0.0657 max mem: 9377 +Train: [14] [1900/6250] eta: 0:12:24 lr: 0.000122 grad: 0.0739 (0.0782) loss: 0.8406 (0.8366) time: 0.1339 data: 0.0512 max mem: 9377 +Train: [14] [2000/6250] eta: 0:12:05 lr: 0.000122 grad: 0.0786 (0.0784) loss: 0.8395 (0.8366) time: 0.1523 data: 0.0667 max mem: 9377 +Train: [14] [2100/6250] eta: 0:11:47 lr: 0.000122 grad: 0.0751 (0.0784) loss: 0.8378 (0.8366) time: 0.1764 data: 0.0860 max mem: 9377 +Train: [14] [2200/6250] eta: 0:11:28 lr: 0.000122 grad: 0.0738 (0.0784) loss: 0.8314 (0.8365) time: 0.1670 data: 0.0779 max mem: 9377 +Train: [14] [2300/6250] eta: 0:11:07 lr: 0.000122 grad: 0.0739 (0.0784) loss: 0.8362 (0.8364) time: 0.1607 data: 0.0855 max mem: 9377 +Train: [14] [2400/6250] eta: 0:10:49 lr: 0.000122 grad: 0.0814 (0.0785) loss: 0.8312 (0.8364) time: 0.1559 data: 0.0720 max mem: 9377 +Train: [14] [2500/6250] eta: 0:10:30 lr: 0.000122 grad: 0.0826 (0.0786) loss: 0.8336 (0.8363) time: 0.1381 data: 0.0416 max mem: 9377 +Train: [14] [2600/6250] eta: 0:10:12 lr: 0.000122 grad: 0.0763 (0.0787) loss: 0.8309 (0.8361) time: 0.1608 data: 0.0650 max mem: 9377 +Train: [14] [2700/6250] eta: 0:09:53 lr: 0.000122 grad: 0.0775 (0.0787) loss: 0.8277 (0.8361) time: 0.1645 data: 0.0748 max mem: 9377 +Train: [14] [2800/6250] eta: 0:09:35 lr: 0.000122 grad: 0.0768 (0.0788) loss: 0.8309 (0.8359) time: 0.1247 data: 0.0329 max mem: 9377 +Train: [14] [2900/6250] eta: 0:09:16 lr: 0.000122 grad: 0.0728 (0.0788) loss: 0.8321 (0.8358) time: 0.1265 data: 0.0396 max mem: 9377 +Train: [14] [3000/6250] eta: 0:08:59 lr: 0.000122 grad: 0.0752 (0.0789) loss: 0.8329 (0.8357) time: 0.1678 data: 0.0831 max mem: 9377 +Train: [14] [3100/6250] eta: 0:08:41 lr: 0.000122 grad: 0.0752 (0.0790) loss: 0.8323 (0.8355) time: 0.1677 data: 0.0787 max mem: 9377 +Train: [14] [3200/6250] eta: 0:08:24 lr: 0.000122 grad: 0.0769 (0.0791) loss: 0.8270 (0.8354) time: 0.1613 data: 0.0767 max mem: 9377 +Train: [14] [3300/6250] eta: 0:08:06 lr: 0.000122 grad: 0.0859 (0.0792) loss: 0.8334 (0.8352) time: 0.1428 data: 0.0500 max mem: 9377 +Train: [14] [3400/6250] eta: 0:07:48 lr: 0.000122 grad: 0.0810 (0.0793) loss: 0.8271 (0.8351) time: 0.1490 data: 0.0532 max mem: 9377 +Train: [14] [3500/6250] eta: 0:07:31 lr: 0.000122 grad: 0.0771 (0.0794) loss: 0.8283 (0.8349) time: 0.1463 data: 0.0544 max mem: 9377 +Train: [14] [3600/6250] eta: 0:07:15 lr: 0.000122 grad: 0.0781 (0.0794) loss: 0.8237 (0.8348) time: 0.1534 data: 0.0665 max mem: 9377 +Train: [14] [3700/6250] eta: 0:06:57 lr: 0.000122 grad: 0.0752 (0.0794) loss: 0.8263 (0.8347) time: 0.1540 data: 0.0691 max mem: 9377 +Train: [14] [3800/6250] eta: 0:06:41 lr: 0.000122 grad: 0.0794 (0.0794) loss: 0.8342 (0.8346) time: 0.1622 data: 0.0844 max mem: 9377 +Train: [14] [3900/6250] eta: 0:06:24 lr: 0.000122 grad: 0.0770 (0.0794) loss: 0.8283 (0.8345) time: 0.1451 data: 0.0572 max mem: 9377 +Train: [14] [4000/6250] eta: 0:06:07 lr: 0.000122 grad: 0.0862 (0.0795) loss: 0.8280 (0.8344) time: 0.1338 data: 0.0503 max mem: 9377 +Train: [14] [4100/6250] eta: 0:05:50 lr: 0.000122 grad: 0.0774 (0.0794) loss: 0.8334 (0.8344) time: 0.1548 data: 0.0666 max mem: 9377 +Train: [14] [4200/6250] eta: 0:05:33 lr: 0.000122 grad: 0.0760 (0.0794) loss: 0.8319 (0.8344) time: 0.1479 data: 0.0624 max mem: 9377 +Train: [14] [4300/6250] eta: 0:05:16 lr: 0.000122 grad: 0.0741 (0.0795) loss: 0.8365 (0.8344) time: 0.1482 data: 0.0497 max mem: 9377 +Train: [14] [4400/6250] eta: 0:04:59 lr: 0.000122 grad: 0.0759 (0.0795) loss: 0.8268 (0.8343) time: 0.1499 data: 0.0614 max mem: 9377 +Train: [14] [4500/6250] eta: 0:04:43 lr: 0.000122 grad: 0.0766 (0.0794) loss: 0.8312 (0.8343) time: 0.1728 data: 0.0835 max mem: 9377 +Train: [14] [4600/6250] eta: 0:04:26 lr: 0.000122 grad: 0.0712 (0.0794) loss: 0.8405 (0.8343) time: 0.1579 data: 0.0649 max mem: 9377 +Train: [14] [4700/6250] eta: 0:04:10 lr: 0.000122 grad: 0.0770 (0.0794) loss: 0.8306 (0.8343) time: 0.1572 data: 0.0821 max mem: 9377 +Train: [14] [4800/6250] eta: 0:03:54 lr: 0.000122 grad: 0.0776 (0.0794) loss: 0.8372 (0.8343) time: 0.1493 data: 0.0610 max mem: 9377 +Train: [14] [4900/6250] eta: 0:03:38 lr: 0.000122 grad: 0.0736 (0.0794) loss: 0.8351 (0.8343) time: 0.1620 data: 0.0746 max mem: 9377 +Train: [14] [5000/6250] eta: 0:03:21 lr: 0.000122 grad: 0.0798 (0.0794) loss: 0.8306 (0.8344) time: 0.1523 data: 0.0655 max mem: 9377 +Train: [14] [5100/6250] eta: 0:03:05 lr: 0.000122 grad: 0.0750 (0.0793) loss: 0.8422 (0.8344) time: 0.1502 data: 0.0622 max mem: 9377 +Train: [14] [5200/6250] eta: 0:02:48 lr: 0.000122 grad: 0.0732 (0.0793) loss: 0.8401 (0.8344) time: 0.1441 data: 0.0479 max mem: 9377 +Train: [14] [5300/6250] eta: 0:02:32 lr: 0.000122 grad: 0.0760 (0.0792) loss: 0.8310 (0.8344) time: 0.1303 data: 0.0389 max mem: 9377 +Train: [14] [5400/6250] eta: 0:02:16 lr: 0.000122 grad: 0.0791 (0.0792) loss: 0.8403 (0.8345) time: 0.1836 data: 0.1058 max mem: 9377 +Train: [14] [5500/6250] eta: 0:02:00 lr: 0.000122 grad: 0.0769 (0.0792) loss: 0.8350 (0.8345) time: 0.1790 data: 0.0985 max mem: 9377 +Train: [14] [5600/6250] eta: 0:01:44 lr: 0.000122 grad: 0.0746 (0.0792) loss: 0.8433 (0.8345) time: 0.1859 data: 0.1037 max mem: 9377 +Train: [14] [5700/6250] eta: 0:01:28 lr: 0.000122 grad: 0.0750 (0.0792) loss: 0.8332 (0.8345) time: 0.1650 data: 0.0870 max mem: 9377 +Train: [14] [5800/6250] eta: 0:01:12 lr: 0.000122 grad: 0.0765 (0.0792) loss: 0.8371 (0.8345) time: 0.2086 data: 0.1240 max mem: 9377 +Train: [14] [5900/6250] eta: 0:00:56 lr: 0.000122 grad: 0.0753 (0.0792) loss: 0.8373 (0.8345) time: 0.1967 data: 0.1076 max mem: 9377 +Train: [14] [6000/6250] eta: 0:00:40 lr: 0.000122 grad: 0.0767 (0.0792) loss: 0.8341 (0.8345) time: 0.1886 data: 0.0891 max mem: 9377 +Train: [14] [6100/6250] eta: 0:00:24 lr: 0.000122 grad: 0.0855 (0.0793) loss: 0.8342 (0.8345) time: 0.1929 data: 0.0990 max mem: 9377 +Train: [14] [6200/6250] eta: 0:00:08 lr: 0.000122 grad: 0.0776 (0.0793) loss: 0.8317 (0.8345) time: 0.1835 data: 0.0859 max mem: 9377 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0761 (0.0793) loss: 0.8340 (0.8344) time: 0.1790 data: 0.0863 max mem: 9377 +Train: [14] Total time: 0:17:06 (0.1642 s / it) +Averaged stats: lr: 0.000122 grad: 0.0761 (0.0793) loss: 0.8340 (0.8344) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:04:57 loss: 0.8477 (0.8477) time: 4.8022 data: 4.7548 max mem: 9377 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8495 (0.8499) time: 0.1366 data: 0.1108 max mem: 9377 +Eval (hcp-train-subset): [14] Total time: 0:00:14 (0.2396 s / it) +Averaged stats (hcp-train-subset): loss: 0.8495 (0.8499) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [14] [ 0/62] eta: 0:04:28 loss: 0.8431 (0.8431) time: 4.3285 data: 4.2357 max mem: 9377 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8476 (0.8480) time: 0.1200 data: 0.0942 max mem: 9377 +Eval (hcp-val): [14] Total time: 0:00:14 (0.2356 s / it) +Averaged stats (hcp-val): loss: 0.8476 (0.8480) +Making plots (hcp-val): example=24 +Eval (nsd-val): [14] [ 0/62] eta: 0:04:02 loss: 0.8095 (0.8095) time: 3.9094 data: 3.8118 max mem: 9377 +Eval (nsd-val): [14] [61/62] eta: 0:00:00 loss: 0.8190 (0.8205) time: 0.1526 data: 0.1266 max mem: 9377 +Eval (nsd-val): [14] Total time: 0:00:14 (0.2335 s / it) +Averaged stats (nsd-val): loss: 0.8190 (0.8205) +Making plots (nsd-val): example=6 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00014.pth +Train: [15] [ 0/6250] eta: 6:57:49 lr: 0.000122 grad: 0.0479 (0.0479) loss: 0.8835 (0.8835) time: 4.0111 data: 3.7614 max mem: 9377 +Train: [15] [ 100/6250] eta: 0:21:27 lr: 0.000122 grad: 0.0817 (0.0810) loss: 0.8452 (0.8538) time: 0.1719 data: 0.0724 max mem: 9377 +Train: [15] [ 200/6250] eta: 0:18:38 lr: 0.000122 grad: 0.0762 (0.0809) loss: 0.8321 (0.8458) time: 0.1284 data: 0.0370 max mem: 9377 +Train: [15] [ 300/6250] eta: 0:17:04 lr: 0.000122 grad: 0.0729 (0.0804) loss: 0.8342 (0.8418) time: 0.1468 data: 0.0601 max mem: 9377 +Train: [15] [ 400/6250] eta: 0:16:34 lr: 0.000122 grad: 0.0702 (0.0815) loss: 0.8340 (0.8386) time: 0.1764 data: 0.0728 max mem: 9377 +Train: [15] [ 500/6250] eta: 0:15:54 lr: 0.000122 grad: 0.0692 (0.0818) loss: 0.8401 (0.8370) time: 0.1356 data: 0.0319 max mem: 9377 +Train: [15] [ 600/6250] eta: 0:15:25 lr: 0.000122 grad: 0.0742 (0.0816) loss: 0.8336 (0.8364) time: 0.1360 data: 0.0282 max mem: 9377 +Train: [15] [ 700/6250] eta: 0:14:59 lr: 0.000122 grad: 0.0682 (0.0806) loss: 0.8404 (0.8365) time: 0.1489 data: 0.0460 max mem: 9377 +Train: [15] [ 800/6250] eta: 0:14:41 lr: 0.000122 grad: 0.0772 (0.0806) loss: 0.8347 (0.8361) time: 0.1699 data: 0.0853 max mem: 9377 +Train: [15] [ 900/6250] eta: 0:14:25 lr: 0.000122 grad: 0.0778 (0.0808) loss: 0.8372 (0.8357) time: 0.1465 data: 0.0580 max mem: 9377 +Train: [15] [1000/6250] eta: 0:14:11 lr: 0.000122 grad: 0.0797 (0.0812) loss: 0.8384 (0.8358) time: 0.1544 data: 0.0653 max mem: 9377 +Train: [15] [1100/6250] eta: 0:13:51 lr: 0.000121 grad: 0.0757 (0.0812) loss: 0.8288 (0.8354) time: 0.1515 data: 0.0551 max mem: 9377 +Train: [15] [1200/6250] eta: 0:13:34 lr: 0.000121 grad: 0.0786 (0.0812) loss: 0.8294 (0.8351) time: 0.1732 data: 0.0930 max mem: 9377 +Train: [15] [1300/6250] eta: 0:13:19 lr: 0.000121 grad: 0.0834 (0.0814) loss: 0.8284 (0.8346) time: 0.1652 data: 0.0754 max mem: 9377 +Train: [15] [1400/6250] eta: 0:13:02 lr: 0.000121 grad: 0.0958 (0.0819) loss: 0.8258 (0.8341) time: 0.1711 data: 0.0903 max mem: 9377 +Train: [15] [1500/6250] eta: 0:12:45 lr: 0.000121 grad: 0.0763 (0.0820) loss: 0.8301 (0.8337) time: 0.1638 data: 0.0748 max mem: 9377 +Train: [15] [1600/6250] eta: 0:12:32 lr: 0.000121 grad: 0.0809 (0.0822) loss: 0.8290 (0.8334) time: 0.1740 data: 0.0802 max mem: 9377 +Train: [15] [1700/6250] eta: 0:12:15 lr: 0.000121 grad: 0.0818 (0.0822) loss: 0.8314 (0.8331) time: 0.1692 data: 0.0891 max mem: 9377 +Train: [15] [1800/6250] eta: 0:11:58 lr: 0.000121 grad: 0.0738 (0.0822) loss: 0.8311 (0.8328) time: 0.1437 data: 0.0542 max mem: 9377 +Train: [15] [1900/6250] eta: 0:11:42 lr: 0.000121 grad: 0.0809 (0.0822) loss: 0.8323 (0.8325) time: 0.1510 data: 0.0649 max mem: 9377 +Train: [15] [2000/6250] eta: 0:11:28 lr: 0.000121 grad: 0.0793 (0.0823) loss: 0.8254 (0.8324) time: 0.1940 data: 0.1106 max mem: 9377 +Train: [15] [2100/6250] eta: 0:11:09 lr: 0.000121 grad: 0.0793 (0.0823) loss: 0.8258 (0.8322) time: 0.1466 data: 0.0554 max mem: 9377 +Train: [15] [2200/6250] eta: 0:10:53 lr: 0.000121 grad: 0.0776 (0.0823) loss: 0.8285 (0.8321) time: 0.1456 data: 0.0542 max mem: 9377 +Train: [15] [2300/6250] eta: 0:10:36 lr: 0.000121 grad: 0.0773 (0.0822) loss: 0.8322 (0.8320) time: 0.1443 data: 0.0636 max mem: 9377 +Train: [15] [2400/6250] eta: 0:10:20 lr: 0.000121 grad: 0.0773 (0.0822) loss: 0.8307 (0.8319) time: 0.1591 data: 0.0713 max mem: 9377 +Train: [15] [2500/6250] eta: 0:10:04 lr: 0.000121 grad: 0.0840 (0.0822) loss: 0.8315 (0.8319) time: 0.1728 data: 0.0934 max mem: 9377 +Train: [15] [2600/6250] eta: 0:09:46 lr: 0.000121 grad: 0.0868 (0.0824) loss: 0.8316 (0.8318) time: 0.1532 data: 0.0689 max mem: 9377 +Train: [15] [2700/6250] eta: 0:09:30 lr: 0.000121 grad: 0.0877 (0.0825) loss: 0.8309 (0.8318) time: 0.1610 data: 0.0745 max mem: 9377 +Train: [15] [2800/6250] eta: 0:09:13 lr: 0.000121 grad: 0.0791 (0.0826) loss: 0.8305 (0.8318) time: 0.1288 data: 0.0392 max mem: 9377 +Train: [15] [2900/6250] eta: 0:08:57 lr: 0.000121 grad: 0.0828 (0.0828) loss: 0.8291 (0.8317) time: 0.1418 data: 0.0576 max mem: 9377 +Train: [15] [3000/6250] eta: 0:08:41 lr: 0.000121 grad: 0.0819 (0.0829) loss: 0.8286 (0.8316) time: 0.1346 data: 0.0491 max mem: 9377 +Train: [15] [3100/6250] eta: 0:08:24 lr: 0.000121 grad: 0.0859 (0.0831) loss: 0.8279 (0.8314) time: 0.1343 data: 0.0462 max mem: 9377 +Train: [15] [3200/6250] eta: 0:08:08 lr: 0.000121 grad: 0.0794 (0.0831) loss: 0.8322 (0.8314) time: 0.1421 data: 0.0532 max mem: 9377 +Train: [15] [3300/6250] eta: 0:07:52 lr: 0.000121 grad: 0.0812 (0.0832) loss: 0.8274 (0.8314) time: 0.1462 data: 0.0663 max mem: 9377 +Train: [15] [3400/6250] eta: 0:07:36 lr: 0.000121 grad: 0.0890 (0.0834) loss: 0.8330 (0.8314) time: 0.1120 data: 0.0264 max mem: 9377 +Train: [15] [3500/6250] eta: 0:07:19 lr: 0.000121 grad: 0.0796 (0.0835) loss: 0.8362 (0.8314) time: 0.1500 data: 0.0630 max mem: 9377 +Train: [15] [3600/6250] eta: 0:07:03 lr: 0.000121 grad: 0.0847 (0.0835) loss: 0.8314 (0.8313) time: 0.1439 data: 0.0571 max mem: 9377 +Train: [15] [3700/6250] eta: 0:06:47 lr: 0.000121 grad: 0.0838 (0.0836) loss: 0.8299 (0.8312) time: 0.1589 data: 0.0708 max mem: 9377 +Train: [15] [3800/6250] eta: 0:06:31 lr: 0.000121 grad: 0.0823 (0.0837) loss: 0.8306 (0.8312) time: 0.1717 data: 0.0804 max mem: 9377 +Train: [15] [3900/6250] eta: 0:06:14 lr: 0.000121 grad: 0.0845 (0.0838) loss: 0.8264 (0.8311) time: 0.1421 data: 0.0533 max mem: 9377 +Train: [15] [4000/6250] eta: 0:05:58 lr: 0.000121 grad: 0.0847 (0.0840) loss: 0.8259 (0.8310) time: 0.1440 data: 0.0524 max mem: 9377 +Train: [15] [4100/6250] eta: 0:05:42 lr: 0.000121 grad: 0.0825 (0.0841) loss: 0.8307 (0.8309) time: 0.1719 data: 0.0893 max mem: 9377 +Train: [15] [4200/6250] eta: 0:05:26 lr: 0.000121 grad: 0.0856 (0.0841) loss: 0.8295 (0.8308) time: 0.1381 data: 0.0496 max mem: 9377 +Train: [15] [4300/6250] eta: 0:05:10 lr: 0.000121 grad: 0.0885 (0.0842) loss: 0.8277 (0.8307) time: 0.1501 data: 0.0624 max mem: 9377 +Train: [15] [4400/6250] eta: 0:04:54 lr: 0.000121 grad: 0.0873 (0.0843) loss: 0.8256 (0.8307) time: 0.1410 data: 0.0518 max mem: 9377 +Train: [15] [4500/6250] eta: 0:04:38 lr: 0.000121 grad: 0.0823 (0.0845) loss: 0.8343 (0.8306) time: 0.1481 data: 0.0550 max mem: 9377 +Train: [15] [4600/6250] eta: 0:04:22 lr: 0.000121 grad: 0.0848 (0.0845) loss: 0.8260 (0.8305) time: 0.1396 data: 0.0501 max mem: 9377 +Train: [15] [4700/6250] eta: 0:04:05 lr: 0.000121 grad: 0.0816 (0.0847) loss: 0.8298 (0.8305) time: 0.1457 data: 0.0478 max mem: 9377 +Train: [15] [4800/6250] eta: 0:03:50 lr: 0.000121 grad: 0.0816 (0.0848) loss: 0.8267 (0.8305) time: 0.1533 data: 0.0649 max mem: 9377 +Train: [15] [4900/6250] eta: 0:03:34 lr: 0.000121 grad: 0.0809 (0.0848) loss: 0.8306 (0.8304) time: 0.1448 data: 0.0638 max mem: 9377 +Train: [15] [5000/6250] eta: 0:03:18 lr: 0.000121 grad: 0.0860 (0.0848) loss: 0.8241 (0.8304) time: 0.1828 data: 0.0986 max mem: 9377 +Train: [15] [5100/6250] eta: 0:03:02 lr: 0.000121 grad: 0.0827 (0.0848) loss: 0.8325 (0.8304) time: 0.1458 data: 0.0605 max mem: 9377 +Train: [15] [5200/6250] eta: 0:02:46 lr: 0.000121 grad: 0.0838 (0.0848) loss: 0.8290 (0.8303) time: 0.1640 data: 0.0828 max mem: 9377 +Train: [15] [5300/6250] eta: 0:02:31 lr: 0.000121 grad: 0.0790 (0.0847) loss: 0.8369 (0.8303) time: 0.2039 data: 0.1258 max mem: 9377 +Train: [15] [5400/6250] eta: 0:02:15 lr: 0.000121 grad: 0.0809 (0.0847) loss: 0.8287 (0.8303) time: 0.1678 data: 0.0825 max mem: 9377 +Train: [15] [5500/6250] eta: 0:02:00 lr: 0.000121 grad: 0.0809 (0.0847) loss: 0.8333 (0.8304) time: 0.1862 data: 0.0991 max mem: 9377 +Train: [15] [5600/6250] eta: 0:01:44 lr: 0.000121 grad: 0.0741 (0.0847) loss: 0.8340 (0.8304) time: 0.2119 data: 0.1233 max mem: 9377 +Train: [15] [5700/6250] eta: 0:01:28 lr: 0.000121 grad: 0.0801 (0.0847) loss: 0.8308 (0.8304) time: 0.1588 data: 0.0713 max mem: 9377 +Train: [15] [5800/6250] eta: 0:01:12 lr: 0.000121 grad: 0.0838 (0.0847) loss: 0.8206 (0.8303) time: 0.1702 data: 0.0870 max mem: 9377 +Train: [15] [5900/6250] eta: 0:00:56 lr: 0.000121 grad: 0.0802 (0.0846) loss: 0.8299 (0.8303) time: 0.1798 data: 0.0867 max mem: 9377 +Train: [15] [6000/6250] eta: 0:00:40 lr: 0.000121 grad: 0.0823 (0.0846) loss: 0.8307 (0.8303) time: 0.1698 data: 0.0746 max mem: 9377 +Train: [15] [6100/6250] eta: 0:00:24 lr: 0.000121 grad: 0.0769 (0.0847) loss: 0.8281 (0.8302) time: 0.1897 data: 0.0901 max mem: 9377 +Train: [15] [6200/6250] eta: 0:00:08 lr: 0.000121 grad: 0.0811 (0.0847) loss: 0.8232 (0.8302) time: 0.1742 data: 0.0877 max mem: 9377 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.0795 (0.0847) loss: 0.8280 (0.8301) time: 0.1850 data: 0.1004 max mem: 9377 +Train: [15] Total time: 0:16:57 (0.1628 s / it) +Averaged stats: lr: 0.000121 grad: 0.0795 (0.0847) loss: 0.8280 (0.8301) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:06:29 loss: 0.8476 (0.8476) time: 6.2745 data: 6.2411 max mem: 9377 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8509 (0.8496) time: 0.1388 data: 0.1132 max mem: 9377 +Eval (hcp-train-subset): [15] Total time: 0:00:15 (0.2471 s / it) +Averaged stats (hcp-train-subset): loss: 0.8509 (0.8496) +Eval (hcp-val): [15] [ 0/62] eta: 0:05:36 loss: 0.8433 (0.8433) time: 5.4294 data: 5.3990 max mem: 9377 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8470 (0.8476) time: 0.1415 data: 0.1143 max mem: 9377 +Eval (hcp-val): [15] Total time: 0:00:14 (0.2320 s / it) +Averaged stats (hcp-val): loss: 0.8470 (0.8476) +Eval (nsd-val): [15] [ 0/62] eta: 0:04:26 loss: 0.8081 (0.8081) time: 4.3013 data: 4.2097 max mem: 9377 +Eval (nsd-val): [15] [61/62] eta: 0:00:00 loss: 0.8181 (0.8183) time: 0.1334 data: 0.1075 max mem: 9377 +Eval (nsd-val): [15] Total time: 0:00:13 (0.2213 s / it) +Averaged stats (nsd-val): loss: 0.8181 (0.8183) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [16] [ 0/6250] eta: 11:05:11 lr: 0.000121 grad: 0.0725 (0.0725) loss: 0.8283 (0.8283) time: 6.3858 data: 6.2818 max mem: 9377 +Train: [16] [ 100/6250] eta: 0:22:40 lr: 0.000121 grad: 0.0707 (0.0816) loss: 0.8508 (0.8502) time: 0.1721 data: 0.0794 max mem: 9377 +Train: [16] [ 200/6250] eta: 0:19:08 lr: 0.000121 grad: 0.0824 (0.0866) loss: 0.8244 (0.8411) time: 0.1462 data: 0.0540 max mem: 9377 +Train: [16] [ 300/6250] eta: 0:17:35 lr: 0.000121 grad: 0.0834 (0.0882) loss: 0.8244 (0.8362) time: 0.1404 data: 0.0532 max mem: 9377 +Train: [16] [ 400/6250] eta: 0:16:39 lr: 0.000121 grad: 0.0768 (0.0866) loss: 0.8329 (0.8345) time: 0.1523 data: 0.0660 max mem: 9377 +Train: [16] [ 500/6250] eta: 0:16:06 lr: 0.000121 grad: 0.0861 (0.0863) loss: 0.8294 (0.8336) time: 0.1533 data: 0.0599 max mem: 9377 +Train: [16] [ 600/6250] eta: 0:15:36 lr: 0.000121 grad: 0.0819 (0.0862) loss: 0.8343 (0.8327) time: 0.1526 data: 0.0531 max mem: 9377 +Train: [16] [ 700/6250] eta: 0:15:13 lr: 0.000121 grad: 0.0787 (0.0857) loss: 0.8309 (0.8323) time: 0.1373 data: 0.0551 max mem: 9377 +Train: [16] [ 800/6250] eta: 0:14:52 lr: 0.000121 grad: 0.0781 (0.0852) loss: 0.8351 (0.8321) time: 0.1686 data: 0.0727 max mem: 9377 +Train: [16] [ 900/6250] eta: 0:14:35 lr: 0.000121 grad: 0.0847 (0.0850) loss: 0.8207 (0.8318) time: 0.1678 data: 0.0731 max mem: 9377 +Train: [16] [1000/6250] eta: 0:14:14 lr: 0.000121 grad: 0.0826 (0.0847) loss: 0.8263 (0.8316) time: 0.1423 data: 0.0516 max mem: 9377 +Train: [16] [1100/6250] eta: 0:13:58 lr: 0.000121 grad: 0.0851 (0.0853) loss: 0.8240 (0.8314) time: 0.1782 data: 0.0935 max mem: 9377 +Train: [16] [1200/6250] eta: 0:13:39 lr: 0.000121 grad: 0.0799 (0.0856) loss: 0.8210 (0.8310) time: 0.1508 data: 0.0635 max mem: 9377 +Train: [16] [1300/6250] eta: 0:13:18 lr: 0.000121 grad: 0.0866 (0.0856) loss: 0.8273 (0.8305) time: 0.1450 data: 0.0550 max mem: 9377 +Train: [16] [1400/6250] eta: 0:13:02 lr: 0.000121 grad: 0.0858 (0.0856) loss: 0.8240 (0.8301) time: 0.1496 data: 0.0573 max mem: 9377 +Train: [16] [1500/6250] eta: 0:12:44 lr: 0.000121 grad: 0.0835 (0.0855) loss: 0.8267 (0.8299) time: 0.1724 data: 0.0799 max mem: 9377 +Train: [16] [1600/6250] eta: 0:12:26 lr: 0.000121 grad: 0.0794 (0.0853) loss: 0.8319 (0.8297) time: 0.1534 data: 0.0683 max mem: 9377 +Train: [16] [1700/6250] eta: 0:12:06 lr: 0.000121 grad: 0.0788 (0.0852) loss: 0.8287 (0.8296) time: 0.1364 data: 0.0483 max mem: 9377 +Train: [16] [1800/6250] eta: 0:11:49 lr: 0.000121 grad: 0.0811 (0.0850) loss: 0.8250 (0.8295) time: 0.1493 data: 0.0599 max mem: 9377 +Train: [16] [1900/6250] eta: 0:11:33 lr: 0.000121 grad: 0.0776 (0.0847) loss: 0.8369 (0.8295) time: 0.1602 data: 0.0741 max mem: 9377 +Train: [16] [2000/6250] eta: 0:11:17 lr: 0.000121 grad: 0.0875 (0.0846) loss: 0.8310 (0.8293) time: 0.1585 data: 0.0774 max mem: 9377 +Train: [16] [2100/6250] eta: 0:11:00 lr: 0.000121 grad: 0.0776 (0.0843) loss: 0.8344 (0.8294) time: 0.1611 data: 0.0675 max mem: 9377 +Train: [16] [2200/6250] eta: 0:10:43 lr: 0.000121 grad: 0.0852 (0.0841) loss: 0.8268 (0.8294) time: 0.1626 data: 0.0880 max mem: 9377 +Train: [16] [2300/6250] eta: 0:10:27 lr: 0.000121 grad: 0.0773 (0.0840) loss: 0.8342 (0.8294) time: 0.1567 data: 0.0698 max mem: 9377 +Train: [16] [2400/6250] eta: 0:10:10 lr: 0.000121 grad: 0.0734 (0.0838) loss: 0.8319 (0.8294) time: 0.1695 data: 0.0833 max mem: 9377 +Train: [16] [2500/6250] eta: 0:09:54 lr: 0.000121 grad: 0.0785 (0.0837) loss: 0.8360 (0.8295) time: 0.1506 data: 0.0661 max mem: 9377 +Train: [16] [2600/6250] eta: 0:09:37 lr: 0.000121 grad: 0.0849 (0.0837) loss: 0.8305 (0.8294) time: 0.1409 data: 0.0499 max mem: 9377 +Train: [16] [2700/6250] eta: 0:09:21 lr: 0.000121 grad: 0.0739 (0.0836) loss: 0.8294 (0.8295) time: 0.1396 data: 0.0444 max mem: 9377 +Train: [16] [2800/6250] eta: 0:09:05 lr: 0.000121 grad: 0.0816 (0.0835) loss: 0.8278 (0.8294) time: 0.1611 data: 0.0740 max mem: 9377 +Train: [16] [2900/6250] eta: 0:08:48 lr: 0.000121 grad: 0.0804 (0.0836) loss: 0.8246 (0.8295) time: 0.1615 data: 0.0818 max mem: 9377 +Train: [16] [3000/6250] eta: 0:08:32 lr: 0.000121 grad: 0.0856 (0.0836) loss: 0.8272 (0.8295) time: 0.1505 data: 0.0565 max mem: 9377 +Train: [16] [3100/6250] eta: 0:08:16 lr: 0.000121 grad: 0.0848 (0.0836) loss: 0.8254 (0.8295) time: 0.1317 data: 0.0424 max mem: 9377 +Train: [16] [3200/6250] eta: 0:07:59 lr: 0.000121 grad: 0.0795 (0.0835) loss: 0.8238 (0.8295) time: 0.1626 data: 0.0759 max mem: 9377 +Train: [16] [3300/6250] eta: 0:07:44 lr: 0.000121 grad: 0.0911 (0.0838) loss: 0.8170 (0.8295) time: 0.1680 data: 0.0810 max mem: 9377 +Train: [16] [3400/6250] eta: 0:07:28 lr: 0.000121 grad: 0.0797 (0.0837) loss: 0.8322 (0.8295) time: 0.1419 data: 0.0541 max mem: 9377 +Train: [16] [3500/6250] eta: 0:07:12 lr: 0.000120 grad: 0.0838 (0.0838) loss: 0.8261 (0.8294) time: 0.1775 data: 0.0982 max mem: 9377 +Train: [16] [3600/6250] eta: 0:06:56 lr: 0.000120 grad: 0.0791 (0.0838) loss: 0.8266 (0.8293) time: 0.1639 data: 0.0833 max mem: 9377 +Train: [16] [3700/6250] eta: 0:06:40 lr: 0.000120 grad: 0.0772 (0.0838) loss: 0.8333 (0.8294) time: 0.1875 data: 0.1063 max mem: 9377 +Train: [16] [3800/6250] eta: 0:06:25 lr: 0.000120 grad: 0.0860 (0.0837) loss: 0.8332 (0.8294) time: 0.1883 data: 0.1076 max mem: 9377 +Train: [16] [3900/6250] eta: 0:06:09 lr: 0.000120 grad: 0.0817 (0.0836) loss: 0.8326 (0.8295) time: 0.1604 data: 0.0715 max mem: 9377 +Train: [16] [4000/6250] eta: 0:05:54 lr: 0.000120 grad: 0.0793 (0.0835) loss: 0.8330 (0.8295) time: 0.2010 data: 0.1142 max mem: 9377 +Train: [16] [4100/6250] eta: 0:05:37 lr: 0.000120 grad: 0.0716 (0.0835) loss: 0.8322 (0.8296) time: 0.1496 data: 0.0597 max mem: 9377 +Train: [16] [4200/6250] eta: 0:05:21 lr: 0.000120 grad: 0.0784 (0.0834) loss: 0.8325 (0.8296) time: 0.1465 data: 0.0533 max mem: 9377 +Train: [16] [4300/6250] eta: 0:05:06 lr: 0.000120 grad: 0.0743 (0.0833) loss: 0.8304 (0.8296) time: 0.1767 data: 0.0857 max mem: 9377 +Train: [16] [4400/6250] eta: 0:04:50 lr: 0.000120 grad: 0.0816 (0.0834) loss: 0.8388 (0.8297) time: 0.1501 data: 0.0641 max mem: 9377 +Train: [16] [4500/6250] eta: 0:04:35 lr: 0.000120 grad: 0.0776 (0.0834) loss: 0.8305 (0.8297) time: 0.1762 data: 0.0885 max mem: 9377 +Train: [16] [4600/6250] eta: 0:04:19 lr: 0.000120 grad: 0.0855 (0.0834) loss: 0.8384 (0.8297) time: 0.1813 data: 0.0899 max mem: 9377 +Train: [16] [4700/6250] eta: 0:04:03 lr: 0.000120 grad: 0.0837 (0.0834) loss: 0.8260 (0.8298) time: 0.1805 data: 0.0969 max mem: 9377 +Train: [16] [4800/6250] eta: 0:03:48 lr: 0.000120 grad: 0.0826 (0.0834) loss: 0.8307 (0.8298) time: 0.1696 data: 0.0748 max mem: 9377 +Train: [16] [4900/6250] eta: 0:03:32 lr: 0.000120 grad: 0.0746 (0.0833) loss: 0.8335 (0.8299) time: 0.1553 data: 0.0689 max mem: 9377 +Train: [16] [5000/6250] eta: 0:03:16 lr: 0.000120 grad: 0.0855 (0.0833) loss: 0.8262 (0.8299) time: 0.1749 data: 0.0883 max mem: 9377 +Train: [16] [5100/6250] eta: 0:03:00 lr: 0.000120 grad: 0.0813 (0.0833) loss: 0.8274 (0.8299) time: 0.1492 data: 0.0606 max mem: 9377 +Train: [16] [5200/6250] eta: 0:02:45 lr: 0.000120 grad: 0.0804 (0.0834) loss: 0.8293 (0.8299) time: 0.1553 data: 0.0597 max mem: 9377 +Train: [16] [5300/6250] eta: 0:02:29 lr: 0.000120 grad: 0.0835 (0.0835) loss: 0.8311 (0.8299) time: 0.1592 data: 0.0768 max mem: 9377 +Train: [16] [5400/6250] eta: 0:02:14 lr: 0.000120 grad: 0.0811 (0.0835) loss: 0.8300 (0.8299) time: 0.1682 data: 0.0901 max mem: 9377 +Train: [16] [5500/6250] eta: 0:01:58 lr: 0.000120 grad: 0.0780 (0.0836) loss: 0.8308 (0.8299) time: 0.1846 data: 0.1035 max mem: 9377 +Train: [16] [5600/6250] eta: 0:01:43 lr: 0.000120 grad: 0.0825 (0.0836) loss: 0.8308 (0.8299) time: 0.2096 data: 0.1228 max mem: 9377 +Train: [16] [5700/6250] eta: 0:01:27 lr: 0.000120 grad: 0.0827 (0.0836) loss: 0.8286 (0.8299) time: 0.1796 data: 0.0967 max mem: 9377 +Train: [16] [5800/6250] eta: 0:01:11 lr: 0.000120 grad: 0.0763 (0.0835) loss: 0.8304 (0.8299) time: 0.1659 data: 0.0808 max mem: 9377 +Train: [16] [5900/6250] eta: 0:00:55 lr: 0.000120 grad: 0.0819 (0.0835) loss: 0.8297 (0.8300) time: 0.1647 data: 0.0738 max mem: 9377 +Train: [16] [6000/6250] eta: 0:00:39 lr: 0.000120 grad: 0.0818 (0.0835) loss: 0.8228 (0.8300) time: 0.1392 data: 0.0548 max mem: 9377 +Train: [16] [6100/6250] eta: 0:00:23 lr: 0.000120 grad: 0.0796 (0.0835) loss: 0.8321 (0.8300) time: 0.1559 data: 0.0718 max mem: 9377 +Train: [16] [6200/6250] eta: 0:00:07 lr: 0.000120 grad: 0.0769 (0.0834) loss: 0.8365 (0.8301) time: 0.1480 data: 0.0587 max mem: 9377 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.0819 (0.0835) loss: 0.8327 (0.8301) time: 0.1603 data: 0.0594 max mem: 9377 +Train: [16] Total time: 0:16:38 (0.1598 s / it) +Averaged stats: lr: 0.000120 grad: 0.0819 (0.0835) loss: 0.8327 (0.8301) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:05:24 loss: 0.8477 (0.8477) time: 5.2377 data: 5.2072 max mem: 9377 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8487 (0.8499) time: 0.1380 data: 0.1107 max mem: 9377 +Eval (hcp-train-subset): [16] Total time: 0:00:13 (0.2251 s / it) +Averaged stats (hcp-train-subset): loss: 0.8487 (0.8499) +Eval (hcp-val): [16] [ 0/62] eta: 0:05:33 loss: 0.8430 (0.8430) time: 5.3808 data: 5.3477 max mem: 9377 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8464 (0.8476) time: 0.1206 data: 0.0947 max mem: 9377 +Eval (hcp-val): [16] Total time: 0:00:14 (0.2261 s / it) +Averaged stats (hcp-val): loss: 0.8464 (0.8476) +Eval (nsd-val): [16] [ 0/62] eta: 0:05:02 loss: 0.8081 (0.8081) time: 4.8737 data: 4.8084 max mem: 9377 +Eval (nsd-val): [16] [61/62] eta: 0:00:00 loss: 0.8203 (0.8207) time: 0.1344 data: 0.1091 max mem: 9377 +Eval (nsd-val): [16] Total time: 0:00:13 (0.2220 s / it) +Averaged stats (nsd-val): loss: 0.8203 (0.8207) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [17] [ 0/6250] eta: 8:30:56 lr: 0.000120 grad: 0.1137 (0.1137) loss: 0.8325 (0.8325) time: 4.9050 data: 4.6374 max mem: 9377 +Train: [17] [ 100/6250] eta: 0:21:24 lr: 0.000120 grad: 0.0877 (0.0941) loss: 0.8349 (0.8451) time: 0.1729 data: 0.0802 max mem: 9377 +Train: [17] [ 200/6250] eta: 0:19:40 lr: 0.000120 grad: 0.0881 (0.0939) loss: 0.8163 (0.8341) time: 0.1506 data: 0.0657 max mem: 9377 +Train: [17] [ 300/6250] eta: 0:18:55 lr: 0.000120 grad: 0.0764 (0.0904) loss: 0.8172 (0.8299) time: 0.1608 data: 0.0670 max mem: 9377 +Train: [17] [ 400/6250] eta: 0:18:17 lr: 0.000120 grad: 0.0835 (0.0887) loss: 0.8298 (0.8279) time: 0.1617 data: 0.0702 max mem: 9377 +Train: [17] [ 500/6250] eta: 0:17:46 lr: 0.000120 grad: 0.0769 (0.0874) loss: 0.8147 (0.8269) time: 0.1767 data: 0.0805 max mem: 9377 +Train: [17] [ 600/6250] eta: 0:17:32 lr: 0.000120 grad: 0.0817 (0.0869) loss: 0.8318 (0.8270) time: 0.1807 data: 0.0741 max mem: 9377 +Train: [17] [ 700/6250] eta: 0:17:05 lr: 0.000120 grad: 0.0756 (0.0861) loss: 0.8279 (0.8271) time: 0.1814 data: 0.1004 max mem: 9377 +Train: [17] [ 800/6250] eta: 0:16:35 lr: 0.000120 grad: 0.0822 (0.0859) loss: 0.8314 (0.8274) time: 0.1749 data: 0.0794 max mem: 9377 +Train: [17] [ 900/6250] eta: 0:16:07 lr: 0.000120 grad: 0.0800 (0.0856) loss: 0.8279 (0.8275) time: 0.1609 data: 0.0675 max mem: 9377 +Train: [17] [1000/6250] eta: 0:15:43 lr: 0.000120 grad: 0.0859 (0.0855) loss: 0.8271 (0.8275) time: 0.1717 data: 0.0730 max mem: 9377 +Train: [17] [1100/6250] eta: 0:15:18 lr: 0.000120 grad: 0.0808 (0.0852) loss: 0.8294 (0.8276) time: 0.1830 data: 0.0906 max mem: 9377 +Train: [17] [1200/6250] eta: 0:14:54 lr: 0.000120 grad: 0.0815 (0.0851) loss: 0.8291 (0.8273) time: 0.1583 data: 0.0693 max mem: 9377 +Train: [17] [1300/6250] eta: 0:14:33 lr: 0.000120 grad: 0.0864 (0.0853) loss: 0.8202 (0.8271) time: 0.1707 data: 0.0823 max mem: 9377 +Train: [17] [1400/6250] eta: 0:14:11 lr: 0.000120 grad: 0.0773 (0.0855) loss: 0.8322 (0.8270) time: 0.1533 data: 0.0617 max mem: 9377 +Train: [17] [1500/6250] eta: 0:13:46 lr: 0.000120 grad: 0.0814 (0.0854) loss: 0.8305 (0.8269) time: 0.1566 data: 0.0662 max mem: 9377 +Train: [17] [1600/6250] eta: 0:13:24 lr: 0.000120 grad: 0.0876 (0.0853) loss: 0.8192 (0.8270) time: 0.1659 data: 0.0775 max mem: 9377 +Train: [17] [1700/6250] eta: 0:13:02 lr: 0.000120 grad: 0.0848 (0.0852) loss: 0.8304 (0.8269) time: 0.1556 data: 0.0626 max mem: 9377 +Train: [17] [1800/6250] eta: 0:12:42 lr: 0.000120 grad: 0.0774 (0.0852) loss: 0.8318 (0.8272) time: 0.1561 data: 0.0671 max mem: 9377 +Train: [17] [1900/6250] eta: 0:12:22 lr: 0.000120 grad: 0.0873 (0.0851) loss: 0.8280 (0.8273) time: 0.1819 data: 0.0897 max mem: 9377 +Train: [17] [2000/6250] eta: 0:12:01 lr: 0.000120 grad: 0.0769 (0.0850) loss: 0.8278 (0.8274) time: 0.1706 data: 0.0827 max mem: 9377 +Train: [17] [2100/6250] eta: 0:11:40 lr: 0.000120 grad: 0.0849 (0.0850) loss: 0.8226 (0.8274) time: 0.1432 data: 0.0522 max mem: 9377 +Train: [17] [2200/6250] eta: 0:11:21 lr: 0.000120 grad: 0.0809 (0.0850) loss: 0.8255 (0.8274) time: 0.1586 data: 0.0647 max mem: 9377 +Train: [17] [2300/6250] eta: 0:11:02 lr: 0.000120 grad: 0.0809 (0.0849) loss: 0.8247 (0.8273) time: 0.1625 data: 0.0760 max mem: 9377 +Train: [17] [2400/6250] eta: 0:10:44 lr: 0.000120 grad: 0.0799 (0.0850) loss: 0.8238 (0.8273) time: 0.1513 data: 0.0602 max mem: 9377 +Train: [17] [2500/6250] eta: 0:10:28 lr: 0.000120 grad: 0.0894 (0.0851) loss: 0.8223 (0.8273) time: 0.1687 data: 0.0822 max mem: 9377 +Train: [17] [2600/6250] eta: 0:10:09 lr: 0.000120 grad: 0.0779 (0.0852) loss: 0.8297 (0.8273) time: 0.1705 data: 0.0844 max mem: 9377 +Train: [17] [2700/6250] eta: 0:09:52 lr: 0.000120 grad: 0.0794 (0.0853) loss: 0.8311 (0.8274) time: 0.2026 data: 0.1232 max mem: 9377 +Train: [17] [2800/6250] eta: 0:09:34 lr: 0.000120 grad: 0.0847 (0.0854) loss: 0.8288 (0.8274) time: 0.1454 data: 0.0590 max mem: 9377 +Train: [17] [2900/6250] eta: 0:09:16 lr: 0.000120 grad: 0.0831 (0.0854) loss: 0.8284 (0.8275) time: 0.1684 data: 0.0797 max mem: 9377 +Train: [17] [3000/6250] eta: 0:08:58 lr: 0.000120 grad: 0.0801 (0.0854) loss: 0.8326 (0.8275) time: 0.1714 data: 0.0816 max mem: 9377 +Train: [17] [3100/6250] eta: 0:08:40 lr: 0.000120 grad: 0.0885 (0.0854) loss: 0.8293 (0.8275) time: 0.1155 data: 0.0093 max mem: 9377 +Train: [17] [3200/6250] eta: 0:08:22 lr: 0.000120 grad: 0.0846 (0.0855) loss: 0.8357 (0.8275) time: 0.1405 data: 0.0486 max mem: 9377 +Train: [17] [3300/6250] eta: 0:08:04 lr: 0.000120 grad: 0.0854 (0.0857) loss: 0.8280 (0.8276) time: 0.1921 data: 0.1106 max mem: 9377 +Train: [17] [3400/6250] eta: 0:07:47 lr: 0.000120 grad: 0.0804 (0.0857) loss: 0.8285 (0.8276) time: 0.1528 data: 0.0739 max mem: 9377 +Train: [17] [3500/6250] eta: 0:07:30 lr: 0.000120 grad: 0.0833 (0.0857) loss: 0.8290 (0.8276) time: 0.1825 data: 0.0988 max mem: 9377 +Train: [17] [3600/6250] eta: 0:07:14 lr: 0.000120 grad: 0.0811 (0.0857) loss: 0.8266 (0.8277) time: 0.1942 data: 0.1095 max mem: 9377 +Train: [17] [3700/6250] eta: 0:06:57 lr: 0.000120 grad: 0.0785 (0.0857) loss: 0.8265 (0.8277) time: 0.1570 data: 0.0694 max mem: 9377 +Train: [17] [3800/6250] eta: 0:06:41 lr: 0.000120 grad: 0.0765 (0.0857) loss: 0.8290 (0.8277) time: 0.1512 data: 0.0587 max mem: 9377 +Train: [17] [3900/6250] eta: 0:06:25 lr: 0.000120 grad: 0.0779 (0.0858) loss: 0.8257 (0.8277) time: 0.2195 data: 0.1296 max mem: 9377 +Train: [17] [4000/6250] eta: 0:06:08 lr: 0.000120 grad: 0.0878 (0.0859) loss: 0.8204 (0.8276) time: 0.1568 data: 0.0610 max mem: 9377 +Train: [17] [4100/6250] eta: 0:05:51 lr: 0.000120 grad: 0.0791 (0.0859) loss: 0.8316 (0.8275) time: 0.1437 data: 0.0534 max mem: 9377 +Train: [17] [4200/6250] eta: 0:05:35 lr: 0.000120 grad: 0.0849 (0.0860) loss: 0.8251 (0.8275) time: 0.1505 data: 0.0635 max mem: 9377 +Train: [17] [4300/6250] eta: 0:05:18 lr: 0.000120 grad: 0.0871 (0.0861) loss: 0.8184 (0.8275) time: 0.1526 data: 0.0594 max mem: 9377 +Train: [17] [4400/6250] eta: 0:05:02 lr: 0.000120 grad: 0.0839 (0.0861) loss: 0.8277 (0.8275) time: 0.1717 data: 0.0874 max mem: 9377 +Train: [17] [4500/6250] eta: 0:04:45 lr: 0.000120 grad: 0.0871 (0.0861) loss: 0.8188 (0.8274) time: 0.1776 data: 0.0934 max mem: 9377 +Train: [17] [4600/6250] eta: 0:04:29 lr: 0.000120 grad: 0.0849 (0.0861) loss: 0.8247 (0.8274) time: 0.1726 data: 0.0892 max mem: 9377 +Train: [17] [4700/6250] eta: 0:04:12 lr: 0.000120 grad: 0.0807 (0.0861) loss: 0.8247 (0.8274) time: 0.1560 data: 0.0568 max mem: 9377 +Train: [17] [4800/6250] eta: 0:03:56 lr: 0.000120 grad: 0.0800 (0.0861) loss: 0.8267 (0.8273) time: 0.1721 data: 0.0880 max mem: 9377 +Train: [17] [4900/6250] eta: 0:03:39 lr: 0.000119 grad: 0.0871 (0.0862) loss: 0.8155 (0.8273) time: 0.1526 data: 0.0639 max mem: 9377 +Train: [17] [5000/6250] eta: 0:03:23 lr: 0.000119 grad: 0.0822 (0.0863) loss: 0.8240 (0.8271) time: 0.1704 data: 0.0774 max mem: 9377 +Train: [17] [5100/6250] eta: 0:03:06 lr: 0.000119 grad: 0.0836 (0.0864) loss: 0.8202 (0.8270) time: 0.1587 data: 0.0695 max mem: 9377 +Train: [17] [5200/6250] eta: 0:02:51 lr: 0.000119 grad: 0.0848 (0.0865) loss: 0.8239 (0.8269) time: 0.1955 data: 0.1131 max mem: 9377 +Train: [17] [5300/6250] eta: 0:02:34 lr: 0.000119 grad: 0.0848 (0.0865) loss: 0.8262 (0.8268) time: 0.1920 data: 0.0976 max mem: 9377 +Train: [17] [5400/6250] eta: 0:02:18 lr: 0.000119 grad: 0.0805 (0.0865) loss: 0.8217 (0.8268) time: 0.1663 data: 0.0762 max mem: 9377 +Train: [17] [5500/6250] eta: 0:02:02 lr: 0.000119 grad: 0.0750 (0.0865) loss: 0.8303 (0.8268) time: 0.1920 data: 0.1039 max mem: 9377 +Train: [17] [5600/6250] eta: 0:01:46 lr: 0.000119 grad: 0.0849 (0.0866) loss: 0.8260 (0.8267) time: 0.1664 data: 0.0783 max mem: 9377 +Train: [17] [5700/6250] eta: 0:01:29 lr: 0.000119 grad: 0.0797 (0.0866) loss: 0.8252 (0.8267) time: 0.1665 data: 0.0728 max mem: 9377 +Train: [17] [5800/6250] eta: 0:01:13 lr: 0.000119 grad: 0.0845 (0.0866) loss: 0.8207 (0.8267) time: 0.1524 data: 0.0456 max mem: 9377 +Train: [17] [5900/6250] eta: 0:00:57 lr: 0.000119 grad: 0.0810 (0.0866) loss: 0.8214 (0.8267) time: 0.1635 data: 0.0697 max mem: 9377 +Train: [17] [6000/6250] eta: 0:00:40 lr: 0.000119 grad: 0.0868 (0.0866) loss: 0.8308 (0.8266) time: 0.1500 data: 0.0604 max mem: 9377 +Train: [17] [6100/6250] eta: 0:00:24 lr: 0.000119 grad: 0.0871 (0.0866) loss: 0.8212 (0.8266) time: 0.1529 data: 0.0492 max mem: 9377 +Train: [17] [6200/6250] eta: 0:00:08 lr: 0.000119 grad: 0.0815 (0.0866) loss: 0.8189 (0.8266) time: 0.1512 data: 0.0431 max mem: 9377 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.0772 (0.0866) loss: 0.8284 (0.8266) time: 0.1818 data: 0.0843 max mem: 9377 +Train: [17] Total time: 0:17:05 (0.1640 s / it) +Averaged stats: lr: 0.000119 grad: 0.0772 (0.0866) loss: 0.8284 (0.8266) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:06:20 loss: 0.8495 (0.8495) time: 6.1414 data: 6.1110 max mem: 9377 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8474 (0.8492) time: 0.1510 data: 0.1229 max mem: 9377 +Eval (hcp-train-subset): [17] Total time: 0:00:15 (0.2542 s / it) +Averaged stats (hcp-train-subset): loss: 0.8474 (0.8492) +Eval (hcp-val): [17] [ 0/62] eta: 0:05:33 loss: 0.8459 (0.8459) time: 5.3837 data: 5.3345 max mem: 9377 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8445 (0.8468) time: 0.1523 data: 0.1255 max mem: 9377 +Eval (hcp-val): [17] Total time: 0:00:15 (0.2567 s / it) +Averaged stats (hcp-val): loss: 0.8445 (0.8468) +Eval (nsd-val): [17] [ 0/62] eta: 0:06:12 loss: 0.8098 (0.8098) time: 6.0074 data: 5.9604 max mem: 9377 +Eval (nsd-val): [17] [61/62] eta: 0:00:00 loss: 0.8174 (0.8198) time: 0.1377 data: 0.1120 max mem: 9377 +Eval (nsd-val): [17] Total time: 0:00:14 (0.2305 s / it) +Averaged stats (nsd-val): loss: 0.8174 (0.8198) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [18] [ 0/6250] eta: 11:55:11 lr: 0.000119 grad: 0.0903 (0.0903) loss: 0.8148 (0.8148) time: 6.8659 data: 6.7309 max mem: 9377 +Train: [18] [ 100/6250] eta: 0:22:49 lr: 0.000119 grad: 0.0979 (0.1039) loss: 0.8193 (0.8294) time: 0.1514 data: 0.0300 max mem: 9377 +Train: [18] [ 200/6250] eta: 0:20:23 lr: 0.000119 grad: 0.0976 (0.1009) loss: 0.8111 (0.8258) time: 0.1946 data: 0.0937 max mem: 9377 +Train: [18] [ 300/6250] eta: 0:18:51 lr: 0.000119 grad: 0.0828 (0.0975) loss: 0.8196 (0.8260) time: 0.1571 data: 0.0583 max mem: 9377 +Train: [18] [ 400/6250] eta: 0:18:04 lr: 0.000119 grad: 0.0881 (0.0956) loss: 0.8305 (0.8266) time: 0.1590 data: 0.0683 max mem: 9377 +Train: [18] [ 500/6250] eta: 0:17:25 lr: 0.000119 grad: 0.0833 (0.0950) loss: 0.8180 (0.8265) time: 0.1800 data: 0.0820 max mem: 9377 +Train: [18] [ 600/6250] eta: 0:16:55 lr: 0.000119 grad: 0.0833 (0.0938) loss: 0.8239 (0.8259) time: 0.1746 data: 0.0704 max mem: 9377 +Train: [18] [ 700/6250] eta: 0:16:23 lr: 0.000119 grad: 0.0813 (0.0929) loss: 0.8306 (0.8259) time: 0.1645 data: 0.0694 max mem: 9377 +Train: [18] [ 800/6250] eta: 0:15:54 lr: 0.000119 grad: 0.0804 (0.0924) loss: 0.8318 (0.8260) time: 0.1616 data: 0.0765 max mem: 9377 +Train: [18] [ 900/6250] eta: 0:15:29 lr: 0.000119 grad: 0.0813 (0.0918) loss: 0.8277 (0.8260) time: 0.1479 data: 0.0659 max mem: 9377 +Train: [18] [1000/6250] eta: 0:15:05 lr: 0.000119 grad: 0.0865 (0.0913) loss: 0.8269 (0.8260) time: 0.1771 data: 0.0885 max mem: 9377 +Train: [18] [1100/6250] eta: 0:14:42 lr: 0.000119 grad: 0.0829 (0.0907) loss: 0.8274 (0.8261) time: 0.1802 data: 0.0973 max mem: 9377 +Train: [18] [1200/6250] eta: 0:14:22 lr: 0.000119 grad: 0.0850 (0.0903) loss: 0.8268 (0.8263) time: 0.1973 data: 0.1116 max mem: 9377 +Train: [18] [1300/6250] eta: 0:14:03 lr: 0.000119 grad: 0.0784 (0.0899) loss: 0.8340 (0.8264) time: 0.1841 data: 0.1038 max mem: 9377 +Train: [18] [1400/6250] eta: 0:13:42 lr: 0.000119 grad: 0.0859 (0.0898) loss: 0.8225 (0.8263) time: 0.1604 data: 0.0734 max mem: 9377 +Train: [18] [1500/6250] eta: 0:13:23 lr: 0.000119 grad: 0.0803 (0.0897) loss: 0.8301 (0.8264) time: 0.1425 data: 0.0519 max mem: 9377 +Train: [18] [1600/6250] eta: 0:13:03 lr: 0.000119 grad: 0.0909 (0.0895) loss: 0.8166 (0.8264) time: 0.1718 data: 0.0903 max mem: 9377 +Train: [18] [1700/6250] eta: 0:12:44 lr: 0.000119 grad: 0.0809 (0.0894) loss: 0.8231 (0.8263) time: 0.1460 data: 0.0563 max mem: 9377 +Train: [18] [1800/6250] eta: 0:12:24 lr: 0.000119 grad: 0.0852 (0.0893) loss: 0.8311 (0.8264) time: 0.1155 data: 0.0318 max mem: 9377 +Train: [18] [1900/6250] eta: 0:12:04 lr: 0.000119 grad: 0.0887 (0.0892) loss: 0.8221 (0.8263) time: 0.1514 data: 0.0536 max mem: 9377 +Train: [18] [2000/6250] eta: 0:11:44 lr: 0.000119 grad: 0.0838 (0.0891) loss: 0.8310 (0.8264) time: 0.1595 data: 0.0726 max mem: 9377 +Train: [18] [2100/6250] eta: 0:11:24 lr: 0.000119 grad: 0.0825 (0.0889) loss: 0.8288 (0.8264) time: 0.1432 data: 0.0598 max mem: 9377 +Train: [18] [2200/6250] eta: 0:11:05 lr: 0.000119 grad: 0.0935 (0.0889) loss: 0.8197 (0.8262) time: 0.1589 data: 0.0720 max mem: 9377 +Train: [18] [2300/6250] eta: 0:10:48 lr: 0.000119 grad: 0.0860 (0.0890) loss: 0.8188 (0.8260) time: 0.1694 data: 0.0894 max mem: 9377 +Train: [18] [2400/6250] eta: 0:10:29 lr: 0.000119 grad: 0.0859 (0.0890) loss: 0.8163 (0.8259) time: 0.1562 data: 0.0753 max mem: 9377 +Train: [18] [2500/6250] eta: 0:10:12 lr: 0.000119 grad: 0.0811 (0.0891) loss: 0.8225 (0.8257) time: 0.1650 data: 0.0830 max mem: 9377 +Train: [18] [2600/6250] eta: 0:09:55 lr: 0.000119 grad: 0.0922 (0.0892) loss: 0.8171 (0.8256) time: 0.1838 data: 0.0964 max mem: 9377 +Train: [18] [2700/6250] eta: 0:09:36 lr: 0.000119 grad: 0.0936 (0.0892) loss: 0.8191 (0.8254) time: 0.1337 data: 0.0497 max mem: 9377 +Train: [18] [2800/6250] eta: 0:09:19 lr: 0.000119 grad: 0.0946 (0.0893) loss: 0.8163 (0.8251) time: 0.1525 data: 0.0624 max mem: 9377 +Train: [18] [2900/6250] eta: 0:09:01 lr: 0.000119 grad: 0.0888 (0.0895) loss: 0.8169 (0.8249) time: 0.1614 data: 0.0778 max mem: 9377 +Train: [18] [3000/6250] eta: 0:08:45 lr: 0.000119 grad: 0.0882 (0.0897) loss: 0.8231 (0.8246) time: 0.1655 data: 0.0774 max mem: 9377 +Train: [18] [3100/6250] eta: 0:08:28 lr: 0.000119 grad: 0.0855 (0.0898) loss: 0.8209 (0.8244) time: 0.1647 data: 0.0786 max mem: 9377 +Train: [18] [3200/6250] eta: 0:08:13 lr: 0.000119 grad: 0.0828 (0.0899) loss: 0.8224 (0.8242) time: 0.1873 data: 0.1026 max mem: 9377 +Train: [18] [3300/6250] eta: 0:07:56 lr: 0.000119 grad: 0.0953 (0.0901) loss: 0.8194 (0.8241) time: 0.1622 data: 0.0679 max mem: 9377 +Train: [18] [3400/6250] eta: 0:07:40 lr: 0.000119 grad: 0.0913 (0.0902) loss: 0.8213 (0.8240) time: 0.1727 data: 0.0801 max mem: 9377 +Train: [18] [3500/6250] eta: 0:07:24 lr: 0.000119 grad: 0.0930 (0.0903) loss: 0.8215 (0.8240) time: 0.1647 data: 0.0799 max mem: 9377 +Train: [18] [3600/6250] eta: 0:07:07 lr: 0.000119 grad: 0.0978 (0.0904) loss: 0.8129 (0.8238) time: 0.1459 data: 0.0587 max mem: 9377 +Train: [18] [3700/6250] eta: 0:06:51 lr: 0.000119 grad: 0.0910 (0.0905) loss: 0.8251 (0.8238) time: 0.1544 data: 0.0553 max mem: 9377 +Train: [18] [3800/6250] eta: 0:06:35 lr: 0.000119 grad: 0.0863 (0.0905) loss: 0.8230 (0.8238) time: 0.1721 data: 0.0843 max mem: 9377 +Train: [18] [3900/6250] eta: 0:06:18 lr: 0.000119 grad: 0.0880 (0.0906) loss: 0.8255 (0.8237) time: 0.1532 data: 0.0691 max mem: 9377 +Train: [18] [4000/6250] eta: 0:06:02 lr: 0.000119 grad: 0.0924 (0.0907) loss: 0.8158 (0.8237) time: 0.1668 data: 0.0747 max mem: 9377 +Train: [18] [4100/6250] eta: 0:05:45 lr: 0.000119 grad: 0.0913 (0.0907) loss: 0.8197 (0.8236) time: 0.1431 data: 0.0428 max mem: 9377 +Train: [18] [4200/6250] eta: 0:05:29 lr: 0.000119 grad: 0.0882 (0.0907) loss: 0.8216 (0.8236) time: 0.1555 data: 0.0722 max mem: 9377 +Train: [18] [4300/6250] eta: 0:05:13 lr: 0.000119 grad: 0.0838 (0.0907) loss: 0.8255 (0.8237) time: 0.1533 data: 0.0640 max mem: 9377 +Train: [18] [4400/6250] eta: 0:04:56 lr: 0.000119 grad: 0.0928 (0.0907) loss: 0.8204 (0.8237) time: 0.1489 data: 0.0660 max mem: 9377 +Train: [18] [4500/6250] eta: 0:04:40 lr: 0.000119 grad: 0.0868 (0.0908) loss: 0.8287 (0.8237) time: 0.1363 data: 0.0489 max mem: 9377 +Train: [18] [4600/6250] eta: 0:04:24 lr: 0.000119 grad: 0.0916 (0.0908) loss: 0.8187 (0.8237) time: 0.1546 data: 0.0703 max mem: 9377 +Train: [18] [4700/6250] eta: 0:04:08 lr: 0.000119 grad: 0.0851 (0.0909) loss: 0.8238 (0.8237) time: 0.1513 data: 0.0645 max mem: 9377 +Train: [18] [4800/6250] eta: 0:03:52 lr: 0.000119 grad: 0.0838 (0.0910) loss: 0.8250 (0.8237) time: 0.1658 data: 0.0713 max mem: 9377 +Train: [18] [4900/6250] eta: 0:03:36 lr: 0.000119 grad: 0.0850 (0.0910) loss: 0.8208 (0.8236) time: 0.1634 data: 0.0810 max mem: 9377 +Train: [18] [5000/6250] eta: 0:03:19 lr: 0.000119 grad: 0.0859 (0.0910) loss: 0.8227 (0.8236) time: 0.1569 data: 0.0713 max mem: 9377 +Train: [18] [5100/6250] eta: 0:03:03 lr: 0.000119 grad: 0.0855 (0.0909) loss: 0.8279 (0.8236) time: 0.1560 data: 0.0646 max mem: 9377 +Train: [18] [5200/6250] eta: 0:02:48 lr: 0.000119 grad: 0.0933 (0.0909) loss: 0.8195 (0.8236) time: 0.1815 data: 0.0952 max mem: 9377 +Train: [18] [5300/6250] eta: 0:02:32 lr: 0.000119 grad: 0.0861 (0.0909) loss: 0.8228 (0.8235) time: 0.1451 data: 0.0630 max mem: 9377 +Train: [18] [5400/6250] eta: 0:02:16 lr: 0.000119 grad: 0.0938 (0.0909) loss: 0.8217 (0.8235) time: 0.2120 data: 0.1379 max mem: 9377 +Train: [18] [5500/6250] eta: 0:02:00 lr: 0.000119 grad: 0.0922 (0.0910) loss: 0.8158 (0.8235) time: 0.1815 data: 0.0986 max mem: 9377 +Train: [18] [5600/6250] eta: 0:01:44 lr: 0.000119 grad: 0.0893 (0.0910) loss: 0.8214 (0.8234) time: 0.1873 data: 0.0954 max mem: 9377 +Train: [18] [5700/6250] eta: 0:01:28 lr: 0.000119 grad: 0.0889 (0.0910) loss: 0.8233 (0.8234) time: 0.1366 data: 0.0531 max mem: 9377 +Train: [18] [5800/6250] eta: 0:01:12 lr: 0.000118 grad: 0.0909 (0.0910) loss: 0.8290 (0.8234) time: 0.1523 data: 0.0522 max mem: 9377 +Train: [18] [5900/6250] eta: 0:00:56 lr: 0.000118 grad: 0.0878 (0.0910) loss: 0.8245 (0.8234) time: 0.1307 data: 0.0423 max mem: 9377 +Train: [18] [6000/6250] eta: 0:00:40 lr: 0.000118 grad: 0.0953 (0.0910) loss: 0.8250 (0.8233) time: 0.1634 data: 0.0680 max mem: 9377 +Train: [18] [6100/6250] eta: 0:00:24 lr: 0.000118 grad: 0.0910 (0.0910) loss: 0.8241 (0.8233) time: 0.1632 data: 0.0638 max mem: 9377 +Train: [18] [6200/6250] eta: 0:00:08 lr: 0.000118 grad: 0.0903 (0.0911) loss: 0.8189 (0.8233) time: 0.1829 data: 0.0744 max mem: 9377 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.0863 (0.0911) loss: 0.8236 (0.8233) time: 0.1702 data: 0.0727 max mem: 9377 +Train: [18] Total time: 0:16:55 (0.1625 s / it) +Averaged stats: lr: 0.000118 grad: 0.0863 (0.0911) loss: 0.8236 (0.8233) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:03:53 loss: 0.8466 (0.8466) time: 3.7652 data: 3.6968 max mem: 9377 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8479 (0.8481) time: 0.1413 data: 0.1155 max mem: 9377 +Eval (hcp-train-subset): [18] Total time: 0:00:14 (0.2382 s / it) +Averaged stats (hcp-train-subset): loss: 0.8479 (0.8481) +Eval (hcp-val): [18] [ 0/62] eta: 0:03:25 loss: 0.8446 (0.8446) time: 3.3074 data: 3.2080 max mem: 9377 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8465 (0.8468) time: 0.1474 data: 0.1186 max mem: 9377 +Eval (hcp-val): [18] Total time: 0:00:15 (0.2443 s / it) +Averaged stats (hcp-val): loss: 0.8465 (0.8468) +Eval (nsd-val): [18] [ 0/62] eta: 0:05:20 loss: 0.8087 (0.8087) time: 5.1665 data: 5.1118 max mem: 9377 +Eval (nsd-val): [18] [61/62] eta: 0:00:00 loss: 0.8192 (0.8198) time: 0.1533 data: 0.1223 max mem: 9377 +Eval (nsd-val): [18] Total time: 0:00:15 (0.2436 s / it) +Averaged stats (nsd-val): loss: 0.8192 (0.8198) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [19] [ 0/6250] eta: 9:51:38 lr: 0.000118 grad: 0.0713 (0.0713) loss: 0.9089 (0.9089) time: 5.6798 data: 5.5299 max mem: 9377 +Train: [19] [ 100/6250] eta: 0:23:46 lr: 0.000118 grad: 0.0826 (0.1386) loss: 0.8377 (0.8343) time: 0.2191 data: 0.1181 max mem: 9377 +Train: [19] [ 200/6250] eta: 0:20:49 lr: 0.000118 grad: 0.0847 (0.1226) loss: 0.8254 (0.8290) time: 0.1952 data: 0.0962 max mem: 9377 +Train: [19] [ 300/6250] eta: 0:19:20 lr: 0.000118 grad: 0.0879 (0.1131) loss: 0.8222 (0.8256) time: 0.1884 data: 0.0981 max mem: 9377 +Train: [19] [ 400/6250] eta: 0:18:32 lr: 0.000118 grad: 0.0852 (0.1108) loss: 0.8083 (0.8233) time: 0.1485 data: 0.0650 max mem: 9377 +Train: [19] [ 500/6250] eta: 0:17:33 lr: 0.000118 grad: 0.0925 (0.1085) loss: 0.8136 (0.8215) time: 0.1584 data: 0.0763 max mem: 9377 +Train: [19] [ 600/6250] eta: 0:16:58 lr: 0.000118 grad: 0.0932 (0.1059) loss: 0.8140 (0.8202) time: 0.1596 data: 0.0756 max mem: 9377 +Train: [19] [ 700/6250] eta: 0:16:22 lr: 0.000118 grad: 0.0813 (0.1035) loss: 0.8263 (0.8205) time: 0.1824 data: 0.0830 max mem: 9377 +Train: [19] [ 800/6250] eta: 0:15:47 lr: 0.000118 grad: 0.0850 (0.1012) loss: 0.8297 (0.8212) time: 0.1488 data: 0.0497 max mem: 9377 +Train: [19] [ 900/6250] eta: 0:15:19 lr: 0.000118 grad: 0.0840 (0.0993) loss: 0.8262 (0.8218) time: 0.1578 data: 0.0538 max mem: 9377 +Train: [19] [1000/6250] eta: 0:14:54 lr: 0.000118 grad: 0.0833 (0.0979) loss: 0.8247 (0.8224) time: 0.1433 data: 0.0467 max mem: 9377 +Train: [19] [1100/6250] eta: 0:14:31 lr: 0.000118 grad: 0.0857 (0.0968) loss: 0.8260 (0.8228) time: 0.1910 data: 0.1012 max mem: 9377 +Train: [19] [1200/6250] eta: 0:14:07 lr: 0.000118 grad: 0.0895 (0.0962) loss: 0.8108 (0.8227) time: 0.1603 data: 0.0718 max mem: 9377 +Train: [19] [1300/6250] eta: 0:13:46 lr: 0.000118 grad: 0.0804 (0.0954) loss: 0.8272 (0.8228) time: 0.1561 data: 0.0692 max mem: 9377 +Train: [19] [1400/6250] eta: 0:13:31 lr: 0.000118 grad: 0.0812 (0.0949) loss: 0.8258 (0.8228) time: 0.2403 data: 0.1563 max mem: 9377 +Train: [19] [1500/6250] eta: 0:13:07 lr: 0.000118 grad: 0.0866 (0.0945) loss: 0.8215 (0.8230) time: 0.1693 data: 0.0845 max mem: 9377 +Train: [19] [1600/6250] eta: 0:12:49 lr: 0.000118 grad: 0.0913 (0.0943) loss: 0.8141 (0.8230) time: 0.1618 data: 0.0785 max mem: 9377 +Train: [19] [1700/6250] eta: 0:12:28 lr: 0.000118 grad: 0.0831 (0.0940) loss: 0.8288 (0.8231) time: 0.1577 data: 0.0680 max mem: 9377 +Train: [19] [1800/6250] eta: 0:12:11 lr: 0.000118 grad: 0.0830 (0.0937) loss: 0.8229 (0.8233) time: 0.1385 data: 0.0541 max mem: 9377 +Train: [19] [1900/6250] eta: 0:11:51 lr: 0.000118 grad: 0.0848 (0.0933) loss: 0.8308 (0.8235) time: 0.1465 data: 0.0568 max mem: 9377 +Train: [19] [2000/6250] eta: 0:11:34 lr: 0.000118 grad: 0.0846 (0.0929) loss: 0.8232 (0.8236) time: 0.1663 data: 0.0779 max mem: 9377 +Train: [19] [2100/6250] eta: 0:11:17 lr: 0.000118 grad: 0.0825 (0.0927) loss: 0.8288 (0.8237) time: 0.1749 data: 0.0881 max mem: 9377 +Train: [19] [2200/6250] eta: 0:11:01 lr: 0.000118 grad: 0.0823 (0.0923) loss: 0.8282 (0.8238) time: 0.1661 data: 0.0666 max mem: 9377 +Train: [19] [2300/6250] eta: 0:10:45 lr: 0.000118 grad: 0.0833 (0.0921) loss: 0.8245 (0.8238) time: 0.1852 data: 0.1051 max mem: 9377 +Train: [19] [2400/6250] eta: 0:10:28 lr: 0.000118 grad: 0.0894 (0.0918) loss: 0.8175 (0.8239) time: 0.1765 data: 0.0862 max mem: 9377 +Train: [19] [2500/6250] eta: 0:10:13 lr: 0.000118 grad: 0.0879 (0.0920) loss: 0.8166 (0.8239) time: 0.1817 data: 0.0912 max mem: 9377 +Train: [19] [2600/6250] eta: 0:09:59 lr: 0.000118 grad: 0.0849 (0.0919) loss: 0.8183 (0.8238) time: 0.1842 data: 0.0935 max mem: 9377 +Train: [19] [2700/6250] eta: 0:09:42 lr: 0.000118 grad: 0.0880 (0.0920) loss: 0.8167 (0.8237) time: 0.1526 data: 0.0580 max mem: 9377 +Train: [19] [2800/6250] eta: 0:09:25 lr: 0.000118 grad: 0.0933 (0.0920) loss: 0.8177 (0.8235) time: 0.1739 data: 0.0820 max mem: 9377 +Train: [19] [2900/6250] eta: 0:09:08 lr: 0.000118 grad: 0.0935 (0.0920) loss: 0.8218 (0.8235) time: 0.1601 data: 0.0622 max mem: 9377 +Train: [19] [3000/6250] eta: 0:08:51 lr: 0.000118 grad: 0.0909 (0.0921) loss: 0.8182 (0.8233) time: 0.1636 data: 0.0821 max mem: 9377 +Train: [19] [3100/6250] eta: 0:08:35 lr: 0.000118 grad: 0.0865 (0.0919) loss: 0.8221 (0.8232) time: 0.1663 data: 0.0739 max mem: 9377 +Train: [19] [3200/6250] eta: 0:08:18 lr: 0.000118 grad: 0.0888 (0.0919) loss: 0.8149 (0.8230) time: 0.1540 data: 0.0662 max mem: 9377 +Train: [19] [3300/6250] eta: 0:08:01 lr: 0.000118 grad: 0.0832 (0.0919) loss: 0.8182 (0.8229) time: 0.1593 data: 0.0621 max mem: 9377 +Train: [19] [3400/6250] eta: 0:07:45 lr: 0.000118 grad: 0.0871 (0.0919) loss: 0.8195 (0.8229) time: 0.1874 data: 0.0978 max mem: 9377 +Train: [19] [3500/6250] eta: 0:07:27 lr: 0.000118 grad: 0.0876 (0.0918) loss: 0.8236 (0.8229) time: 0.1614 data: 0.0718 max mem: 9377 +Train: [19] [3600/6250] eta: 0:07:11 lr: 0.000118 grad: 0.0961 (0.0919) loss: 0.8179 (0.8228) time: 0.1513 data: 0.0643 max mem: 9377 +Train: [19] [3700/6250] eta: 0:06:54 lr: 0.000118 grad: 0.0906 (0.0920) loss: 0.8210 (0.8228) time: 0.1544 data: 0.0621 max mem: 9377 +Train: [19] [3800/6250] eta: 0:06:38 lr: 0.000118 grad: 0.0896 (0.0920) loss: 0.8189 (0.8227) time: 0.1419 data: 0.0492 max mem: 9377 +Train: [19] [3900/6250] eta: 0:06:22 lr: 0.000118 grad: 0.0940 (0.0920) loss: 0.8152 (0.8227) time: 0.1784 data: 0.0915 max mem: 9377 +Train: [19] [4000/6250] eta: 0:06:05 lr: 0.000118 grad: 0.1006 (0.0920) loss: 0.8231 (0.8226) time: 0.1477 data: 0.0578 max mem: 9377 +Train: [19] [4100/6250] eta: 0:05:48 lr: 0.000118 grad: 0.0893 (0.0920) loss: 0.8248 (0.8227) time: 0.1518 data: 0.0629 max mem: 9377 +Train: [19] [4200/6250] eta: 0:05:32 lr: 0.000118 grad: 0.0864 (0.0921) loss: 0.8242 (0.8227) time: 0.1597 data: 0.0649 max mem: 9377 +Train: [19] [4300/6250] eta: 0:05:16 lr: 0.000118 grad: 0.0884 (0.0921) loss: 0.8262 (0.8227) time: 0.1557 data: 0.0668 max mem: 9377 +Train: [19] [4400/6250] eta: 0:04:59 lr: 0.000118 grad: 0.0927 (0.0921) loss: 0.8201 (0.8227) time: 0.1543 data: 0.0686 max mem: 9377 +Train: [19] [4500/6250] eta: 0:04:43 lr: 0.000118 grad: 0.0868 (0.0922) loss: 0.8246 (0.8227) time: 0.1581 data: 0.0674 max mem: 9377 +Train: [19] [4600/6250] eta: 0:04:27 lr: 0.000118 grad: 0.0878 (0.0922) loss: 0.8196 (0.8226) time: 0.1871 data: 0.0864 max mem: 9377 +Train: [19] [4700/6250] eta: 0:04:10 lr: 0.000118 grad: 0.0922 (0.0922) loss: 0.8236 (0.8226) time: 0.1488 data: 0.0617 max mem: 9377 +Train: [19] [4800/6250] eta: 0:03:54 lr: 0.000118 grad: 0.0894 (0.0923) loss: 0.8142 (0.8225) time: 0.1426 data: 0.0583 max mem: 9377 +Train: [19] [4900/6250] eta: 0:03:38 lr: 0.000118 grad: 0.0843 (0.0923) loss: 0.8181 (0.8224) time: 0.1562 data: 0.0709 max mem: 9377 +Train: [19] [5000/6250] eta: 0:03:21 lr: 0.000118 grad: 0.0890 (0.0923) loss: 0.8243 (0.8224) time: 0.1597 data: 0.0756 max mem: 9377 +Train: [19] [5100/6250] eta: 0:03:05 lr: 0.000118 grad: 0.0916 (0.0923) loss: 0.8208 (0.8223) time: 0.2165 data: 0.1348 max mem: 9377 +Train: [19] [5200/6250] eta: 0:02:49 lr: 0.000118 grad: 0.0880 (0.0923) loss: 0.8231 (0.8223) time: 0.1471 data: 0.0598 max mem: 9377 +Train: [19] [5300/6250] eta: 0:02:33 lr: 0.000118 grad: 0.0979 (0.0923) loss: 0.8161 (0.8222) time: 0.1561 data: 0.0720 max mem: 9377 +Train: [19] [5400/6250] eta: 0:02:18 lr: 0.000118 grad: 0.0917 (0.0924) loss: 0.8265 (0.8222) time: 0.1565 data: 0.0765 max mem: 9377 +Train: [19] [5500/6250] eta: 0:02:01 lr: 0.000118 grad: 0.0890 (0.0924) loss: 0.8146 (0.8222) time: 0.1953 data: 0.1015 max mem: 9377 +Train: [19] [5600/6250] eta: 0:01:45 lr: 0.000118 grad: 0.0907 (0.0924) loss: 0.8181 (0.8221) time: 0.1984 data: 0.1119 max mem: 9377 +Train: [19] [5700/6250] eta: 0:01:29 lr: 0.000118 grad: 0.0911 (0.0924) loss: 0.8204 (0.8221) time: 0.1621 data: 0.0739 max mem: 9377 +Train: [19] [5800/6250] eta: 0:01:13 lr: 0.000118 grad: 0.0891 (0.0925) loss: 0.8189 (0.8221) time: 0.1773 data: 0.0830 max mem: 9377 +Train: [19] [5900/6250] eta: 0:00:57 lr: 0.000118 grad: 0.0926 (0.0925) loss: 0.8243 (0.8221) time: 0.1729 data: 0.0699 max mem: 9377 +Train: [19] [6000/6250] eta: 0:00:40 lr: 0.000118 grad: 0.0904 (0.0925) loss: 0.8151 (0.8220) time: 0.1786 data: 0.0752 max mem: 9377 +Train: [19] [6100/6250] eta: 0:00:24 lr: 0.000117 grad: 0.0910 (0.0926) loss: 0.8199 (0.8220) time: 0.1827 data: 0.0852 max mem: 9377 +Train: [19] [6200/6250] eta: 0:00:08 lr: 0.000117 grad: 0.0952 (0.0927) loss: 0.8125 (0.8219) time: 0.1834 data: 0.0796 max mem: 9377 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.0941 (0.0927) loss: 0.8230 (0.8219) time: 0.1843 data: 0.0842 max mem: 9377 +Train: [19] Total time: 0:17:14 (0.1655 s / it) +Averaged stats: lr: 0.000117 grad: 0.0941 (0.0927) loss: 0.8230 (0.8219) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:05:45 loss: 0.8498 (0.8498) time: 5.5688 data: 5.5383 max mem: 9377 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8511 (0.8498) time: 0.1440 data: 0.1169 max mem: 9377 +Eval (hcp-train-subset): [19] Total time: 0:00:14 (0.2280 s / it) +Averaged stats (hcp-train-subset): loss: 0.8511 (0.8498) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [19] [ 0/62] eta: 0:04:12 loss: 0.8482 (0.8482) time: 4.0766 data: 3.9939 max mem: 9377 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8457 (0.8484) time: 0.1207 data: 0.0955 max mem: 9377 +Eval (hcp-val): [19] Total time: 0:00:14 (0.2259 s / it) +Averaged stats (hcp-val): loss: 0.8457 (0.8484) +Making plots (hcp-val): example=6 +Eval (nsd-val): [19] [ 0/62] eta: 0:03:24 loss: 0.8105 (0.8105) time: 3.2957 data: 3.2155 max mem: 9377 +Eval (nsd-val): [19] [61/62] eta: 0:00:00 loss: 0.8201 (0.8218) time: 0.1177 data: 0.0923 max mem: 9377 +Eval (nsd-val): [19] Total time: 0:00:14 (0.2318 s / it) +Averaged stats (nsd-val): loss: 0.8201 (0.8218) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 11:33:11 lr: 0.000117 grad: 0.1812 (0.1812) loss: 0.8283 (0.8283) time: 6.6546 data: 6.5269 max mem: 9377 +Train: [20] [ 100/6250] eta: 0:22:10 lr: 0.000117 grad: 0.0895 (0.1090) loss: 0.8329 (0.8375) time: 0.1785 data: 0.0864 max mem: 9377 +Train: [20] [ 200/6250] eta: 0:19:23 lr: 0.000117 grad: 0.0898 (0.1072) loss: 0.8231 (0.8309) time: 0.1586 data: 0.0721 max mem: 9377 +Train: [20] [ 300/6250] eta: 0:18:17 lr: 0.000117 grad: 0.0853 (0.1033) loss: 0.8278 (0.8278) time: 0.1607 data: 0.0707 max mem: 9377 +Train: [20] [ 400/6250] eta: 0:17:14 lr: 0.000117 grad: 0.0881 (0.1011) loss: 0.8269 (0.8261) time: 0.1635 data: 0.0793 max mem: 9377 +Train: [20] [ 500/6250] eta: 0:16:24 lr: 0.000117 grad: 0.0861 (0.0995) loss: 0.8301 (0.8253) time: 0.1426 data: 0.0533 max mem: 9377 +Train: [20] [ 600/6250] eta: 0:15:52 lr: 0.000117 grad: 0.0799 (0.0976) loss: 0.8257 (0.8245) time: 0.1713 data: 0.0763 max mem: 9377 +Train: [20] [ 700/6250] eta: 0:15:26 lr: 0.000117 grad: 0.0800 (0.0964) loss: 0.8279 (0.8238) time: 0.1628 data: 0.0734 max mem: 9377 +Train: [20] [ 800/6250] eta: 0:15:02 lr: 0.000117 grad: 0.0897 (0.0962) loss: 0.8243 (0.8233) time: 0.1617 data: 0.0701 max mem: 9377 +Train: [20] [ 900/6250] eta: 0:14:40 lr: 0.000117 grad: 0.0919 (0.0955) loss: 0.8192 (0.8231) time: 0.1580 data: 0.0637 max mem: 9377 +Train: [20] [1000/6250] eta: 0:14:19 lr: 0.000117 grad: 0.0969 (0.0952) loss: 0.8179 (0.8228) time: 0.1726 data: 0.0850 max mem: 9377 +Train: [20] [1100/6250] eta: 0:13:54 lr: 0.000117 grad: 0.0898 (0.0948) loss: 0.8191 (0.8224) time: 0.1637 data: 0.0691 max mem: 9377 +Train: [20] [1200/6250] eta: 0:13:30 lr: 0.000117 grad: 0.0905 (0.0945) loss: 0.8152 (0.8221) time: 0.1384 data: 0.0479 max mem: 9377 +Train: [20] [1300/6250] eta: 0:13:16 lr: 0.000117 grad: 0.0851 (0.0942) loss: 0.8248 (0.8218) time: 0.1467 data: 0.0572 max mem: 9377 +Train: [20] [1400/6250] eta: 0:13:01 lr: 0.000117 grad: 0.0954 (0.0940) loss: 0.8152 (0.8216) time: 0.1837 data: 0.0948 max mem: 9377 +Train: [20] [1500/6250] eta: 0:12:42 lr: 0.000117 grad: 0.0861 (0.0938) loss: 0.8218 (0.8215) time: 0.1274 data: 0.0374 max mem: 9377 +Train: [20] [1600/6250] eta: 0:12:27 lr: 0.000117 grad: 0.0977 (0.0936) loss: 0.8196 (0.8216) time: 0.1681 data: 0.0757 max mem: 9377 +Train: [20] [1700/6250] eta: 0:12:09 lr: 0.000117 grad: 0.0929 (0.0935) loss: 0.8131 (0.8213) time: 0.1462 data: 0.0615 max mem: 9377 +Train: [20] [1800/6250] eta: 0:11:53 lr: 0.000117 grad: 0.0916 (0.0935) loss: 0.8199 (0.8212) time: 0.1458 data: 0.0593 max mem: 9377 +Train: [20] [1900/6250] eta: 0:11:38 lr: 0.000117 grad: 0.0839 (0.0933) loss: 0.8208 (0.8212) time: 0.1203 data: 0.0312 max mem: 9377 +Train: [20] [2000/6250] eta: 0:11:22 lr: 0.000117 grad: 0.0946 (0.0933) loss: 0.8175 (0.8210) time: 0.1626 data: 0.0755 max mem: 9377 +Train: [20] [2100/6250] eta: 0:11:04 lr: 0.000117 grad: 0.0917 (0.0932) loss: 0.8206 (0.8209) time: 0.1114 data: 0.0238 max mem: 9377 +Train: [20] [2200/6250] eta: 0:10:47 lr: 0.000117 grad: 0.0900 (0.0932) loss: 0.8174 (0.8208) time: 0.1543 data: 0.0657 max mem: 9377 +Train: [20] [2300/6250] eta: 0:10:32 lr: 0.000117 grad: 0.0922 (0.0931) loss: 0.8251 (0.8209) time: 0.1722 data: 0.0958 max mem: 9377 +Train: [20] [2400/6250] eta: 0:10:15 lr: 0.000117 grad: 0.0903 (0.0930) loss: 0.8184 (0.8210) time: 0.1569 data: 0.0708 max mem: 9377 +Train: [20] [2500/6250] eta: 0:09:58 lr: 0.000117 grad: 0.0807 (0.0929) loss: 0.8213 (0.8210) time: 0.1363 data: 0.0449 max mem: 9377 +Train: [20] [2600/6250] eta: 0:09:42 lr: 0.000117 grad: 0.0842 (0.0929) loss: 0.8178 (0.8210) time: 0.1659 data: 0.0763 max mem: 9377 +Train: [20] [2700/6250] eta: 0:09:26 lr: 0.000117 grad: 0.0903 (0.0928) loss: 0.8126 (0.8209) time: 0.1773 data: 0.0907 max mem: 9377 +Train: [20] [2800/6250] eta: 0:09:09 lr: 0.000117 grad: 0.0836 (0.0927) loss: 0.8179 (0.8209) time: 0.1620 data: 0.0723 max mem: 9377 +Train: [20] [2900/6250] eta: 0:08:53 lr: 0.000117 grad: 0.0873 (0.0926) loss: 0.8263 (0.8209) time: 0.1550 data: 0.0646 max mem: 9377 +Train: [20] [3000/6250] eta: 0:08:37 lr: 0.000117 grad: 0.0853 (0.0925) loss: 0.8190 (0.8210) time: 0.1658 data: 0.0712 max mem: 9377 +Train: [20] [3100/6250] eta: 0:08:21 lr: 0.000117 grad: 0.0878 (0.0923) loss: 0.8256 (0.8211) time: 0.1650 data: 0.0827 max mem: 9377 +Train: [20] [3200/6250] eta: 0:08:05 lr: 0.000117 grad: 0.0821 (0.0921) loss: 0.8249 (0.8213) time: 0.1581 data: 0.0731 max mem: 9377 +Train: [20] [3300/6250] eta: 0:07:50 lr: 0.000117 grad: 0.0830 (0.0919) loss: 0.8278 (0.8214) time: 0.1591 data: 0.0696 max mem: 9377 +Train: [20] [3400/6250] eta: 0:07:33 lr: 0.000117 grad: 0.0946 (0.0922) loss: 0.8280 (0.8216) time: 0.1516 data: 0.0539 max mem: 9377 +Train: [20] [3500/6250] eta: 0:07:17 lr: 0.000117 grad: 0.0869 (0.0921) loss: 0.8226 (0.8217) time: 0.1619 data: 0.0735 max mem: 9377 +Train: [20] [3600/6250] eta: 0:07:00 lr: 0.000117 grad: 0.0889 (0.0920) loss: 0.8173 (0.8217) time: 0.1662 data: 0.0803 max mem: 9377 +Train: [20] [3700/6250] eta: 0:06:45 lr: 0.000117 grad: 0.0827 (0.0920) loss: 0.8277 (0.8217) time: 0.1657 data: 0.0869 max mem: 9377 +Train: [20] [3800/6250] eta: 0:06:29 lr: 0.000117 grad: 0.0910 (0.0920) loss: 0.8206 (0.8217) time: 0.1563 data: 0.0625 max mem: 9377 +Train: [20] [3900/6250] eta: 0:06:13 lr: 0.000117 grad: 0.0855 (0.0919) loss: 0.8188 (0.8218) time: 0.1848 data: 0.1025 max mem: 9377 +Train: [20] [4000/6250] eta: 0:05:57 lr: 0.000117 grad: 0.0971 (0.0919) loss: 0.8204 (0.8218) time: 0.1518 data: 0.0681 max mem: 9377 +Train: [20] [4100/6250] eta: 0:05:41 lr: 0.000117 grad: 0.0868 (0.0918) loss: 0.8208 (0.8218) time: 0.1515 data: 0.0697 max mem: 9377 +Train: [20] [4200/6250] eta: 0:05:24 lr: 0.000117 grad: 0.0867 (0.0919) loss: 0.8247 (0.8218) time: 0.1660 data: 0.0879 max mem: 9377 +Train: [20] [4300/6250] eta: 0:05:08 lr: 0.000117 grad: 0.0875 (0.0918) loss: 0.8236 (0.8218) time: 0.1280 data: 0.0369 max mem: 9377 +Train: [20] [4400/6250] eta: 0:04:52 lr: 0.000117 grad: 0.0839 (0.0918) loss: 0.8286 (0.8219) time: 0.1676 data: 0.0849 max mem: 9377 +Train: [20] [4500/6250] eta: 0:04:37 lr: 0.000117 grad: 0.0853 (0.0919) loss: 0.8277 (0.8219) time: 0.1574 data: 0.0748 max mem: 9377 +Train: [20] [4600/6250] eta: 0:04:21 lr: 0.000117 grad: 0.0877 (0.0919) loss: 0.8242 (0.8220) time: 0.1617 data: 0.0702 max mem: 9377 +Train: [20] [4700/6250] eta: 0:04:05 lr: 0.000117 grad: 0.0891 (0.0918) loss: 0.8206 (0.8220) time: 0.1627 data: 0.0655 max mem: 9377 +Train: [20] [4800/6250] eta: 0:03:49 lr: 0.000117 grad: 0.0896 (0.0918) loss: 0.8147 (0.8220) time: 0.1630 data: 0.0689 max mem: 9377 +Train: [20] [4900/6250] eta: 0:03:33 lr: 0.000117 grad: 0.0836 (0.0918) loss: 0.8235 (0.8220) time: 0.1982 data: 0.1108 max mem: 9377 +Train: [20] [5000/6250] eta: 0:03:18 lr: 0.000117 grad: 0.0860 (0.0918) loss: 0.8245 (0.8219) time: 0.1625 data: 0.0649 max mem: 9377 +Train: [20] [5100/6250] eta: 0:03:02 lr: 0.000117 grad: 0.0847 (0.0917) loss: 0.8259 (0.8218) time: 0.1925 data: 0.1063 max mem: 9377 +Train: [20] [5200/6250] eta: 0:02:47 lr: 0.000117 grad: 0.0849 (0.0917) loss: 0.8244 (0.8218) time: 0.1622 data: 0.0722 max mem: 9377 +Train: [20] [5300/6250] eta: 0:02:31 lr: 0.000117 grad: 0.0883 (0.0916) loss: 0.8263 (0.8219) time: 0.1906 data: 0.1062 max mem: 9377 +Train: [20] [5400/6250] eta: 0:02:16 lr: 0.000117 grad: 0.0936 (0.0916) loss: 0.8273 (0.8219) time: 0.1883 data: 0.1103 max mem: 9377 +Train: [20] [5500/6250] eta: 0:02:00 lr: 0.000117 grad: 0.0844 (0.0916) loss: 0.8303 (0.8219) time: 0.1646 data: 0.0812 max mem: 9377 +Train: [20] [5600/6250] eta: 0:01:44 lr: 0.000117 grad: 0.0964 (0.0917) loss: 0.8228 (0.8219) time: 0.1542 data: 0.0684 max mem: 9377 +Train: [20] [5700/6250] eta: 0:01:28 lr: 0.000117 grad: 0.0892 (0.0916) loss: 0.8197 (0.8219) time: 0.1747 data: 0.0759 max mem: 9377 +Train: [20] [5800/6250] eta: 0:01:12 lr: 0.000117 grad: 0.0891 (0.0917) loss: 0.8262 (0.8219) time: 0.1672 data: 0.0754 max mem: 9377 +Train: [20] [5900/6250] eta: 0:00:56 lr: 0.000117 grad: 0.0848 (0.0917) loss: 0.8201 (0.8219) time: 0.1557 data: 0.0677 max mem: 9377 +Train: [20] [6000/6250] eta: 0:00:40 lr: 0.000116 grad: 0.0881 (0.0917) loss: 0.8175 (0.8218) time: 0.1409 data: 0.0445 max mem: 9377 +Train: [20] [6100/6250] eta: 0:00:23 lr: 0.000116 grad: 0.0898 (0.0917) loss: 0.8251 (0.8219) time: 0.1383 data: 0.0496 max mem: 9377 +Train: [20] [6200/6250] eta: 0:00:07 lr: 0.000116 grad: 0.0910 (0.0917) loss: 0.8270 (0.8219) time: 0.1363 data: 0.0377 max mem: 9377 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.0881 (0.0917) loss: 0.8264 (0.8219) time: 0.1270 data: 0.0311 max mem: 9377 +Train: [20] Total time: 0:16:43 (0.1605 s / it) +Averaged stats: lr: 0.000116 grad: 0.0881 (0.0917) loss: 0.8264 (0.8219) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:05:16 loss: 0.8514 (0.8514) time: 5.0996 data: 5.0679 max mem: 9377 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8493 (0.8507) time: 0.1341 data: 0.1084 max mem: 9377 +Eval (hcp-train-subset): [20] Total time: 0:00:14 (0.2340 s / it) +Averaged stats (hcp-train-subset): loss: 0.8493 (0.8507) +Eval (hcp-val): [20] [ 0/62] eta: 0:05:59 loss: 0.8462 (0.8462) time: 5.8001 data: 5.7665 max mem: 9377 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8474 (0.8487) time: 0.1085 data: 0.0816 max mem: 9377 +Eval (hcp-val): [20] Total time: 0:00:13 (0.2233 s / it) +Averaged stats (hcp-val): loss: 0.8474 (0.8487) +Eval (nsd-val): [20] [ 0/62] eta: 0:04:03 loss: 0.8116 (0.8116) time: 3.9230 data: 3.8510 max mem: 9377 +Eval (nsd-val): [20] [61/62] eta: 0:00:00 loss: 0.8211 (0.8216) time: 0.1373 data: 0.1120 max mem: 9377 +Eval (nsd-val): [20] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (nsd-val): loss: 0.8211 (0.8216) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [21] [ 0/6250] eta: 10:53:14 lr: 0.000116 grad: 0.1028 (0.1028) loss: 0.8463 (0.8463) time: 6.2711 data: 6.1706 max mem: 9377 +Train: [21] [ 100/6250] eta: 0:21:40 lr: 0.000116 grad: 0.0954 (0.1274) loss: 0.8234 (0.8276) time: 0.1722 data: 0.0624 max mem: 9377 +Train: [21] [ 200/6250] eta: 0:18:15 lr: 0.000116 grad: 0.1011 (0.1170) loss: 0.8102 (0.8210) time: 0.1319 data: 0.0236 max mem: 9377 +Train: [21] [ 300/6250] eta: 0:17:12 lr: 0.000116 grad: 0.0893 (0.1122) loss: 0.8157 (0.8191) time: 0.1838 data: 0.0855 max mem: 9377 +Train: [21] [ 400/6250] eta: 0:16:59 lr: 0.000116 grad: 0.0937 (0.1075) loss: 0.8276 (0.8191) time: 0.1612 data: 0.0750 max mem: 9377 +Train: [21] [ 500/6250] eta: 0:16:23 lr: 0.000116 grad: 0.0836 (0.1034) loss: 0.8288 (0.8197) time: 0.1485 data: 0.0463 max mem: 9377 +Train: [21] [ 600/6250] eta: 0:16:00 lr: 0.000116 grad: 0.0993 (0.1013) loss: 0.8099 (0.8194) time: 0.1904 data: 0.0797 max mem: 9377 +Train: [21] [ 700/6250] eta: 0:15:54 lr: 0.000116 grad: 0.0869 (0.1000) loss: 0.8139 (0.8187) time: 0.1747 data: 0.0700 max mem: 9377 +Train: [21] [ 800/6250] eta: 0:15:34 lr: 0.000116 grad: 0.0962 (0.0989) loss: 0.8075 (0.8185) time: 0.1510 data: 0.0561 max mem: 9377 +Train: [21] [ 900/6250] eta: 0:15:16 lr: 0.000116 grad: 0.0886 (0.0983) loss: 0.8151 (0.8184) time: 0.1667 data: 0.0816 max mem: 9377 +Train: [21] [1000/6250] eta: 0:14:55 lr: 0.000116 grad: 0.0869 (0.0979) loss: 0.8225 (0.8182) time: 0.1773 data: 0.0893 max mem: 9377 +Train: [21] [1100/6250] eta: 0:14:30 lr: 0.000116 grad: 0.0963 (0.0978) loss: 0.8169 (0.8180) time: 0.1557 data: 0.0683 max mem: 9377 +Train: [21] [1200/6250] eta: 0:14:12 lr: 0.000116 grad: 0.0844 (0.0976) loss: 0.8231 (0.8178) time: 0.1867 data: 0.1055 max mem: 9377 +Train: [21] [1300/6250] eta: 0:13:51 lr: 0.000116 grad: 0.0884 (0.0973) loss: 0.8207 (0.8179) time: 0.1900 data: 0.1062 max mem: 9377 +Train: [21] [1400/6250] eta: 0:13:30 lr: 0.000116 grad: 0.0962 (0.0973) loss: 0.8171 (0.8176) time: 0.1758 data: 0.0848 max mem: 9377 +Train: [21] [1500/6250] eta: 0:13:12 lr: 0.000116 grad: 0.0970 (0.0971) loss: 0.8158 (0.8176) time: 0.1570 data: 0.0708 max mem: 9377 +Train: [21] [1600/6250] eta: 0:12:53 lr: 0.000116 grad: 0.0922 (0.0969) loss: 0.8147 (0.8176) time: 0.1749 data: 0.0950 max mem: 9377 +Train: [21] [1700/6250] eta: 0:12:35 lr: 0.000116 grad: 0.0876 (0.0968) loss: 0.8168 (0.8176) time: 0.1522 data: 0.0679 max mem: 9377 +Train: [21] [1800/6250] eta: 0:12:16 lr: 0.000116 grad: 0.0871 (0.0965) loss: 0.8158 (0.8177) time: 0.1646 data: 0.0767 max mem: 9377 +Train: [21] [1900/6250] eta: 0:11:55 lr: 0.000116 grad: 0.0830 (0.0963) loss: 0.8126 (0.8179) time: 0.1293 data: 0.0418 max mem: 9377 +Train: [21] [2000/6250] eta: 0:11:38 lr: 0.000116 grad: 0.0968 (0.0961) loss: 0.8233 (0.8181) time: 0.1779 data: 0.0983 max mem: 9377 +Train: [21] [2100/6250] eta: 0:11:20 lr: 0.000116 grad: 0.0935 (0.0960) loss: 0.8181 (0.8182) time: 0.1675 data: 0.0773 max mem: 9377 +Train: [21] [2200/6250] eta: 0:11:02 lr: 0.000116 grad: 0.0947 (0.0959) loss: 0.8186 (0.8182) time: 0.1694 data: 0.0820 max mem: 9377 +Train: [21] [2300/6250] eta: 0:10:43 lr: 0.000116 grad: 0.0854 (0.0958) loss: 0.8201 (0.8183) time: 0.1294 data: 0.0394 max mem: 9377 +Train: [21] [2400/6250] eta: 0:10:27 lr: 0.000116 grad: 0.1013 (0.0959) loss: 0.8165 (0.8182) time: 0.1671 data: 0.0829 max mem: 9377 +Train: [21] [2500/6250] eta: 0:10:08 lr: 0.000116 grad: 0.0957 (0.0960) loss: 0.8183 (0.8181) time: 0.1367 data: 0.0457 max mem: 9377 +Train: [21] [2600/6250] eta: 0:09:52 lr: 0.000116 grad: 0.0924 (0.0960) loss: 0.8248 (0.8182) time: 0.1647 data: 0.0799 max mem: 9377 +Train: [21] [2700/6250] eta: 0:09:35 lr: 0.000116 grad: 0.0961 (0.0960) loss: 0.8143 (0.8183) time: 0.1558 data: 0.0648 max mem: 9377 +Train: [21] [2800/6250] eta: 0:09:18 lr: 0.000116 grad: 0.0884 (0.0959) loss: 0.8226 (0.8184) time: 0.1712 data: 0.0898 max mem: 9377 +Train: [21] [2900/6250] eta: 0:09:01 lr: 0.000116 grad: 0.0923 (0.0958) loss: 0.8275 (0.8184) time: 0.1645 data: 0.0807 max mem: 9377 +Train: [21] [3000/6250] eta: 0:08:44 lr: 0.000116 grad: 0.0899 (0.0958) loss: 0.8242 (0.8184) time: 0.1618 data: 0.0758 max mem: 9377 +Train: [21] [3100/6250] eta: 0:08:27 lr: 0.000116 grad: 0.0838 (0.0959) loss: 0.8133 (0.8184) time: 0.1582 data: 0.0644 max mem: 9377 +Train: [21] [3200/6250] eta: 0:08:10 lr: 0.000116 grad: 0.0921 (0.0959) loss: 0.8159 (0.8184) time: 0.1698 data: 0.0797 max mem: 9377 +Train: [21] [3300/6250] eta: 0:07:53 lr: 0.000116 grad: 0.0960 (0.0960) loss: 0.8109 (0.8184) time: 0.1401 data: 0.0457 max mem: 9377 +Train: [21] [3400/6250] eta: 0:07:36 lr: 0.000116 grad: 0.0890 (0.0959) loss: 0.8144 (0.8184) time: 0.1481 data: 0.0625 max mem: 9377 +Train: [21] [3500/6250] eta: 0:07:20 lr: 0.000116 grad: 0.0952 (0.0960) loss: 0.8109 (0.8183) time: 0.1533 data: 0.0619 max mem: 9377 +Train: [21] [3600/6250] eta: 0:07:04 lr: 0.000116 grad: 0.0913 (0.0960) loss: 0.8150 (0.8181) time: 0.2023 data: 0.1230 max mem: 9377 +Train: [21] [3700/6250] eta: 0:06:46 lr: 0.000116 grad: 0.0985 (0.0962) loss: 0.8092 (0.8180) time: 0.1389 data: 0.0558 max mem: 9377 +Train: [21] [3800/6250] eta: 0:06:30 lr: 0.000116 grad: 0.0936 (0.0964) loss: 0.8159 (0.8178) time: 0.1477 data: 0.0618 max mem: 9377 +Train: [21] [3900/6250] eta: 0:06:14 lr: 0.000116 grad: 0.0916 (0.0964) loss: 0.8166 (0.8177) time: 0.1601 data: 0.0700 max mem: 9377 +Train: [21] [4000/6250] eta: 0:05:58 lr: 0.000116 grad: 0.0930 (0.0964) loss: 0.8191 (0.8176) time: 0.1602 data: 0.0742 max mem: 9377 +Train: [21] [4100/6250] eta: 0:05:41 lr: 0.000116 grad: 0.0969 (0.0964) loss: 0.8181 (0.8176) time: 0.1441 data: 0.0548 max mem: 9377 +Train: [21] [4200/6250] eta: 0:05:25 lr: 0.000116 grad: 0.1003 (0.0965) loss: 0.8129 (0.8175) time: 0.1616 data: 0.0828 max mem: 9377 +Train: [21] [4300/6250] eta: 0:05:09 lr: 0.000116 grad: 0.0867 (0.0964) loss: 0.8263 (0.8175) time: 0.1191 data: 0.0315 max mem: 9377 +Train: [21] [4400/6250] eta: 0:04:54 lr: 0.000116 grad: 0.0888 (0.0964) loss: 0.8210 (0.8175) time: 0.1864 data: 0.1013 max mem: 9377 +Train: [21] [4500/6250] eta: 0:04:38 lr: 0.000116 grad: 0.0867 (0.0964) loss: 0.8268 (0.8176) time: 0.1683 data: 0.0885 max mem: 9377 +Train: [21] [4600/6250] eta: 0:04:22 lr: 0.000116 grad: 0.0923 (0.0964) loss: 0.8189 (0.8175) time: 0.1717 data: 0.0783 max mem: 9377 +Train: [21] [4700/6250] eta: 0:04:06 lr: 0.000116 grad: 0.0927 (0.0964) loss: 0.8179 (0.8175) time: 0.1498 data: 0.0622 max mem: 9377 +Train: [21] [4800/6250] eta: 0:03:50 lr: 0.000116 grad: 0.0870 (0.0964) loss: 0.8165 (0.8175) time: 0.1645 data: 0.0681 max mem: 9377 +Train: [21] [4900/6250] eta: 0:03:34 lr: 0.000116 grad: 0.0964 (0.0965) loss: 0.8178 (0.8175) time: 0.1494 data: 0.0666 max mem: 9377 +Train: [21] [5000/6250] eta: 0:03:19 lr: 0.000116 grad: 0.0905 (0.0965) loss: 0.8173 (0.8174) time: 0.1724 data: 0.0898 max mem: 9377 +Train: [21] [5100/6250] eta: 0:03:03 lr: 0.000116 grad: 0.0917 (0.0964) loss: 0.8047 (0.8175) time: 0.1622 data: 0.0811 max mem: 9377 +Train: [21] [5200/6250] eta: 0:02:47 lr: 0.000116 grad: 0.0918 (0.0964) loss: 0.8146 (0.8174) time: 0.1882 data: 0.0991 max mem: 9377 +Train: [21] [5300/6250] eta: 0:02:31 lr: 0.000116 grad: 0.0884 (0.0964) loss: 0.8132 (0.8174) time: 0.1648 data: 0.0831 max mem: 9377 +Train: [21] [5400/6250] eta: 0:02:16 lr: 0.000116 grad: 0.0872 (0.0963) loss: 0.8162 (0.8174) time: 0.1679 data: 0.0797 max mem: 9377 +Train: [21] [5500/6250] eta: 0:02:00 lr: 0.000116 grad: 0.0939 (0.0964) loss: 0.8130 (0.8173) time: 0.1556 data: 0.0735 max mem: 9377 +Train: [21] [5600/6250] eta: 0:01:44 lr: 0.000115 grad: 0.0958 (0.0964) loss: 0.8152 (0.8173) time: 0.1534 data: 0.0654 max mem: 9377 +Train: [21] [5700/6250] eta: 0:01:28 lr: 0.000115 grad: 0.0939 (0.0963) loss: 0.8160 (0.8173) time: 0.1579 data: 0.0535 max mem: 9377 +Train: [21] [5800/6250] eta: 0:01:12 lr: 0.000115 grad: 0.0948 (0.0963) loss: 0.8202 (0.8173) time: 0.1417 data: 0.0473 max mem: 9377 +Train: [21] [5900/6250] eta: 0:00:56 lr: 0.000115 grad: 0.0942 (0.0962) loss: 0.8102 (0.8173) time: 0.1455 data: 0.0603 max mem: 9377 +Train: [21] [6000/6250] eta: 0:00:40 lr: 0.000115 grad: 0.0926 (0.0962) loss: 0.8174 (0.8173) time: 0.1648 data: 0.0779 max mem: 9377 +Train: [21] [6100/6250] eta: 0:00:24 lr: 0.000115 grad: 0.0940 (0.0961) loss: 0.8107 (0.8173) time: 0.1405 data: 0.0441 max mem: 9377 +Train: [21] [6200/6250] eta: 0:00:07 lr: 0.000115 grad: 0.0943 (0.0961) loss: 0.8100 (0.8172) time: 0.1574 data: 0.0659 max mem: 9377 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.0910 (0.0962) loss: 0.8104 (0.8172) time: 0.1402 data: 0.0420 max mem: 9377 +Train: [21] Total time: 0:16:44 (0.1607 s / it) +Averaged stats: lr: 0.000115 grad: 0.0910 (0.0962) loss: 0.8104 (0.8172) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:04:42 loss: 0.8507 (0.8507) time: 4.5537 data: 4.5139 max mem: 9377 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8498 (0.8496) time: 0.1174 data: 0.0903 max mem: 9377 +Eval (hcp-train-subset): [21] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (hcp-train-subset): loss: 0.8498 (0.8496) +Eval (hcp-val): [21] [ 0/62] eta: 0:03:46 loss: 0.8412 (0.8412) time: 3.6518 data: 3.5789 max mem: 9377 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8463 (0.8468) time: 0.1311 data: 0.1058 max mem: 9377 +Eval (hcp-val): [21] Total time: 0:00:13 (0.2225 s / it) +Averaged stats (hcp-val): loss: 0.8463 (0.8468) +Eval (nsd-val): [21] [ 0/62] eta: 0:05:43 loss: 0.8106 (0.8106) time: 5.5403 data: 5.5085 max mem: 9377 +Eval (nsd-val): [21] [61/62] eta: 0:00:00 loss: 0.8199 (0.8195) time: 0.1387 data: 0.1124 max mem: 9377 +Eval (nsd-val): [21] Total time: 0:00:14 (0.2268 s / it) +Averaged stats (nsd-val): loss: 0.8199 (0.8195) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [22] [ 0/6250] eta: 8:31:15 lr: 0.000115 grad: 0.1593 (0.1593) loss: 0.8433 (0.8433) time: 4.9080 data: 4.6973 max mem: 9377 +Train: [22] [ 100/6250] eta: 0:23:30 lr: 0.000115 grad: 0.0957 (0.1333) loss: 0.8278 (0.8314) time: 0.1692 data: 0.0600 max mem: 9377 +Train: [22] [ 200/6250] eta: 0:19:45 lr: 0.000115 grad: 0.0952 (0.1181) loss: 0.8193 (0.8271) time: 0.1208 data: 0.0183 max mem: 9377 +Train: [22] [ 300/6250] eta: 0:18:40 lr: 0.000115 grad: 0.0861 (0.1095) loss: 0.8283 (0.8258) time: 0.1885 data: 0.0896 max mem: 9377 +Train: [22] [ 400/6250] eta: 0:17:46 lr: 0.000115 grad: 0.0926 (0.1056) loss: 0.8145 (0.8237) time: 0.1522 data: 0.0597 max mem: 9377 +Train: [22] [ 500/6250] eta: 0:17:15 lr: 0.000115 grad: 0.0939 (0.1037) loss: 0.8065 (0.8214) time: 0.1789 data: 0.0832 max mem: 9377 +Train: [22] [ 600/6250] eta: 0:16:39 lr: 0.000115 grad: 0.1055 (0.1022) loss: 0.8120 (0.8196) time: 0.1584 data: 0.0654 max mem: 9377 +Train: [22] [ 700/6250] eta: 0:16:18 lr: 0.000115 grad: 0.0884 (0.1019) loss: 0.8154 (0.8186) time: 0.1739 data: 0.0821 max mem: 9377 +Train: [22] [ 800/6250] eta: 0:15:52 lr: 0.000115 grad: 0.0918 (0.1010) loss: 0.8069 (0.8175) time: 0.1673 data: 0.0692 max mem: 9377 +Train: [22] [ 900/6250] eta: 0:15:29 lr: 0.000115 grad: 0.0941 (0.1005) loss: 0.8061 (0.8167) time: 0.1713 data: 0.0827 max mem: 9377 +Train: [22] [1000/6250] eta: 0:15:01 lr: 0.000115 grad: 0.0895 (0.0997) loss: 0.8171 (0.8166) time: 0.1491 data: 0.0598 max mem: 9377 +Train: [22] [1100/6250] eta: 0:14:34 lr: 0.000115 grad: 0.0872 (0.0992) loss: 0.8106 (0.8162) time: 0.1521 data: 0.0621 max mem: 9377 +Train: [22] [1200/6250] eta: 0:14:13 lr: 0.000115 grad: 0.0964 (0.0990) loss: 0.8175 (0.8160) time: 0.1481 data: 0.0629 max mem: 9377 +Train: [22] [1300/6250] eta: 0:13:54 lr: 0.000115 grad: 0.0972 (0.0990) loss: 0.8125 (0.8155) time: 0.1657 data: 0.0781 max mem: 9377 +Train: [22] [1400/6250] eta: 0:13:30 lr: 0.000115 grad: 0.0988 (0.0990) loss: 0.8125 (0.8152) time: 0.1416 data: 0.0573 max mem: 9377 +Train: [22] [1500/6250] eta: 0:13:12 lr: 0.000115 grad: 0.0960 (0.0988) loss: 0.8112 (0.8149) time: 0.1506 data: 0.0556 max mem: 9377 +Train: [22] [1600/6250] eta: 0:12:54 lr: 0.000115 grad: 0.0973 (0.0990) loss: 0.8043 (0.8145) time: 0.1682 data: 0.0830 max mem: 9377 +Train: [22] [1700/6250] eta: 0:12:33 lr: 0.000115 grad: 0.0946 (0.0989) loss: 0.8056 (0.8142) time: 0.1625 data: 0.0753 max mem: 9377 +Train: [22] [1800/6250] eta: 0:12:13 lr: 0.000115 grad: 0.0912 (0.0990) loss: 0.8152 (0.8139) time: 0.1497 data: 0.0690 max mem: 9377 +Train: [22] [1900/6250] eta: 0:11:53 lr: 0.000115 grad: 0.0914 (0.0991) loss: 0.8103 (0.8136) time: 0.1714 data: 0.0870 max mem: 9377 +Train: [22] [2000/6250] eta: 0:11:34 lr: 0.000115 grad: 0.0914 (0.0991) loss: 0.8119 (0.8134) time: 0.1473 data: 0.0639 max mem: 9377 +Train: [22] [2100/6250] eta: 0:11:16 lr: 0.000115 grad: 0.0957 (0.0991) loss: 0.8024 (0.8132) time: 0.1503 data: 0.0581 max mem: 9377 +Train: [22] [2200/6250] eta: 0:10:59 lr: 0.000115 grad: 0.0947 (0.0989) loss: 0.8156 (0.8132) time: 0.1916 data: 0.0990 max mem: 9377 +Train: [22] [2300/6250] eta: 0:10:41 lr: 0.000115 grad: 0.0859 (0.0988) loss: 0.8160 (0.8133) time: 0.1678 data: 0.0746 max mem: 9377 +Train: [22] [2400/6250] eta: 0:10:23 lr: 0.000115 grad: 0.0938 (0.0987) loss: 0.8187 (0.8134) time: 0.1509 data: 0.0637 max mem: 9377 +Train: [22] [2500/6250] eta: 0:10:06 lr: 0.000115 grad: 0.0887 (0.0986) loss: 0.8150 (0.8136) time: 0.1734 data: 0.0831 max mem: 9377 +Train: [22] [2600/6250] eta: 0:09:48 lr: 0.000115 grad: 0.0878 (0.0983) loss: 0.8183 (0.8138) time: 0.1487 data: 0.0577 max mem: 9377 +Train: [22] [2700/6250] eta: 0:09:32 lr: 0.000115 grad: 0.0881 (0.0981) loss: 0.8168 (0.8140) time: 0.1841 data: 0.0921 max mem: 9377 +Train: [22] [2800/6250] eta: 0:09:14 lr: 0.000115 grad: 0.0839 (0.0980) loss: 0.8181 (0.8142) time: 0.1502 data: 0.0649 max mem: 9377 +Train: [22] [2900/6250] eta: 0:08:57 lr: 0.000115 grad: 0.0909 (0.0978) loss: 0.8190 (0.8145) time: 0.1346 data: 0.0467 max mem: 9377 +Train: [22] [3000/6250] eta: 0:08:41 lr: 0.000115 grad: 0.0865 (0.0975) loss: 0.8243 (0.8147) time: 0.1972 data: 0.1189 max mem: 9377 +Train: [22] [3100/6250] eta: 0:08:24 lr: 0.000115 grad: 0.0877 (0.0974) loss: 0.8219 (0.8150) time: 0.1629 data: 0.0703 max mem: 9377 +Train: [22] [3200/6250] eta: 0:08:07 lr: 0.000115 grad: 0.0900 (0.0972) loss: 0.8222 (0.8153) time: 0.1525 data: 0.0562 max mem: 9377 +Train: [22] [3300/6250] eta: 0:07:51 lr: 0.000115 grad: 0.0907 (0.0970) loss: 0.8307 (0.8156) time: 0.1713 data: 0.0902 max mem: 9377 +Train: [22] [3400/6250] eta: 0:07:34 lr: 0.000115 grad: 0.0903 (0.0969) loss: 0.8187 (0.8156) time: 0.1340 data: 0.0400 max mem: 9377 +Train: [22] [3500/6250] eta: 0:07:18 lr: 0.000115 grad: 0.0912 (0.0968) loss: 0.8241 (0.8158) time: 0.1707 data: 0.0840 max mem: 9377 +Train: [22] [3600/6250] eta: 0:07:02 lr: 0.000115 grad: 0.0872 (0.0966) loss: 0.8157 (0.8159) time: 0.1644 data: 0.0804 max mem: 9377 +Train: [22] [3700/6250] eta: 0:06:46 lr: 0.000115 grad: 0.0949 (0.0966) loss: 0.8218 (0.8159) time: 0.1624 data: 0.0743 max mem: 9377 +Train: [22] [3800/6250] eta: 0:06:31 lr: 0.000115 grad: 0.0932 (0.0967) loss: 0.8177 (0.8159) time: 0.1875 data: 0.1006 max mem: 9377 +Train: [22] [3900/6250] eta: 0:06:15 lr: 0.000115 grad: 0.0908 (0.0966) loss: 0.8148 (0.8158) time: 0.1585 data: 0.0741 max mem: 9377 +Train: [22] [4000/6250] eta: 0:05:59 lr: 0.000115 grad: 0.0903 (0.0966) loss: 0.8131 (0.8157) time: 0.1502 data: 0.0639 max mem: 9377 +Train: [22] [4100/6250] eta: 0:05:43 lr: 0.000115 grad: 0.0931 (0.0966) loss: 0.8177 (0.8157) time: 0.1248 data: 0.0249 max mem: 9377 +Train: [22] [4200/6250] eta: 0:05:27 lr: 0.000115 grad: 0.0913 (0.0966) loss: 0.8197 (0.8157) time: 0.1592 data: 0.0666 max mem: 9377 +Train: [22] [4300/6250] eta: 0:05:11 lr: 0.000115 grad: 0.0969 (0.0969) loss: 0.8130 (0.8155) time: 0.1716 data: 0.0788 max mem: 9377 +Train: [22] [4400/6250] eta: 0:04:55 lr: 0.000115 grad: 0.0923 (0.0969) loss: 0.8119 (0.8154) time: 0.1925 data: 0.1098 max mem: 9377 +Train: [22] [4500/6250] eta: 0:04:39 lr: 0.000115 grad: 0.0962 (0.0971) loss: 0.8065 (0.8153) time: 0.1786 data: 0.0958 max mem: 9377 +Train: [22] [4600/6250] eta: 0:04:23 lr: 0.000115 grad: 0.1031 (0.0971) loss: 0.8130 (0.8152) time: 0.1365 data: 0.0499 max mem: 9377 +Train: [22] [4700/6250] eta: 0:04:06 lr: 0.000115 grad: 0.0969 (0.0973) loss: 0.8104 (0.8151) time: 0.1591 data: 0.0722 max mem: 9377 +Train: [22] [4800/6250] eta: 0:03:50 lr: 0.000115 grad: 0.0943 (0.0973) loss: 0.8113 (0.8150) time: 0.1585 data: 0.0737 max mem: 9377 +Train: [22] [4900/6250] eta: 0:03:34 lr: 0.000114 grad: 0.0985 (0.0974) loss: 0.8103 (0.8149) time: 0.1551 data: 0.0604 max mem: 9377 +Train: [22] [5000/6250] eta: 0:03:19 lr: 0.000114 grad: 0.0964 (0.0975) loss: 0.8098 (0.8148) time: 0.1647 data: 0.0825 max mem: 9377 +Train: [22] [5100/6250] eta: 0:03:03 lr: 0.000114 grad: 0.1039 (0.0975) loss: 0.8096 (0.8147) time: 0.1749 data: 0.0876 max mem: 9377 +Train: [22] [5200/6250] eta: 0:02:47 lr: 0.000114 grad: 0.1004 (0.0976) loss: 0.8068 (0.8146) time: 0.1647 data: 0.0795 max mem: 9377 +Train: [22] [5300/6250] eta: 0:02:31 lr: 0.000114 grad: 0.1037 (0.0977) loss: 0.8058 (0.8146) time: 0.2315 data: 0.1551 max mem: 9377 +Train: [22] [5400/6250] eta: 0:02:15 lr: 0.000114 grad: 0.1063 (0.0978) loss: 0.8090 (0.8145) time: 0.1478 data: 0.0546 max mem: 9377 +Train: [22] [5500/6250] eta: 0:01:59 lr: 0.000114 grad: 0.0967 (0.0979) loss: 0.8237 (0.8145) time: 0.1655 data: 0.0840 max mem: 9377 +Train: [22] [5600/6250] eta: 0:01:43 lr: 0.000114 grad: 0.0966 (0.0980) loss: 0.8138 (0.8145) time: 0.1539 data: 0.0686 max mem: 9377 +Train: [22] [5700/6250] eta: 0:01:27 lr: 0.000114 grad: 0.0983 (0.0981) loss: 0.8119 (0.8144) time: 0.1700 data: 0.0766 max mem: 9377 +Train: [22] [5800/6250] eta: 0:01:11 lr: 0.000114 grad: 0.0999 (0.0981) loss: 0.8140 (0.8143) time: 0.1401 data: 0.0466 max mem: 9377 +Train: [22] [5900/6250] eta: 0:00:55 lr: 0.000114 grad: 0.0968 (0.0981) loss: 0.8085 (0.8142) time: 0.1436 data: 0.0471 max mem: 9377 +Train: [22] [6000/6250] eta: 0:00:39 lr: 0.000114 grad: 0.0914 (0.0982) loss: 0.8100 (0.8141) time: 0.1480 data: 0.0519 max mem: 9377 +Train: [22] [6100/6250] eta: 0:00:23 lr: 0.000114 grad: 0.0966 (0.0982) loss: 0.8160 (0.8140) time: 0.1728 data: 0.0764 max mem: 9377 +Train: [22] [6200/6250] eta: 0:00:07 lr: 0.000114 grad: 0.1038 (0.0982) loss: 0.8061 (0.8139) time: 0.1800 data: 0.0840 max mem: 9377 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.1018 (0.0983) loss: 0.8053 (0.8139) time: 0.1706 data: 0.0807 max mem: 9377 +Train: [22] Total time: 0:16:44 (0.1607 s / it) +Averaged stats: lr: 0.000114 grad: 0.1018 (0.0983) loss: 0.8053 (0.8139) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:03:52 loss: 0.8464 (0.8464) time: 3.7521 data: 3.6851 max mem: 9377 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8461 (0.8493) time: 0.1324 data: 0.1053 max mem: 9377 +Eval (hcp-train-subset): [22] Total time: 0:00:14 (0.2340 s / it) +Averaged stats (hcp-train-subset): loss: 0.8461 (0.8493) +Eval (hcp-val): [22] [ 0/62] eta: 0:05:53 loss: 0.8484 (0.8484) time: 5.7045 data: 5.6737 max mem: 9377 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8441 (0.8473) time: 0.1414 data: 0.1156 max mem: 9377 +Eval (hcp-val): [22] Total time: 0:00:15 (0.2424 s / it) +Averaged stats (hcp-val): loss: 0.8441 (0.8473) +Eval (nsd-val): [22] [ 0/62] eta: 0:06:05 loss: 0.8068 (0.8068) time: 5.9013 data: 5.8708 max mem: 9377 +Eval (nsd-val): [22] [61/62] eta: 0:00:00 loss: 0.8179 (0.8194) time: 0.1320 data: 0.1065 max mem: 9377 +Eval (nsd-val): [22] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (nsd-val): loss: 0.8179 (0.8194) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [23] [ 0/6250] eta: 7:24:34 lr: 0.000114 grad: 0.0469 (0.0469) loss: 0.8871 (0.8871) time: 4.2679 data: 4.0210 max mem: 9377 +Train: [23] [ 100/6250] eta: 0:22:16 lr: 0.000114 grad: 0.0961 (0.1290) loss: 0.8305 (0.8310) time: 0.1468 data: 0.0402 max mem: 9377 +Train: [23] [ 200/6250] eta: 0:19:21 lr: 0.000114 grad: 0.1136 (0.1207) loss: 0.8071 (0.8224) time: 0.1666 data: 0.0649 max mem: 9377 +Train: [23] [ 300/6250] eta: 0:18:39 lr: 0.000114 grad: 0.0975 (0.1180) loss: 0.8127 (0.8177) time: 0.2088 data: 0.1153 max mem: 9377 +Train: [23] [ 400/6250] eta: 0:17:49 lr: 0.000114 grad: 0.1007 (0.1135) loss: 0.8060 (0.8159) time: 0.1537 data: 0.0631 max mem: 9377 +Train: [23] [ 500/6250] eta: 0:17:16 lr: 0.000114 grad: 0.0948 (0.1107) loss: 0.8097 (0.8152) time: 0.1682 data: 0.0821 max mem: 9377 +Train: [23] [ 600/6250] eta: 0:16:53 lr: 0.000114 grad: 0.0972 (0.1084) loss: 0.8092 (0.8148) time: 0.1680 data: 0.0783 max mem: 9377 +Train: [23] [ 700/6250] eta: 0:16:30 lr: 0.000114 grad: 0.0958 (0.1075) loss: 0.8154 (0.8145) time: 0.1531 data: 0.0536 max mem: 9377 +Train: [23] [ 800/6250] eta: 0:16:04 lr: 0.000114 grad: 0.0964 (0.1063) loss: 0.8082 (0.8142) time: 0.1716 data: 0.0874 max mem: 9377 +Train: [23] [ 900/6250] eta: 0:15:41 lr: 0.000114 grad: 0.0943 (0.1054) loss: 0.8200 (0.8142) time: 0.1646 data: 0.0599 max mem: 9377 +Train: [23] [1000/6250] eta: 0:15:14 lr: 0.000114 grad: 0.0906 (0.1045) loss: 0.8205 (0.8144) time: 0.1532 data: 0.0638 max mem: 9377 +Train: [23] [1100/6250] eta: 0:14:46 lr: 0.000114 grad: 0.0942 (0.1038) loss: 0.8225 (0.8143) time: 0.1576 data: 0.0735 max mem: 9377 +Train: [23] [1200/6250] eta: 0:14:21 lr: 0.000114 grad: 0.0922 (0.1031) loss: 0.8228 (0.8145) time: 0.1217 data: 0.0349 max mem: 9377 +Train: [23] [1300/6250] eta: 0:14:00 lr: 0.000114 grad: 0.0913 (0.1029) loss: 0.8185 (0.8147) time: 0.1624 data: 0.0709 max mem: 9377 +Train: [23] [1400/6250] eta: 0:13:36 lr: 0.000114 grad: 0.0983 (0.1026) loss: 0.8147 (0.8147) time: 0.1580 data: 0.0667 max mem: 9377 +Train: [23] [1500/6250] eta: 0:13:16 lr: 0.000114 grad: 0.0958 (0.1022) loss: 0.8190 (0.8147) time: 0.1564 data: 0.0664 max mem: 9377 +Train: [23] [1600/6250] eta: 0:12:56 lr: 0.000114 grad: 0.0926 (0.1020) loss: 0.8080 (0.8147) time: 0.1335 data: 0.0402 max mem: 9377 +Train: [23] [1700/6250] eta: 0:12:36 lr: 0.000114 grad: 0.0924 (0.1016) loss: 0.8171 (0.8147) time: 0.1488 data: 0.0535 max mem: 9377 +Train: [23] [1800/6250] eta: 0:12:16 lr: 0.000114 grad: 0.0914 (0.1014) loss: 0.8147 (0.8147) time: 0.1380 data: 0.0530 max mem: 9377 +Train: [23] [1900/6250] eta: 0:11:57 lr: 0.000114 grad: 0.0963 (0.1013) loss: 0.8141 (0.8148) time: 0.1073 data: 0.0157 max mem: 9377 +Train: [23] [2000/6250] eta: 0:11:38 lr: 0.000114 grad: 0.0946 (0.1010) loss: 0.8202 (0.8148) time: 0.1436 data: 0.0501 max mem: 9377 +Train: [23] [2100/6250] eta: 0:11:19 lr: 0.000114 grad: 0.0938 (0.1008) loss: 0.8158 (0.8148) time: 0.1405 data: 0.0461 max mem: 9377 +Train: [23] [2200/6250] eta: 0:11:02 lr: 0.000114 grad: 0.0916 (0.1006) loss: 0.8130 (0.8149) time: 0.1653 data: 0.0852 max mem: 9377 +Train: [23] [2300/6250] eta: 0:10:44 lr: 0.000114 grad: 0.0943 (0.1005) loss: 0.8137 (0.8148) time: 0.1583 data: 0.0680 max mem: 9377 +Train: [23] [2400/6250] eta: 0:10:26 lr: 0.000114 grad: 0.0948 (0.1004) loss: 0.8116 (0.8147) time: 0.1528 data: 0.0584 max mem: 9377 +Train: [23] [2500/6250] eta: 0:10:08 lr: 0.000114 grad: 0.0965 (0.1005) loss: 0.8103 (0.8145) time: 0.1324 data: 0.0440 max mem: 9377 +Train: [23] [2600/6250] eta: 0:09:51 lr: 0.000114 grad: 0.1000 (0.1005) loss: 0.8130 (0.8143) time: 0.1574 data: 0.0677 max mem: 9377 +Train: [23] [2700/6250] eta: 0:09:35 lr: 0.000114 grad: 0.0968 (0.1004) loss: 0.8131 (0.8143) time: 0.1722 data: 0.0815 max mem: 9377 +Train: [23] [2800/6250] eta: 0:09:18 lr: 0.000114 grad: 0.0930 (0.1005) loss: 0.8121 (0.8140) time: 0.1657 data: 0.0818 max mem: 9377 +Train: [23] [2900/6250] eta: 0:09:00 lr: 0.000114 grad: 0.0936 (0.1005) loss: 0.8149 (0.8139) time: 0.1161 data: 0.0188 max mem: 9377 +Train: [23] [3000/6250] eta: 0:08:44 lr: 0.000114 grad: 0.1002 (0.1005) loss: 0.8154 (0.8138) time: 0.1246 data: 0.0327 max mem: 9377 +Train: [23] [3100/6250] eta: 0:08:26 lr: 0.000114 grad: 0.0970 (0.1005) loss: 0.8078 (0.8136) time: 0.1289 data: 0.0424 max mem: 9377 +Train: [23] [3200/6250] eta: 0:08:10 lr: 0.000114 grad: 0.0977 (0.1007) loss: 0.8167 (0.8134) time: 0.1491 data: 0.0567 max mem: 9377 +Train: [23] [3300/6250] eta: 0:07:53 lr: 0.000114 grad: 0.1049 (0.1007) loss: 0.8143 (0.8133) time: 0.1249 data: 0.0368 max mem: 9377 +Train: [23] [3400/6250] eta: 0:07:36 lr: 0.000114 grad: 0.1008 (0.1008) loss: 0.8150 (0.8133) time: 0.1795 data: 0.0993 max mem: 9377 +Train: [23] [3500/6250] eta: 0:07:19 lr: 0.000114 grad: 0.0941 (0.1007) loss: 0.8192 (0.8132) time: 0.1412 data: 0.0491 max mem: 9377 +Train: [23] [3600/6250] eta: 0:07:02 lr: 0.000114 grad: 0.0948 (0.1006) loss: 0.8215 (0.8132) time: 0.1287 data: 0.0373 max mem: 9377 +Train: [23] [3700/6250] eta: 0:06:46 lr: 0.000114 grad: 0.0970 (0.1008) loss: 0.8102 (0.8132) time: 0.1437 data: 0.0636 max mem: 9377 +Train: [23] [3800/6250] eta: 0:06:30 lr: 0.000114 grad: 0.1006 (0.1008) loss: 0.8035 (0.8132) time: 0.1750 data: 0.0904 max mem: 9377 +Train: [23] [3900/6250] eta: 0:06:14 lr: 0.000114 grad: 0.1012 (0.1009) loss: 0.8095 (0.8131) time: 0.1242 data: 0.0416 max mem: 9377 +Train: [23] [4000/6250] eta: 0:05:58 lr: 0.000113 grad: 0.1026 (0.1010) loss: 0.8035 (0.8130) time: 0.1599 data: 0.0712 max mem: 9377 +Train: [23] [4100/6250] eta: 0:05:42 lr: 0.000113 grad: 0.1063 (0.1011) loss: 0.8047 (0.8129) time: 0.1615 data: 0.0776 max mem: 9377 +Train: [23] [4200/6250] eta: 0:05:26 lr: 0.000113 grad: 0.0997 (0.1011) loss: 0.8120 (0.8129) time: 0.1263 data: 0.0302 max mem: 9377 +Train: [23] [4300/6250] eta: 0:05:10 lr: 0.000113 grad: 0.1047 (0.1011) loss: 0.8117 (0.8128) time: 0.1599 data: 0.0684 max mem: 9377 +Train: [23] [4400/6250] eta: 0:04:54 lr: 0.000113 grad: 0.0986 (0.1011) loss: 0.8056 (0.8127) time: 0.1377 data: 0.0465 max mem: 9377 +Train: [23] [4500/6250] eta: 0:04:38 lr: 0.000113 grad: 0.1018 (0.1011) loss: 0.8180 (0.8126) time: 0.1570 data: 0.0578 max mem: 9377 +Train: [23] [4600/6250] eta: 0:04:22 lr: 0.000113 grad: 0.0973 (0.1011) loss: 0.8118 (0.8126) time: 0.1765 data: 0.0976 max mem: 9377 +Train: [23] [4700/6250] eta: 0:04:06 lr: 0.000113 grad: 0.0978 (0.1011) loss: 0.8112 (0.8127) time: 0.1698 data: 0.0767 max mem: 9377 +Train: [23] [4800/6250] eta: 0:03:50 lr: 0.000113 grad: 0.0999 (0.1011) loss: 0.8090 (0.8127) time: 0.1616 data: 0.0764 max mem: 9377 +Train: [23] [4900/6250] eta: 0:03:34 lr: 0.000113 grad: 0.0943 (0.1011) loss: 0.8145 (0.8127) time: 0.1959 data: 0.1206 max mem: 9377 +Train: [23] [5000/6250] eta: 0:03:18 lr: 0.000113 grad: 0.0981 (0.1011) loss: 0.8163 (0.8127) time: 0.1576 data: 0.0791 max mem: 9377 +Train: [23] [5100/6250] eta: 0:03:02 lr: 0.000113 grad: 0.0994 (0.1011) loss: 0.8154 (0.8127) time: 0.1605 data: 0.0668 max mem: 9377 +Train: [23] [5200/6250] eta: 0:02:47 lr: 0.000113 grad: 0.0964 (0.1010) loss: 0.8203 (0.8128) time: 0.1529 data: 0.0684 max mem: 9377 +Train: [23] [5300/6250] eta: 0:02:31 lr: 0.000113 grad: 0.1008 (0.1010) loss: 0.8168 (0.8129) time: 0.1645 data: 0.0727 max mem: 9377 +Train: [23] [5400/6250] eta: 0:02:16 lr: 0.000113 grad: 0.0987 (0.1010) loss: 0.8147 (0.8130) time: 0.1732 data: 0.0754 max mem: 9377 +Train: [23] [5500/6250] eta: 0:02:00 lr: 0.000113 grad: 0.1019 (0.1011) loss: 0.8139 (0.8131) time: 0.1657 data: 0.0721 max mem: 9377 +Train: [23] [5600/6250] eta: 0:01:44 lr: 0.000113 grad: 0.0953 (0.1010) loss: 0.8188 (0.8132) time: 0.1724 data: 0.0940 max mem: 9377 +Train: [23] [5700/6250] eta: 0:01:28 lr: 0.000113 grad: 0.1082 (0.1011) loss: 0.8008 (0.8131) time: 0.1952 data: 0.0949 max mem: 9377 +Train: [23] [5800/6250] eta: 0:01:12 lr: 0.000113 grad: 0.1048 (0.1011) loss: 0.8103 (0.8131) time: 0.1655 data: 0.0789 max mem: 9377 +Train: [23] [5900/6250] eta: 0:00:56 lr: 0.000113 grad: 0.0996 (0.1012) loss: 0.8145 (0.8131) time: 0.1690 data: 0.0793 max mem: 9377 +Train: [23] [6000/6250] eta: 0:00:40 lr: 0.000113 grad: 0.1087 (0.1012) loss: 0.8110 (0.8131) time: 0.2198 data: 0.1227 max mem: 9377 +Train: [23] [6100/6250] eta: 0:00:24 lr: 0.000113 grad: 0.1061 (0.1013) loss: 0.8026 (0.8130) time: 0.1568 data: 0.0647 max mem: 9377 +Train: [23] [6200/6250] eta: 0:00:08 lr: 0.000113 grad: 0.0953 (0.1013) loss: 0.8173 (0.8130) time: 0.1522 data: 0.0502 max mem: 9377 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.1025 (0.1013) loss: 0.8039 (0.8129) time: 0.1896 data: 0.1043 max mem: 9377 +Train: [23] Total time: 0:16:56 (0.1627 s / it) +Averaged stats: lr: 0.000113 grad: 0.1025 (0.1013) loss: 0.8039 (0.8129) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:04:45 loss: 0.8483 (0.8483) time: 4.6041 data: 4.5221 max mem: 9377 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8499 (0.8493) time: 0.1379 data: 0.1124 max mem: 9377 +Eval (hcp-train-subset): [23] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-train-subset): loss: 0.8499 (0.8493) +Eval (hcp-val): [23] [ 0/62] eta: 0:05:14 loss: 0.8468 (0.8468) time: 5.0653 data: 5.0353 max mem: 9377 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8464 (0.8468) time: 0.1155 data: 0.0901 max mem: 9377 +Eval (hcp-val): [23] Total time: 0:00:14 (0.2345 s / it) +Averaged stats (hcp-val): loss: 0.8464 (0.8468) +Eval (nsd-val): [23] [ 0/62] eta: 0:05:58 loss: 0.8087 (0.8087) time: 5.7782 data: 5.7473 max mem: 9377 +Eval (nsd-val): [23] [61/62] eta: 0:00:00 loss: 0.8165 (0.8185) time: 0.1337 data: 0.1080 max mem: 9377 +Eval (nsd-val): [23] Total time: 0:00:14 (0.2346 s / it) +Averaged stats (nsd-val): loss: 0.8165 (0.8185) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 12:23:36 lr: 0.000113 grad: 0.1117 (0.1117) loss: 0.8304 (0.8304) time: 7.1387 data: 7.0337 max mem: 9377 +Train: [24] [ 100/6250] eta: 0:23:11 lr: 0.000113 grad: 0.0963 (0.1080) loss: 0.8270 (0.8272) time: 0.1690 data: 0.0427 max mem: 9377 +Train: [24] [ 200/6250] eta: 0:19:46 lr: 0.000113 grad: 0.1062 (0.1055) loss: 0.8056 (0.8208) time: 0.1745 data: 0.0723 max mem: 9377 +Train: [24] [ 300/6250] eta: 0:18:21 lr: 0.000113 grad: 0.0898 (0.1049) loss: 0.8178 (0.8189) time: 0.1673 data: 0.0602 max mem: 9377 +Train: [24] [ 400/6250] eta: 0:18:06 lr: 0.000113 grad: 0.0922 (0.1038) loss: 0.8198 (0.8183) time: 0.1671 data: 0.0761 max mem: 9377 +Train: [24] [ 500/6250] eta: 0:17:40 lr: 0.000113 grad: 0.0905 (0.1021) loss: 0.8168 (0.8179) time: 0.1802 data: 0.0963 max mem: 9377 +Train: [24] [ 600/6250] eta: 0:17:06 lr: 0.000113 grad: 0.0918 (0.1015) loss: 0.8184 (0.8171) time: 0.1540 data: 0.0630 max mem: 9377 +Train: [24] [ 700/6250] eta: 0:16:47 lr: 0.000113 grad: 0.0932 (0.1009) loss: 0.8124 (0.8167) time: 0.1495 data: 0.0581 max mem: 9377 +Train: [24] [ 800/6250] eta: 0:16:22 lr: 0.000113 grad: 0.0930 (0.1007) loss: 0.8144 (0.8161) time: 0.1768 data: 0.0864 max mem: 9377 +Train: [24] [ 900/6250] eta: 0:15:59 lr: 0.000113 grad: 0.0921 (0.1005) loss: 0.8139 (0.8157) time: 0.1875 data: 0.0907 max mem: 9377 +Train: [24] [1000/6250] eta: 0:15:28 lr: 0.000113 grad: 0.0902 (0.0998) loss: 0.8176 (0.8155) time: 0.1415 data: 0.0556 max mem: 9377 +Train: [24] [1100/6250] eta: 0:15:03 lr: 0.000113 grad: 0.0839 (0.0995) loss: 0.8183 (0.8154) time: 0.1554 data: 0.0593 max mem: 9377 +Train: [24] [1200/6250] eta: 0:14:35 lr: 0.000113 grad: 0.0852 (0.0992) loss: 0.8182 (0.8153) time: 0.1504 data: 0.0601 max mem: 9377 +Train: [24] [1300/6250] eta: 0:14:12 lr: 0.000113 grad: 0.0939 (0.0991) loss: 0.8040 (0.8151) time: 0.1383 data: 0.0506 max mem: 9377 +Train: [24] [1400/6250] eta: 0:13:51 lr: 0.000113 grad: 0.0942 (0.0989) loss: 0.8122 (0.8149) time: 0.1678 data: 0.0808 max mem: 9377 +Train: [24] [1500/6250] eta: 0:13:28 lr: 0.000113 grad: 0.0923 (0.0987) loss: 0.8125 (0.8147) time: 0.1478 data: 0.0627 max mem: 9377 +Train: [24] [1600/6250] eta: 0:13:08 lr: 0.000113 grad: 0.0850 (0.0987) loss: 0.8175 (0.8145) time: 0.1704 data: 0.0824 max mem: 9377 +Train: [24] [1700/6250] eta: 0:12:49 lr: 0.000113 grad: 0.1015 (0.0989) loss: 0.8094 (0.8143) time: 0.1166 data: 0.0063 max mem: 9377 +Train: [24] [1800/6250] eta: 0:12:26 lr: 0.000113 grad: 0.0923 (0.0989) loss: 0.8130 (0.8141) time: 0.1373 data: 0.0456 max mem: 9377 +Train: [24] [1900/6250] eta: 0:12:08 lr: 0.000113 grad: 0.1075 (0.0990) loss: 0.8071 (0.8139) time: 0.1512 data: 0.0643 max mem: 9377 +Train: [24] [2000/6250] eta: 0:11:47 lr: 0.000113 grad: 0.0951 (0.0998) loss: 0.8099 (0.8137) time: 0.1494 data: 0.0601 max mem: 9377 +Train: [24] [2100/6250] eta: 0:11:28 lr: 0.000113 grad: 0.0877 (0.0996) loss: 0.8092 (0.8136) time: 0.1585 data: 0.0696 max mem: 9377 +Train: [24] [2200/6250] eta: 0:11:09 lr: 0.000113 grad: 0.0999 (0.0999) loss: 0.8159 (0.8136) time: 0.1024 data: 0.0002 max mem: 9377 +Train: [24] [2300/6250] eta: 0:10:52 lr: 0.000113 grad: 0.0922 (0.0997) loss: 0.8154 (0.8136) time: 0.1326 data: 0.0379 max mem: 9377 +Train: [24] [2400/6250] eta: 0:10:33 lr: 0.000113 grad: 0.0968 (0.0996) loss: 0.8122 (0.8135) time: 0.1210 data: 0.0327 max mem: 9377 +Train: [24] [2500/6250] eta: 0:10:14 lr: 0.000113 grad: 0.1077 (0.0997) loss: 0.8062 (0.8133) time: 0.1207 data: 0.0308 max mem: 9377 +Train: [24] [2600/6250] eta: 0:09:57 lr: 0.000113 grad: 0.0996 (0.0999) loss: 0.8153 (0.8132) time: 0.1679 data: 0.0719 max mem: 9377 +Train: [24] [2700/6250] eta: 0:09:40 lr: 0.000113 grad: 0.0920 (0.0999) loss: 0.8187 (0.8133) time: 0.1562 data: 0.0654 max mem: 9377 +Train: [24] [2800/6250] eta: 0:09:22 lr: 0.000113 grad: 0.0948 (0.0998) loss: 0.8129 (0.8131) time: 0.1446 data: 0.0563 max mem: 9377 +Train: [24] [2900/6250] eta: 0:09:05 lr: 0.000112 grad: 0.1019 (0.0997) loss: 0.8102 (0.8130) time: 0.1370 data: 0.0497 max mem: 9377 +Train: [24] [3000/6250] eta: 0:08:49 lr: 0.000112 grad: 0.0953 (0.0996) loss: 0.8114 (0.8130) time: 0.1968 data: 0.1065 max mem: 9377 +Train: [24] [3100/6250] eta: 0:08:31 lr: 0.000112 grad: 0.1005 (0.0998) loss: 0.8112 (0.8129) time: 0.1564 data: 0.0583 max mem: 9377 +Train: [24] [3200/6250] eta: 0:08:14 lr: 0.000112 grad: 0.0999 (0.0998) loss: 0.8102 (0.8128) time: 0.1419 data: 0.0439 max mem: 9377 +Train: [24] [3300/6250] eta: 0:07:57 lr: 0.000112 grad: 0.1000 (0.0999) loss: 0.7949 (0.8126) time: 0.1347 data: 0.0417 max mem: 9377 +Train: [24] [3400/6250] eta: 0:07:40 lr: 0.000112 grad: 0.0931 (0.1000) loss: 0.8101 (0.8126) time: 0.1394 data: 0.0458 max mem: 9377 +Train: [24] [3500/6250] eta: 0:07:23 lr: 0.000112 grad: 0.0948 (0.0999) loss: 0.8204 (0.8127) time: 0.1482 data: 0.0518 max mem: 9377 +Train: [24] [3600/6250] eta: 0:07:08 lr: 0.000112 grad: 0.0922 (0.0999) loss: 0.8139 (0.8126) time: 0.1339 data: 0.0401 max mem: 9377 +Train: [24] [3700/6250] eta: 0:06:52 lr: 0.000112 grad: 0.0937 (0.1000) loss: 0.8088 (0.8125) time: 0.1602 data: 0.0726 max mem: 9377 +Train: [24] [3800/6250] eta: 0:06:36 lr: 0.000112 grad: 0.0943 (0.0999) loss: 0.8133 (0.8125) time: 0.1533 data: 0.0578 max mem: 9377 +Train: [24] [3900/6250] eta: 0:06:19 lr: 0.000112 grad: 0.0984 (0.0998) loss: 0.8115 (0.8125) time: 0.1670 data: 0.0852 max mem: 9377 +Train: [24] [4000/6250] eta: 0:06:03 lr: 0.000112 grad: 0.0980 (0.0998) loss: 0.8142 (0.8125) time: 0.1619 data: 0.0804 max mem: 9377 +Train: [24] [4100/6250] eta: 0:05:47 lr: 0.000112 grad: 0.0930 (0.0998) loss: 0.8180 (0.8125) time: 0.1587 data: 0.0725 max mem: 9377 +Train: [24] [4200/6250] eta: 0:05:31 lr: 0.000112 grad: 0.0949 (0.0998) loss: 0.8150 (0.8125) time: 0.1593 data: 0.0752 max mem: 9377 +Train: [24] [4300/6250] eta: 0:05:14 lr: 0.000112 grad: 0.0961 (0.0999) loss: 0.8149 (0.8124) time: 0.1618 data: 0.0701 max mem: 9377 +Train: [24] [4400/6250] eta: 0:04:58 lr: 0.000112 grad: 0.0967 (0.0999) loss: 0.8100 (0.8124) time: 0.1557 data: 0.0599 max mem: 9377 +Train: [24] [4500/6250] eta: 0:04:42 lr: 0.000112 grad: 0.0966 (0.0999) loss: 0.8138 (0.8124) time: 0.1628 data: 0.0811 max mem: 9377 +Train: [24] [4600/6250] eta: 0:04:25 lr: 0.000112 grad: 0.1040 (0.0999) loss: 0.8085 (0.8123) time: 0.1479 data: 0.0572 max mem: 9377 +Train: [24] [4700/6250] eta: 0:04:09 lr: 0.000112 grad: 0.1054 (0.1000) loss: 0.8143 (0.8123) time: 0.1573 data: 0.0667 max mem: 9377 +Train: [24] [4800/6250] eta: 0:03:53 lr: 0.000112 grad: 0.0966 (0.1001) loss: 0.8107 (0.8123) time: 0.1729 data: 0.0813 max mem: 9377 +Train: [24] [4900/6250] eta: 0:03:38 lr: 0.000112 grad: 0.0964 (0.1001) loss: 0.8101 (0.8122) time: 0.1482 data: 0.0677 max mem: 9377 +Train: [24] [5000/6250] eta: 0:03:22 lr: 0.000112 grad: 0.0957 (0.1001) loss: 0.8042 (0.8122) time: 0.1641 data: 0.0807 max mem: 9377 +Train: [24] [5100/6250] eta: 0:03:06 lr: 0.000112 grad: 0.0939 (0.1001) loss: 0.8125 (0.8122) time: 0.1503 data: 0.0611 max mem: 9377 +Train: [24] [5200/6250] eta: 0:02:50 lr: 0.000112 grad: 0.0937 (0.1000) loss: 0.8119 (0.8122) time: 0.1598 data: 0.0766 max mem: 9377 +Train: [24] [5300/6250] eta: 0:02:34 lr: 0.000112 grad: 0.0979 (0.1000) loss: 0.8086 (0.8122) time: 0.2678 data: 0.1821 max mem: 9377 +Train: [24] [5400/6250] eta: 0:02:18 lr: 0.000112 grad: 0.1040 (0.1001) loss: 0.8100 (0.8122) time: 0.1781 data: 0.0992 max mem: 9377 +Train: [24] [5500/6250] eta: 0:02:02 lr: 0.000112 grad: 0.0972 (0.1001) loss: 0.8164 (0.8123) time: 0.2213 data: 0.1378 max mem: 9377 +Train: [24] [5600/6250] eta: 0:01:46 lr: 0.000112 grad: 0.0951 (0.1001) loss: 0.8081 (0.8123) time: 0.1464 data: 0.0547 max mem: 9377 +Train: [24] [5700/6250] eta: 0:01:29 lr: 0.000112 grad: 0.0988 (0.1000) loss: 0.8128 (0.8123) time: 0.1656 data: 0.0798 max mem: 9377 +Train: [24] [5800/6250] eta: 0:01:13 lr: 0.000112 grad: 0.0998 (0.1001) loss: 0.8108 (0.8123) time: 0.1703 data: 0.0759 max mem: 9377 +Train: [24] [5900/6250] eta: 0:00:57 lr: 0.000112 grad: 0.0934 (0.1001) loss: 0.8119 (0.8124) time: 0.1921 data: 0.1062 max mem: 9377 +Train: [24] [6000/6250] eta: 0:00:40 lr: 0.000112 grad: 0.0974 (0.1001) loss: 0.8175 (0.8124) time: 0.1628 data: 0.0664 max mem: 9377 +Train: [24] [6100/6250] eta: 0:00:24 lr: 0.000112 grad: 0.1023 (0.1001) loss: 0.8146 (0.8124) time: 0.1660 data: 0.0544 max mem: 9377 +Train: [24] [6200/6250] eta: 0:00:08 lr: 0.000112 grad: 0.1005 (0.1001) loss: 0.8038 (0.8124) time: 0.1741 data: 0.0760 max mem: 9377 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.0947 (0.1001) loss: 0.8221 (0.8124) time: 0.1280 data: 0.0244 max mem: 9377 +Train: [24] Total time: 0:17:11 (0.1650 s / it) +Averaged stats: lr: 0.000112 grad: 0.0947 (0.1001) loss: 0.8221 (0.8124) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:06:12 loss: 0.8441 (0.8441) time: 6.0019 data: 5.9716 max mem: 9377 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8528 (0.8495) time: 0.1347 data: 0.1075 max mem: 9377 +Eval (hcp-train-subset): [24] Total time: 0:00:15 (0.2552 s / it) +Averaged stats (hcp-train-subset): loss: 0.8528 (0.8495) +Making plots (hcp-train-subset): example=55 +Eval (hcp-val): [24] [ 0/62] eta: 0:03:50 loss: 0.8477 (0.8477) time: 3.7249 data: 3.6486 max mem: 9377 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8463 (0.8475) time: 0.1305 data: 0.1050 max mem: 9377 +Eval (hcp-val): [24] Total time: 0:00:15 (0.2578 s / it) +Averaged stats (hcp-val): loss: 0.8463 (0.8475) +Making plots (hcp-val): example=39 +Eval (nsd-val): [24] [ 0/62] eta: 0:03:57 loss: 0.8101 (0.8101) time: 3.8323 data: 3.7364 max mem: 9377 +Eval (nsd-val): [24] [61/62] eta: 0:00:00 loss: 0.8196 (0.8211) time: 0.1529 data: 0.1270 max mem: 9377 +Eval (nsd-val): [24] Total time: 0:00:15 (0.2501 s / it) +Averaged stats (nsd-val): loss: 0.8196 (0.8211) +Making plots (nsd-val): example=23 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00024.pth +Train: [25] [ 0/6250] eta: 11:17:54 lr: 0.000112 grad: 0.0679 (0.0679) loss: 0.8565 (0.8565) time: 6.5079 data: 6.3734 max mem: 9377 +Train: [25] [ 100/6250] eta: 0:23:57 lr: 0.000112 grad: 0.1172 (0.1408) loss: 0.8228 (0.8291) time: 0.1628 data: 0.0593 max mem: 9377 +Train: [25] [ 200/6250] eta: 0:20:50 lr: 0.000112 grad: 0.0935 (0.1250) loss: 0.8227 (0.8229) time: 0.1789 data: 0.0819 max mem: 9377 +Train: [25] [ 300/6250] eta: 0:19:44 lr: 0.000112 grad: 0.0866 (0.1156) loss: 0.8332 (0.8214) time: 0.1988 data: 0.1010 max mem: 9377 +Train: [25] [ 400/6250] eta: 0:19:00 lr: 0.000112 grad: 0.0814 (0.1102) loss: 0.8361 (0.8221) time: 0.2071 data: 0.1074 max mem: 9377 +Train: [25] [ 500/6250] eta: 0:18:24 lr: 0.000112 grad: 0.0943 (0.1061) loss: 0.8212 (0.8232) time: 0.1755 data: 0.0861 max mem: 9377 +Train: [25] [ 600/6250] eta: 0:17:46 lr: 0.000112 grad: 0.0869 (0.1041) loss: 0.8271 (0.8227) time: 0.1606 data: 0.0659 max mem: 9377 +Train: [25] [ 700/6250] eta: 0:17:10 lr: 0.000112 grad: 0.0877 (0.1029) loss: 0.8298 (0.8228) time: 0.1834 data: 0.0759 max mem: 9377 +Train: [25] [ 800/6250] eta: 0:16:47 lr: 0.000112 grad: 0.0956 (0.1018) loss: 0.8196 (0.8222) time: 0.1927 data: 0.0890 max mem: 9377 +Train: [25] [ 900/6250] eta: 0:16:28 lr: 0.000112 grad: 0.0929 (0.1013) loss: 0.8128 (0.8216) time: 0.2155 data: 0.1200 max mem: 9377 +Train: [25] [1000/6250] eta: 0:15:51 lr: 0.000112 grad: 0.1079 (0.1012) loss: 0.8091 (0.8208) time: 0.1355 data: 0.0460 max mem: 9377 +Train: [25] [1100/6250] eta: 0:15:23 lr: 0.000112 grad: 0.1016 (0.1012) loss: 0.8020 (0.8195) time: 0.1637 data: 0.0762 max mem: 9377 +Train: [25] [1200/6250] eta: 0:14:58 lr: 0.000112 grad: 0.1038 (0.1014) loss: 0.8048 (0.8184) time: 0.1755 data: 0.0896 max mem: 9377 +Train: [25] [1300/6250] eta: 0:14:35 lr: 0.000112 grad: 0.0981 (0.1016) loss: 0.8005 (0.8173) time: 0.1611 data: 0.0772 max mem: 9377 +Train: [25] [1400/6250] eta: 0:14:14 lr: 0.000112 grad: 0.0951 (0.1020) loss: 0.8000 (0.8163) time: 0.1642 data: 0.0862 max mem: 9377 +Train: [25] [1500/6250] eta: 0:13:53 lr: 0.000112 grad: 0.1052 (0.1026) loss: 0.8004 (0.8153) time: 0.1956 data: 0.1117 max mem: 9377 +Train: [25] [1600/6250] eta: 0:13:34 lr: 0.000111 grad: 0.0957 (0.1026) loss: 0.8048 (0.8144) time: 0.1852 data: 0.1064 max mem: 9377 +Train: [25] [1700/6250] eta: 0:13:15 lr: 0.000111 grad: 0.1027 (0.1027) loss: 0.7958 (0.8136) time: 0.1751 data: 0.0909 max mem: 9377 +Train: [25] [1800/6250] eta: 0:12:53 lr: 0.000111 grad: 0.1025 (0.1030) loss: 0.8003 (0.8127) time: 0.1516 data: 0.0724 max mem: 9377 +Train: [25] [1900/6250] eta: 0:12:30 lr: 0.000111 grad: 0.1030 (0.1032) loss: 0.8005 (0.8120) time: 0.1129 data: 0.0281 max mem: 9377 +Train: [25] [2000/6250] eta: 0:12:10 lr: 0.000111 grad: 0.0982 (0.1031) loss: 0.8030 (0.8116) time: 0.1525 data: 0.0660 max mem: 9377 +Train: [25] [2100/6250] eta: 0:11:52 lr: 0.000111 grad: 0.1105 (0.1034) loss: 0.7931 (0.8110) time: 0.1874 data: 0.1081 max mem: 9377 +Train: [25] [2200/6250] eta: 0:11:34 lr: 0.000111 grad: 0.1019 (0.1035) loss: 0.7976 (0.8106) time: 0.1871 data: 0.1005 max mem: 9377 +Train: [25] [2300/6250] eta: 0:11:13 lr: 0.000111 grad: 0.1062 (0.1034) loss: 0.8056 (0.8104) time: 0.1552 data: 0.0741 max mem: 9377 +Train: [25] [2400/6250] eta: 0:10:54 lr: 0.000111 grad: 0.0993 (0.1034) loss: 0.8035 (0.8102) time: 0.1642 data: 0.0822 max mem: 9377 +Train: [25] [2500/6250] eta: 0:10:34 lr: 0.000111 grad: 0.0997 (0.1033) loss: 0.7994 (0.8100) time: 0.1751 data: 0.0899 max mem: 9377 +Train: [25] [2600/6250] eta: 0:10:15 lr: 0.000111 grad: 0.0980 (0.1033) loss: 0.8013 (0.8098) time: 0.1561 data: 0.0678 max mem: 9377 +Train: [25] [2700/6250] eta: 0:09:57 lr: 0.000111 grad: 0.0971 (0.1033) loss: 0.8003 (0.8097) time: 0.1585 data: 0.0758 max mem: 9377 +Train: [25] [2800/6250] eta: 0:09:40 lr: 0.000111 grad: 0.0994 (0.1034) loss: 0.8081 (0.8096) time: 0.1770 data: 0.0963 max mem: 9377 +Train: [25] [2900/6250] eta: 0:09:24 lr: 0.000111 grad: 0.0947 (0.1033) loss: 0.8123 (0.8096) time: 0.1866 data: 0.1105 max mem: 9377 +Train: [25] [3000/6250] eta: 0:09:06 lr: 0.000111 grad: 0.1016 (0.1032) loss: 0.8129 (0.8096) time: 0.1524 data: 0.0685 max mem: 9377 +Train: [25] [3100/6250] eta: 0:08:48 lr: 0.000111 grad: 0.0996 (0.1031) loss: 0.8055 (0.8097) time: 0.1467 data: 0.0533 max mem: 9377 +Train: [25] [3200/6250] eta: 0:08:31 lr: 0.000111 grad: 0.1019 (0.1031) loss: 0.8119 (0.8097) time: 0.1683 data: 0.0868 max mem: 9377 +Train: [25] [3300/6250] eta: 0:08:13 lr: 0.000111 grad: 0.0969 (0.1030) loss: 0.8136 (0.8098) time: 0.1618 data: 0.0683 max mem: 9377 +Train: [25] [3400/6250] eta: 0:07:55 lr: 0.000111 grad: 0.1023 (0.1030) loss: 0.8094 (0.8099) time: 0.1506 data: 0.0633 max mem: 9377 +Train: [25] [3500/6250] eta: 0:07:37 lr: 0.000111 grad: 0.1006 (0.1029) loss: 0.8112 (0.8100) time: 0.1541 data: 0.0687 max mem: 9377 +Train: [25] [3600/6250] eta: 0:07:20 lr: 0.000111 grad: 0.1078 (0.1029) loss: 0.8058 (0.8100) time: 0.1673 data: 0.0858 max mem: 9377 +Train: [25] [3700/6250] eta: 0:07:03 lr: 0.000111 grad: 0.0904 (0.1028) loss: 0.8116 (0.8100) time: 0.1522 data: 0.0652 max mem: 9377 +Train: [25] [3800/6250] eta: 0:06:46 lr: 0.000111 grad: 0.0914 (0.1027) loss: 0.8192 (0.8101) time: 0.1656 data: 0.0853 max mem: 9377 +Train: [25] [3900/6250] eta: 0:06:29 lr: 0.000111 grad: 0.1041 (0.1027) loss: 0.8073 (0.8102) time: 0.1510 data: 0.0653 max mem: 9377 +Train: [25] [4000/6250] eta: 0:06:12 lr: 0.000111 grad: 0.1033 (0.1028) loss: 0.8075 (0.8102) time: 0.1767 data: 0.0860 max mem: 9377 +Train: [25] [4100/6250] eta: 0:05:55 lr: 0.000111 grad: 0.0981 (0.1028) loss: 0.8155 (0.8103) time: 0.1693 data: 0.0788 max mem: 9377 +Train: [25] [4200/6250] eta: 0:05:39 lr: 0.000111 grad: 0.1003 (0.1027) loss: 0.8222 (0.8103) time: 0.1597 data: 0.0732 max mem: 9377 +Train: [25] [4300/6250] eta: 0:05:22 lr: 0.000111 grad: 0.0990 (0.1026) loss: 0.8144 (0.8104) time: 0.1491 data: 0.0562 max mem: 9377 +Train: [25] [4400/6250] eta: 0:05:05 lr: 0.000111 grad: 0.0937 (0.1025) loss: 0.8146 (0.8105) time: 0.1699 data: 0.0783 max mem: 9377 +Train: [25] [4500/6250] eta: 0:04:49 lr: 0.000111 grad: 0.1040 (0.1025) loss: 0.8117 (0.8105) time: 0.1394 data: 0.0547 max mem: 9377 +Train: [25] [4600/6250] eta: 0:04:32 lr: 0.000111 grad: 0.0987 (0.1025) loss: 0.8147 (0.8106) time: 0.1605 data: 0.0818 max mem: 9377 +Train: [25] [4700/6250] eta: 0:04:15 lr: 0.000111 grad: 0.0985 (0.1024) loss: 0.8145 (0.8107) time: 0.1541 data: 0.0727 max mem: 9377 +Train: [25] [4800/6250] eta: 0:03:59 lr: 0.000111 grad: 0.1009 (0.1024) loss: 0.8113 (0.8107) time: 0.1540 data: 0.0583 max mem: 9377 +Train: [25] [4900/6250] eta: 0:03:43 lr: 0.000111 grad: 0.1056 (0.1024) loss: 0.8104 (0.8106) time: 0.1769 data: 0.0910 max mem: 9377 +Train: [25] [5000/6250] eta: 0:03:26 lr: 0.000111 grad: 0.0917 (0.1023) loss: 0.8239 (0.8107) time: 0.1527 data: 0.0610 max mem: 9377 +Train: [25] [5100/6250] eta: 0:03:10 lr: 0.000111 grad: 0.1003 (0.1022) loss: 0.8091 (0.8106) time: 0.1763 data: 0.0852 max mem: 9377 +Train: [25] [5200/6250] eta: 0:02:53 lr: 0.000111 grad: 0.0986 (0.1021) loss: 0.8072 (0.8107) time: 0.2056 data: 0.1252 max mem: 9377 +Train: [25] [5300/6250] eta: 0:02:37 lr: 0.000111 grad: 0.0991 (0.1020) loss: 0.8114 (0.8107) time: 0.1981 data: 0.1170 max mem: 9377 +Train: [25] [5400/6250] eta: 0:02:20 lr: 0.000111 grad: 0.0886 (0.1019) loss: 0.8096 (0.8108) time: 0.1683 data: 0.0849 max mem: 9377 +Train: [25] [5500/6250] eta: 0:02:04 lr: 0.000111 grad: 0.0944 (0.1019) loss: 0.8167 (0.8108) time: 0.1622 data: 0.0685 max mem: 9377 +Train: [25] [5600/6250] eta: 0:01:47 lr: 0.000111 grad: 0.0936 (0.1018) loss: 0.8101 (0.8109) time: 0.1361 data: 0.0498 max mem: 9377 +Train: [25] [5700/6250] eta: 0:01:31 lr: 0.000111 grad: 0.0907 (0.1018) loss: 0.8140 (0.8109) time: 0.1647 data: 0.0690 max mem: 9377 +Train: [25] [5800/6250] eta: 0:01:14 lr: 0.000111 grad: 0.0911 (0.1017) loss: 0.8111 (0.8109) time: 0.1498 data: 0.0513 max mem: 9377 +Train: [25] [5900/6250] eta: 0:00:58 lr: 0.000111 grad: 0.1004 (0.1017) loss: 0.8068 (0.8108) time: 0.1592 data: 0.0660 max mem: 9377 +Train: [25] [6000/6250] eta: 0:00:41 lr: 0.000111 grad: 0.0964 (0.1017) loss: 0.8117 (0.8108) time: 0.1551 data: 0.0619 max mem: 9377 +Train: [25] [6100/6250] eta: 0:00:24 lr: 0.000111 grad: 0.0976 (0.1017) loss: 0.8084 (0.8108) time: 0.1456 data: 0.0510 max mem: 9377 +Train: [25] [6200/6250] eta: 0:00:08 lr: 0.000111 grad: 0.0995 (0.1016) loss: 0.8148 (0.8108) time: 0.1531 data: 0.0745 max mem: 9377 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.0972 (0.1016) loss: 0.8155 (0.8108) time: 0.1612 data: 0.0750 max mem: 9377 +Train: [25] Total time: 0:17:19 (0.1663 s / it) +Averaged stats: lr: 0.000111 grad: 0.0972 (0.1016) loss: 0.8155 (0.8108) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:04:54 loss: 0.8485 (0.8485) time: 4.7562 data: 4.7251 max mem: 9377 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8486 (0.8492) time: 0.1462 data: 0.1211 max mem: 9377 +Eval (hcp-train-subset): [25] Total time: 0:00:14 (0.2285 s / it) +Averaged stats (hcp-train-subset): loss: 0.8486 (0.8492) +Eval (hcp-val): [25] [ 0/62] eta: 0:04:08 loss: 0.8436 (0.8436) time: 4.0145 data: 3.9555 max mem: 9377 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8448 (0.8465) time: 0.1572 data: 0.1268 max mem: 9377 +Eval (hcp-val): [25] Total time: 0:00:14 (0.2343 s / it) +Averaged stats (hcp-val): loss: 0.8448 (0.8465) +Eval (nsd-val): [25] [ 0/62] eta: 0:04:17 loss: 0.8125 (0.8125) time: 4.1480 data: 4.0707 max mem: 9377 +Eval (nsd-val): [25] [61/62] eta: 0:00:00 loss: 0.8204 (0.8219) time: 0.1407 data: 0.1134 max mem: 9377 +Eval (nsd-val): [25] Total time: 0:00:13 (0.2220 s / it) +Averaged stats (nsd-val): loss: 0.8204 (0.8219) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +Train: [26] [ 0/6250] eta: 7:01:12 lr: 0.000111 grad: 0.0429 (0.0429) loss: 0.8930 (0.8930) time: 4.0436 data: 3.7746 max mem: 9377 +Train: [26] [ 100/6250] eta: 0:22:23 lr: 0.000111 grad: 0.1026 (0.1214) loss: 0.8197 (0.8312) time: 0.1962 data: 0.1020 max mem: 9377 +Train: [26] [ 200/6250] eta: 0:19:22 lr: 0.000110 grad: 0.0994 (0.1100) loss: 0.8091 (0.8243) time: 0.1563 data: 0.0617 max mem: 9377 +Train: [26] [ 300/6250] eta: 0:18:00 lr: 0.000110 grad: 0.1018 (0.1078) loss: 0.8072 (0.8201) time: 0.1642 data: 0.0637 max mem: 9377 +Train: [26] [ 400/6250] eta: 0:17:36 lr: 0.000110 grad: 0.0936 (0.1053) loss: 0.8185 (0.8182) time: 0.1626 data: 0.0621 max mem: 9377 +Train: [26] [ 500/6250] eta: 0:17:11 lr: 0.000110 grad: 0.0868 (0.1028) loss: 0.8180 (0.8180) time: 0.2107 data: 0.1131 max mem: 9377 +Train: [26] [ 600/6250] eta: 0:16:39 lr: 0.000110 grad: 0.0877 (0.1007) loss: 0.8191 (0.8185) time: 0.1656 data: 0.0769 max mem: 9377 +Train: [26] [ 700/6250] eta: 0:16:06 lr: 0.000110 grad: 0.0984 (0.1000) loss: 0.8176 (0.8183) time: 0.1740 data: 0.0917 max mem: 9377 +Train: [26] [ 800/6250] eta: 0:15:44 lr: 0.000110 grad: 0.0879 (0.0991) loss: 0.8179 (0.8181) time: 0.1624 data: 0.0764 max mem: 9377 +Train: [26] [ 900/6250] eta: 0:15:15 lr: 0.000110 grad: 0.0915 (0.0985) loss: 0.8174 (0.8182) time: 0.1571 data: 0.0639 max mem: 9377 +Train: [26] [1000/6250] eta: 0:14:47 lr: 0.000110 grad: 0.0971 (0.0983) loss: 0.8169 (0.8180) time: 0.1351 data: 0.0408 max mem: 9377 +Train: [26] [1100/6250] eta: 0:14:20 lr: 0.000110 grad: 0.0922 (0.0981) loss: 0.8119 (0.8174) time: 0.1561 data: 0.0556 max mem: 9377 +Train: [26] [1200/6250] eta: 0:13:54 lr: 0.000110 grad: 0.0947 (0.0978) loss: 0.8106 (0.8171) time: 0.1615 data: 0.0618 max mem: 9377 +Train: [26] [1300/6250] eta: 0:13:33 lr: 0.000110 grad: 0.0945 (0.0977) loss: 0.8147 (0.8168) time: 0.1632 data: 0.0705 max mem: 9377 +Train: [26] [1400/6250] eta: 0:13:18 lr: 0.000110 grad: 0.1006 (0.0977) loss: 0.8028 (0.8163) time: 0.1649 data: 0.0740 max mem: 9377 +Train: [26] [1500/6250] eta: 0:13:02 lr: 0.000110 grad: 0.0989 (0.0979) loss: 0.8085 (0.8158) time: 0.1701 data: 0.0837 max mem: 9377 +Train: [26] [1600/6250] eta: 0:12:48 lr: 0.000110 grad: 0.1067 (0.0979) loss: 0.8081 (0.8155) time: 0.1138 data: 0.0241 max mem: 9377 +Train: [26] [1700/6250] eta: 0:12:29 lr: 0.000110 grad: 0.0963 (0.0980) loss: 0.8154 (0.8152) time: 0.2166 data: 0.1329 max mem: 9377 +Train: [26] [1800/6250] eta: 0:12:09 lr: 0.000110 grad: 0.0992 (0.0982) loss: 0.8074 (0.8149) time: 0.1669 data: 0.0862 max mem: 9377 +Train: [26] [1900/6250] eta: 0:11:50 lr: 0.000110 grad: 0.0975 (0.0985) loss: 0.8025 (0.8145) time: 0.1708 data: 0.0853 max mem: 9377 +Train: [26] [2000/6250] eta: 0:11:35 lr: 0.000110 grad: 0.1060 (0.0988) loss: 0.8132 (0.8142) time: 0.2065 data: 0.1235 max mem: 9377 +Train: [26] [2100/6250] eta: 0:11:18 lr: 0.000110 grad: 0.1032 (0.0990) loss: 0.7985 (0.8139) time: 0.1622 data: 0.0737 max mem: 9377 +Train: [26] [2200/6250] eta: 0:11:02 lr: 0.000110 grad: 0.1000 (0.0993) loss: 0.8007 (0.8135) time: 0.1696 data: 0.0764 max mem: 9377 +Train: [26] [2300/6250] eta: 0:10:45 lr: 0.000110 grad: 0.1059 (0.0994) loss: 0.8004 (0.8131) time: 0.1363 data: 0.0523 max mem: 9377 +Train: [26] [2400/6250] eta: 0:10:28 lr: 0.000110 grad: 0.0986 (0.0996) loss: 0.8026 (0.8128) time: 0.1552 data: 0.0718 max mem: 9377 +Train: [26] [2500/6250] eta: 0:10:12 lr: 0.000110 grad: 0.1031 (0.0997) loss: 0.8060 (0.8124) time: 0.1173 data: 0.0189 max mem: 9377 +Train: [26] [2600/6250] eta: 0:09:53 lr: 0.000110 grad: 0.1015 (0.0998) loss: 0.8041 (0.8122) time: 0.1560 data: 0.0738 max mem: 9377 +Train: [26] [2700/6250] eta: 0:09:35 lr: 0.000110 grad: 0.0949 (0.1000) loss: 0.8054 (0.8120) time: 0.1253 data: 0.0329 max mem: 9377 +Train: [26] [2800/6250] eta: 0:09:17 lr: 0.000110 grad: 0.1022 (0.1000) loss: 0.8020 (0.8117) time: 0.1381 data: 0.0488 max mem: 9377 +Train: [26] [2900/6250] eta: 0:09:00 lr: 0.000110 grad: 0.1013 (0.1001) loss: 0.8054 (0.8115) time: 0.1513 data: 0.0594 max mem: 9377 +Train: [26] [3000/6250] eta: 0:08:43 lr: 0.000110 grad: 0.1037 (0.1002) loss: 0.8049 (0.8113) time: 0.1470 data: 0.0603 max mem: 9377 +Train: [26] [3100/6250] eta: 0:08:26 lr: 0.000110 grad: 0.1042 (0.1003) loss: 0.8057 (0.8112) time: 0.1566 data: 0.0697 max mem: 9377 +Train: [26] [3200/6250] eta: 0:08:11 lr: 0.000110 grad: 0.1005 (0.1005) loss: 0.8021 (0.8110) time: 0.1614 data: 0.0784 max mem: 9377 +Train: [26] [3300/6250] eta: 0:07:55 lr: 0.000110 grad: 0.0972 (0.1005) loss: 0.8049 (0.8109) time: 0.1799 data: 0.0950 max mem: 9377 +Train: [26] [3400/6250] eta: 0:07:39 lr: 0.000110 grad: 0.0991 (0.1006) loss: 0.8050 (0.8107) time: 0.1577 data: 0.0719 max mem: 9377 +Train: [26] [3500/6250] eta: 0:07:24 lr: 0.000110 grad: 0.0941 (0.1008) loss: 0.8020 (0.8106) time: 0.1138 data: 0.0138 max mem: 9377 +Train: [26] [3600/6250] eta: 0:07:07 lr: 0.000110 grad: 0.0980 (0.1008) loss: 0.8100 (0.8105) time: 0.1800 data: 0.0941 max mem: 9377 +Train: [26] [3700/6250] eta: 0:06:52 lr: 0.000110 grad: 0.0974 (0.1009) loss: 0.8056 (0.8104) time: 0.1838 data: 0.0985 max mem: 9377 +Train: [26] [3800/6250] eta: 0:06:36 lr: 0.000110 grad: 0.1057 (0.1009) loss: 0.8144 (0.8103) time: 0.1607 data: 0.0676 max mem: 9377 +Train: [26] [3900/6250] eta: 0:06:20 lr: 0.000110 grad: 0.1062 (0.1010) loss: 0.8031 (0.8102) time: 0.1541 data: 0.0651 max mem: 9377 +Train: [26] [4000/6250] eta: 0:06:04 lr: 0.000110 grad: 0.1040 (0.1012) loss: 0.8052 (0.8101) time: 0.1346 data: 0.0476 max mem: 9377 +Train: [26] [4100/6250] eta: 0:05:47 lr: 0.000110 grad: 0.0977 (0.1014) loss: 0.8057 (0.8100) time: 0.1453 data: 0.0643 max mem: 9377 +Train: [26] [4200/6250] eta: 0:05:31 lr: 0.000110 grad: 0.1100 (0.1017) loss: 0.8038 (0.8099) time: 0.1720 data: 0.0890 max mem: 9377 +Train: [26] [4300/6250] eta: 0:05:15 lr: 0.000110 grad: 0.1026 (0.1019) loss: 0.8058 (0.8097) time: 0.1673 data: 0.0786 max mem: 9377 +Train: [26] [4400/6250] eta: 0:04:58 lr: 0.000110 grad: 0.1038 (0.1021) loss: 0.8006 (0.8096) time: 0.1276 data: 0.0331 max mem: 9377 +Train: [26] [4500/6250] eta: 0:04:42 lr: 0.000110 grad: 0.1042 (0.1023) loss: 0.8013 (0.8095) time: 0.1560 data: 0.0677 max mem: 9377 +Train: [26] [4600/6250] eta: 0:04:26 lr: 0.000110 grad: 0.1161 (0.1025) loss: 0.7950 (0.8093) time: 0.1667 data: 0.0726 max mem: 9377 +Train: [26] [4700/6250] eta: 0:04:09 lr: 0.000110 grad: 0.0992 (0.1026) loss: 0.8035 (0.8092) time: 0.1422 data: 0.0461 max mem: 9377 +Train: [26] [4800/6250] eta: 0:03:54 lr: 0.000109 grad: 0.1065 (0.1028) loss: 0.8083 (0.8091) time: 0.2398 data: 0.1594 max mem: 9377 +Train: [26] [4900/6250] eta: 0:03:37 lr: 0.000109 grad: 0.1059 (0.1029) loss: 0.8078 (0.8090) time: 0.1361 data: 0.0558 max mem: 9377 +Train: [26] [5000/6250] eta: 0:03:22 lr: 0.000109 grad: 0.1097 (0.1031) loss: 0.8060 (0.8089) time: 0.1906 data: 0.1038 max mem: 9377 +Train: [26] [5100/6250] eta: 0:03:05 lr: 0.000109 grad: 0.1038 (0.1032) loss: 0.8021 (0.8088) time: 0.1479 data: 0.0668 max mem: 9377 +Train: [26] [5200/6250] eta: 0:02:49 lr: 0.000109 grad: 0.1094 (0.1033) loss: 0.8071 (0.8088) time: 0.1718 data: 0.0800 max mem: 9377 +Train: [26] [5300/6250] eta: 0:02:34 lr: 0.000109 grad: 0.1036 (0.1034) loss: 0.8080 (0.8087) time: 0.1692 data: 0.0831 max mem: 9377 +Train: [26] [5400/6250] eta: 0:02:17 lr: 0.000109 grad: 0.1053 (0.1035) loss: 0.8045 (0.8088) time: 0.1539 data: 0.0669 max mem: 9377 +Train: [26] [5500/6250] eta: 0:02:01 lr: 0.000109 grad: 0.1063 (0.1036) loss: 0.8061 (0.8088) time: 0.1549 data: 0.0737 max mem: 9377 +Train: [26] [5600/6250] eta: 0:01:45 lr: 0.000109 grad: 0.1008 (0.1037) loss: 0.8174 (0.8088) time: 0.1793 data: 0.0859 max mem: 9377 +Train: [26] [5700/6250] eta: 0:01:29 lr: 0.000109 grad: 0.1072 (0.1037) loss: 0.8078 (0.8088) time: 0.1598 data: 0.0697 max mem: 9377 +Train: [26] [5800/6250] eta: 0:01:13 lr: 0.000109 grad: 0.1040 (0.1038) loss: 0.7994 (0.8087) time: 0.1644 data: 0.0625 max mem: 9377 +Train: [26] [5900/6250] eta: 0:00:56 lr: 0.000109 grad: 0.1177 (0.1040) loss: 0.8030 (0.8086) time: 0.1757 data: 0.0847 max mem: 9377 +Train: [26] [6000/6250] eta: 0:00:40 lr: 0.000109 grad: 0.1071 (0.1041) loss: 0.8009 (0.8085) time: 0.1467 data: 0.0457 max mem: 9377 +Train: [26] [6100/6250] eta: 0:00:24 lr: 0.000109 grad: 0.1051 (0.1041) loss: 0.8039 (0.8084) time: 0.1445 data: 0.0477 max mem: 9377 +Train: [26] [6200/6250] eta: 0:00:08 lr: 0.000109 grad: 0.0999 (0.1042) loss: 0.8124 (0.8084) time: 0.1539 data: 0.0599 max mem: 9377 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.1031 (0.1042) loss: 0.8023 (0.8083) time: 0.1626 data: 0.0692 max mem: 9377 +Train: [26] Total time: 0:17:00 (0.1632 s / it) +Averaged stats: lr: 0.000109 grad: 0.1031 (0.1042) loss: 0.8023 (0.8083) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:03:19 loss: 0.8489 (0.8489) time: 3.2131 data: 3.1230 max mem: 9377 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8504 (0.8499) time: 0.1421 data: 0.1167 max mem: 9377 +Eval (hcp-train-subset): [26] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (hcp-train-subset): loss: 0.8504 (0.8499) +Eval (hcp-val): [26] [ 0/62] eta: 0:06:02 loss: 0.8454 (0.8454) time: 5.8485 data: 5.8182 max mem: 9377 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8475 (0.8476) time: 0.1340 data: 0.1089 max mem: 9377 +Eval (hcp-val): [26] Total time: 0:00:14 (0.2312 s / it) +Averaged stats (hcp-val): loss: 0.8475 (0.8476) +Eval (nsd-val): [26] [ 0/62] eta: 0:05:09 loss: 0.8153 (0.8153) time: 4.9869 data: 4.9531 max mem: 9377 +Eval (nsd-val): [26] [61/62] eta: 0:00:00 loss: 0.8218 (0.8234) time: 0.1413 data: 0.1155 max mem: 9377 +Eval (nsd-val): [26] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (nsd-val): loss: 0.8218 (0.8234) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [27] [ 0/6250] eta: 8:55:08 lr: 0.000109 grad: 0.1045 (0.1045) loss: 0.8635 (0.8635) time: 5.1373 data: 4.9590 max mem: 9377 +Train: [27] [ 100/6250] eta: 0:22:17 lr: 0.000109 grad: 0.0979 (0.1406) loss: 0.8199 (0.8216) time: 0.1452 data: 0.0344 max mem: 9377 +Train: [27] [ 200/6250] eta: 0:19:45 lr: 0.000109 grad: 0.0883 (0.1268) loss: 0.8148 (0.8140) time: 0.1652 data: 0.0620 max mem: 9377 +Train: [27] [ 300/6250] eta: 0:18:10 lr: 0.000109 grad: 0.1120 (0.1209) loss: 0.7945 (0.8104) time: 0.1650 data: 0.0680 max mem: 9377 +Train: [27] [ 400/6250] eta: 0:17:10 lr: 0.000109 grad: 0.1038 (0.1191) loss: 0.8134 (0.8094) time: 0.1364 data: 0.0452 max mem: 9377 +Train: [27] [ 500/6250] eta: 0:16:39 lr: 0.000109 grad: 0.1051 (0.1163) loss: 0.8129 (0.8090) time: 0.1632 data: 0.0745 max mem: 9377 +Train: [27] [ 600/6250] eta: 0:16:18 lr: 0.000109 grad: 0.1018 (0.1141) loss: 0.7912 (0.8090) time: 0.1718 data: 0.0826 max mem: 9377 +Train: [27] [ 700/6250] eta: 0:15:50 lr: 0.000109 grad: 0.0992 (0.1127) loss: 0.8064 (0.8088) time: 0.1461 data: 0.0446 max mem: 9377 +Train: [27] [ 800/6250] eta: 0:15:31 lr: 0.000109 grad: 0.0993 (0.1119) loss: 0.8165 (0.8086) time: 0.1878 data: 0.0982 max mem: 9377 +Train: [27] [ 900/6250] eta: 0:15:09 lr: 0.000109 grad: 0.1011 (0.1112) loss: 0.8118 (0.8082) time: 0.1567 data: 0.0471 max mem: 9377 +Train: [27] [1000/6250] eta: 0:14:38 lr: 0.000109 grad: 0.0921 (0.1101) loss: 0.8046 (0.8083) time: 0.1362 data: 0.0406 max mem: 9377 +Train: [27] [1100/6250] eta: 0:14:15 lr: 0.000109 grad: 0.0972 (0.1096) loss: 0.8062 (0.8078) time: 0.1649 data: 0.0678 max mem: 9377 +Train: [27] [1200/6250] eta: 0:13:47 lr: 0.000109 grad: 0.0965 (0.1095) loss: 0.8056 (0.8073) time: 0.1452 data: 0.0528 max mem: 9377 +Train: [27] [1300/6250] eta: 0:13:25 lr: 0.000109 grad: 0.1091 (0.1093) loss: 0.7996 (0.8066) time: 0.1525 data: 0.0628 max mem: 9377 +Train: [27] [1400/6250] eta: 0:13:09 lr: 0.000109 grad: 0.1018 (0.1095) loss: 0.8092 (0.8063) time: 0.2014 data: 0.1128 max mem: 9377 +Train: [27] [1500/6250] eta: 0:12:50 lr: 0.000109 grad: 0.1043 (0.1093) loss: 0.8012 (0.8060) time: 0.1534 data: 0.0679 max mem: 9377 +Train: [27] [1600/6250] eta: 0:12:32 lr: 0.000109 grad: 0.1039 (0.1090) loss: 0.8009 (0.8058) time: 0.1408 data: 0.0519 max mem: 9377 +Train: [27] [1700/6250] eta: 0:12:14 lr: 0.000109 grad: 0.0905 (0.1086) loss: 0.8016 (0.8056) time: 0.1573 data: 0.0664 max mem: 9377 +Train: [27] [1800/6250] eta: 0:12:00 lr: 0.000109 grad: 0.1002 (0.1084) loss: 0.8071 (0.8055) time: 0.1603 data: 0.0833 max mem: 9377 +Train: [27] [1900/6250] eta: 0:11:44 lr: 0.000109 grad: 0.1027 (0.1083) loss: 0.8043 (0.8056) time: 0.1433 data: 0.0560 max mem: 9377 +Train: [27] [2000/6250] eta: 0:11:32 lr: 0.000109 grad: 0.0950 (0.1080) loss: 0.8152 (0.8058) time: 0.1372 data: 0.0563 max mem: 9377 +Train: [27] [2100/6250] eta: 0:11:13 lr: 0.000109 grad: 0.1022 (0.1078) loss: 0.8029 (0.8057) time: 0.1440 data: 0.0643 max mem: 9377 +Train: [27] [2200/6250] eta: 0:10:58 lr: 0.000109 grad: 0.1006 (0.1076) loss: 0.8032 (0.8057) time: 0.1577 data: 0.0680 max mem: 9377 +Train: [27] [2300/6250] eta: 0:10:40 lr: 0.000109 grad: 0.1075 (0.1075) loss: 0.7995 (0.8057) time: 0.1497 data: 0.0662 max mem: 9377 +Train: [27] [2400/6250] eta: 0:10:22 lr: 0.000109 grad: 0.1054 (0.1076) loss: 0.8071 (0.8056) time: 0.1380 data: 0.0448 max mem: 9377 +Train: [27] [2500/6250] eta: 0:10:06 lr: 0.000109 grad: 0.1055 (0.1076) loss: 0.8006 (0.8056) time: 0.1393 data: 0.0496 max mem: 9377 +Train: [27] [2600/6250] eta: 0:09:48 lr: 0.000109 grad: 0.1071 (0.1075) loss: 0.7927 (0.8055) time: 0.1553 data: 0.0704 max mem: 9377 +Train: [27] [2700/6250] eta: 0:09:33 lr: 0.000109 grad: 0.1101 (0.1075) loss: 0.8006 (0.8055) time: 0.1812 data: 0.0808 max mem: 9377 +Train: [27] [2800/6250] eta: 0:09:16 lr: 0.000109 grad: 0.1046 (0.1076) loss: 0.8017 (0.8055) time: 0.1556 data: 0.0738 max mem: 9377 +Train: [27] [2900/6250] eta: 0:08:58 lr: 0.000109 grad: 0.1113 (0.1077) loss: 0.7997 (0.8053) time: 0.1268 data: 0.0348 max mem: 9377 +Train: [27] [3000/6250] eta: 0:08:41 lr: 0.000109 grad: 0.1049 (0.1076) loss: 0.8000 (0.8052) time: 0.1496 data: 0.0643 max mem: 9377 +Train: [27] [3100/6250] eta: 0:08:25 lr: 0.000108 grad: 0.1018 (0.1077) loss: 0.8083 (0.8050) time: 0.1682 data: 0.0822 max mem: 9377 +Train: [27] [3200/6250] eta: 0:08:08 lr: 0.000108 grad: 0.0971 (0.1077) loss: 0.8078 (0.8050) time: 0.1538 data: 0.0625 max mem: 9377 +Train: [27] [3300/6250] eta: 0:07:51 lr: 0.000108 grad: 0.1033 (0.1079) loss: 0.8026 (0.8048) time: 0.1639 data: 0.0751 max mem: 9377 +Train: [27] [3400/6250] eta: 0:07:35 lr: 0.000108 grad: 0.1061 (0.1079) loss: 0.8022 (0.8047) time: 0.1532 data: 0.0676 max mem: 9377 +Train: [27] [3500/6250] eta: 0:07:19 lr: 0.000108 grad: 0.1014 (0.1078) loss: 0.8124 (0.8047) time: 0.1719 data: 0.0878 max mem: 9377 +Train: [27] [3600/6250] eta: 0:07:02 lr: 0.000108 grad: 0.1044 (0.1077) loss: 0.8072 (0.8047) time: 0.1478 data: 0.0587 max mem: 9377 +Train: [27] [3700/6250] eta: 0:06:45 lr: 0.000108 grad: 0.0994 (0.1077) loss: 0.8049 (0.8048) time: 0.1636 data: 0.0767 max mem: 9377 +Train: [27] [3800/6250] eta: 0:06:29 lr: 0.000108 grad: 0.1007 (0.1075) loss: 0.8125 (0.8049) time: 0.1425 data: 0.0558 max mem: 9377 +Train: [27] [3900/6250] eta: 0:06:13 lr: 0.000108 grad: 0.1070 (0.1075) loss: 0.8128 (0.8051) time: 0.1830 data: 0.0976 max mem: 9377 +Train: [27] [4000/6250] eta: 0:05:57 lr: 0.000108 grad: 0.1084 (0.1074) loss: 0.8089 (0.8052) time: 0.1648 data: 0.0807 max mem: 9377 +Train: [27] [4100/6250] eta: 0:05:41 lr: 0.000108 grad: 0.1010 (0.1074) loss: 0.8093 (0.8053) time: 0.1499 data: 0.0661 max mem: 9377 +Train: [27] [4200/6250] eta: 0:05:25 lr: 0.000108 grad: 0.0971 (0.1073) loss: 0.8156 (0.8054) time: 0.1424 data: 0.0534 max mem: 9377 +Train: [27] [4300/6250] eta: 0:05:09 lr: 0.000108 grad: 0.1000 (0.1072) loss: 0.8036 (0.8055) time: 0.1520 data: 0.0665 max mem: 9377 +Train: [27] [4400/6250] eta: 0:04:53 lr: 0.000108 grad: 0.0989 (0.1072) loss: 0.8113 (0.8057) time: 0.1583 data: 0.0701 max mem: 9377 +Train: [27] [4500/6250] eta: 0:04:37 lr: 0.000108 grad: 0.0999 (0.1071) loss: 0.8047 (0.8057) time: 0.1487 data: 0.0602 max mem: 9377 +Train: [27] [4600/6250] eta: 0:04:21 lr: 0.000108 grad: 0.0980 (0.1072) loss: 0.8119 (0.8058) time: 0.1596 data: 0.0724 max mem: 9377 +Train: [27] [4700/6250] eta: 0:04:05 lr: 0.000108 grad: 0.1054 (0.1074) loss: 0.8114 (0.8058) time: 0.1502 data: 0.0666 max mem: 9377 +Train: [27] [4800/6250] eta: 0:03:50 lr: 0.000108 grad: 0.1054 (0.1073) loss: 0.8030 (0.8058) time: 0.2053 data: 0.1313 max mem: 9377 +Train: [27] [4900/6250] eta: 0:03:34 lr: 0.000108 grad: 0.1046 (0.1074) loss: 0.8095 (0.8058) time: 0.1458 data: 0.0571 max mem: 9377 +Train: [27] [5000/6250] eta: 0:03:18 lr: 0.000108 grad: 0.1034 (0.1074) loss: 0.8002 (0.8058) time: 0.1567 data: 0.0692 max mem: 9377 +Train: [27] [5100/6250] eta: 0:03:02 lr: 0.000108 grad: 0.1058 (0.1076) loss: 0.8052 (0.8057) time: 0.1715 data: 0.0877 max mem: 9377 +Train: [27] [5200/6250] eta: 0:02:47 lr: 0.000108 grad: 0.1070 (0.1077) loss: 0.8043 (0.8056) time: 0.1817 data: 0.0959 max mem: 9377 +Train: [27] [5300/6250] eta: 0:02:31 lr: 0.000108 grad: 0.1118 (0.1078) loss: 0.7871 (0.8055) time: 0.1660 data: 0.0757 max mem: 9377 +Train: [27] [5400/6250] eta: 0:02:15 lr: 0.000108 grad: 0.1036 (0.1078) loss: 0.8011 (0.8054) time: 0.1827 data: 0.0929 max mem: 9377 +Train: [27] [5500/6250] eta: 0:01:59 lr: 0.000108 grad: 0.1082 (0.1078) loss: 0.8069 (0.8054) time: 0.1594 data: 0.0748 max mem: 9377 +Train: [27] [5600/6250] eta: 0:01:43 lr: 0.000108 grad: 0.0992 (0.1077) loss: 0.8144 (0.8054) time: 0.1465 data: 0.0639 max mem: 9377 +Train: [27] [5700/6250] eta: 0:01:28 lr: 0.000108 grad: 0.1036 (0.1077) loss: 0.8040 (0.8054) time: 0.1838 data: 0.0904 max mem: 9377 +Train: [27] [5800/6250] eta: 0:01:12 lr: 0.000108 grad: 0.1013 (0.1077) loss: 0.8117 (0.8054) time: 0.1478 data: 0.0531 max mem: 9377 +Train: [27] [5900/6250] eta: 0:00:56 lr: 0.000108 grad: 0.1091 (0.1078) loss: 0.8031 (0.8054) time: 0.1508 data: 0.0574 max mem: 9377 +Train: [27] [6000/6250] eta: 0:00:40 lr: 0.000108 grad: 0.1010 (0.1078) loss: 0.8064 (0.8054) time: 0.1594 data: 0.0473 max mem: 9377 +Train: [27] [6100/6250] eta: 0:00:24 lr: 0.000108 grad: 0.0989 (0.1077) loss: 0.8039 (0.8054) time: 0.1412 data: 0.0416 max mem: 9377 +Train: [27] [6200/6250] eta: 0:00:08 lr: 0.000108 grad: 0.0996 (0.1077) loss: 0.8006 (0.8054) time: 0.1402 data: 0.0464 max mem: 9377 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.0974 (0.1077) loss: 0.8072 (0.8054) time: 0.1639 data: 0.0768 max mem: 9377 +Train: [27] Total time: 0:16:45 (0.1610 s / it) +Averaged stats: lr: 0.000108 grad: 0.0974 (0.1077) loss: 0.8072 (0.8054) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:03:35 loss: 0.8451 (0.8451) time: 3.4696 data: 3.3934 max mem: 9377 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8490 (0.8496) time: 0.1358 data: 0.1091 max mem: 9377 +Eval (hcp-train-subset): [27] Total time: 0:00:14 (0.2266 s / it) +Averaged stats (hcp-train-subset): loss: 0.8490 (0.8496) +Eval (hcp-val): [27] [ 0/62] eta: 0:04:15 loss: 0.8510 (0.8510) time: 4.1216 data: 4.0603 max mem: 9377 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8456 (0.8478) time: 0.1304 data: 0.1053 max mem: 9377 +Eval (hcp-val): [27] Total time: 0:00:14 (0.2275 s / it) +Averaged stats (hcp-val): loss: 0.8456 (0.8478) +Eval (nsd-val): [27] [ 0/62] eta: 0:05:44 loss: 0.8161 (0.8161) time: 5.5595 data: 5.5290 max mem: 9377 +Eval (nsd-val): [27] [61/62] eta: 0:00:00 loss: 0.8250 (0.8268) time: 0.1195 data: 0.0923 max mem: 9377 +Eval (nsd-val): [27] Total time: 0:00:13 (0.2245 s / it) +Averaged stats (nsd-val): loss: 0.8250 (0.8268) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [28] [ 0/6250] eta: 8:20:04 lr: 0.000108 grad: 0.1829 (0.1829) loss: 0.8559 (0.8559) time: 4.8007 data: 4.5096 max mem: 9377 +Train: [28] [ 100/6250] eta: 0:21:50 lr: 0.000108 grad: 0.0942 (0.1351) loss: 0.8224 (0.8312) time: 0.1617 data: 0.0578 max mem: 9377 +Train: [28] [ 200/6250] eta: 0:18:46 lr: 0.000108 grad: 0.1022 (0.1251) loss: 0.8114 (0.8237) time: 0.1536 data: 0.0517 max mem: 9377 +Train: [28] [ 300/6250] eta: 0:17:33 lr: 0.000108 grad: 0.1030 (0.1206) loss: 0.8132 (0.8194) time: 0.1568 data: 0.0614 max mem: 9377 +Train: [28] [ 400/6250] eta: 0:16:51 lr: 0.000108 grad: 0.0969 (0.1181) loss: 0.8207 (0.8174) time: 0.1579 data: 0.0627 max mem: 9377 +Train: [28] [ 500/6250] eta: 0:16:25 lr: 0.000108 grad: 0.1157 (0.1177) loss: 0.8024 (0.8155) time: 0.1785 data: 0.0969 max mem: 9377 +Train: [28] [ 600/6250] eta: 0:16:05 lr: 0.000108 grad: 0.1052 (0.1171) loss: 0.8112 (0.8140) time: 0.1755 data: 0.0959 max mem: 9377 +Train: [28] [ 700/6250] eta: 0:15:43 lr: 0.000108 grad: 0.1055 (0.1152) loss: 0.8054 (0.8135) time: 0.1665 data: 0.0702 max mem: 9377 +Train: [28] [ 800/6250] eta: 0:15:15 lr: 0.000108 grad: 0.0973 (0.1133) loss: 0.8168 (0.8134) time: 0.1544 data: 0.0631 max mem: 9377 +Train: [28] [ 900/6250] eta: 0:15:00 lr: 0.000108 grad: 0.1039 (0.1118) loss: 0.8043 (0.8132) time: 0.1724 data: 0.0806 max mem: 9377 +Train: [28] [1000/6250] eta: 0:14:33 lr: 0.000108 grad: 0.0984 (0.1106) loss: 0.8084 (0.8131) time: 0.1489 data: 0.0609 max mem: 9377 +Train: [28] [1100/6250] eta: 0:14:07 lr: 0.000108 grad: 0.1012 (0.1095) loss: 0.8147 (0.8129) time: 0.1343 data: 0.0404 max mem: 9377 +Train: [28] [1200/6250] eta: 0:13:45 lr: 0.000108 grad: 0.0924 (0.1084) loss: 0.8144 (0.8130) time: 0.1436 data: 0.0489 max mem: 9377 +Train: [28] [1300/6250] eta: 0:13:21 lr: 0.000107 grad: 0.0929 (0.1077) loss: 0.8135 (0.8126) time: 0.1415 data: 0.0599 max mem: 9377 +Train: [28] [1400/6250] eta: 0:13:03 lr: 0.000107 grad: 0.1030 (0.1071) loss: 0.8061 (0.8125) time: 0.1633 data: 0.0762 max mem: 9377 +Train: [28] [1500/6250] eta: 0:12:46 lr: 0.000107 grad: 0.0991 (0.1067) loss: 0.8056 (0.8123) time: 0.1642 data: 0.0749 max mem: 9377 +Train: [28] [1600/6250] eta: 0:12:28 lr: 0.000107 grad: 0.0954 (0.1063) loss: 0.8175 (0.8121) time: 0.1440 data: 0.0571 max mem: 9377 +Train: [28] [1700/6250] eta: 0:12:15 lr: 0.000107 grad: 0.1006 (0.1061) loss: 0.8093 (0.8119) time: 0.1950 data: 0.1068 max mem: 9377 +Train: [28] [1800/6250] eta: 0:11:59 lr: 0.000107 grad: 0.0960 (0.1058) loss: 0.8064 (0.8118) time: 0.1496 data: 0.0601 max mem: 9377 +Train: [28] [1900/6250] eta: 0:11:45 lr: 0.000107 grad: 0.0928 (0.1055) loss: 0.8103 (0.8116) time: 0.2043 data: 0.1192 max mem: 9377 +Train: [28] [2000/6250] eta: 0:11:29 lr: 0.000107 grad: 0.1012 (0.1053) loss: 0.8067 (0.8115) time: 0.1899 data: 0.1017 max mem: 9377 +Train: [28] [2100/6250] eta: 0:11:14 lr: 0.000107 grad: 0.0985 (0.1052) loss: 0.8038 (0.8112) time: 0.1912 data: 0.0960 max mem: 9377 +Train: [28] [2200/6250] eta: 0:10:57 lr: 0.000107 grad: 0.1007 (0.1050) loss: 0.8084 (0.8111) time: 0.1729 data: 0.0799 max mem: 9377 +Train: [28] [2300/6250] eta: 0:10:40 lr: 0.000107 grad: 0.0976 (0.1049) loss: 0.8049 (0.8111) time: 0.1675 data: 0.0799 max mem: 9377 +Train: [28] [2400/6250] eta: 0:10:24 lr: 0.000107 grad: 0.1075 (0.1049) loss: 0.8068 (0.8109) time: 0.1446 data: 0.0577 max mem: 9377 +Train: [28] [2500/6250] eta: 0:10:07 lr: 0.000107 grad: 0.1178 (0.1051) loss: 0.8133 (0.8109) time: 0.1594 data: 0.0634 max mem: 9377 +Train: [28] [2600/6250] eta: 0:09:50 lr: 0.000107 grad: 0.1095 (0.1053) loss: 0.7955 (0.8107) time: 0.1461 data: 0.0608 max mem: 9377 +Train: [28] [2700/6250] eta: 0:09:33 lr: 0.000107 grad: 0.1025 (0.1053) loss: 0.8050 (0.8106) time: 0.1736 data: 0.0823 max mem: 9377 +Train: [28] [2800/6250] eta: 0:09:16 lr: 0.000107 grad: 0.0954 (0.1053) loss: 0.8065 (0.8105) time: 0.1715 data: 0.0818 max mem: 9377 +Train: [28] [2900/6250] eta: 0:09:00 lr: 0.000107 grad: 0.1008 (0.1054) loss: 0.8059 (0.8103) time: 0.1536 data: 0.0725 max mem: 9377 +Train: [28] [3000/6250] eta: 0:08:43 lr: 0.000107 grad: 0.1024 (0.1054) loss: 0.8106 (0.8101) time: 0.1573 data: 0.0660 max mem: 9377 +Train: [28] [3100/6250] eta: 0:08:26 lr: 0.000107 grad: 0.1044 (0.1054) loss: 0.8086 (0.8100) time: 0.1545 data: 0.0719 max mem: 9377 +Train: [28] [3200/6250] eta: 0:08:10 lr: 0.000107 grad: 0.1008 (0.1055) loss: 0.8068 (0.8098) time: 0.1472 data: 0.0602 max mem: 9377 +Train: [28] [3300/6250] eta: 0:07:54 lr: 0.000107 grad: 0.1086 (0.1056) loss: 0.8018 (0.8097) time: 0.1600 data: 0.0741 max mem: 9377 +Train: [28] [3400/6250] eta: 0:07:37 lr: 0.000107 grad: 0.1016 (0.1058) loss: 0.8117 (0.8096) time: 0.1259 data: 0.0402 max mem: 9377 +Train: [28] [3500/6250] eta: 0:07:21 lr: 0.000107 grad: 0.1095 (0.1058) loss: 0.8052 (0.8095) time: 0.1573 data: 0.0711 max mem: 9377 +Train: [28] [3600/6250] eta: 0:07:04 lr: 0.000107 grad: 0.1031 (0.1059) loss: 0.8055 (0.8094) time: 0.1419 data: 0.0566 max mem: 9377 +Train: [28] [3700/6250] eta: 0:06:48 lr: 0.000107 grad: 0.1024 (0.1063) loss: 0.8047 (0.8092) time: 0.1627 data: 0.0787 max mem: 9377 +Train: [28] [3800/6250] eta: 0:06:32 lr: 0.000107 grad: 0.1080 (0.1064) loss: 0.8090 (0.8090) time: 0.1695 data: 0.0848 max mem: 9377 +Train: [28] [3900/6250] eta: 0:06:16 lr: 0.000107 grad: 0.1001 (0.1064) loss: 0.8121 (0.8089) time: 0.1698 data: 0.0888 max mem: 9377 +Train: [28] [4000/6250] eta: 0:05:59 lr: 0.000107 grad: 0.1120 (0.1064) loss: 0.7990 (0.8088) time: 0.1563 data: 0.0697 max mem: 9377 +Train: [28] [4100/6250] eta: 0:05:43 lr: 0.000107 grad: 0.0981 (0.1065) loss: 0.8111 (0.8086) time: 0.1504 data: 0.0700 max mem: 9377 +Train: [28] [4200/6250] eta: 0:05:27 lr: 0.000107 grad: 0.1019 (0.1065) loss: 0.8063 (0.8086) time: 0.1371 data: 0.0444 max mem: 9377 +Train: [28] [4300/6250] eta: 0:05:11 lr: 0.000107 grad: 0.1044 (0.1065) loss: 0.8028 (0.8084) time: 0.1575 data: 0.0754 max mem: 9377 +Train: [28] [4400/6250] eta: 0:04:55 lr: 0.000107 grad: 0.1067 (0.1066) loss: 0.8023 (0.8083) time: 0.1719 data: 0.0916 max mem: 9377 +Train: [28] [4500/6250] eta: 0:04:39 lr: 0.000107 grad: 0.0971 (0.1066) loss: 0.8051 (0.8082) time: 0.1476 data: 0.0576 max mem: 9377 +Train: [28] [4600/6250] eta: 0:04:22 lr: 0.000107 grad: 0.1019 (0.1066) loss: 0.8017 (0.8080) time: 0.1517 data: 0.0650 max mem: 9377 +Train: [28] [4700/6250] eta: 0:04:06 lr: 0.000107 grad: 0.1028 (0.1067) loss: 0.8021 (0.8077) time: 0.1528 data: 0.0704 max mem: 9377 +Train: [28] [4800/6250] eta: 0:03:51 lr: 0.000107 grad: 0.1067 (0.1068) loss: 0.7969 (0.8076) time: 0.1634 data: 0.0775 max mem: 9377 +Train: [28] [4900/6250] eta: 0:03:35 lr: 0.000107 grad: 0.1117 (0.1070) loss: 0.7962 (0.8074) time: 0.1620 data: 0.0837 max mem: 9377 +Train: [28] [5000/6250] eta: 0:03:19 lr: 0.000107 grad: 0.1145 (0.1070) loss: 0.7892 (0.8072) time: 0.1459 data: 0.0562 max mem: 9377 +Train: [28] [5100/6250] eta: 0:03:04 lr: 0.000107 grad: 0.1128 (0.1071) loss: 0.8031 (0.8070) time: 0.1635 data: 0.0737 max mem: 9377 +Train: [28] [5200/6250] eta: 0:02:48 lr: 0.000107 grad: 0.1056 (0.1073) loss: 0.7926 (0.8069) time: 0.2104 data: 0.1231 max mem: 9377 +Train: [28] [5300/6250] eta: 0:02:32 lr: 0.000107 grad: 0.1083 (0.1074) loss: 0.7989 (0.8067) time: 0.1510 data: 0.0713 max mem: 9377 +Train: [28] [5400/6250] eta: 0:02:16 lr: 0.000107 grad: 0.1038 (0.1074) loss: 0.8000 (0.8065) time: 0.1764 data: 0.0836 max mem: 9377 +Train: [28] [5500/6250] eta: 0:02:00 lr: 0.000107 grad: 0.1005 (0.1075) loss: 0.8006 (0.8063) time: 0.1659 data: 0.0804 max mem: 9377 +Train: [28] [5600/6250] eta: 0:01:44 lr: 0.000106 grad: 0.1179 (0.1075) loss: 0.8014 (0.8061) time: 0.1742 data: 0.0755 max mem: 9377 +Train: [28] [5700/6250] eta: 0:01:28 lr: 0.000106 grad: 0.1161 (0.1076) loss: 0.7875 (0.8059) time: 0.1451 data: 0.0583 max mem: 9377 +Train: [28] [5800/6250] eta: 0:01:12 lr: 0.000106 grad: 0.1112 (0.1076) loss: 0.7995 (0.8058) time: 0.1748 data: 0.0830 max mem: 9377 +Train: [28] [5900/6250] eta: 0:00:56 lr: 0.000106 grad: 0.1100 (0.1077) loss: 0.7932 (0.8055) time: 0.1305 data: 0.0397 max mem: 9377 +Train: [28] [6000/6250] eta: 0:00:40 lr: 0.000106 grad: 0.1121 (0.1078) loss: 0.7933 (0.8053) time: 0.1602 data: 0.0708 max mem: 9377 +Train: [28] [6100/6250] eta: 0:00:24 lr: 0.000106 grad: 0.1125 (0.1079) loss: 0.7939 (0.8052) time: 0.1410 data: 0.0389 max mem: 9377 +Train: [28] [6200/6250] eta: 0:00:08 lr: 0.000106 grad: 0.1073 (0.1079) loss: 0.7964 (0.8050) time: 0.1394 data: 0.0522 max mem: 9377 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.1069 (0.1079) loss: 0.7949 (0.8049) time: 0.1582 data: 0.0795 max mem: 9377 +Train: [28] Total time: 0:16:45 (0.1608 s / it) +Averaged stats: lr: 0.000106 grad: 0.1069 (0.1079) loss: 0.7949 (0.8049) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:06:01 loss: 0.8443 (0.8443) time: 5.8334 data: 5.8034 max mem: 9377 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8493 (0.8497) time: 0.1179 data: 0.0900 max mem: 9377 +Eval (hcp-train-subset): [28] Total time: 0:00:14 (0.2289 s / it) +Averaged stats (hcp-train-subset): loss: 0.8493 (0.8497) +Eval (hcp-val): [28] [ 0/62] eta: 0:04:21 loss: 0.8457 (0.8457) time: 4.2150 data: 4.0945 max mem: 9377 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8460 (0.8478) time: 0.1305 data: 0.1054 max mem: 9377 +Eval (hcp-val): [28] Total time: 0:00:14 (0.2275 s / it) +Averaged stats (hcp-val): loss: 0.8460 (0.8478) +Eval (nsd-val): [28] [ 0/62] eta: 0:05:10 loss: 0.8114 (0.8114) time: 5.0089 data: 4.9720 max mem: 9377 +Eval (nsd-val): [28] [61/62] eta: 0:00:00 loss: 0.8186 (0.8201) time: 0.1415 data: 0.1141 max mem: 9377 +Eval (nsd-val): [28] Total time: 0:00:13 (0.2199 s / it) +Averaged stats (nsd-val): loss: 0.8186 (0.8201) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 10:13:53 lr: 0.000106 grad: 0.4017 (0.4017) loss: 0.8111 (0.8111) time: 5.8934 data: 5.7363 max mem: 9377 +Train: [29] [ 100/6250] eta: 0:22:51 lr: 0.000106 grad: 0.1081 (0.1247) loss: 0.8171 (0.8228) time: 0.1617 data: 0.0598 max mem: 9377 +Train: [29] [ 200/6250] eta: 0:19:13 lr: 0.000106 grad: 0.1079 (0.1246) loss: 0.8062 (0.8127) time: 0.1668 data: 0.0700 max mem: 9377 +Train: [29] [ 300/6250] eta: 0:17:41 lr: 0.000106 grad: 0.0951 (0.1187) loss: 0.8223 (0.8128) time: 0.1551 data: 0.0601 max mem: 9377 +Train: [29] [ 400/6250] eta: 0:16:46 lr: 0.000106 grad: 0.0929 (0.1136) loss: 0.8158 (0.8134) time: 0.1496 data: 0.0615 max mem: 9377 +Train: [29] [ 500/6250] eta: 0:16:14 lr: 0.000106 grad: 0.0862 (0.1103) loss: 0.8237 (0.8145) time: 0.1918 data: 0.1071 max mem: 9377 +Train: [29] [ 600/6250] eta: 0:15:46 lr: 0.000106 grad: 0.0947 (0.1084) loss: 0.8169 (0.8146) time: 0.1480 data: 0.0550 max mem: 9377 +Train: [29] [ 700/6250] eta: 0:15:22 lr: 0.000106 grad: 0.0915 (0.1067) loss: 0.8221 (0.8148) time: 0.1532 data: 0.0539 max mem: 9377 +Train: [29] [ 800/6250] eta: 0:15:03 lr: 0.000106 grad: 0.0984 (0.1056) loss: 0.8104 (0.8147) time: 0.1696 data: 0.0802 max mem: 9377 +Train: [29] [ 900/6250] eta: 0:14:49 lr: 0.000106 grad: 0.0931 (0.1052) loss: 0.8124 (0.8142) time: 0.1531 data: 0.0490 max mem: 9377 +Train: [29] [1000/6250] eta: 0:14:30 lr: 0.000106 grad: 0.0980 (0.1046) loss: 0.8103 (0.8136) time: 0.1519 data: 0.0606 max mem: 9377 +Train: [29] [1100/6250] eta: 0:14:10 lr: 0.000106 grad: 0.1021 (0.1044) loss: 0.8157 (0.8133) time: 0.1436 data: 0.0553 max mem: 9377 +Train: [29] [1200/6250] eta: 0:13:48 lr: 0.000106 grad: 0.1054 (0.1043) loss: 0.8010 (0.8128) time: 0.1613 data: 0.0739 max mem: 9377 +Train: [29] [1300/6250] eta: 0:13:30 lr: 0.000106 grad: 0.0936 (0.1040) loss: 0.8106 (0.8124) time: 0.1664 data: 0.0781 max mem: 9377 +Train: [29] [1400/6250] eta: 0:13:09 lr: 0.000106 grad: 0.0997 (0.1041) loss: 0.8068 (0.8119) time: 0.1582 data: 0.0700 max mem: 9377 +Train: [29] [1500/6250] eta: 0:12:51 lr: 0.000106 grad: 0.1065 (0.1041) loss: 0.8053 (0.8116) time: 0.1563 data: 0.0682 max mem: 9377 +Train: [29] [1600/6250] eta: 0:12:33 lr: 0.000106 grad: 0.1021 (0.1042) loss: 0.8072 (0.8112) time: 0.1627 data: 0.0765 max mem: 9377 +Train: [29] [1700/6250] eta: 0:12:17 lr: 0.000106 grad: 0.0971 (0.1042) loss: 0.8130 (0.8107) time: 0.1818 data: 0.0932 max mem: 9377 +Train: [29] [1800/6250] eta: 0:11:57 lr: 0.000106 grad: 0.1040 (0.1043) loss: 0.8003 (0.8102) time: 0.1522 data: 0.0654 max mem: 9377 +Train: [29] [1900/6250] eta: 0:11:39 lr: 0.000106 grad: 0.1078 (0.1045) loss: 0.8043 (0.8098) time: 0.1377 data: 0.0447 max mem: 9377 +Train: [29] [2000/6250] eta: 0:11:22 lr: 0.000106 grad: 0.1100 (0.1048) loss: 0.8053 (0.8094) time: 0.1717 data: 0.0800 max mem: 9377 +Train: [29] [2100/6250] eta: 0:11:04 lr: 0.000106 grad: 0.1041 (0.1051) loss: 0.8070 (0.8090) time: 0.1649 data: 0.0774 max mem: 9377 +Train: [29] [2200/6250] eta: 0:10:46 lr: 0.000106 grad: 0.1058 (0.1050) loss: 0.7997 (0.8087) time: 0.1674 data: 0.0875 max mem: 9377 +Train: [29] [2300/6250] eta: 0:10:30 lr: 0.000106 grad: 0.1059 (0.1050) loss: 0.8033 (0.8085) time: 0.1748 data: 0.0807 max mem: 9377 +Train: [29] [2400/6250] eta: 0:10:14 lr: 0.000106 grad: 0.1005 (0.1050) loss: 0.8002 (0.8083) time: 0.1722 data: 0.0776 max mem: 9377 +Train: [29] [2500/6250] eta: 0:09:58 lr: 0.000106 grad: 0.1019 (0.1050) loss: 0.8045 (0.8082) time: 0.1584 data: 0.0716 max mem: 9377 +Train: [29] [2600/6250] eta: 0:09:41 lr: 0.000106 grad: 0.1049 (0.1051) loss: 0.8007 (0.8081) time: 0.1512 data: 0.0669 max mem: 9377 +Train: [29] [2700/6250] eta: 0:09:25 lr: 0.000106 grad: 0.1026 (0.1053) loss: 0.8119 (0.8079) time: 0.1501 data: 0.0609 max mem: 9377 +Train: [29] [2800/6250] eta: 0:09:08 lr: 0.000106 grad: 0.1021 (0.1054) loss: 0.7935 (0.8076) time: 0.1557 data: 0.0674 max mem: 9377 +Train: [29] [2900/6250] eta: 0:08:53 lr: 0.000106 grad: 0.1133 (0.1057) loss: 0.7905 (0.8072) time: 0.1743 data: 0.0881 max mem: 9377 +Train: [29] [3000/6250] eta: 0:08:37 lr: 0.000106 grad: 0.1046 (0.1059) loss: 0.7981 (0.8069) time: 0.1607 data: 0.0699 max mem: 9377 +Train: [29] [3100/6250] eta: 0:08:20 lr: 0.000106 grad: 0.1052 (0.1060) loss: 0.7962 (0.8066) time: 0.1576 data: 0.0599 max mem: 9377 +Train: [29] [3200/6250] eta: 0:08:04 lr: 0.000106 grad: 0.1111 (0.1062) loss: 0.8009 (0.8064) time: 0.1456 data: 0.0600 max mem: 9377 +Train: [29] [3300/6250] eta: 0:07:48 lr: 0.000106 grad: 0.1125 (0.1064) loss: 0.8049 (0.8062) time: 0.1562 data: 0.0597 max mem: 9377 +Train: [29] [3400/6250] eta: 0:07:32 lr: 0.000106 grad: 0.1119 (0.1065) loss: 0.8069 (0.8060) time: 0.1396 data: 0.0500 max mem: 9377 +Train: [29] [3500/6250] eta: 0:07:16 lr: 0.000105 grad: 0.1032 (0.1066) loss: 0.7956 (0.8058) time: 0.1348 data: 0.0486 max mem: 9377 +Train: [29] [3600/6250] eta: 0:07:00 lr: 0.000105 grad: 0.1040 (0.1067) loss: 0.8076 (0.8058) time: 0.1792 data: 0.0971 max mem: 9377 +Train: [29] [3700/6250] eta: 0:06:44 lr: 0.000105 grad: 0.1028 (0.1066) loss: 0.8080 (0.8057) time: 0.1640 data: 0.0792 max mem: 9377 +Train: [29] [3800/6250] eta: 0:06:29 lr: 0.000105 grad: 0.1049 (0.1067) loss: 0.7974 (0.8057) time: 0.1581 data: 0.0736 max mem: 9377 +Train: [29] [3900/6250] eta: 0:06:12 lr: 0.000105 grad: 0.1019 (0.1067) loss: 0.7995 (0.8056) time: 0.1555 data: 0.0618 max mem: 9377 +Train: [29] [4000/6250] eta: 0:05:56 lr: 0.000105 grad: 0.1041 (0.1067) loss: 0.7985 (0.8056) time: 0.1434 data: 0.0529 max mem: 9377 +Train: [29] [4100/6250] eta: 0:05:40 lr: 0.000105 grad: 0.1042 (0.1067) loss: 0.8054 (0.8056) time: 0.1588 data: 0.0654 max mem: 9377 +Train: [29] [4200/6250] eta: 0:05:24 lr: 0.000105 grad: 0.1030 (0.1069) loss: 0.8118 (0.8055) time: 0.1556 data: 0.0754 max mem: 9377 +Train: [29] [4300/6250] eta: 0:05:08 lr: 0.000105 grad: 0.1061 (0.1069) loss: 0.8002 (0.8054) time: 0.1227 data: 0.0212 max mem: 9377 +Train: [29] [4400/6250] eta: 0:04:52 lr: 0.000105 grad: 0.1041 (0.1070) loss: 0.7954 (0.8053) time: 0.1414 data: 0.0549 max mem: 9377 +Train: [29] [4500/6250] eta: 0:04:36 lr: 0.000105 grad: 0.1024 (0.1071) loss: 0.7950 (0.8052) time: 0.1572 data: 0.0621 max mem: 9377 +Train: [29] [4600/6250] eta: 0:04:20 lr: 0.000105 grad: 0.1048 (0.1072) loss: 0.8052 (0.8051) time: 0.1451 data: 0.0623 max mem: 9377 +Train: [29] [4700/6250] eta: 0:04:05 lr: 0.000105 grad: 0.1064 (0.1071) loss: 0.7980 (0.8050) time: 0.1729 data: 0.0823 max mem: 9377 +Train: [29] [4800/6250] eta: 0:03:49 lr: 0.000105 grad: 0.1128 (0.1072) loss: 0.7966 (0.8050) time: 0.1571 data: 0.0794 max mem: 9377 +Train: [29] [4900/6250] eta: 0:03:33 lr: 0.000105 grad: 0.1030 (0.1072) loss: 0.7947 (0.8048) time: 0.1554 data: 0.0681 max mem: 9377 +Train: [29] [5000/6250] eta: 0:03:18 lr: 0.000105 grad: 0.1071 (0.1072) loss: 0.7988 (0.8048) time: 0.1570 data: 0.0756 max mem: 9377 +Train: [29] [5100/6250] eta: 0:03:02 lr: 0.000105 grad: 0.1096 (0.1073) loss: 0.7963 (0.8047) time: 0.1639 data: 0.0774 max mem: 9377 +Train: [29] [5200/6250] eta: 0:02:46 lr: 0.000105 grad: 0.0964 (0.1072) loss: 0.8127 (0.8047) time: 0.1705 data: 0.0747 max mem: 9377 +Train: [29] [5300/6250] eta: 0:02:30 lr: 0.000105 grad: 0.1073 (0.1073) loss: 0.8105 (0.8046) time: 0.1612 data: 0.0809 max mem: 9377 +Train: [29] [5400/6250] eta: 0:02:15 lr: 0.000105 grad: 0.1119 (0.1072) loss: 0.8011 (0.8046) time: 0.1646 data: 0.0835 max mem: 9377 +Train: [29] [5500/6250] eta: 0:01:59 lr: 0.000105 grad: 0.1014 (0.1073) loss: 0.8110 (0.8045) time: 0.1526 data: 0.0590 max mem: 9377 +Train: [29] [5600/6250] eta: 0:01:43 lr: 0.000105 grad: 0.1004 (0.1072) loss: 0.8052 (0.8045) time: 0.1549 data: 0.0760 max mem: 9377 +Train: [29] [5700/6250] eta: 0:01:27 lr: 0.000105 grad: 0.1003 (0.1072) loss: 0.8058 (0.8045) time: 0.1701 data: 0.0668 max mem: 9377 +Train: [29] [5800/6250] eta: 0:01:11 lr: 0.000105 grad: 0.1028 (0.1073) loss: 0.8012 (0.8044) time: 0.1524 data: 0.0568 max mem: 9377 +Train: [29] [5900/6250] eta: 0:00:55 lr: 0.000105 grad: 0.1011 (0.1073) loss: 0.7969 (0.8044) time: 0.1461 data: 0.0493 max mem: 9377 +Train: [29] [6000/6250] eta: 0:00:39 lr: 0.000105 grad: 0.1119 (0.1073) loss: 0.8006 (0.8044) time: 0.1548 data: 0.0507 max mem: 9377 +Train: [29] [6100/6250] eta: 0:00:23 lr: 0.000105 grad: 0.1132 (0.1073) loss: 0.8027 (0.8044) time: 0.1429 data: 0.0473 max mem: 9377 +Train: [29] [6200/6250] eta: 0:00:07 lr: 0.000105 grad: 0.1106 (0.1073) loss: 0.8076 (0.8043) time: 0.1459 data: 0.0429 max mem: 9377 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.1052 (0.1074) loss: 0.8090 (0.8043) time: 0.1392 data: 0.0445 max mem: 9377 +Train: [29] Total time: 0:16:32 (0.1589 s / it) +Averaged stats: lr: 0.000105 grad: 0.1052 (0.1074) loss: 0.8090 (0.8043) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:04:40 loss: 0.8436 (0.8436) time: 4.5232 data: 4.4149 max mem: 9377 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8500 (0.8479) time: 0.1336 data: 0.1085 max mem: 9377 +Eval (hcp-train-subset): [29] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (hcp-train-subset): loss: 0.8500 (0.8479) +Making plots (hcp-train-subset): example=1 +Eval (hcp-val): [29] [ 0/62] eta: 0:06:06 loss: 0.8438 (0.8438) time: 5.9169 data: 5.8861 max mem: 9377 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8449 (0.8456) time: 0.1331 data: 0.0981 max mem: 9377 +Eval (hcp-val): [29] Total time: 0:00:14 (0.2328 s / it) +Averaged stats (hcp-val): loss: 0.8449 (0.8456) +Making plots (hcp-val): example=28 +Eval (nsd-val): [29] [ 0/62] eta: 0:04:11 loss: 0.8089 (0.8089) time: 4.0613 data: 3.9973 max mem: 9377 +Eval (nsd-val): [29] [61/62] eta: 0:00:00 loss: 0.8211 (0.8210) time: 0.1387 data: 0.1132 max mem: 9377 +Eval (nsd-val): [29] Total time: 0:00:13 (0.2251 s / it) +Averaged stats (nsd-val): loss: 0.8211 (0.8210) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00029.pth +Train: [30] [ 0/6250] eta: 8:25:21 lr: 0.000105 grad: 0.2318 (0.2318) loss: 0.8899 (0.8899) time: 4.8515 data: 4.5525 max mem: 9377 +Train: [30] [ 100/6250] eta: 0:22:17 lr: 0.000105 grad: 0.1343 (0.1425) loss: 0.8138 (0.8206) time: 0.1883 data: 0.0817 max mem: 9377 +Train: [30] [ 200/6250] eta: 0:19:44 lr: 0.000105 grad: 0.1023 (0.1382) loss: 0.7959 (0.8115) time: 0.1823 data: 0.0803 max mem: 9377 +Train: [30] [ 300/6250] eta: 0:18:20 lr: 0.000105 grad: 0.1175 (0.1353) loss: 0.8006 (0.8054) time: 0.1538 data: 0.0568 max mem: 9377 +Train: [30] [ 400/6250] eta: 0:17:52 lr: 0.000105 grad: 0.1119 (0.1303) loss: 0.8069 (0.8037) time: 0.1983 data: 0.1080 max mem: 9377 +Train: [30] [ 500/6250] eta: 0:17:35 lr: 0.000105 grad: 0.1047 (0.1258) loss: 0.8001 (0.8035) time: 0.1882 data: 0.0949 max mem: 9377 +Train: [30] [ 600/6250] eta: 0:17:01 lr: 0.000105 grad: 0.0963 (0.1226) loss: 0.8115 (0.8037) time: 0.1756 data: 0.0900 max mem: 9377 +Train: [30] [ 700/6250] eta: 0:16:37 lr: 0.000105 grad: 0.1067 (0.1203) loss: 0.8042 (0.8035) time: 0.1478 data: 0.0576 max mem: 9377 +Train: [30] [ 800/6250] eta: 0:16:21 lr: 0.000105 grad: 0.1000 (0.1189) loss: 0.8039 (0.8026) time: 0.1746 data: 0.0774 max mem: 9377 +Train: [30] [ 900/6250] eta: 0:16:04 lr: 0.000105 grad: 0.1025 (0.1184) loss: 0.8002 (0.8022) time: 0.1976 data: 0.0994 max mem: 9377 +Train: [30] [1000/6250] eta: 0:15:37 lr: 0.000105 grad: 0.0992 (0.1173) loss: 0.7987 (0.8015) time: 0.1681 data: 0.0726 max mem: 9377 +Train: [30] [1100/6250] eta: 0:15:09 lr: 0.000105 grad: 0.1118 (0.1165) loss: 0.7811 (0.8010) time: 0.1535 data: 0.0620 max mem: 9377 +Train: [30] [1200/6250] eta: 0:14:47 lr: 0.000105 grad: 0.1155 (0.1162) loss: 0.7960 (0.8004) time: 0.1665 data: 0.0809 max mem: 9377 +Train: [30] [1300/6250] eta: 0:14:23 lr: 0.000105 grad: 0.1091 (0.1158) loss: 0.7969 (0.8002) time: 0.1487 data: 0.0650 max mem: 9377 +Train: [30] [1400/6250] eta: 0:13:59 lr: 0.000104 grad: 0.1063 (0.1157) loss: 0.8012 (0.7998) time: 0.1627 data: 0.0716 max mem: 9377 +Train: [30] [1500/6250] eta: 0:13:36 lr: 0.000104 grad: 0.1050 (0.1154) loss: 0.7994 (0.7996) time: 0.1558 data: 0.0700 max mem: 9377 +Train: [30] [1600/6250] eta: 0:13:14 lr: 0.000104 grad: 0.1128 (0.1153) loss: 0.7914 (0.7993) time: 0.1455 data: 0.0539 max mem: 9377 +Train: [30] [1700/6250] eta: 0:12:52 lr: 0.000104 grad: 0.1129 (0.1151) loss: 0.8016 (0.7992) time: 0.1571 data: 0.0647 max mem: 9377 +Train: [30] [1800/6250] eta: 0:12:32 lr: 0.000104 grad: 0.1115 (0.1148) loss: 0.7997 (0.7991) time: 0.1602 data: 0.0753 max mem: 9377 +Train: [30] [1900/6250] eta: 0:12:11 lr: 0.000104 grad: 0.1065 (0.1146) loss: 0.7893 (0.7990) time: 0.1598 data: 0.0691 max mem: 9377 +Train: [30] [2000/6250] eta: 0:11:51 lr: 0.000104 grad: 0.1089 (0.1143) loss: 0.7934 (0.7990) time: 0.1630 data: 0.0743 max mem: 9377 +Train: [30] [2100/6250] eta: 0:11:33 lr: 0.000104 grad: 0.1036 (0.1140) loss: 0.8026 (0.7990) time: 0.1367 data: 0.0472 max mem: 9377 +Train: [30] [2200/6250] eta: 0:11:15 lr: 0.000104 grad: 0.1124 (0.1139) loss: 0.7912 (0.7990) time: 0.1909 data: 0.0979 max mem: 9377 +Train: [30] [2300/6250] eta: 0:10:56 lr: 0.000104 grad: 0.1095 (0.1139) loss: 0.8004 (0.7990) time: 0.1564 data: 0.0616 max mem: 9377 +Train: [30] [2400/6250] eta: 0:10:37 lr: 0.000104 grad: 0.1068 (0.1137) loss: 0.7967 (0.7991) time: 0.1671 data: 0.0762 max mem: 9377 +Train: [30] [2500/6250] eta: 0:10:18 lr: 0.000104 grad: 0.1083 (0.1136) loss: 0.8080 (0.7991) time: 0.1573 data: 0.0670 max mem: 9377 +Train: [30] [2600/6250] eta: 0:10:01 lr: 0.000104 grad: 0.1128 (0.1134) loss: 0.8030 (0.7990) time: 0.1553 data: 0.0732 max mem: 9377 +Train: [30] [2700/6250] eta: 0:09:43 lr: 0.000104 grad: 0.1069 (0.1134) loss: 0.8010 (0.7991) time: 0.1555 data: 0.0678 max mem: 9377 +Train: [30] [2800/6250] eta: 0:09:25 lr: 0.000104 grad: 0.1083 (0.1133) loss: 0.7903 (0.7992) time: 0.1453 data: 0.0587 max mem: 9377 +Train: [30] [2900/6250] eta: 0:09:08 lr: 0.000104 grad: 0.1096 (0.1132) loss: 0.8104 (0.7994) time: 0.1440 data: 0.0545 max mem: 9377 +Train: [30] [3000/6250] eta: 0:08:50 lr: 0.000104 grad: 0.1142 (0.1132) loss: 0.8004 (0.7994) time: 0.1571 data: 0.0778 max mem: 9377 +Train: [30] [3100/6250] eta: 0:08:32 lr: 0.000104 grad: 0.1073 (0.1131) loss: 0.7928 (0.7995) time: 0.1425 data: 0.0421 max mem: 9377 +Train: [30] [3200/6250] eta: 0:08:15 lr: 0.000104 grad: 0.1092 (0.1130) loss: 0.8019 (0.7996) time: 0.1492 data: 0.0586 max mem: 9377 +Train: [30] [3300/6250] eta: 0:07:58 lr: 0.000104 grad: 0.1024 (0.1129) loss: 0.8157 (0.7998) time: 0.1384 data: 0.0447 max mem: 9377 +Train: [30] [3400/6250] eta: 0:07:42 lr: 0.000104 grad: 0.1076 (0.1129) loss: 0.8126 (0.7999) time: 0.1564 data: 0.0668 max mem: 9377 +Train: [30] [3500/6250] eta: 0:07:25 lr: 0.000104 grad: 0.1052 (0.1129) loss: 0.8066 (0.7999) time: 0.1495 data: 0.0619 max mem: 9377 +Train: [30] [3600/6250] eta: 0:07:09 lr: 0.000104 grad: 0.1030 (0.1128) loss: 0.8139 (0.8000) time: 0.1527 data: 0.0619 max mem: 9377 +Train: [30] [3700/6250] eta: 0:06:52 lr: 0.000104 grad: 0.1065 (0.1128) loss: 0.8031 (0.8001) time: 0.1452 data: 0.0568 max mem: 9377 +Train: [30] [3800/6250] eta: 0:06:36 lr: 0.000104 grad: 0.1065 (0.1127) loss: 0.8058 (0.8001) time: 0.1467 data: 0.0564 max mem: 9377 +Train: [30] [3900/6250] eta: 0:06:19 lr: 0.000104 grad: 0.1085 (0.1127) loss: 0.7984 (0.8000) time: 0.1509 data: 0.0675 max mem: 9377 +Train: [30] [4000/6250] eta: 0:06:03 lr: 0.000104 grad: 0.1090 (0.1127) loss: 0.8121 (0.8001) time: 0.1594 data: 0.0672 max mem: 9377 +Train: [30] [4100/6250] eta: 0:05:46 lr: 0.000104 grad: 0.1113 (0.1127) loss: 0.7922 (0.8000) time: 0.1449 data: 0.0583 max mem: 9377 +Train: [30] [4200/6250] eta: 0:05:30 lr: 0.000104 grad: 0.1024 (0.1126) loss: 0.8055 (0.8001) time: 0.1587 data: 0.0657 max mem: 9377 +Train: [30] [4300/6250] eta: 0:05:13 lr: 0.000104 grad: 0.1104 (0.1126) loss: 0.8018 (0.8000) time: 0.1601 data: 0.0744 max mem: 9377 +Train: [30] [4400/6250] eta: 0:04:57 lr: 0.000104 grad: 0.1150 (0.1127) loss: 0.7964 (0.8001) time: 0.1545 data: 0.0635 max mem: 9377 +Train: [30] [4500/6250] eta: 0:04:41 lr: 0.000104 grad: 0.1088 (0.1127) loss: 0.7934 (0.8000) time: 0.1789 data: 0.0951 max mem: 9377 +Train: [30] [4600/6250] eta: 0:04:24 lr: 0.000104 grad: 0.1116 (0.1127) loss: 0.7938 (0.8000) time: 0.1480 data: 0.0620 max mem: 9377 +Train: [30] [4700/6250] eta: 0:04:09 lr: 0.000104 grad: 0.1164 (0.1126) loss: 0.7926 (0.8000) time: 0.1390 data: 0.0578 max mem: 9377 +Train: [30] [4800/6250] eta: 0:03:53 lr: 0.000104 grad: 0.1031 (0.1126) loss: 0.8087 (0.8001) time: 0.1696 data: 0.0900 max mem: 9377 +Train: [30] [4900/6250] eta: 0:03:37 lr: 0.000104 grad: 0.1123 (0.1126) loss: 0.7948 (0.8000) time: 0.1936 data: 0.1123 max mem: 9377 +Train: [30] [5000/6250] eta: 0:03:21 lr: 0.000104 grad: 0.1049 (0.1126) loss: 0.8029 (0.8000) time: 0.1896 data: 0.1105 max mem: 9377 +Train: [30] [5100/6250] eta: 0:03:05 lr: 0.000104 grad: 0.1080 (0.1125) loss: 0.7954 (0.8001) time: 0.1742 data: 0.0801 max mem: 9377 +Train: [30] [5200/6250] eta: 0:02:49 lr: 0.000104 grad: 0.1138 (0.1125) loss: 0.7878 (0.8001) time: 0.1645 data: 0.0754 max mem: 9377 +Train: [30] [5300/6250] eta: 0:02:33 lr: 0.000104 grad: 0.1066 (0.1124) loss: 0.8051 (0.8001) time: 0.1856 data: 0.0954 max mem: 9377 +Train: [30] [5400/6250] eta: 0:02:17 lr: 0.000103 grad: 0.1136 (0.1123) loss: 0.7971 (0.8001) time: 0.1592 data: 0.0734 max mem: 9377 +Train: [30] [5500/6250] eta: 0:02:00 lr: 0.000103 grad: 0.1057 (0.1123) loss: 0.8060 (0.8001) time: 0.1597 data: 0.0808 max mem: 9377 +Train: [30] [5600/6250] eta: 0:01:44 lr: 0.000103 grad: 0.1043 (0.1123) loss: 0.8030 (0.8001) time: 0.1437 data: 0.0634 max mem: 9377 +Train: [30] [5700/6250] eta: 0:01:28 lr: 0.000103 grad: 0.1026 (0.1122) loss: 0.8073 (0.8001) time: 0.1416 data: 0.0467 max mem: 9377 +Train: [30] [5800/6250] eta: 0:01:12 lr: 0.000103 grad: 0.1052 (0.1123) loss: 0.8092 (0.8002) time: 0.1497 data: 0.0581 max mem: 9377 +Train: [30] [5900/6250] eta: 0:00:56 lr: 0.000103 grad: 0.1134 (0.1122) loss: 0.7985 (0.8002) time: 0.1595 data: 0.0699 max mem: 9377 +Train: [30] [6000/6250] eta: 0:00:40 lr: 0.000103 grad: 0.1102 (0.1122) loss: 0.8030 (0.8002) time: 0.1490 data: 0.0509 max mem: 9377 +Train: [30] [6100/6250] eta: 0:00:24 lr: 0.000103 grad: 0.1108 (0.1123) loss: 0.8036 (0.8002) time: 0.1695 data: 0.0847 max mem: 9377 +Train: [30] [6200/6250] eta: 0:00:08 lr: 0.000103 grad: 0.1101 (0.1123) loss: 0.8046 (0.8003) time: 0.1443 data: 0.0585 max mem: 9377 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.1062 (0.1123) loss: 0.8055 (0.8003) time: 0.1493 data: 0.0559 max mem: 9377 +Train: [30] Total time: 0:16:48 (0.1614 s / it) +Averaged stats: lr: 0.000103 grad: 0.1062 (0.1123) loss: 0.8055 (0.8003) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:06:18 loss: 0.8498 (0.8498) time: 6.0983 data: 6.0647 max mem: 9377 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8480 (0.8501) time: 0.1433 data: 0.1180 max mem: 9377 +Eval (hcp-train-subset): [30] Total time: 0:00:15 (0.2482 s / it) +Averaged stats (hcp-train-subset): loss: 0.8480 (0.8501) +Eval (hcp-val): [30] [ 0/62] eta: 0:04:05 loss: 0.8443 (0.8443) time: 3.9587 data: 3.8551 max mem: 9377 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8463 (0.8490) time: 0.1207 data: 0.0944 max mem: 9377 +Eval (hcp-val): [30] Total time: 0:00:15 (0.2436 s / it) +Averaged stats (hcp-val): loss: 0.8463 (0.8490) +Eval (nsd-val): [30] [ 0/62] eta: 0:04:14 loss: 0.8111 (0.8111) time: 4.1081 data: 4.0141 max mem: 9377 +Eval (nsd-val): [30] [61/62] eta: 0:00:00 loss: 0.8234 (0.8235) time: 0.1528 data: 0.1270 max mem: 9377 +Eval (nsd-val): [30] Total time: 0:00:14 (0.2357 s / it) +Averaged stats (nsd-val): loss: 0.8234 (0.8235) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [31] [ 0/6250] eta: 9:23:49 lr: 0.000103 grad: 0.1038 (0.1038) loss: 0.8551 (0.8551) time: 5.4127 data: 5.1711 max mem: 9377 +Train: [31] [ 100/6250] eta: 0:24:02 lr: 0.000103 grad: 0.1069 (0.1461) loss: 0.8128 (0.8221) time: 0.1675 data: 0.0621 max mem: 9377 +Train: [31] [ 200/6250] eta: 0:20:38 lr: 0.000103 grad: 0.1073 (0.1314) loss: 0.8276 (0.8180) time: 0.1742 data: 0.0793 max mem: 9377 +Train: [31] [ 300/6250] eta: 0:19:28 lr: 0.000103 grad: 0.1159 (0.1273) loss: 0.8116 (0.8151) time: 0.2133 data: 0.1166 max mem: 9377 +Train: [31] [ 400/6250] eta: 0:18:15 lr: 0.000103 grad: 0.1104 (0.1257) loss: 0.8078 (0.8133) time: 0.1655 data: 0.0732 max mem: 9377 +Train: [31] [ 500/6250] eta: 0:17:47 lr: 0.000103 grad: 0.1172 (0.1245) loss: 0.8010 (0.8114) time: 0.1683 data: 0.0727 max mem: 9377 +Train: [31] [ 600/6250] eta: 0:17:22 lr: 0.000103 grad: 0.1069 (0.1227) loss: 0.7989 (0.8097) time: 0.1968 data: 0.1111 max mem: 9377 +Train: [31] [ 700/6250] eta: 0:16:50 lr: 0.000103 grad: 0.1183 (0.1210) loss: 0.7926 (0.8088) time: 0.1583 data: 0.0684 max mem: 9377 +Train: [31] [ 800/6250] eta: 0:16:19 lr: 0.000103 grad: 0.1162 (0.1198) loss: 0.8072 (0.8084) time: 0.1764 data: 0.0817 max mem: 9377 +Train: [31] [ 900/6250] eta: 0:15:58 lr: 0.000103 grad: 0.0979 (0.1183) loss: 0.8159 (0.8081) time: 0.1710 data: 0.0726 max mem: 9377 +Train: [31] [1000/6250] eta: 0:15:33 lr: 0.000103 grad: 0.1035 (0.1173) loss: 0.8028 (0.8080) time: 0.1537 data: 0.0505 max mem: 9377 +Train: [31] [1100/6250] eta: 0:15:08 lr: 0.000103 grad: 0.1126 (0.1165) loss: 0.8048 (0.8075) time: 0.1704 data: 0.0793 max mem: 9377 +Train: [31] [1200/6250] eta: 0:14:42 lr: 0.000103 grad: 0.1139 (0.1162) loss: 0.8037 (0.8069) time: 0.1550 data: 0.0694 max mem: 9377 +Train: [31] [1300/6250] eta: 0:14:20 lr: 0.000103 grad: 0.1088 (0.1161) loss: 0.8044 (0.8061) time: 0.1644 data: 0.0767 max mem: 9377 +Train: [31] [1400/6250] eta: 0:13:55 lr: 0.000103 grad: 0.1051 (0.1159) loss: 0.8088 (0.8057) time: 0.1467 data: 0.0611 max mem: 9377 +Train: [31] [1500/6250] eta: 0:13:36 lr: 0.000103 grad: 0.1101 (0.1158) loss: 0.7925 (0.8050) time: 0.1344 data: 0.0474 max mem: 9377 +Train: [31] [1600/6250] eta: 0:13:13 lr: 0.000103 grad: 0.1141 (0.1155) loss: 0.7996 (0.8047) time: 0.1505 data: 0.0696 max mem: 9377 +Train: [31] [1700/6250] eta: 0:12:54 lr: 0.000103 grad: 0.1112 (0.1155) loss: 0.8070 (0.8043) time: 0.1523 data: 0.0657 max mem: 9377 +Train: [31] [1800/6250] eta: 0:12:33 lr: 0.000103 grad: 0.1037 (0.1153) loss: 0.8046 (0.8041) time: 0.1543 data: 0.0709 max mem: 9377 +Train: [31] [1900/6250] eta: 0:12:13 lr: 0.000103 grad: 0.1081 (0.1151) loss: 0.8011 (0.8039) time: 0.1554 data: 0.0629 max mem: 9377 +Train: [31] [2000/6250] eta: 0:11:52 lr: 0.000103 grad: 0.1077 (0.1151) loss: 0.8011 (0.8035) time: 0.1584 data: 0.0702 max mem: 9377 +Train: [31] [2100/6250] eta: 0:11:34 lr: 0.000103 grad: 0.1116 (0.1150) loss: 0.8042 (0.8034) time: 0.1492 data: 0.0583 max mem: 9377 +Train: [31] [2200/6250] eta: 0:11:17 lr: 0.000103 grad: 0.1113 (0.1148) loss: 0.8050 (0.8033) time: 0.1614 data: 0.0806 max mem: 9377 +Train: [31] [2300/6250] eta: 0:10:58 lr: 0.000103 grad: 0.1063 (0.1145) loss: 0.8124 (0.8033) time: 0.1665 data: 0.0737 max mem: 9377 +Train: [31] [2400/6250] eta: 0:10:40 lr: 0.000103 grad: 0.1154 (0.1145) loss: 0.8104 (0.8032) time: 0.1585 data: 0.0725 max mem: 9377 +Train: [31] [2500/6250] eta: 0:10:21 lr: 0.000103 grad: 0.1121 (0.1144) loss: 0.7986 (0.8031) time: 0.1424 data: 0.0604 max mem: 9377 +Train: [31] [2600/6250] eta: 0:10:04 lr: 0.000103 grad: 0.1070 (0.1141) loss: 0.8035 (0.8030) time: 0.1571 data: 0.0713 max mem: 9377 +Train: [31] [2700/6250] eta: 0:09:47 lr: 0.000103 grad: 0.1057 (0.1140) loss: 0.8035 (0.8030) time: 0.1520 data: 0.0696 max mem: 9377 +Train: [31] [2800/6250] eta: 0:09:29 lr: 0.000103 grad: 0.1144 (0.1139) loss: 0.8013 (0.8030) time: 0.1495 data: 0.0636 max mem: 9377 +Train: [31] [2900/6250] eta: 0:09:11 lr: 0.000103 grad: 0.1069 (0.1138) loss: 0.8038 (0.8030) time: 0.1518 data: 0.0682 max mem: 9377 +Train: [31] [3000/6250] eta: 0:08:53 lr: 0.000103 grad: 0.1104 (0.1137) loss: 0.7995 (0.8029) time: 0.1537 data: 0.0639 max mem: 9377 +Train: [31] [3100/6250] eta: 0:08:36 lr: 0.000103 grad: 0.1115 (0.1137) loss: 0.7892 (0.8028) time: 0.1566 data: 0.0784 max mem: 9377 +Train: [31] [3200/6250] eta: 0:08:18 lr: 0.000102 grad: 0.1072 (0.1137) loss: 0.8040 (0.8027) time: 0.1592 data: 0.0667 max mem: 9377 +Train: [31] [3300/6250] eta: 0:08:01 lr: 0.000102 grad: 0.1095 (0.1136) loss: 0.7966 (0.8026) time: 0.1582 data: 0.0813 max mem: 9377 +Train: [31] [3400/6250] eta: 0:07:44 lr: 0.000102 grad: 0.1056 (0.1136) loss: 0.8005 (0.8025) time: 0.1404 data: 0.0536 max mem: 9377 +Train: [31] [3500/6250] eta: 0:07:27 lr: 0.000102 grad: 0.1054 (0.1135) loss: 0.8108 (0.8023) time: 0.1552 data: 0.0693 max mem: 9377 +Train: [31] [3600/6250] eta: 0:07:10 lr: 0.000102 grad: 0.1094 (0.1135) loss: 0.8028 (0.8021) time: 0.1554 data: 0.0684 max mem: 9377 +Train: [31] [3700/6250] eta: 0:06:54 lr: 0.000102 grad: 0.1162 (0.1135) loss: 0.7951 (0.8020) time: 0.1659 data: 0.0767 max mem: 9377 +Train: [31] [3800/6250] eta: 0:06:36 lr: 0.000102 grad: 0.1044 (0.1134) loss: 0.8043 (0.8019) time: 0.1609 data: 0.0730 max mem: 9377 +Train: [31] [3900/6250] eta: 0:06:20 lr: 0.000102 grad: 0.1187 (0.1135) loss: 0.7825 (0.8017) time: 0.1638 data: 0.0748 max mem: 9377 +Train: [31] [4000/6250] eta: 0:06:03 lr: 0.000102 grad: 0.1126 (0.1136) loss: 0.7953 (0.8015) time: 0.1574 data: 0.0653 max mem: 9377 +Train: [31] [4100/6250] eta: 0:05:47 lr: 0.000102 grad: 0.1243 (0.1136) loss: 0.7903 (0.8013) time: 0.1500 data: 0.0660 max mem: 9377 +Train: [31] [4200/6250] eta: 0:05:30 lr: 0.000102 grad: 0.1056 (0.1137) loss: 0.7969 (0.8011) time: 0.1677 data: 0.0756 max mem: 9377 +Train: [31] [4300/6250] eta: 0:05:14 lr: 0.000102 grad: 0.1119 (0.1138) loss: 0.7936 (0.8008) time: 0.1655 data: 0.0807 max mem: 9377 +Train: [31] [4400/6250] eta: 0:04:57 lr: 0.000102 grad: 0.1232 (0.1139) loss: 0.7900 (0.8006) time: 0.1498 data: 0.0625 max mem: 9377 +Train: [31] [4500/6250] eta: 0:04:41 lr: 0.000102 grad: 0.1186 (0.1141) loss: 0.7847 (0.8002) time: 0.1541 data: 0.0615 max mem: 9377 +Train: [31] [4600/6250] eta: 0:04:24 lr: 0.000102 grad: 0.1135 (0.1142) loss: 0.7890 (0.8000) time: 0.1448 data: 0.0535 max mem: 9377 +Train: [31] [4700/6250] eta: 0:04:09 lr: 0.000102 grad: 0.1181 (0.1142) loss: 0.7892 (0.7997) time: 0.1470 data: 0.0691 max mem: 9377 +Train: [31] [4800/6250] eta: 0:03:53 lr: 0.000102 grad: 0.1100 (0.1142) loss: 0.7896 (0.7995) time: 0.1796 data: 0.0986 max mem: 9377 +Train: [31] [4900/6250] eta: 0:03:37 lr: 0.000102 grad: 0.1128 (0.1143) loss: 0.7812 (0.7993) time: 0.1622 data: 0.0829 max mem: 9377 +Train: [31] [5000/6250] eta: 0:03:21 lr: 0.000102 grad: 0.1112 (0.1143) loss: 0.7836 (0.7991) time: 0.1504 data: 0.0649 max mem: 9377 +Train: [31] [5100/6250] eta: 0:03:05 lr: 0.000102 grad: 0.1173 (0.1144) loss: 0.7955 (0.7990) time: 0.1522 data: 0.0638 max mem: 9377 +Train: [31] [5200/6250] eta: 0:02:49 lr: 0.000102 grad: 0.1126 (0.1145) loss: 0.7923 (0.7989) time: 0.1724 data: 0.0898 max mem: 9377 +Train: [31] [5300/6250] eta: 0:02:33 lr: 0.000102 grad: 0.1083 (0.1146) loss: 0.7944 (0.7988) time: 0.1703 data: 0.0845 max mem: 9377 +Train: [31] [5400/6250] eta: 0:02:17 lr: 0.000102 grad: 0.1133 (0.1146) loss: 0.7923 (0.7987) time: 0.1899 data: 0.0941 max mem: 9377 +Train: [31] [5500/6250] eta: 0:02:00 lr: 0.000102 grad: 0.1202 (0.1146) loss: 0.7865 (0.7986) time: 0.1686 data: 0.0850 max mem: 9377 +Train: [31] [5600/6250] eta: 0:01:45 lr: 0.000102 grad: 0.1208 (0.1147) loss: 0.7822 (0.7985) time: 0.1639 data: 0.0628 max mem: 9377 +Train: [31] [5700/6250] eta: 0:01:29 lr: 0.000102 grad: 0.1218 (0.1148) loss: 0.7885 (0.7983) time: 0.1863 data: 0.0851 max mem: 9377 +Train: [31] [5800/6250] eta: 0:01:13 lr: 0.000102 grad: 0.1211 (0.1150) loss: 0.7849 (0.7981) time: 0.1460 data: 0.0456 max mem: 9377 +Train: [31] [5900/6250] eta: 0:00:56 lr: 0.000102 grad: 0.1157 (0.1150) loss: 0.7957 (0.7980) time: 0.1517 data: 0.0454 max mem: 9377 +Train: [31] [6000/6250] eta: 0:00:40 lr: 0.000102 grad: 0.1117 (0.1150) loss: 0.7798 (0.7978) time: 0.1421 data: 0.0514 max mem: 9377 +Train: [31] [6100/6250] eta: 0:00:24 lr: 0.000102 grad: 0.1199 (0.1151) loss: 0.7831 (0.7976) time: 0.1419 data: 0.0494 max mem: 9377 +Train: [31] [6200/6250] eta: 0:00:08 lr: 0.000102 grad: 0.1139 (0.1150) loss: 0.7973 (0.7975) time: 0.1794 data: 0.0968 max mem: 9377 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.1143 (0.1151) loss: 0.7749 (0.7974) time: 0.1611 data: 0.0741 max mem: 9377 +Train: [31] Total time: 0:17:00 (0.1633 s / it) +Averaged stats: lr: 0.000102 grad: 0.1143 (0.1151) loss: 0.7749 (0.7974) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:06:20 loss: 0.8441 (0.8441) time: 6.1432 data: 6.1109 max mem: 9377 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8493 (0.8501) time: 0.1446 data: 0.1195 max mem: 9377 +Eval (hcp-train-subset): [31] Total time: 0:00:14 (0.2414 s / it) +Averaged stats (hcp-train-subset): loss: 0.8493 (0.8501) +Eval (hcp-val): [31] [ 0/62] eta: 0:06:23 loss: 0.8469 (0.8469) time: 6.1870 data: 6.1550 max mem: 9377 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8473 (0.8479) time: 0.1394 data: 0.1120 max mem: 9377 +Eval (hcp-val): [31] Total time: 0:00:14 (0.2377 s / it) +Averaged stats (hcp-val): loss: 0.8473 (0.8479) +Eval (nsd-val): [31] [ 0/62] eta: 0:05:34 loss: 0.8159 (0.8159) time: 5.3887 data: 5.3573 max mem: 9377 +Eval (nsd-val): [31] [61/62] eta: 0:00:00 loss: 0.8239 (0.8264) time: 0.1452 data: 0.1198 max mem: 9377 +Eval (nsd-val): [31] Total time: 0:00:14 (0.2356 s / it) +Averaged stats (nsd-val): loss: 0.8239 (0.8264) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [32] [ 0/6250] eta: 8:50:59 lr: 0.000102 grad: 0.2432 (0.2432) loss: 0.8250 (0.8250) time: 5.0975 data: 4.8292 max mem: 9377 +Train: [32] [ 100/6250] eta: 0:23:08 lr: 0.000102 grad: 0.0991 (0.1368) loss: 0.8276 (0.8287) time: 0.1801 data: 0.0860 max mem: 9377 +Train: [32] [ 200/6250] eta: 0:19:58 lr: 0.000102 grad: 0.1162 (0.1288) loss: 0.8143 (0.8220) time: 0.1523 data: 0.0511 max mem: 9377 +Train: [32] [ 300/6250] eta: 0:18:38 lr: 0.000102 grad: 0.1099 (0.1284) loss: 0.8094 (0.8172) time: 0.1621 data: 0.0610 max mem: 9377 +Train: [32] [ 400/6250] eta: 0:17:56 lr: 0.000102 grad: 0.1110 (0.1256) loss: 0.8008 (0.8136) time: 0.1915 data: 0.0912 max mem: 9377 +Train: [32] [ 500/6250] eta: 0:17:24 lr: 0.000102 grad: 0.1094 (0.1243) loss: 0.7986 (0.8103) time: 0.1738 data: 0.0858 max mem: 9377 +Train: [32] [ 600/6250] eta: 0:16:53 lr: 0.000102 grad: 0.1110 (0.1228) loss: 0.7969 (0.8075) time: 0.1651 data: 0.0644 max mem: 9377 +Train: [32] [ 700/6250] eta: 0:16:23 lr: 0.000102 grad: 0.1230 (0.1225) loss: 0.7857 (0.8054) time: 0.1652 data: 0.0685 max mem: 9377 +Train: [32] [ 800/6250] eta: 0:15:56 lr: 0.000101 grad: 0.1127 (0.1217) loss: 0.7889 (0.8040) time: 0.1413 data: 0.0465 max mem: 9377 +Train: [32] [ 900/6250] eta: 0:15:41 lr: 0.000101 grad: 0.1118 (0.1216) loss: 0.7932 (0.8027) time: 0.1881 data: 0.0980 max mem: 9377 +Train: [32] [1000/6250] eta: 0:15:14 lr: 0.000101 grad: 0.1101 (0.1209) loss: 0.7992 (0.8021) time: 0.1458 data: 0.0504 max mem: 9377 +Train: [32] [1100/6250] eta: 0:14:46 lr: 0.000101 grad: 0.1113 (0.1205) loss: 0.7975 (0.8017) time: 0.1463 data: 0.0608 max mem: 9377 +Train: [32] [1200/6250] eta: 0:14:22 lr: 0.000101 grad: 0.1132 (0.1203) loss: 0.7914 (0.8008) time: 0.1595 data: 0.0629 max mem: 9377 +Train: [32] [1300/6250] eta: 0:13:56 lr: 0.000101 grad: 0.1131 (0.1198) loss: 0.7844 (0.8000) time: 0.1435 data: 0.0504 max mem: 9377 +Train: [32] [1400/6250] eta: 0:13:33 lr: 0.000101 grad: 0.1068 (0.1194) loss: 0.8013 (0.7997) time: 0.1222 data: 0.0328 max mem: 9377 +Train: [32] [1500/6250] eta: 0:13:14 lr: 0.000101 grad: 0.1112 (0.1193) loss: 0.8002 (0.7994) time: 0.1736 data: 0.0828 max mem: 9377 +Train: [32] [1600/6250] eta: 0:12:52 lr: 0.000101 grad: 0.1092 (0.1190) loss: 0.7996 (0.7992) time: 0.1341 data: 0.0415 max mem: 9377 +Train: [32] [1700/6250] eta: 0:12:31 lr: 0.000101 grad: 0.1133 (0.1188) loss: 0.7980 (0.7990) time: 0.1567 data: 0.0682 max mem: 9377 +Train: [32] [1800/6250] eta: 0:12:14 lr: 0.000101 grad: 0.1090 (0.1185) loss: 0.7994 (0.7988) time: 0.1597 data: 0.0725 max mem: 9377 +Train: [32] [1900/6250] eta: 0:11:56 lr: 0.000101 grad: 0.1093 (0.1183) loss: 0.7893 (0.7986) time: 0.1780 data: 0.0962 max mem: 9377 +Train: [32] [2000/6250] eta: 0:11:37 lr: 0.000101 grad: 0.1093 (0.1182) loss: 0.7922 (0.7985) time: 0.1511 data: 0.0635 max mem: 9377 +Train: [32] [2100/6250] eta: 0:11:20 lr: 0.000101 grad: 0.1066 (0.1179) loss: 0.7995 (0.7984) time: 0.1654 data: 0.0748 max mem: 9377 +Train: [32] [2200/6250] eta: 0:11:01 lr: 0.000101 grad: 0.1097 (0.1176) loss: 0.7899 (0.7983) time: 0.1352 data: 0.0426 max mem: 9377 +Train: [32] [2300/6250] eta: 0:10:43 lr: 0.000101 grad: 0.1114 (0.1174) loss: 0.8025 (0.7982) time: 0.1607 data: 0.0735 max mem: 9377 +Train: [32] [2400/6250] eta: 0:10:24 lr: 0.000101 grad: 0.1170 (0.1171) loss: 0.7928 (0.7982) time: 0.1408 data: 0.0555 max mem: 9377 +Train: [32] [2500/6250] eta: 0:10:07 lr: 0.000101 grad: 0.1101 (0.1173) loss: 0.7936 (0.7981) time: 0.1362 data: 0.0406 max mem: 9377 +Train: [32] [2600/6250] eta: 0:09:49 lr: 0.000101 grad: 0.1107 (0.1170) loss: 0.7915 (0.7980) time: 0.1371 data: 0.0567 max mem: 9377 +Train: [32] [2700/6250] eta: 0:09:32 lr: 0.000101 grad: 0.1082 (0.1169) loss: 0.7897 (0.7979) time: 0.1430 data: 0.0500 max mem: 9377 +Train: [32] [2800/6250] eta: 0:09:15 lr: 0.000101 grad: 0.1093 (0.1167) loss: 0.7932 (0.7979) time: 0.1454 data: 0.0617 max mem: 9377 +Train: [32] [2900/6250] eta: 0:08:59 lr: 0.000101 grad: 0.1096 (0.1166) loss: 0.8047 (0.7980) time: 0.1659 data: 0.0802 max mem: 9377 +Train: [32] [3000/6250] eta: 0:08:41 lr: 0.000101 grad: 0.1162 (0.1165) loss: 0.7855 (0.7978) time: 0.1393 data: 0.0461 max mem: 9377 +Train: [32] [3100/6250] eta: 0:08:25 lr: 0.000101 grad: 0.1091 (0.1164) loss: 0.7928 (0.7979) time: 0.1853 data: 0.0966 max mem: 9377 +Train: [32] [3200/6250] eta: 0:08:08 lr: 0.000101 grad: 0.1116 (0.1163) loss: 0.7987 (0.7979) time: 0.1716 data: 0.0902 max mem: 9377 +Train: [32] [3300/6250] eta: 0:07:52 lr: 0.000101 grad: 0.1113 (0.1163) loss: 0.7947 (0.7979) time: 0.1591 data: 0.0684 max mem: 9377 +Train: [32] [3400/6250] eta: 0:07:36 lr: 0.000101 grad: 0.1134 (0.1163) loss: 0.7971 (0.7979) time: 0.1520 data: 0.0662 max mem: 9377 +Train: [32] [3500/6250] eta: 0:07:20 lr: 0.000101 grad: 0.1109 (0.1163) loss: 0.8008 (0.7978) time: 0.1600 data: 0.0724 max mem: 9377 +Train: [32] [3600/6250] eta: 0:07:04 lr: 0.000101 grad: 0.1146 (0.1162) loss: 0.8070 (0.7978) time: 0.1482 data: 0.0628 max mem: 9377 +Train: [32] [3700/6250] eta: 0:06:47 lr: 0.000101 grad: 0.1169 (0.1163) loss: 0.7911 (0.7978) time: 0.1459 data: 0.0610 max mem: 9377 +Train: [32] [3800/6250] eta: 0:06:31 lr: 0.000101 grad: 0.1095 (0.1162) loss: 0.7962 (0.7978) time: 0.1363 data: 0.0459 max mem: 9377 +Train: [32] [3900/6250] eta: 0:06:15 lr: 0.000101 grad: 0.1135 (0.1162) loss: 0.8033 (0.7978) time: 0.1538 data: 0.0644 max mem: 9377 +Train: [32] [4000/6250] eta: 0:05:59 lr: 0.000101 grad: 0.1109 (0.1162) loss: 0.8051 (0.7978) time: 0.1589 data: 0.0771 max mem: 9377 +Train: [32] [4100/6250] eta: 0:05:42 lr: 0.000101 grad: 0.1216 (0.1162) loss: 0.7901 (0.7977) time: 0.1412 data: 0.0583 max mem: 9377 +Train: [32] [4200/6250] eta: 0:05:26 lr: 0.000101 grad: 0.1134 (0.1162) loss: 0.7938 (0.7976) time: 0.1201 data: 0.0341 max mem: 9377 +Train: [32] [4300/6250] eta: 0:05:10 lr: 0.000101 grad: 0.1084 (0.1162) loss: 0.8027 (0.7976) time: 0.1504 data: 0.0702 max mem: 9377 +Train: [32] [4400/6250] eta: 0:04:54 lr: 0.000101 grad: 0.1152 (0.1163) loss: 0.7929 (0.7975) time: 0.1650 data: 0.0841 max mem: 9377 +Train: [32] [4500/6250] eta: 0:04:38 lr: 0.000101 grad: 0.1165 (0.1163) loss: 0.7885 (0.7974) time: 0.1962 data: 0.1109 max mem: 9377 +Train: [32] [4600/6250] eta: 0:04:22 lr: 0.000101 grad: 0.1137 (0.1164) loss: 0.7787 (0.7973) time: 0.1433 data: 0.0597 max mem: 9377 +Train: [32] [4700/6250] eta: 0:04:06 lr: 0.000100 grad: 0.1109 (0.1163) loss: 0.7967 (0.7971) time: 0.1728 data: 0.0867 max mem: 9377 +Train: [32] [4800/6250] eta: 0:03:51 lr: 0.000100 grad: 0.1091 (0.1163) loss: 0.7928 (0.7970) time: 0.1692 data: 0.0779 max mem: 9377 +Train: [32] [4900/6250] eta: 0:03:35 lr: 0.000100 grad: 0.1126 (0.1164) loss: 0.7901 (0.7969) time: 0.1471 data: 0.0565 max mem: 9377 +Train: [32] [5000/6250] eta: 0:03:19 lr: 0.000100 grad: 0.1099 (0.1164) loss: 0.7953 (0.7968) time: 0.1982 data: 0.1118 max mem: 9377 +Train: [32] [5100/6250] eta: 0:03:04 lr: 0.000100 grad: 0.1137 (0.1164) loss: 0.7898 (0.7968) time: 0.1756 data: 0.0830 max mem: 9377 +Train: [32] [5200/6250] eta: 0:02:48 lr: 0.000100 grad: 0.1170 (0.1164) loss: 0.7880 (0.7967) time: 0.1607 data: 0.0645 max mem: 9377 +Train: [32] [5300/6250] eta: 0:02:33 lr: 0.000100 grad: 0.1094 (0.1164) loss: 0.7944 (0.7967) time: 0.1853 data: 0.0912 max mem: 9377 +Train: [32] [5400/6250] eta: 0:02:17 lr: 0.000100 grad: 0.1157 (0.1164) loss: 0.7931 (0.7967) time: 0.1627 data: 0.0681 max mem: 9377 +Train: [32] [5500/6250] eta: 0:02:01 lr: 0.000100 grad: 0.1116 (0.1165) loss: 0.7880 (0.7966) time: 0.1603 data: 0.0683 max mem: 9377 +Train: [32] [5600/6250] eta: 0:01:45 lr: 0.000100 grad: 0.1231 (0.1165) loss: 0.7916 (0.7965) time: 0.1968 data: 0.1030 max mem: 9377 +Train: [32] [5700/6250] eta: 0:01:29 lr: 0.000100 grad: 0.1161 (0.1165) loss: 0.7889 (0.7965) time: 0.1895 data: 0.1037 max mem: 9377 +Train: [32] [5800/6250] eta: 0:01:13 lr: 0.000100 grad: 0.1148 (0.1165) loss: 0.7814 (0.7963) time: 0.1824 data: 0.0838 max mem: 9377 +Train: [32] [5900/6250] eta: 0:00:57 lr: 0.000100 grad: 0.1109 (0.1166) loss: 0.7944 (0.7962) time: 0.1543 data: 0.0640 max mem: 9377 +Train: [32] [6000/6250] eta: 0:00:40 lr: 0.000100 grad: 0.1243 (0.1167) loss: 0.7795 (0.7961) time: 0.1503 data: 0.0551 max mem: 9377 +Train: [32] [6100/6250] eta: 0:00:24 lr: 0.000100 grad: 0.1114 (0.1168) loss: 0.7910 (0.7959) time: 0.1376 data: 0.0463 max mem: 9377 +Train: [32] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.1200 (0.1168) loss: 0.7870 (0.7958) time: 0.1531 data: 0.0694 max mem: 9377 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1188 (0.1169) loss: 0.7794 (0.7957) time: 0.1489 data: 0.0546 max mem: 9377 +Train: [32] Total time: 0:17:00 (0.1633 s / it) +Averaged stats: lr: 0.000100 grad: 0.1188 (0.1169) loss: 0.7794 (0.7957) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:04:11 loss: 0.8463 (0.8463) time: 4.0515 data: 3.9554 max mem: 9377 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8530 (0.8511) time: 0.1391 data: 0.1112 max mem: 9377 +Eval (hcp-train-subset): [32] Total time: 0:00:15 (0.2428 s / it) +Averaged stats (hcp-train-subset): loss: 0.8530 (0.8511) +Eval (hcp-val): [32] [ 0/62] eta: 0:03:48 loss: 0.8459 (0.8459) time: 3.6919 data: 3.5951 max mem: 9377 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8448 (0.8485) time: 0.1313 data: 0.1061 max mem: 9377 +Eval (hcp-val): [32] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-val): loss: 0.8448 (0.8485) +Eval (nsd-val): [32] [ 0/62] eta: 0:05:36 loss: 0.8125 (0.8125) time: 5.4220 data: 5.3899 max mem: 9377 +Eval (nsd-val): [32] [61/62] eta: 0:00:00 loss: 0.8212 (0.8210) time: 0.1256 data: 0.1000 max mem: 9377 +Eval (nsd-val): [32] Total time: 0:00:14 (0.2297 s / it) +Averaged stats (nsd-val): loss: 0.8212 (0.8210) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 11:17:21 lr: 0.000100 grad: 0.1023 (0.1023) loss: 0.8316 (0.8316) time: 6.5027 data: 6.4013 max mem: 9377 +Train: [33] [ 100/6250] eta: 0:21:58 lr: 0.000100 grad: 0.1110 (0.1419) loss: 0.8143 (0.8229) time: 0.1623 data: 0.0624 max mem: 9377 +Train: [33] [ 200/6250] eta: 0:19:31 lr: 0.000100 grad: 0.1211 (0.1381) loss: 0.8224 (0.8166) time: 0.1706 data: 0.0757 max mem: 9377 +Train: [33] [ 300/6250] eta: 0:18:07 lr: 0.000100 grad: 0.1249 (0.1333) loss: 0.8008 (0.8135) time: 0.1734 data: 0.0749 max mem: 9377 +Train: [33] [ 400/6250] eta: 0:17:25 lr: 0.000100 grad: 0.1178 (0.1308) loss: 0.7906 (0.8095) time: 0.1663 data: 0.0746 max mem: 9377 +Train: [33] [ 500/6250] eta: 0:16:47 lr: 0.000100 grad: 0.1113 (0.1296) loss: 0.8013 (0.8070) time: 0.1597 data: 0.0599 max mem: 9377 +Train: [33] [ 600/6250] eta: 0:16:28 lr: 0.000100 grad: 0.1050 (0.1274) loss: 0.7968 (0.8056) time: 0.1947 data: 0.1040 max mem: 9377 +Train: [33] [ 700/6250] eta: 0:16:04 lr: 0.000100 grad: 0.1173 (0.1265) loss: 0.7984 (0.8045) time: 0.1684 data: 0.0779 max mem: 9377 +Train: [33] [ 800/6250] eta: 0:15:50 lr: 0.000100 grad: 0.1043 (0.1249) loss: 0.8010 (0.8038) time: 0.1460 data: 0.0597 max mem: 9377 +Train: [33] [ 900/6250] eta: 0:15:29 lr: 0.000100 grad: 0.1095 (0.1236) loss: 0.8126 (0.8035) time: 0.1722 data: 0.0791 max mem: 9377 +Train: [33] [1000/6250] eta: 0:15:09 lr: 0.000100 grad: 0.1167 (0.1228) loss: 0.7978 (0.8031) time: 0.1622 data: 0.0722 max mem: 9377 +Train: [33] [1100/6250] eta: 0:14:45 lr: 0.000100 grad: 0.1089 (0.1219) loss: 0.8013 (0.8027) time: 0.1374 data: 0.0473 max mem: 9377 +Train: [33] [1200/6250] eta: 0:14:24 lr: 0.000100 grad: 0.1068 (0.1213) loss: 0.8020 (0.8021) time: 0.1488 data: 0.0525 max mem: 9377 +Train: [33] [1300/6250] eta: 0:14:01 lr: 0.000100 grad: 0.1049 (0.1207) loss: 0.8043 (0.8017) time: 0.1694 data: 0.0905 max mem: 9377 +Train: [33] [1400/6250] eta: 0:13:37 lr: 0.000100 grad: 0.1127 (0.1200) loss: 0.7995 (0.8014) time: 0.1294 data: 0.0352 max mem: 9377 +Train: [33] [1500/6250] eta: 0:13:18 lr: 0.000100 grad: 0.1177 (0.1198) loss: 0.8015 (0.8011) time: 0.1529 data: 0.0663 max mem: 9377 +Train: [33] [1600/6250] eta: 0:12:59 lr: 0.000100 grad: 0.1216 (0.1197) loss: 0.7886 (0.8008) time: 0.1693 data: 0.0956 max mem: 9377 +Train: [33] [1700/6250] eta: 0:12:42 lr: 0.000100 grad: 0.1230 (0.1195) loss: 0.7972 (0.8006) time: 0.1066 data: 0.0162 max mem: 9377 +Train: [33] [1800/6250] eta: 0:12:26 lr: 0.000100 grad: 0.1179 (0.1195) loss: 0.7881 (0.8000) time: 0.1668 data: 0.0762 max mem: 9377 +Train: [33] [1900/6250] eta: 0:12:09 lr: 0.000100 grad: 0.1135 (0.1194) loss: 0.7902 (0.7996) time: 0.1705 data: 0.0935 max mem: 9377 +Train: [33] [2000/6250] eta: 0:11:52 lr: 0.000100 grad: 0.1101 (0.1194) loss: 0.7917 (0.7992) time: 0.1428 data: 0.0547 max mem: 9377 +Train: [33] [2100/6250] eta: 0:11:31 lr: 0.000100 grad: 0.1146 (0.1193) loss: 0.7903 (0.7989) time: 0.1221 data: 0.0259 max mem: 9377 +Train: [33] [2200/6250] eta: 0:11:14 lr: 0.000099 grad: 0.1172 (0.1192) loss: 0.7886 (0.7985) time: 0.1656 data: 0.0862 max mem: 9377 +Train: [33] [2300/6250] eta: 0:10:55 lr: 0.000099 grad: 0.1145 (0.1191) loss: 0.7986 (0.7984) time: 0.1707 data: 0.0838 max mem: 9377 +Train: [33] [2400/6250] eta: 0:10:38 lr: 0.000099 grad: 0.1104 (0.1191) loss: 0.7950 (0.7982) time: 0.1578 data: 0.0755 max mem: 9377 +Train: [33] [2500/6250] eta: 0:10:21 lr: 0.000099 grad: 0.1101 (0.1191) loss: 0.7999 (0.7980) time: 0.1567 data: 0.0672 max mem: 9377 +Train: [33] [2600/6250] eta: 0:10:04 lr: 0.000099 grad: 0.1180 (0.1191) loss: 0.7946 (0.7979) time: 0.1562 data: 0.0589 max mem: 9377 +Train: [33] [2700/6250] eta: 0:09:47 lr: 0.000099 grad: 0.1224 (0.1192) loss: 0.7985 (0.7978) time: 0.1554 data: 0.0709 max mem: 9377 +Train: [33] [2800/6250] eta: 0:09:29 lr: 0.000099 grad: 0.1086 (0.1191) loss: 0.8024 (0.7978) time: 0.1776 data: 0.0937 max mem: 9377 +Train: [33] [2900/6250] eta: 0:09:12 lr: 0.000099 grad: 0.1093 (0.1191) loss: 0.8086 (0.7977) time: 0.1596 data: 0.0698 max mem: 9377 +Train: [33] [3000/6250] eta: 0:08:54 lr: 0.000099 grad: 0.1163 (0.1190) loss: 0.7892 (0.7977) time: 0.1641 data: 0.0713 max mem: 9377 +Train: [33] [3100/6250] eta: 0:08:36 lr: 0.000099 grad: 0.1123 (0.1189) loss: 0.8018 (0.7978) time: 0.1353 data: 0.0494 max mem: 9377 +Train: [33] [3200/6250] eta: 0:08:19 lr: 0.000099 grad: 0.1173 (0.1189) loss: 0.7925 (0.7977) time: 0.1523 data: 0.0556 max mem: 9377 +Train: [33] [3300/6250] eta: 0:08:03 lr: 0.000099 grad: 0.1122 (0.1189) loss: 0.7995 (0.7977) time: 0.1770 data: 0.0907 max mem: 9377 +Train: [33] [3400/6250] eta: 0:07:46 lr: 0.000099 grad: 0.1143 (0.1190) loss: 0.7999 (0.7977) time: 0.1001 data: 0.0002 max mem: 9377 +Train: [33] [3500/6250] eta: 0:07:29 lr: 0.000099 grad: 0.1107 (0.1190) loss: 0.8040 (0.7976) time: 0.1512 data: 0.0632 max mem: 9377 +Train: [33] [3600/6250] eta: 0:07:11 lr: 0.000099 grad: 0.1102 (0.1189) loss: 0.8082 (0.7976) time: 0.1527 data: 0.0605 max mem: 9377 +Train: [33] [3700/6250] eta: 0:06:55 lr: 0.000099 grad: 0.1134 (0.1189) loss: 0.7907 (0.7975) time: 0.1727 data: 0.0855 max mem: 9377 +Train: [33] [3800/6250] eta: 0:06:38 lr: 0.000099 grad: 0.1100 (0.1189) loss: 0.7977 (0.7974) time: 0.1564 data: 0.0748 max mem: 9377 +Train: [33] [3900/6250] eta: 0:06:22 lr: 0.000099 grad: 0.1126 (0.1189) loss: 0.8027 (0.7974) time: 0.1455 data: 0.0588 max mem: 9377 +Train: [33] [4000/6250] eta: 0:06:04 lr: 0.000099 grad: 0.1154 (0.1188) loss: 0.8077 (0.7974) time: 0.1373 data: 0.0463 max mem: 9377 +Train: [33] [4100/6250] eta: 0:05:47 lr: 0.000099 grad: 0.1096 (0.1187) loss: 0.8009 (0.7975) time: 0.1222 data: 0.0219 max mem: 9377 +Train: [33] [4200/6250] eta: 0:05:31 lr: 0.000099 grad: 0.1140 (0.1188) loss: 0.7959 (0.7975) time: 0.1692 data: 0.0842 max mem: 9377 +Train: [33] [4300/6250] eta: 0:05:15 lr: 0.000099 grad: 0.1156 (0.1187) loss: 0.7991 (0.7975) time: 0.1572 data: 0.0676 max mem: 9377 +Train: [33] [4400/6250] eta: 0:04:59 lr: 0.000099 grad: 0.1152 (0.1188) loss: 0.7959 (0.7974) time: 0.1461 data: 0.0487 max mem: 9377 +Train: [33] [4500/6250] eta: 0:04:43 lr: 0.000099 grad: 0.1089 (0.1188) loss: 0.7924 (0.7974) time: 0.1549 data: 0.0638 max mem: 9377 +Train: [33] [4600/6250] eta: 0:04:27 lr: 0.000099 grad: 0.1246 (0.1189) loss: 0.7932 (0.7972) time: 0.2687 data: 0.1941 max mem: 9377 +Train: [33] [4700/6250] eta: 0:04:12 lr: 0.000099 grad: 0.1149 (0.1189) loss: 0.7869 (0.7971) time: 0.1918 data: 0.1105 max mem: 9377 +Train: [33] [4800/6250] eta: 0:03:56 lr: 0.000099 grad: 0.1113 (0.1190) loss: 0.7996 (0.7971) time: 0.1842 data: 0.0896 max mem: 9377 +Train: [33] [4900/6250] eta: 0:03:39 lr: 0.000099 grad: 0.1070 (0.1190) loss: 0.8031 (0.7970) time: 0.1615 data: 0.0742 max mem: 9377 +Train: [33] [5000/6250] eta: 0:03:23 lr: 0.000099 grad: 0.1144 (0.1190) loss: 0.7994 (0.7971) time: 0.1707 data: 0.0786 max mem: 9377 +Train: [33] [5100/6250] eta: 0:03:07 lr: 0.000099 grad: 0.1167 (0.1189) loss: 0.7958 (0.7971) time: 0.1613 data: 0.0716 max mem: 9377 +Train: [33] [5200/6250] eta: 0:02:51 lr: 0.000099 grad: 0.1145 (0.1189) loss: 0.7946 (0.7971) time: 0.1558 data: 0.0643 max mem: 9377 +Train: [33] [5300/6250] eta: 0:02:35 lr: 0.000099 grad: 0.1150 (0.1190) loss: 0.7957 (0.7971) time: 0.1698 data: 0.0872 max mem: 9377 +Train: [33] [5400/6250] eta: 0:02:19 lr: 0.000099 grad: 0.1163 (0.1190) loss: 0.7972 (0.7971) time: 0.1963 data: 0.1150 max mem: 9377 +Train: [33] [5500/6250] eta: 0:02:03 lr: 0.000099 grad: 0.1197 (0.1190) loss: 0.8019 (0.7971) time: 0.1493 data: 0.0614 max mem: 9377 +Train: [33] [5600/6250] eta: 0:01:47 lr: 0.000099 grad: 0.1148 (0.1190) loss: 0.7973 (0.7970) time: 0.1668 data: 0.0752 max mem: 9377 +Train: [33] [5700/6250] eta: 0:01:30 lr: 0.000099 grad: 0.1157 (0.1189) loss: 0.7990 (0.7970) time: 0.1993 data: 0.1048 max mem: 9377 +Train: [33] [5800/6250] eta: 0:01:14 lr: 0.000099 grad: 0.1163 (0.1190) loss: 0.7954 (0.7969) time: 0.1698 data: 0.0686 max mem: 9377 +Train: [33] [5900/6250] eta: 0:00:57 lr: 0.000098 grad: 0.1169 (0.1190) loss: 0.7909 (0.7968) time: 0.1639 data: 0.0693 max mem: 9377 +Train: [33] [6000/6250] eta: 0:00:41 lr: 0.000098 grad: 0.1228 (0.1191) loss: 0.7905 (0.7967) time: 0.1337 data: 0.0412 max mem: 9377 +Train: [33] [6100/6250] eta: 0:00:24 lr: 0.000098 grad: 0.1223 (0.1191) loss: 0.7852 (0.7966) time: 0.1644 data: 0.0750 max mem: 9377 +Train: [33] [6200/6250] eta: 0:00:08 lr: 0.000098 grad: 0.1162 (0.1191) loss: 0.7897 (0.7965) time: 0.1799 data: 0.0922 max mem: 9377 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.1158 (0.1191) loss: 0.7921 (0.7964) time: 0.1793 data: 0.0932 max mem: 9377 +Train: [33] Total time: 0:17:15 (0.1656 s / it) +Averaged stats: lr: 0.000098 grad: 0.1158 (0.1191) loss: 0.7921 (0.7964) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:05:15 loss: 0.8484 (0.8484) time: 5.0889 data: 5.0434 max mem: 9377 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8504 (0.8514) time: 0.1435 data: 0.1182 max mem: 9377 +Eval (hcp-train-subset): [33] Total time: 0:00:14 (0.2392 s / it) +Averaged stats (hcp-train-subset): loss: 0.8504 (0.8514) +Eval (hcp-val): [33] [ 0/62] eta: 0:04:04 loss: 0.8501 (0.8501) time: 3.9372 data: 3.8383 max mem: 9377 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8462 (0.8492) time: 0.1334 data: 0.1082 max mem: 9377 +Eval (hcp-val): [33] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (hcp-val): loss: 0.8462 (0.8492) +Eval (nsd-val): [33] [ 0/62] eta: 0:06:17 loss: 0.8125 (0.8125) time: 6.0817 data: 6.0494 max mem: 9377 +Eval (nsd-val): [33] [61/62] eta: 0:00:00 loss: 0.8225 (0.8240) time: 0.1276 data: 0.1004 max mem: 9377 +Eval (nsd-val): [33] Total time: 0:00:14 (0.2349 s / it) +Averaged stats (nsd-val): loss: 0.8225 (0.8240) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [34] [ 0/6250] eta: 11:36:57 lr: 0.000098 grad: 0.1365 (0.1365) loss: 0.8697 (0.8697) time: 6.6908 data: 6.5401 max mem: 9377 +Train: [34] [ 100/6250] eta: 0:22:07 lr: 0.000098 grad: 0.1415 (0.1894) loss: 0.8093 (0.8202) time: 0.1761 data: 0.0758 max mem: 9377 +Train: [34] [ 200/6250] eta: 0:19:10 lr: 0.000098 grad: 0.1088 (0.1635) loss: 0.8079 (0.8146) time: 0.1487 data: 0.0515 max mem: 9377 +Train: [34] [ 300/6250] eta: 0:17:58 lr: 0.000098 grad: 0.1221 (0.1530) loss: 0.7993 (0.8088) time: 0.1582 data: 0.0621 max mem: 9377 +Train: [34] [ 400/6250] eta: 0:17:15 lr: 0.000098 grad: 0.1280 (0.1467) loss: 0.7937 (0.8045) time: 0.1405 data: 0.0417 max mem: 9377 +Train: [34] [ 500/6250] eta: 0:16:36 lr: 0.000098 grad: 0.1164 (0.1411) loss: 0.7962 (0.8023) time: 0.1595 data: 0.0618 max mem: 9377 +Train: [34] [ 600/6250] eta: 0:16:13 lr: 0.000098 grad: 0.1115 (0.1366) loss: 0.7972 (0.8015) time: 0.1545 data: 0.0536 max mem: 9377 +Train: [34] [ 700/6250] eta: 0:15:50 lr: 0.000098 grad: 0.1121 (0.1338) loss: 0.8053 (0.8017) time: 0.1626 data: 0.0732 max mem: 9377 +Train: [34] [ 800/6250] eta: 0:15:27 lr: 0.000098 grad: 0.1070 (0.1311) loss: 0.8009 (0.8019) time: 0.1563 data: 0.0647 max mem: 9377 +Train: [34] [ 900/6250] eta: 0:15:02 lr: 0.000098 grad: 0.1070 (0.1289) loss: 0.8025 (0.8022) time: 0.1348 data: 0.0448 max mem: 9377 +Train: [34] [1000/6250] eta: 0:14:49 lr: 0.000098 grad: 0.1164 (0.1282) loss: 0.8052 (0.8022) time: 0.1595 data: 0.0736 max mem: 9377 +Train: [34] [1100/6250] eta: 0:14:27 lr: 0.000098 grad: 0.1097 (0.1270) loss: 0.8039 (0.8020) time: 0.1550 data: 0.0645 max mem: 9377 +Train: [34] [1200/6250] eta: 0:14:04 lr: 0.000098 grad: 0.1154 (0.1260) loss: 0.7880 (0.8011) time: 0.1578 data: 0.0654 max mem: 9377 +Train: [34] [1300/6250] eta: 0:13:44 lr: 0.000098 grad: 0.1132 (0.1253) loss: 0.7933 (0.8005) time: 0.1709 data: 0.0843 max mem: 9377 +Train: [34] [1400/6250] eta: 0:13:19 lr: 0.000098 grad: 0.1111 (0.1248) loss: 0.7974 (0.7999) time: 0.1427 data: 0.0416 max mem: 9377 +Train: [34] [1500/6250] eta: 0:12:54 lr: 0.000098 grad: 0.1195 (0.1243) loss: 0.7849 (0.7992) time: 0.1384 data: 0.0418 max mem: 9377 +Train: [34] [1600/6250] eta: 0:12:35 lr: 0.000098 grad: 0.1207 (0.1240) loss: 0.7854 (0.7988) time: 0.1652 data: 0.0818 max mem: 9377 +Train: [34] [1700/6250] eta: 0:12:16 lr: 0.000098 grad: 0.1129 (0.1234) loss: 0.7835 (0.7984) time: 0.1716 data: 0.0746 max mem: 9377 +Train: [34] [1800/6250] eta: 0:11:56 lr: 0.000098 grad: 0.1215 (0.1231) loss: 0.7892 (0.7979) time: 0.1245 data: 0.0370 max mem: 9377 +Train: [34] [1900/6250] eta: 0:11:39 lr: 0.000098 grad: 0.1137 (0.1229) loss: 0.7914 (0.7976) time: 0.1587 data: 0.0710 max mem: 9377 +Train: [34] [2000/6250] eta: 0:11:22 lr: 0.000098 grad: 0.1206 (0.1226) loss: 0.7869 (0.7973) time: 0.1421 data: 0.0510 max mem: 9377 +Train: [34] [2100/6250] eta: 0:11:03 lr: 0.000098 grad: 0.1096 (0.1223) loss: 0.7871 (0.7969) time: 0.1409 data: 0.0471 max mem: 9377 +Train: [34] [2200/6250] eta: 0:10:47 lr: 0.000098 grad: 0.1171 (0.1220) loss: 0.7898 (0.7969) time: 0.1489 data: 0.0616 max mem: 9377 +Train: [34] [2300/6250] eta: 0:10:30 lr: 0.000098 grad: 0.1085 (0.1218) loss: 0.7997 (0.7968) time: 0.1617 data: 0.0783 max mem: 9377 +Train: [34] [2400/6250] eta: 0:10:13 lr: 0.000098 grad: 0.1234 (0.1216) loss: 0.7913 (0.7968) time: 0.1599 data: 0.0752 max mem: 9377 +Train: [34] [2500/6250] eta: 0:09:57 lr: 0.000098 grad: 0.1107 (0.1214) loss: 0.7908 (0.7967) time: 0.1514 data: 0.0565 max mem: 9377 +Train: [34] [2600/6250] eta: 0:09:41 lr: 0.000098 grad: 0.1152 (0.1212) loss: 0.7993 (0.7966) time: 0.1443 data: 0.0576 max mem: 9377 +Train: [34] [2700/6250] eta: 0:09:25 lr: 0.000098 grad: 0.1102 (0.1210) loss: 0.7927 (0.7966) time: 0.1310 data: 0.0424 max mem: 9377 +Train: [34] [2800/6250] eta: 0:09:08 lr: 0.000098 grad: 0.1165 (0.1210) loss: 0.8008 (0.7965) time: 0.1438 data: 0.0608 max mem: 9377 +Train: [34] [2900/6250] eta: 0:08:51 lr: 0.000098 grad: 0.1200 (0.1209) loss: 0.7886 (0.7964) time: 0.1522 data: 0.0654 max mem: 9377 +Train: [34] [3000/6250] eta: 0:08:35 lr: 0.000098 grad: 0.1205 (0.1209) loss: 0.7954 (0.7963) time: 0.1536 data: 0.0655 max mem: 9377 +Train: [34] [3100/6250] eta: 0:08:20 lr: 0.000098 grad: 0.1188 (0.1212) loss: 0.7816 (0.7961) time: 0.1572 data: 0.0602 max mem: 9377 +Train: [34] [3200/6250] eta: 0:08:04 lr: 0.000098 grad: 0.1165 (0.1214) loss: 0.7980 (0.7959) time: 0.1572 data: 0.0757 max mem: 9377 +Train: [34] [3300/6250] eta: 0:07:48 lr: 0.000097 grad: 0.1075 (0.1213) loss: 0.7985 (0.7958) time: 0.1669 data: 0.0838 max mem: 9377 +Train: [34] [3400/6250] eta: 0:07:32 lr: 0.000097 grad: 0.1239 (0.1213) loss: 0.7828 (0.7956) time: 0.1613 data: 0.0694 max mem: 9377 +Train: [34] [3500/6250] eta: 0:07:16 lr: 0.000097 grad: 0.1218 (0.1213) loss: 0.7941 (0.7955) time: 0.1563 data: 0.0702 max mem: 9377 +Train: [34] [3600/6250] eta: 0:07:00 lr: 0.000097 grad: 0.1221 (0.1213) loss: 0.7901 (0.7954) time: 0.1795 data: 0.0967 max mem: 9377 +Train: [34] [3700/6250] eta: 0:06:44 lr: 0.000097 grad: 0.1161 (0.1214) loss: 0.7855 (0.7952) time: 0.1645 data: 0.0649 max mem: 9377 +Train: [34] [3800/6250] eta: 0:06:28 lr: 0.000097 grad: 0.1195 (0.1214) loss: 0.7923 (0.7952) time: 0.1546 data: 0.0650 max mem: 9377 +Train: [34] [3900/6250] eta: 0:06:13 lr: 0.000097 grad: 0.1112 (0.1213) loss: 0.7901 (0.7952) time: 0.1589 data: 0.0675 max mem: 9377 +Train: [34] [4000/6250] eta: 0:05:57 lr: 0.000097 grad: 0.1136 (0.1212) loss: 0.7914 (0.7951) time: 0.1573 data: 0.0685 max mem: 9377 +Train: [34] [4100/6250] eta: 0:05:42 lr: 0.000097 grad: 0.1210 (0.1211) loss: 0.7927 (0.7951) time: 0.1477 data: 0.0655 max mem: 9377 +Train: [34] [4200/6250] eta: 0:05:26 lr: 0.000097 grad: 0.1104 (0.1211) loss: 0.7900 (0.7949) time: 0.1578 data: 0.0626 max mem: 9377 +Train: [34] [4300/6250] eta: 0:05:10 lr: 0.000097 grad: 0.1213 (0.1211) loss: 0.7901 (0.7948) time: 0.1610 data: 0.0691 max mem: 9377 +Train: [34] [4400/6250] eta: 0:04:54 lr: 0.000097 grad: 0.1197 (0.1211) loss: 0.7919 (0.7947) time: 0.1146 data: 0.0278 max mem: 9377 +Train: [34] [4500/6250] eta: 0:04:38 lr: 0.000097 grad: 0.1214 (0.1210) loss: 0.7953 (0.7947) time: 0.1799 data: 0.0977 max mem: 9377 +Train: [34] [4600/6250] eta: 0:04:23 lr: 0.000097 grad: 0.1174 (0.1210) loss: 0.7961 (0.7947) time: 0.1496 data: 0.0698 max mem: 9377 +Train: [34] [4700/6250] eta: 0:04:07 lr: 0.000097 grad: 0.1241 (0.1210) loss: 0.7977 (0.7947) time: 0.1782 data: 0.0955 max mem: 9377 +Train: [34] [4800/6250] eta: 0:03:52 lr: 0.000097 grad: 0.1132 (0.1209) loss: 0.7881 (0.7947) time: 0.1379 data: 0.0521 max mem: 9377 +Train: [34] [4900/6250] eta: 0:03:36 lr: 0.000097 grad: 0.1160 (0.1209) loss: 0.8006 (0.7947) time: 0.1428 data: 0.0534 max mem: 9377 +Train: [34] [5000/6250] eta: 0:03:20 lr: 0.000097 grad: 0.1152 (0.1209) loss: 0.7939 (0.7946) time: 0.1639 data: 0.0716 max mem: 9377 +Train: [34] [5100/6250] eta: 0:03:04 lr: 0.000097 grad: 0.1159 (0.1209) loss: 0.7950 (0.7946) time: 0.1624 data: 0.0722 max mem: 9377 +Train: [34] [5200/6250] eta: 0:02:49 lr: 0.000097 grad: 0.1239 (0.1209) loss: 0.7948 (0.7946) time: 0.1697 data: 0.0879 max mem: 9377 +Train: [34] [5300/6250] eta: 0:02:33 lr: 0.000097 grad: 0.1220 (0.1210) loss: 0.7879 (0.7945) time: 0.1838 data: 0.1017 max mem: 9377 +Train: [34] [5400/6250] eta: 0:02:17 lr: 0.000097 grad: 0.1248 (0.1211) loss: 0.7882 (0.7944) time: 0.2073 data: 0.1106 max mem: 9377 +Train: [34] [5500/6250] eta: 0:02:01 lr: 0.000097 grad: 0.1175 (0.1214) loss: 0.7882 (0.7944) time: 0.1394 data: 0.0561 max mem: 9377 +Train: [34] [5600/6250] eta: 0:01:45 lr: 0.000097 grad: 0.1256 (0.1214) loss: 0.7849 (0.7942) time: 0.1963 data: 0.1140 max mem: 9377 +Train: [34] [5700/6250] eta: 0:01:29 lr: 0.000097 grad: 0.1184 (0.1215) loss: 0.7959 (0.7942) time: 0.1849 data: 0.1001 max mem: 9377 +Train: [34] [5800/6250] eta: 0:01:13 lr: 0.000097 grad: 0.1299 (0.1216) loss: 0.7885 (0.7940) time: 0.1831 data: 0.0922 max mem: 9377 +Train: [34] [5900/6250] eta: 0:00:56 lr: 0.000097 grad: 0.1260 (0.1217) loss: 0.7827 (0.7939) time: 0.1560 data: 0.0624 max mem: 9377 +Train: [34] [6000/6250] eta: 0:00:40 lr: 0.000097 grad: 0.1221 (0.1218) loss: 0.7827 (0.7938) time: 0.1741 data: 0.0761 max mem: 9377 +Train: [34] [6100/6250] eta: 0:00:24 lr: 0.000097 grad: 0.1238 (0.1219) loss: 0.7796 (0.7936) time: 0.1751 data: 0.0735 max mem: 9377 +Train: [34] [6200/6250] eta: 0:00:08 lr: 0.000097 grad: 0.1217 (0.1219) loss: 0.7936 (0.7936) time: 0.1692 data: 0.0774 max mem: 9377 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.1217 (0.1219) loss: 0.7845 (0.7935) time: 0.1536 data: 0.0551 max mem: 9377 +Train: [34] Total time: 0:17:01 (0.1634 s / it) +Averaged stats: lr: 0.000097 grad: 0.1217 (0.1219) loss: 0.7845 (0.7935) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:05:48 loss: 0.8428 (0.8428) time: 5.6143 data: 5.5825 max mem: 9377 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8489 (0.8492) time: 0.1420 data: 0.1148 max mem: 9377 +Eval (hcp-train-subset): [34] Total time: 0:00:15 (0.2450 s / it) +Averaged stats (hcp-train-subset): loss: 0.8489 (0.8492) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [34] [ 0/62] eta: 0:04:04 loss: 0.8414 (0.8414) time: 3.9407 data: 3.8452 max mem: 9377 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8470 (0.8478) time: 0.1564 data: 0.1289 max mem: 9377 +Eval (hcp-val): [34] Total time: 0:00:15 (0.2552 s / it) +Averaged stats (hcp-val): loss: 0.8470 (0.8478) +Making plots (hcp-val): example=45 +Eval (nsd-val): [34] [ 0/62] eta: 0:05:54 loss: 0.8081 (0.8081) time: 5.7160 data: 5.6834 max mem: 9377 +Eval (nsd-val): [34] [61/62] eta: 0:00:00 loss: 0.8198 (0.8206) time: 0.1405 data: 0.1143 max mem: 9377 +Eval (nsd-val): [34] Total time: 0:00:15 (0.2492 s / it) +Averaged stats (nsd-val): loss: 0.8198 (0.8206) +Making plots (nsd-val): example=30 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00034.pth +Train: [35] [ 0/6250] eta: 7:06:46 lr: 0.000097 grad: 0.0754 (0.0754) loss: 0.8414 (0.8414) time: 4.0970 data: 3.7747 max mem: 9377 +Train: [35] [ 100/6250] eta: 0:21:46 lr: 0.000097 grad: 0.1650 (0.2060) loss: 0.8017 (0.8068) time: 0.1547 data: 0.0464 max mem: 9377 +Train: [35] [ 200/6250] eta: 0:18:34 lr: 0.000097 grad: 0.1291 (0.1800) loss: 0.7935 (0.8008) time: 0.1472 data: 0.0503 max mem: 9377 +Train: [35] [ 300/6250] eta: 0:17:26 lr: 0.000097 grad: 0.1433 (0.1688) loss: 0.7825 (0.7980) time: 0.1504 data: 0.0594 max mem: 9377 +Train: [35] [ 400/6250] eta: 0:16:40 lr: 0.000097 grad: 0.1228 (0.1583) loss: 0.7859 (0.7963) time: 0.1491 data: 0.0517 max mem: 9377 +Train: [35] [ 500/6250] eta: 0:16:04 lr: 0.000097 grad: 0.1186 (0.1511) loss: 0.7924 (0.7955) time: 0.1336 data: 0.0336 max mem: 9377 +Train: [35] [ 600/6250] eta: 0:15:40 lr: 0.000097 grad: 0.1194 (0.1466) loss: 0.8013 (0.7949) time: 0.1930 data: 0.1023 max mem: 9377 +Train: [35] [ 700/6250] eta: 0:15:25 lr: 0.000096 grad: 0.1240 (0.1432) loss: 0.7858 (0.7947) time: 0.1671 data: 0.0822 max mem: 9377 +Train: [35] [ 800/6250] eta: 0:15:05 lr: 0.000096 grad: 0.1199 (0.1403) loss: 0.8002 (0.7948) time: 0.1835 data: 0.0924 max mem: 9377 +Train: [35] [ 900/6250] eta: 0:14:55 lr: 0.000096 grad: 0.1132 (0.1376) loss: 0.7980 (0.7953) time: 0.1633 data: 0.0744 max mem: 9377 +Train: [35] [1000/6250] eta: 0:14:39 lr: 0.000096 grad: 0.1098 (0.1354) loss: 0.7973 (0.7957) time: 0.1726 data: 0.0821 max mem: 9377 +Train: [35] [1100/6250] eta: 0:14:29 lr: 0.000096 grad: 0.1060 (0.1335) loss: 0.7991 (0.7959) time: 0.1724 data: 0.0852 max mem: 9377 +Train: [35] [1200/6250] eta: 0:14:08 lr: 0.000096 grad: 0.1181 (0.1324) loss: 0.7997 (0.7957) time: 0.1628 data: 0.0765 max mem: 9377 +Train: [35] [1300/6250] eta: 0:13:45 lr: 0.000096 grad: 0.1123 (0.1310) loss: 0.8007 (0.7956) time: 0.1363 data: 0.0453 max mem: 9377 +Train: [35] [1400/6250] eta: 0:13:30 lr: 0.000096 grad: 0.1213 (0.1302) loss: 0.7934 (0.7954) time: 0.2196 data: 0.1406 max mem: 9377 +Train: [35] [1500/6250] eta: 0:13:10 lr: 0.000096 grad: 0.1148 (0.1296) loss: 0.7949 (0.7952) time: 0.1584 data: 0.0818 max mem: 9377 +Train: [35] [1600/6250] eta: 0:12:53 lr: 0.000096 grad: 0.1124 (0.1289) loss: 0.7897 (0.7952) time: 0.1766 data: 0.1016 max mem: 9377 +Train: [35] [1700/6250] eta: 0:12:39 lr: 0.000096 grad: 0.1227 (0.1284) loss: 0.7912 (0.7950) time: 0.1669 data: 0.0790 max mem: 9377 +Train: [35] [1800/6250] eta: 0:12:25 lr: 0.000096 grad: 0.1174 (0.1280) loss: 0.7884 (0.7948) time: 0.1852 data: 0.1036 max mem: 9377 +Train: [35] [1900/6250] eta: 0:12:08 lr: 0.000096 grad: 0.1160 (0.1278) loss: 0.7881 (0.7945) time: 0.1835 data: 0.0956 max mem: 9377 +Train: [35] [2000/6250] eta: 0:11:50 lr: 0.000096 grad: 0.1108 (0.1275) loss: 0.7867 (0.7943) time: 0.1680 data: 0.0841 max mem: 9377 +Train: [35] [2100/6250] eta: 0:11:35 lr: 0.000096 grad: 0.1120 (0.1270) loss: 0.7896 (0.7941) time: 0.1929 data: 0.1061 max mem: 9377 +Train: [35] [2200/6250] eta: 0:11:18 lr: 0.000096 grad: 0.1205 (0.1267) loss: 0.7907 (0.7939) time: 0.1581 data: 0.0717 max mem: 9377 +Train: [35] [2300/6250] eta: 0:11:02 lr: 0.000096 grad: 0.1135 (0.1265) loss: 0.7858 (0.7935) time: 0.1833 data: 0.0883 max mem: 9377 +Train: [35] [2400/6250] eta: 0:10:44 lr: 0.000096 grad: 0.1261 (0.1263) loss: 0.7781 (0.7931) time: 0.1566 data: 0.0639 max mem: 9377 +Train: [35] [2500/6250] eta: 0:10:27 lr: 0.000096 grad: 0.1246 (0.1263) loss: 0.7893 (0.7927) time: 0.1521 data: 0.0734 max mem: 9377 +Train: [35] [2600/6250] eta: 0:10:09 lr: 0.000096 grad: 0.1272 (0.1263) loss: 0.7808 (0.7924) time: 0.1504 data: 0.0618 max mem: 9377 +Train: [35] [2700/6250] eta: 0:09:51 lr: 0.000096 grad: 0.1182 (0.1263) loss: 0.7850 (0.7923) time: 0.1772 data: 0.0936 max mem: 9377 +Train: [35] [2800/6250] eta: 0:09:33 lr: 0.000096 grad: 0.1171 (0.1260) loss: 0.7826 (0.7921) time: 0.1280 data: 0.0443 max mem: 9377 +Train: [35] [2900/6250] eta: 0:09:15 lr: 0.000096 grad: 0.1147 (0.1259) loss: 0.7971 (0.7920) time: 0.1405 data: 0.0652 max mem: 9377 +Train: [35] [3000/6250] eta: 0:08:58 lr: 0.000096 grad: 0.1212 (0.1257) loss: 0.7785 (0.7919) time: 0.1563 data: 0.0751 max mem: 9377 +Train: [35] [3100/6250] eta: 0:08:42 lr: 0.000096 grad: 0.1132 (0.1255) loss: 0.7871 (0.7918) time: 0.2335 data: 0.1479 max mem: 9377 +Train: [35] [3200/6250] eta: 0:08:24 lr: 0.000096 grad: 0.1185 (0.1254) loss: 0.7801 (0.7917) time: 0.1605 data: 0.0667 max mem: 9377 +Train: [35] [3300/6250] eta: 0:08:09 lr: 0.000096 grad: 0.1168 (0.1253) loss: 0.7832 (0.7915) time: 0.2250 data: 0.1411 max mem: 9377 +Train: [35] [3400/6250] eta: 0:07:51 lr: 0.000096 grad: 0.1247 (0.1252) loss: 0.7830 (0.7915) time: 0.1607 data: 0.0676 max mem: 9377 +Train: [35] [3500/6250] eta: 0:07:35 lr: 0.000096 grad: 0.1151 (0.1251) loss: 0.7861 (0.7915) time: 0.1825 data: 0.0915 max mem: 9377 +Train: [35] [3600/6250] eta: 0:07:18 lr: 0.000096 grad: 0.1157 (0.1249) loss: 0.7848 (0.7914) time: 0.1717 data: 0.0837 max mem: 9377 +Train: [35] [3700/6250] eta: 0:07:01 lr: 0.000096 grad: 0.1148 (0.1248) loss: 0.7956 (0.7915) time: 0.1681 data: 0.0857 max mem: 9377 +Train: [35] [3800/6250] eta: 0:06:45 lr: 0.000096 grad: 0.1192 (0.1246) loss: 0.7976 (0.7915) time: 0.1669 data: 0.0812 max mem: 9377 +Train: [35] [3900/6250] eta: 0:06:29 lr: 0.000096 grad: 0.1125 (0.1245) loss: 0.7876 (0.7914) time: 0.1706 data: 0.0864 max mem: 9377 +Train: [35] [4000/6250] eta: 0:06:12 lr: 0.000096 grad: 0.1177 (0.1244) loss: 0.7864 (0.7914) time: 0.1481 data: 0.0725 max mem: 9377 +Train: [35] [4100/6250] eta: 0:05:55 lr: 0.000096 grad: 0.1111 (0.1243) loss: 0.7958 (0.7914) time: 0.1617 data: 0.0691 max mem: 9377 +Train: [35] [4200/6250] eta: 0:05:38 lr: 0.000096 grad: 0.1272 (0.1243) loss: 0.7882 (0.7914) time: 0.1609 data: 0.0778 max mem: 9377 +Train: [35] [4300/6250] eta: 0:05:21 lr: 0.000095 grad: 0.1202 (0.1241) loss: 0.7898 (0.7914) time: 0.1330 data: 0.0461 max mem: 9377 +Train: [35] [4400/6250] eta: 0:05:04 lr: 0.000095 grad: 0.1223 (0.1240) loss: 0.7890 (0.7914) time: 0.1555 data: 0.0684 max mem: 9377 +Train: [35] [4500/6250] eta: 0:04:48 lr: 0.000095 grad: 0.1244 (0.1240) loss: 0.7842 (0.7914) time: 0.2131 data: 0.1341 max mem: 9377 +Train: [35] [4600/6250] eta: 0:04:31 lr: 0.000095 grad: 0.1205 (0.1238) loss: 0.7889 (0.7914) time: 0.1538 data: 0.0767 max mem: 9377 +Train: [35] [4700/6250] eta: 0:04:15 lr: 0.000095 grad: 0.1255 (0.1238) loss: 0.7805 (0.7913) time: 0.1477 data: 0.0638 max mem: 9377 +Train: [35] [4800/6250] eta: 0:03:58 lr: 0.000095 grad: 0.1195 (0.1239) loss: 0.7799 (0.7912) time: 0.1414 data: 0.0533 max mem: 9377 +Train: [35] [4900/6250] eta: 0:03:42 lr: 0.000095 grad: 0.1164 (0.1240) loss: 0.7983 (0.7911) time: 0.2115 data: 0.1284 max mem: 9377 +Train: [35] [5000/6250] eta: 0:03:26 lr: 0.000095 grad: 0.1171 (0.1240) loss: 0.7893 (0.7910) time: 0.1817 data: 0.0955 max mem: 9377 +Train: [35] [5100/6250] eta: 0:03:09 lr: 0.000095 grad: 0.1261 (0.1240) loss: 0.7770 (0.7908) time: 0.1505 data: 0.0600 max mem: 9377 +Train: [35] [5200/6250] eta: 0:02:53 lr: 0.000095 grad: 0.1243 (0.1240) loss: 0.7936 (0.7908) time: 0.1936 data: 0.1099 max mem: 9377 +Train: [35] [5300/6250] eta: 0:02:37 lr: 0.000095 grad: 0.1133 (0.1239) loss: 0.7773 (0.7907) time: 0.2089 data: 0.1280 max mem: 9377 +Train: [35] [5400/6250] eta: 0:02:21 lr: 0.000095 grad: 0.1234 (0.1239) loss: 0.7798 (0.7906) time: 0.1603 data: 0.0711 max mem: 9377 +Train: [35] [5500/6250] eta: 0:02:04 lr: 0.000095 grad: 0.1198 (0.1239) loss: 0.7944 (0.7907) time: 0.1822 data: 0.0911 max mem: 9377 +Train: [35] [5600/6250] eta: 0:01:48 lr: 0.000095 grad: 0.1168 (0.1238) loss: 0.7994 (0.7907) time: 0.1833 data: 0.0959 max mem: 9377 +Train: [35] [5700/6250] eta: 0:01:31 lr: 0.000095 grad: 0.1236 (0.1238) loss: 0.7948 (0.7906) time: 0.1429 data: 0.0506 max mem: 9377 +Train: [35] [5800/6250] eta: 0:01:14 lr: 0.000095 grad: 0.1137 (0.1238) loss: 0.7843 (0.7906) time: 0.1592 data: 0.0693 max mem: 9377 +Train: [35] [5900/6250] eta: 0:00:57 lr: 0.000095 grad: 0.1189 (0.1237) loss: 0.7844 (0.7907) time: 0.1545 data: 0.0695 max mem: 9377 +Train: [35] [6000/6250] eta: 0:00:41 lr: 0.000095 grad: 0.1181 (0.1237) loss: 0.7928 (0.7907) time: 0.1635 data: 0.0754 max mem: 9377 +Train: [35] [6100/6250] eta: 0:00:24 lr: 0.000095 grad: 0.1169 (0.1236) loss: 0.7942 (0.7908) time: 0.1606 data: 0.0629 max mem: 9377 +Train: [35] [6200/6250] eta: 0:00:08 lr: 0.000095 grad: 0.1126 (0.1236) loss: 0.7872 (0.7908) time: 0.1192 data: 0.0230 max mem: 9377 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.1170 (0.1236) loss: 0.7932 (0.7908) time: 0.1230 data: 0.0343 max mem: 9377 +Train: [35] Total time: 0:17:17 (0.1660 s / it) +Averaged stats: lr: 0.000095 grad: 0.1170 (0.1236) loss: 0.7932 (0.7908) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:03:59 loss: 0.8490 (0.8490) time: 3.8703 data: 3.7658 max mem: 9377 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8495 (0.8499) time: 0.1347 data: 0.1097 max mem: 9377 +Eval (hcp-train-subset): [35] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (hcp-train-subset): loss: 0.8495 (0.8499) +Eval (hcp-val): [35] [ 0/62] eta: 0:05:19 loss: 0.8479 (0.8479) time: 5.1522 data: 5.1215 max mem: 9377 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8471 (0.8479) time: 0.1343 data: 0.1069 max mem: 9377 +Eval (hcp-val): [35] Total time: 0:00:13 (0.2258 s / it) +Averaged stats (hcp-val): loss: 0.8471 (0.8479) +Eval (nsd-val): [35] [ 0/62] eta: 0:04:39 loss: 0.8095 (0.8095) time: 4.5027 data: 4.4176 max mem: 9377 +Eval (nsd-val): [35] [61/62] eta: 0:00:00 loss: 0.8199 (0.8218) time: 0.1334 data: 0.1062 max mem: 9377 +Eval (nsd-val): [35] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (nsd-val): loss: 0.8199 (0.8218) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [36] [ 0/6250] eta: 10:09:24 lr: 0.000095 grad: 0.2933 (0.2933) loss: 0.8180 (0.8180) time: 5.8504 data: 5.6164 max mem: 9377 +Train: [36] [ 100/6250] eta: 0:24:24 lr: 0.000095 grad: 0.1269 (0.1924) loss: 0.8076 (0.8129) time: 0.1728 data: 0.0534 max mem: 9377 +Train: [36] [ 200/6250] eta: 0:21:11 lr: 0.000095 grad: 0.1568 (0.1745) loss: 0.7792 (0.8013) time: 0.1649 data: 0.0756 max mem: 9377 +Train: [36] [ 300/6250] eta: 0:19:21 lr: 0.000095 grad: 0.1285 (0.1611) loss: 0.7965 (0.7988) time: 0.1853 data: 0.0895 max mem: 9377 +Train: [36] [ 400/6250] eta: 0:18:24 lr: 0.000095 grad: 0.1174 (0.1527) loss: 0.7825 (0.7971) time: 0.1823 data: 0.0861 max mem: 9377 +Train: [36] [ 500/6250] eta: 0:17:27 lr: 0.000095 grad: 0.1279 (0.1503) loss: 0.7834 (0.7938) time: 0.1322 data: 0.0461 max mem: 9377 +Train: [36] [ 600/6250] eta: 0:16:48 lr: 0.000095 grad: 0.1178 (0.1470) loss: 0.7857 (0.7925) time: 0.1686 data: 0.0933 max mem: 9377 +Train: [36] [ 700/6250] eta: 0:16:28 lr: 0.000095 grad: 0.1192 (0.1434) loss: 0.7913 (0.7916) time: 0.1804 data: 0.0963 max mem: 9377 +Train: [36] [ 800/6250] eta: 0:16:09 lr: 0.000095 grad: 0.1141 (0.1403) loss: 0.7931 (0.7910) time: 0.1512 data: 0.0604 max mem: 9377 +Train: [36] [ 900/6250] eta: 0:15:43 lr: 0.000095 grad: 0.1121 (0.1381) loss: 0.7846 (0.7903) time: 0.1441 data: 0.0530 max mem: 9377 +Train: [36] [1000/6250] eta: 0:15:16 lr: 0.000095 grad: 0.1286 (0.1370) loss: 0.7792 (0.7897) time: 0.1420 data: 0.0516 max mem: 9377 +Train: [36] [1100/6250] eta: 0:14:50 lr: 0.000095 grad: 0.1236 (0.1356) loss: 0.7944 (0.7895) time: 0.1570 data: 0.0653 max mem: 9377 +Train: [36] [1200/6250] eta: 0:14:27 lr: 0.000095 grad: 0.1110 (0.1342) loss: 0.8015 (0.7896) time: 0.1564 data: 0.0680 max mem: 9377 +Train: [36] [1300/6250] eta: 0:14:03 lr: 0.000095 grad: 0.1252 (0.1333) loss: 0.7883 (0.7892) time: 0.1400 data: 0.0462 max mem: 9377 +Train: [36] [1400/6250] eta: 0:13:43 lr: 0.000095 grad: 0.1151 (0.1327) loss: 0.7905 (0.7891) time: 0.1864 data: 0.1072 max mem: 9377 +Train: [36] [1500/6250] eta: 0:13:20 lr: 0.000095 grad: 0.1250 (0.1323) loss: 0.7751 (0.7888) time: 0.1686 data: 0.0811 max mem: 9377 +Train: [36] [1600/6250] eta: 0:12:58 lr: 0.000094 grad: 0.1263 (0.1322) loss: 0.7876 (0.7883) time: 0.1345 data: 0.0490 max mem: 9377 +Train: [36] [1700/6250] eta: 0:12:42 lr: 0.000094 grad: 0.1253 (0.1323) loss: 0.7883 (0.7878) time: 0.1036 data: 0.0015 max mem: 9377 +Train: [36] [1800/6250] eta: 0:12:24 lr: 0.000094 grad: 0.1175 (0.1321) loss: 0.7823 (0.7873) time: 0.1993 data: 0.1137 max mem: 9377 +Train: [36] [1900/6250] eta: 0:12:04 lr: 0.000094 grad: 0.1247 (0.1319) loss: 0.7762 (0.7870) time: 0.1588 data: 0.0778 max mem: 9377 +Train: [36] [2000/6250] eta: 0:11:46 lr: 0.000094 grad: 0.1242 (0.1316) loss: 0.7847 (0.7866) time: 0.1400 data: 0.0532 max mem: 9377 +Train: [36] [2100/6250] eta: 0:11:29 lr: 0.000094 grad: 0.1226 (0.1313) loss: 0.7840 (0.7865) time: 0.1517 data: 0.0651 max mem: 9377 +Train: [36] [2200/6250] eta: 0:11:12 lr: 0.000094 grad: 0.1126 (0.1309) loss: 0.7957 (0.7864) time: 0.2053 data: 0.1232 max mem: 9377 +Train: [36] [2300/6250] eta: 0:10:54 lr: 0.000094 grad: 0.1245 (0.1307) loss: 0.7873 (0.7863) time: 0.1715 data: 0.0860 max mem: 9377 +Train: [36] [2400/6250] eta: 0:10:37 lr: 0.000094 grad: 0.1142 (0.1304) loss: 0.7832 (0.7863) time: 0.1493 data: 0.0617 max mem: 9377 +Train: [36] [2500/6250] eta: 0:10:18 lr: 0.000094 grad: 0.1178 (0.1301) loss: 0.7880 (0.7864) time: 0.1196 data: 0.0320 max mem: 9377 +Train: [36] [2600/6250] eta: 0:10:01 lr: 0.000094 grad: 0.1218 (0.1300) loss: 0.7854 (0.7864) time: 0.1533 data: 0.0620 max mem: 9377 +Train: [36] [2700/6250] eta: 0:09:45 lr: 0.000094 grad: 0.1208 (0.1297) loss: 0.7839 (0.7863) time: 0.1370 data: 0.0470 max mem: 9377 +Train: [36] [2800/6250] eta: 0:09:29 lr: 0.000094 grad: 0.1272 (0.1295) loss: 0.7840 (0.7862) time: 0.1688 data: 0.0849 max mem: 9377 +Train: [36] [2900/6250] eta: 0:09:11 lr: 0.000094 grad: 0.1129 (0.1293) loss: 0.7764 (0.7861) time: 0.1687 data: 0.0697 max mem: 9377 +Train: [36] [3000/6250] eta: 0:08:55 lr: 0.000094 grad: 0.1151 (0.1291) loss: 0.7917 (0.7861) time: 0.1628 data: 0.0738 max mem: 9377 +Train: [36] [3100/6250] eta: 0:08:38 lr: 0.000094 grad: 0.1269 (0.1290) loss: 0.7872 (0.7860) time: 0.1540 data: 0.0699 max mem: 9377 +Train: [36] [3200/6250] eta: 0:08:21 lr: 0.000094 grad: 0.1178 (0.1290) loss: 0.7946 (0.7860) time: 0.1430 data: 0.0512 max mem: 9377 +Train: [36] [3300/6250] eta: 0:08:03 lr: 0.000094 grad: 0.1329 (0.1290) loss: 0.7798 (0.7859) time: 0.1432 data: 0.0577 max mem: 9377 +Train: [36] [3400/6250] eta: 0:07:46 lr: 0.000094 grad: 0.1157 (0.1287) loss: 0.7842 (0.7859) time: 0.1273 data: 0.0317 max mem: 9377 +Train: [36] [3500/6250] eta: 0:07:28 lr: 0.000094 grad: 0.1181 (0.1285) loss: 0.7941 (0.7861) time: 0.1545 data: 0.0642 max mem: 9377 +Train: [36] [3600/6250] eta: 0:07:12 lr: 0.000094 grad: 0.1246 (0.1283) loss: 0.7917 (0.7862) time: 0.1777 data: 0.0922 max mem: 9377 +Train: [36] [3700/6250] eta: 0:06:55 lr: 0.000094 grad: 0.1163 (0.1282) loss: 0.7951 (0.7862) time: 0.1576 data: 0.0721 max mem: 9377 +Train: [36] [3800/6250] eta: 0:06:38 lr: 0.000094 grad: 0.1199 (0.1281) loss: 0.7875 (0.7863) time: 0.1692 data: 0.0947 max mem: 9377 +Train: [36] [3900/6250] eta: 0:06:21 lr: 0.000094 grad: 0.1223 (0.1279) loss: 0.7936 (0.7863) time: 0.1558 data: 0.0667 max mem: 9377 +Train: [36] [4000/6250] eta: 0:06:04 lr: 0.000094 grad: 0.1205 (0.1277) loss: 0.7932 (0.7865) time: 0.1526 data: 0.0589 max mem: 9377 +Train: [36] [4100/6250] eta: 0:05:48 lr: 0.000094 grad: 0.1207 (0.1276) loss: 0.7823 (0.7865) time: 0.1892 data: 0.1061 max mem: 9377 +Train: [36] [4200/6250] eta: 0:05:31 lr: 0.000094 grad: 0.1226 (0.1277) loss: 0.7867 (0.7865) time: 0.1507 data: 0.0636 max mem: 9377 +Train: [36] [4300/6250] eta: 0:05:14 lr: 0.000094 grad: 0.1282 (0.1276) loss: 0.7839 (0.7865) time: 0.1531 data: 0.0610 max mem: 9377 +Train: [36] [4400/6250] eta: 0:04:58 lr: 0.000094 grad: 0.1257 (0.1275) loss: 0.7957 (0.7865) time: 0.1550 data: 0.0706 max mem: 9377 +Train: [36] [4500/6250] eta: 0:04:43 lr: 0.000094 grad: 0.1186 (0.1274) loss: 0.7766 (0.7865) time: 0.1448 data: 0.0638 max mem: 9377 +Train: [36] [4600/6250] eta: 0:04:27 lr: 0.000094 grad: 0.1211 (0.1274) loss: 0.7804 (0.7865) time: 0.1845 data: 0.0996 max mem: 9377 +Train: [36] [4700/6250] eta: 0:04:10 lr: 0.000094 grad: 0.1249 (0.1274) loss: 0.7775 (0.7864) time: 0.1448 data: 0.0607 max mem: 9377 +Train: [36] [4800/6250] eta: 0:03:54 lr: 0.000094 grad: 0.1263 (0.1274) loss: 0.7903 (0.7864) time: 0.1541 data: 0.0642 max mem: 9377 +Train: [36] [4900/6250] eta: 0:03:38 lr: 0.000094 grad: 0.1256 (0.1273) loss: 0.7845 (0.7865) time: 0.1484 data: 0.0604 max mem: 9377 +Train: [36] [5000/6250] eta: 0:03:22 lr: 0.000094 grad: 0.1257 (0.1272) loss: 0.7881 (0.7866) time: 0.1602 data: 0.0690 max mem: 9377 +Train: [36] [5100/6250] eta: 0:03:06 lr: 0.000093 grad: 0.1212 (0.1271) loss: 0.7847 (0.7866) time: 0.1599 data: 0.0706 max mem: 9377 +Train: [36] [5200/6250] eta: 0:02:50 lr: 0.000093 grad: 0.1278 (0.1270) loss: 0.7896 (0.7867) time: 0.1713 data: 0.0850 max mem: 9377 +Train: [36] [5300/6250] eta: 0:02:34 lr: 0.000093 grad: 0.1243 (0.1270) loss: 0.7833 (0.7867) time: 0.1352 data: 0.0388 max mem: 9377 +Train: [36] [5400/6250] eta: 0:02:17 lr: 0.000093 grad: 0.1227 (0.1270) loss: 0.7905 (0.7867) time: 0.1594 data: 0.0711 max mem: 9377 +Train: [36] [5500/6250] eta: 0:02:01 lr: 0.000093 grad: 0.1219 (0.1270) loss: 0.7812 (0.7867) time: 0.1682 data: 0.0831 max mem: 9377 +Train: [36] [5600/6250] eta: 0:01:45 lr: 0.000093 grad: 0.1149 (0.1269) loss: 0.7849 (0.7868) time: 0.1666 data: 0.0775 max mem: 9377 +Train: [36] [5700/6250] eta: 0:01:29 lr: 0.000093 grad: 0.1206 (0.1269) loss: 0.7939 (0.7867) time: 0.1571 data: 0.0640 max mem: 9377 +Train: [36] [5800/6250] eta: 0:01:12 lr: 0.000093 grad: 0.1167 (0.1269) loss: 0.7882 (0.7867) time: 0.1591 data: 0.0673 max mem: 9377 +Train: [36] [5900/6250] eta: 0:00:56 lr: 0.000093 grad: 0.1185 (0.1268) loss: 0.7863 (0.7867) time: 0.1576 data: 0.0680 max mem: 9377 +Train: [36] [6000/6250] eta: 0:00:40 lr: 0.000093 grad: 0.1200 (0.1268) loss: 0.7898 (0.7867) time: 0.1750 data: 0.0875 max mem: 9377 +Train: [36] [6100/6250] eta: 0:00:24 lr: 0.000093 grad: 0.1122 (0.1267) loss: 0.7951 (0.7867) time: 0.1471 data: 0.0470 max mem: 9377 +Train: [36] [6200/6250] eta: 0:00:08 lr: 0.000093 grad: 0.1159 (0.1266) loss: 0.7944 (0.7867) time: 0.1441 data: 0.0623 max mem: 9377 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.1201 (0.1266) loss: 0.7839 (0.7867) time: 0.1385 data: 0.0550 max mem: 9377 +Train: [36] Total time: 0:16:53 (0.1621 s / it) +Averaged stats: lr: 0.000093 grad: 0.1201 (0.1266) loss: 0.7839 (0.7867) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:04:08 loss: 0.8440 (0.8440) time: 4.0022 data: 3.9149 max mem: 9377 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8493 (0.8491) time: 0.1309 data: 0.1059 max mem: 9377 +Eval (hcp-train-subset): [36] Total time: 0:00:14 (0.2276 s / it) +Averaged stats (hcp-train-subset): loss: 0.8493 (0.8491) +Eval (hcp-val): [36] [ 0/62] eta: 0:06:09 loss: 0.8452 (0.8452) time: 5.9632 data: 5.9326 max mem: 9377 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8455 (0.8479) time: 0.1343 data: 0.1071 max mem: 9377 +Eval (hcp-val): [36] Total time: 0:00:14 (0.2343 s / it) +Averaged stats (hcp-val): loss: 0.8455 (0.8479) +Eval (nsd-val): [36] [ 0/62] eta: 0:05:02 loss: 0.8106 (0.8106) time: 4.8743 data: 4.8401 max mem: 9377 +Eval (nsd-val): [36] [61/62] eta: 0:00:00 loss: 0.8221 (0.8235) time: 0.1103 data: 0.0853 max mem: 9377 +Eval (nsd-val): [36] Total time: 0:00:13 (0.2221 s / it) +Averaged stats (nsd-val): loss: 0.8221 (0.8235) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 7:21:21 lr: 0.000093 grad: 0.0724 (0.0724) loss: 0.8480 (0.8480) time: 4.2370 data: 4.0397 max mem: 9377 +Train: [37] [ 100/6250] eta: 0:22:50 lr: 0.000093 grad: 0.1409 (0.1970) loss: 0.8015 (0.8115) time: 0.1578 data: 0.0599 max mem: 9377 +Train: [37] [ 200/6250] eta: 0:19:26 lr: 0.000093 grad: 0.1372 (0.1774) loss: 0.7997 (0.7997) time: 0.1649 data: 0.0672 max mem: 9377 +Train: [37] [ 300/6250] eta: 0:18:07 lr: 0.000093 grad: 0.1453 (0.1672) loss: 0.7882 (0.7949) time: 0.1626 data: 0.0627 max mem: 9377 +Train: [37] [ 400/6250] eta: 0:17:06 lr: 0.000093 grad: 0.1279 (0.1590) loss: 0.7698 (0.7920) time: 0.1750 data: 0.0851 max mem: 9377 +Train: [37] [ 500/6250] eta: 0:16:29 lr: 0.000093 grad: 0.1148 (0.1532) loss: 0.7855 (0.7901) time: 0.1482 data: 0.0503 max mem: 9377 +Train: [37] [ 600/6250] eta: 0:15:53 lr: 0.000093 grad: 0.1160 (0.1478) loss: 0.7967 (0.7901) time: 0.1616 data: 0.0768 max mem: 9377 +Train: [37] [ 700/6250] eta: 0:15:26 lr: 0.000093 grad: 0.1210 (0.1441) loss: 0.7881 (0.7903) time: 0.1618 data: 0.0717 max mem: 9377 +Train: [37] [ 800/6250] eta: 0:15:17 lr: 0.000093 grad: 0.1176 (0.1412) loss: 0.7835 (0.7901) time: 0.1704 data: 0.0831 max mem: 9377 +Train: [37] [ 900/6250] eta: 0:15:08 lr: 0.000093 grad: 0.1218 (0.1389) loss: 0.7866 (0.7900) time: 0.1860 data: 0.0913 max mem: 9377 +Train: [37] [1000/6250] eta: 0:14:57 lr: 0.000093 grad: 0.1147 (0.1373) loss: 0.8042 (0.7903) time: 0.1754 data: 0.0797 max mem: 9377 +Train: [37] [1100/6250] eta: 0:14:47 lr: 0.000093 grad: 0.1114 (0.1351) loss: 0.7873 (0.7903) time: 0.1633 data: 0.0701 max mem: 9377 +Train: [37] [1200/6250] eta: 0:14:29 lr: 0.000093 grad: 0.1085 (0.1338) loss: 0.7950 (0.7899) time: 0.1335 data: 0.0421 max mem: 9377 +Train: [37] [1300/6250] eta: 0:14:06 lr: 0.000093 grad: 0.1170 (0.1327) loss: 0.7838 (0.7895) time: 0.1440 data: 0.0494 max mem: 9377 +Train: [37] [1400/6250] eta: 0:13:43 lr: 0.000093 grad: 0.1129 (0.1318) loss: 0.7832 (0.7893) time: 0.1542 data: 0.0615 max mem: 9377 +Train: [37] [1500/6250] eta: 0:13:21 lr: 0.000093 grad: 0.1168 (0.1309) loss: 0.7947 (0.7892) time: 0.1518 data: 0.0689 max mem: 9377 +Train: [37] [1600/6250] eta: 0:13:03 lr: 0.000093 grad: 0.1201 (0.1308) loss: 0.7862 (0.7888) time: 0.1857 data: 0.1039 max mem: 9377 +Train: [37] [1700/6250] eta: 0:12:44 lr: 0.000093 grad: 0.1264 (0.1305) loss: 0.7902 (0.7884) time: 0.1793 data: 0.0888 max mem: 9377 +Train: [37] [1800/6250] eta: 0:12:22 lr: 0.000093 grad: 0.1212 (0.1300) loss: 0.7862 (0.7881) time: 0.1471 data: 0.0608 max mem: 9377 +Train: [37] [1900/6250] eta: 0:12:04 lr: 0.000093 grad: 0.1149 (0.1296) loss: 0.7906 (0.7879) time: 0.1413 data: 0.0400 max mem: 9377 +Train: [37] [2000/6250] eta: 0:11:47 lr: 0.000093 grad: 0.1305 (0.1296) loss: 0.7715 (0.7877) time: 0.1594 data: 0.0763 max mem: 9377 +Train: [37] [2100/6250] eta: 0:11:30 lr: 0.000093 grad: 0.1211 (0.1291) loss: 0.7909 (0.7876) time: 0.1566 data: 0.0690 max mem: 9377 +Train: [37] [2200/6250] eta: 0:11:13 lr: 0.000093 grad: 0.1180 (0.1286) loss: 0.7805 (0.7875) time: 0.1479 data: 0.0580 max mem: 9377 +Train: [37] [2300/6250] eta: 0:10:55 lr: 0.000092 grad: 0.1189 (0.1285) loss: 0.7865 (0.7875) time: 0.1513 data: 0.0562 max mem: 9377 +Train: [37] [2400/6250] eta: 0:10:37 lr: 0.000092 grad: 0.1186 (0.1281) loss: 0.7920 (0.7875) time: 0.1639 data: 0.0746 max mem: 9377 +Train: [37] [2500/6250] eta: 0:10:22 lr: 0.000092 grad: 0.1107 (0.1280) loss: 0.7874 (0.7876) time: 0.2079 data: 0.1255 max mem: 9377 +Train: [37] [2600/6250] eta: 0:10:04 lr: 0.000092 grad: 0.1204 (0.1277) loss: 0.7797 (0.7876) time: 0.1516 data: 0.0599 max mem: 9377 +Train: [37] [2700/6250] eta: 0:09:46 lr: 0.000092 grad: 0.1230 (0.1275) loss: 0.7835 (0.7875) time: 0.1513 data: 0.0490 max mem: 9377 +Train: [37] [2800/6250] eta: 0:09:29 lr: 0.000092 grad: 0.1178 (0.1272) loss: 0.7876 (0.7875) time: 0.1777 data: 0.0910 max mem: 9377 +Train: [37] [2900/6250] eta: 0:09:12 lr: 0.000092 grad: 0.1215 (0.1270) loss: 0.7872 (0.7874) time: 0.1766 data: 0.0922 max mem: 9377 +Train: [37] [3000/6250] eta: 0:08:55 lr: 0.000092 grad: 0.1205 (0.1269) loss: 0.7798 (0.7872) time: 0.1638 data: 0.0804 max mem: 9377 +Train: [37] [3100/6250] eta: 0:08:37 lr: 0.000092 grad: 0.1259 (0.1268) loss: 0.7865 (0.7871) time: 0.1576 data: 0.0701 max mem: 9377 +Train: [37] [3200/6250] eta: 0:08:20 lr: 0.000092 grad: 0.1225 (0.1268) loss: 0.7839 (0.7870) time: 0.1661 data: 0.0757 max mem: 9377 +Train: [37] [3300/6250] eta: 0:08:03 lr: 0.000092 grad: 0.1148 (0.1267) loss: 0.7904 (0.7870) time: 0.1523 data: 0.0688 max mem: 9377 +Train: [37] [3400/6250] eta: 0:07:46 lr: 0.000092 grad: 0.1216 (0.1266) loss: 0.7870 (0.7869) time: 0.1425 data: 0.0627 max mem: 9377 +Train: [37] [3500/6250] eta: 0:07:29 lr: 0.000092 grad: 0.1223 (0.1266) loss: 0.7801 (0.7868) time: 0.1478 data: 0.0582 max mem: 9377 +Train: [37] [3600/6250] eta: 0:07:12 lr: 0.000092 grad: 0.1270 (0.1265) loss: 0.7861 (0.7868) time: 0.1646 data: 0.0850 max mem: 9377 +Train: [37] [3700/6250] eta: 0:06:55 lr: 0.000092 grad: 0.1198 (0.1264) loss: 0.7822 (0.7868) time: 0.1379 data: 0.0510 max mem: 9377 +Train: [37] [3800/6250] eta: 0:06:38 lr: 0.000092 grad: 0.1154 (0.1264) loss: 0.7833 (0.7868) time: 0.1582 data: 0.0707 max mem: 9377 +Train: [37] [3900/6250] eta: 0:06:21 lr: 0.000092 grad: 0.1254 (0.1263) loss: 0.7821 (0.7867) time: 0.1477 data: 0.0579 max mem: 9377 +Train: [37] [4000/6250] eta: 0:06:05 lr: 0.000092 grad: 0.1261 (0.1263) loss: 0.7959 (0.7866) time: 0.1579 data: 0.0688 max mem: 9377 +Train: [37] [4100/6250] eta: 0:05:48 lr: 0.000092 grad: 0.1256 (0.1262) loss: 0.7794 (0.7866) time: 0.1311 data: 0.0372 max mem: 9377 +Train: [37] [4200/6250] eta: 0:05:31 lr: 0.000092 grad: 0.1136 (0.1260) loss: 0.7917 (0.7867) time: 0.1640 data: 0.0706 max mem: 9377 +Train: [37] [4300/6250] eta: 0:05:15 lr: 0.000092 grad: 0.1195 (0.1259) loss: 0.7845 (0.7867) time: 0.1529 data: 0.0651 max mem: 9377 +Train: [37] [4400/6250] eta: 0:04:59 lr: 0.000092 grad: 0.1181 (0.1259) loss: 0.7885 (0.7867) time: 0.1789 data: 0.0988 max mem: 9377 +Train: [37] [4500/6250] eta: 0:04:43 lr: 0.000092 grad: 0.1191 (0.1258) loss: 0.7894 (0.7868) time: 0.1443 data: 0.0651 max mem: 9377 +Train: [37] [4600/6250] eta: 0:04:27 lr: 0.000092 grad: 0.1246 (0.1258) loss: 0.7831 (0.7868) time: 0.1620 data: 0.0769 max mem: 9377 +Train: [37] [4700/6250] eta: 0:04:11 lr: 0.000092 grad: 0.1281 (0.1258) loss: 0.7795 (0.7868) time: 0.1758 data: 0.0968 max mem: 9377 +Train: [37] [4800/6250] eta: 0:03:55 lr: 0.000092 grad: 0.1143 (0.1258) loss: 0.7944 (0.7869) time: 0.1604 data: 0.0728 max mem: 9377 +Train: [37] [4900/6250] eta: 0:03:39 lr: 0.000092 grad: 0.1163 (0.1258) loss: 0.7860 (0.7869) time: 0.1635 data: 0.0768 max mem: 9377 +Train: [37] [5000/6250] eta: 0:03:22 lr: 0.000092 grad: 0.1252 (0.1258) loss: 0.7910 (0.7868) time: 0.1349 data: 0.0474 max mem: 9377 +Train: [37] [5100/6250] eta: 0:03:06 lr: 0.000092 grad: 0.1303 (0.1258) loss: 0.7688 (0.7867) time: 0.1357 data: 0.0346 max mem: 9377 +Train: [37] [5200/6250] eta: 0:02:50 lr: 0.000092 grad: 0.1275 (0.1259) loss: 0.7764 (0.7867) time: 0.1410 data: 0.0425 max mem: 9377 +Train: [37] [5300/6250] eta: 0:02:34 lr: 0.000092 grad: 0.1232 (0.1260) loss: 0.7871 (0.7866) time: 0.1776 data: 0.0869 max mem: 9377 +Train: [37] [5400/6250] eta: 0:02:18 lr: 0.000092 grad: 0.1205 (0.1259) loss: 0.7878 (0.7866) time: 0.1579 data: 0.0759 max mem: 9377 +Train: [37] [5500/6250] eta: 0:02:02 lr: 0.000092 grad: 0.1307 (0.1259) loss: 0.7876 (0.7866) time: 0.1961 data: 0.1086 max mem: 9377 +Train: [37] [5600/6250] eta: 0:01:45 lr: 0.000092 grad: 0.1183 (0.1259) loss: 0.7887 (0.7866) time: 0.1735 data: 0.0830 max mem: 9377 +Train: [37] [5700/6250] eta: 0:01:29 lr: 0.000091 grad: 0.1266 (0.1259) loss: 0.7871 (0.7866) time: 0.1683 data: 0.0744 max mem: 9377 +Train: [37] [5800/6250] eta: 0:01:13 lr: 0.000091 grad: 0.1194 (0.1259) loss: 0.7897 (0.7867) time: 0.1691 data: 0.0662 max mem: 9377 +Train: [37] [5900/6250] eta: 0:00:57 lr: 0.000091 grad: 0.1183 (0.1259) loss: 0.7845 (0.7867) time: 0.1506 data: 0.0647 max mem: 9377 +Train: [37] [6000/6250] eta: 0:00:40 lr: 0.000091 grad: 0.1166 (0.1259) loss: 0.7833 (0.7868) time: 0.1395 data: 0.0551 max mem: 9377 +Train: [37] [6100/6250] eta: 0:00:24 lr: 0.000091 grad: 0.1233 (0.1259) loss: 0.7840 (0.7869) time: 0.1631 data: 0.0763 max mem: 9377 +Train: [37] [6200/6250] eta: 0:00:08 lr: 0.000091 grad: 0.1242 (0.1259) loss: 0.7922 (0.7869) time: 0.1508 data: 0.0641 max mem: 9377 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.1189 (0.1259) loss: 0.7873 (0.7870) time: 0.1531 data: 0.0682 max mem: 9377 +Train: [37] Total time: 0:17:01 (0.1634 s / it) +Averaged stats: lr: 0.000091 grad: 0.1189 (0.1259) loss: 0.7873 (0.7870) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:05:40 loss: 0.8460 (0.8460) time: 5.4893 data: 5.4566 max mem: 9377 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8483 (0.8501) time: 0.1436 data: 0.1186 max mem: 9377 +Eval (hcp-train-subset): [37] Total time: 0:00:14 (0.2306 s / it) +Averaged stats (hcp-train-subset): loss: 0.8483 (0.8501) +Eval (hcp-val): [37] [ 0/62] eta: 0:05:11 loss: 0.8505 (0.8505) time: 5.0300 data: 5.0004 max mem: 9377 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8454 (0.8487) time: 0.1500 data: 0.1245 max mem: 9377 +Eval (hcp-val): [37] Total time: 0:00:14 (0.2348 s / it) +Averaged stats (hcp-val): loss: 0.8454 (0.8487) +Eval (nsd-val): [37] [ 0/62] eta: 0:05:41 loss: 0.8182 (0.8182) time: 5.5086 data: 5.4779 max mem: 9377 +Eval (nsd-val): [37] [61/62] eta: 0:00:00 loss: 0.8237 (0.8274) time: 0.1543 data: 0.1283 max mem: 9377 +Eval (nsd-val): [37] Total time: 0:00:15 (0.2502 s / it) +Averaged stats (nsd-val): loss: 0.8237 (0.8274) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 9:52:37 lr: 0.000091 grad: 0.3238 (0.3238) loss: 0.8258 (0.8258) time: 5.6892 data: 5.4278 max mem: 9377 +Train: [38] [ 100/6250] eta: 0:24:37 lr: 0.000091 grad: 0.1182 (0.1442) loss: 0.8198 (0.8274) time: 0.2046 data: 0.1071 max mem: 9377 +Train: [38] [ 200/6250] eta: 0:20:41 lr: 0.000091 grad: 0.1354 (0.1397) loss: 0.8020 (0.8211) time: 0.1743 data: 0.0714 max mem: 9377 +Train: [38] [ 300/6250] eta: 0:19:10 lr: 0.000091 grad: 0.1178 (0.1364) loss: 0.8124 (0.8152) time: 0.1722 data: 0.0820 max mem: 9377 +Train: [38] [ 400/6250] eta: 0:18:10 lr: 0.000091 grad: 0.1104 (0.1339) loss: 0.8092 (0.8114) time: 0.1642 data: 0.0642 max mem: 9377 +Train: [38] [ 500/6250] eta: 0:17:25 lr: 0.000091 grad: 0.1133 (0.1307) loss: 0.7874 (0.8080) time: 0.1486 data: 0.0589 max mem: 9377 +Train: [38] [ 600/6250] eta: 0:16:56 lr: 0.000091 grad: 0.1212 (0.1285) loss: 0.7971 (0.8066) time: 0.1843 data: 0.0972 max mem: 9377 +Train: [38] [ 700/6250] eta: 0:16:26 lr: 0.000091 grad: 0.1110 (0.1266) loss: 0.7955 (0.8051) time: 0.1601 data: 0.0786 max mem: 9377 +Train: [38] [ 800/6250] eta: 0:16:05 lr: 0.000091 grad: 0.1153 (0.1251) loss: 0.7938 (0.8044) time: 0.1794 data: 0.0830 max mem: 9377 +Train: [38] [ 900/6250] eta: 0:15:38 lr: 0.000091 grad: 0.1090 (0.1237) loss: 0.7895 (0.8037) time: 0.1415 data: 0.0507 max mem: 9377 +Train: [38] [1000/6250] eta: 0:15:06 lr: 0.000091 grad: 0.1075 (0.1228) loss: 0.8036 (0.8035) time: 0.1257 data: 0.0398 max mem: 9377 +Train: [38] [1100/6250] eta: 0:14:42 lr: 0.000091 grad: 0.1116 (0.1219) loss: 0.7959 (0.8030) time: 0.1666 data: 0.0721 max mem: 9377 +Train: [38] [1200/6250] eta: 0:14:20 lr: 0.000091 grad: 0.1089 (0.1214) loss: 0.7983 (0.8024) time: 0.1542 data: 0.0626 max mem: 9377 +Train: [38] [1300/6250] eta: 0:13:58 lr: 0.000091 grad: 0.1186 (0.1210) loss: 0.7898 (0.8016) time: 0.1546 data: 0.0609 max mem: 9377 +Train: [38] [1400/6250] eta: 0:13:32 lr: 0.000091 grad: 0.1173 (0.1207) loss: 0.7926 (0.8011) time: 0.1354 data: 0.0555 max mem: 9377 +Train: [38] [1500/6250] eta: 0:13:11 lr: 0.000091 grad: 0.1169 (0.1206) loss: 0.7866 (0.8005) time: 0.1523 data: 0.0599 max mem: 9377 +Train: [38] [1600/6250] eta: 0:12:51 lr: 0.000091 grad: 0.1129 (0.1206) loss: 0.7907 (0.7999) time: 0.1589 data: 0.0664 max mem: 9377 +Train: [38] [1700/6250] eta: 0:12:32 lr: 0.000091 grad: 0.1217 (0.1207) loss: 0.7930 (0.7991) time: 0.1537 data: 0.0621 max mem: 9377 +Train: [38] [1800/6250] eta: 0:12:14 lr: 0.000091 grad: 0.1176 (0.1208) loss: 0.7874 (0.7986) time: 0.1731 data: 0.0850 max mem: 9377 +Train: [38] [1900/6250] eta: 0:11:55 lr: 0.000091 grad: 0.1121 (0.1207) loss: 0.7948 (0.7985) time: 0.1747 data: 0.0846 max mem: 9377 +Train: [38] [2000/6250] eta: 0:11:36 lr: 0.000091 grad: 0.1166 (0.1206) loss: 0.7921 (0.7982) time: 0.1514 data: 0.0561 max mem: 9377 +Train: [38] [2100/6250] eta: 0:11:18 lr: 0.000091 grad: 0.1143 (0.1205) loss: 0.7839 (0.7978) time: 0.1514 data: 0.0589 max mem: 9377 +Train: [38] [2200/6250] eta: 0:11:01 lr: 0.000091 grad: 0.1143 (0.1204) loss: 0.7876 (0.7974) time: 0.1504 data: 0.0593 max mem: 9377 +Train: [38] [2300/6250] eta: 0:10:45 lr: 0.000091 grad: 0.1209 (0.1207) loss: 0.7924 (0.7970) time: 0.1797 data: 0.0928 max mem: 9377 +Train: [38] [2400/6250] eta: 0:10:27 lr: 0.000091 grad: 0.1155 (0.1206) loss: 0.7871 (0.7966) time: 0.1598 data: 0.0716 max mem: 9377 +Train: [38] [2500/6250] eta: 0:10:09 lr: 0.000091 grad: 0.1181 (0.1208) loss: 0.7928 (0.7964) time: 0.1625 data: 0.0634 max mem: 9377 +Train: [38] [2600/6250] eta: 0:09:52 lr: 0.000091 grad: 0.1245 (0.1209) loss: 0.7902 (0.7962) time: 0.1443 data: 0.0554 max mem: 9377 +Train: [38] [2700/6250] eta: 0:09:36 lr: 0.000091 grad: 0.1203 (0.1209) loss: 0.7897 (0.7959) time: 0.1825 data: 0.0950 max mem: 9377 +Train: [38] [2800/6250] eta: 0:09:18 lr: 0.000091 grad: 0.1209 (0.1209) loss: 0.7861 (0.7956) time: 0.1577 data: 0.0702 max mem: 9377 +Train: [38] [2900/6250] eta: 0:09:01 lr: 0.000090 grad: 0.1146 (0.1209) loss: 0.7942 (0.7953) time: 0.1503 data: 0.0615 max mem: 9377 +Train: [38] [3000/6250] eta: 0:08:44 lr: 0.000090 grad: 0.1265 (0.1211) loss: 0.7994 (0.7951) time: 0.1559 data: 0.0705 max mem: 9377 +Train: [38] [3100/6250] eta: 0:08:29 lr: 0.000090 grad: 0.1192 (0.1211) loss: 0.7867 (0.7949) time: 0.1904 data: 0.1092 max mem: 9377 +Train: [38] [3200/6250] eta: 0:08:12 lr: 0.000090 grad: 0.1173 (0.1210) loss: 0.7957 (0.7947) time: 0.1520 data: 0.0691 max mem: 9377 +Train: [38] [3300/6250] eta: 0:07:56 lr: 0.000090 grad: 0.1292 (0.1212) loss: 0.7889 (0.7945) time: 0.1596 data: 0.0742 max mem: 9377 +Train: [38] [3400/6250] eta: 0:07:39 lr: 0.000090 grad: 0.1180 (0.1213) loss: 0.7843 (0.7943) time: 0.1501 data: 0.0629 max mem: 9377 +Train: [38] [3500/6250] eta: 0:07:23 lr: 0.000090 grad: 0.1269 (0.1216) loss: 0.7988 (0.7941) time: 0.1539 data: 0.0624 max mem: 9377 +Train: [38] [3600/6250] eta: 0:07:07 lr: 0.000090 grad: 0.1243 (0.1217) loss: 0.7885 (0.7939) time: 0.1339 data: 0.0412 max mem: 9377 +Train: [38] [3700/6250] eta: 0:06:50 lr: 0.000090 grad: 0.1199 (0.1219) loss: 0.7792 (0.7937) time: 0.1551 data: 0.0642 max mem: 9377 +Train: [38] [3800/6250] eta: 0:06:35 lr: 0.000090 grad: 0.1272 (0.1220) loss: 0.7838 (0.7936) time: 0.1678 data: 0.0854 max mem: 9377 +Train: [38] [3900/6250] eta: 0:06:18 lr: 0.000090 grad: 0.1170 (0.1221) loss: 0.7962 (0.7935) time: 0.1467 data: 0.0612 max mem: 9377 +Train: [38] [4000/6250] eta: 0:06:02 lr: 0.000090 grad: 0.1170 (0.1222) loss: 0.7866 (0.7935) time: 0.1496 data: 0.0697 max mem: 9377 +Train: [38] [4100/6250] eta: 0:05:45 lr: 0.000090 grad: 0.1233 (0.1223) loss: 0.7836 (0.7934) time: 0.1280 data: 0.0403 max mem: 9377 +Train: [38] [4200/6250] eta: 0:05:29 lr: 0.000090 grad: 0.1235 (0.1223) loss: 0.7887 (0.7933) time: 0.1424 data: 0.0527 max mem: 9377 +Train: [38] [4300/6250] eta: 0:05:13 lr: 0.000090 grad: 0.1168 (0.1223) loss: 0.7914 (0.7932) time: 0.1441 data: 0.0647 max mem: 9377 +Train: [38] [4400/6250] eta: 0:04:58 lr: 0.000090 grad: 0.1233 (0.1225) loss: 0.7880 (0.7930) time: 0.1735 data: 0.0898 max mem: 9377 +Train: [38] [4500/6250] eta: 0:04:42 lr: 0.000090 grad: 0.1229 (0.1225) loss: 0.7852 (0.7929) time: 0.1683 data: 0.0844 max mem: 9377 +Train: [38] [4600/6250] eta: 0:04:26 lr: 0.000090 grad: 0.1188 (0.1226) loss: 0.7969 (0.7928) time: 0.1831 data: 0.0933 max mem: 9377 +Train: [38] [4700/6250] eta: 0:04:11 lr: 0.000090 grad: 0.1292 (0.1226) loss: 0.7863 (0.7928) time: 0.1814 data: 0.0881 max mem: 9377 +Train: [38] [4800/6250] eta: 0:03:56 lr: 0.000090 grad: 0.1125 (0.1226) loss: 0.7952 (0.7927) time: 0.1935 data: 0.0915 max mem: 9377 +Train: [38] [4900/6250] eta: 0:03:40 lr: 0.000090 grad: 0.1236 (0.1227) loss: 0.7818 (0.7926) time: 0.1818 data: 0.0892 max mem: 9377 +Train: [38] [5000/6250] eta: 0:03:24 lr: 0.000090 grad: 0.1157 (0.1228) loss: 0.7908 (0.7925) time: 0.1825 data: 0.0841 max mem: 9377 +Train: [38] [5100/6250] eta: 0:03:08 lr: 0.000090 grad: 0.1190 (0.1228) loss: 0.7887 (0.7924) time: 0.1753 data: 0.0939 max mem: 9377 +Train: [38] [5200/6250] eta: 0:02:51 lr: 0.000090 grad: 0.1244 (0.1228) loss: 0.7912 (0.7924) time: 0.1943 data: 0.1006 max mem: 9377 +Train: [38] [5300/6250] eta: 0:02:35 lr: 0.000090 grad: 0.1260 (0.1229) loss: 0.7900 (0.7923) time: 0.1628 data: 0.0631 max mem: 9377 +Train: [38] [5400/6250] eta: 0:02:19 lr: 0.000090 grad: 0.1201 (0.1229) loss: 0.8004 (0.7923) time: 0.1784 data: 0.0831 max mem: 9377 +Train: [38] [5500/6250] eta: 0:02:02 lr: 0.000090 grad: 0.1189 (0.1229) loss: 0.7984 (0.7923) time: 0.1905 data: 0.1049 max mem: 9377 +Train: [38] [5600/6250] eta: 0:01:46 lr: 0.000090 grad: 0.1170 (0.1230) loss: 0.7974 (0.7923) time: 0.1665 data: 0.0762 max mem: 9377 +Train: [38] [5700/6250] eta: 0:01:29 lr: 0.000090 grad: 0.1235 (0.1231) loss: 0.7903 (0.7923) time: 0.1440 data: 0.0444 max mem: 9377 +Train: [38] [5800/6250] eta: 0:01:13 lr: 0.000090 grad: 0.1204 (0.1231) loss: 0.7919 (0.7922) time: 0.1545 data: 0.0638 max mem: 9377 +Train: [38] [5900/6250] eta: 0:00:57 lr: 0.000090 grad: 0.1183 (0.1231) loss: 0.7740 (0.7922) time: 0.1727 data: 0.0868 max mem: 9377 +Train: [38] [6000/6250] eta: 0:00:40 lr: 0.000090 grad: 0.1144 (0.1232) loss: 0.7955 (0.7921) time: 0.1437 data: 0.0425 max mem: 9377 +Train: [38] [6100/6250] eta: 0:00:24 lr: 0.000090 grad: 0.1271 (0.1232) loss: 0.7878 (0.7921) time: 0.1638 data: 0.0730 max mem: 9377 +Train: [38] [6200/6250] eta: 0:00:08 lr: 0.000089 grad: 0.1221 (0.1233) loss: 0.7841 (0.7920) time: 0.1551 data: 0.0650 max mem: 9377 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.1228 (0.1233) loss: 0.7906 (0.7920) time: 0.1494 data: 0.0602 max mem: 9377 +Train: [38] Total time: 0:17:05 (0.1640 s / it) +Averaged stats: lr: 0.000089 grad: 0.1228 (0.1233) loss: 0.7906 (0.7920) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:03:25 loss: 0.8460 (0.8460) time: 3.3134 data: 3.2306 max mem: 9377 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8472 (0.8494) time: 0.1646 data: 0.1376 max mem: 9377 +Eval (hcp-train-subset): [38] Total time: 0:00:14 (0.2400 s / it) +Averaged stats (hcp-train-subset): loss: 0.8472 (0.8494) +Eval (hcp-val): [38] [ 0/62] eta: 0:05:58 loss: 0.8478 (0.8478) time: 5.7798 data: 5.7483 max mem: 9377 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8449 (0.8474) time: 0.1229 data: 0.0961 max mem: 9377 +Eval (hcp-val): [38] Total time: 0:00:14 (0.2321 s / it) +Averaged stats (hcp-val): loss: 0.8449 (0.8474) +Eval (nsd-val): [38] [ 0/62] eta: 0:04:36 loss: 0.8110 (0.8110) time: 4.4555 data: 4.3717 max mem: 9377 +Eval (nsd-val): [38] [61/62] eta: 0:00:00 loss: 0.8192 (0.8218) time: 0.1279 data: 0.1008 max mem: 9377 +Eval (nsd-val): [38] Total time: 0:00:14 (0.2290 s / it) +Averaged stats (nsd-val): loss: 0.8192 (0.8218) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [39] [ 0/6250] eta: 10:04:03 lr: 0.000089 grad: 0.1415 (0.1415) loss: 0.7881 (0.7881) time: 5.7989 data: 5.6482 max mem: 9377 +Train: [39] [ 100/6250] eta: 0:23:04 lr: 0.000089 grad: 0.1297 (0.1611) loss: 0.8177 (0.8121) time: 0.1812 data: 0.0711 max mem: 9377 +Train: [39] [ 200/6250] eta: 0:19:16 lr: 0.000089 grad: 0.1357 (0.1574) loss: 0.7855 (0.8040) time: 0.1383 data: 0.0464 max mem: 9377 +Train: [39] [ 300/6250] eta: 0:17:47 lr: 0.000089 grad: 0.1179 (0.1499) loss: 0.8130 (0.8017) time: 0.1470 data: 0.0454 max mem: 9377 +Train: [39] [ 400/6250] eta: 0:17:08 lr: 0.000089 grad: 0.1114 (0.1455) loss: 0.8048 (0.8004) time: 0.1503 data: 0.0504 max mem: 9377 +Train: [39] [ 500/6250] eta: 0:16:27 lr: 0.000089 grad: 0.1258 (0.1417) loss: 0.7894 (0.7990) time: 0.1687 data: 0.0645 max mem: 9377 +Train: [39] [ 600/6250] eta: 0:15:55 lr: 0.000089 grad: 0.1317 (0.1406) loss: 0.7784 (0.7967) time: 0.1656 data: 0.0738 max mem: 9377 +Train: [39] [ 700/6250] eta: 0:15:28 lr: 0.000089 grad: 0.1260 (0.1397) loss: 0.7886 (0.7957) time: 0.1673 data: 0.0698 max mem: 9377 +Train: [39] [ 800/6250] eta: 0:15:15 lr: 0.000089 grad: 0.1104 (0.1376) loss: 0.7954 (0.7947) time: 0.1685 data: 0.0732 max mem: 9377 +Train: [39] [ 900/6250] eta: 0:14:56 lr: 0.000089 grad: 0.1227 (0.1367) loss: 0.7913 (0.7939) time: 0.1631 data: 0.0688 max mem: 9377 +Train: [39] [1000/6250] eta: 0:14:40 lr: 0.000089 grad: 0.1247 (0.1354) loss: 0.7823 (0.7932) time: 0.1478 data: 0.0606 max mem: 9377 +Train: [39] [1100/6250] eta: 0:14:23 lr: 0.000089 grad: 0.1171 (0.1343) loss: 0.7890 (0.7925) time: 0.1747 data: 0.0809 max mem: 9377 +Train: [39] [1200/6250] eta: 0:14:11 lr: 0.000089 grad: 0.1225 (0.1341) loss: 0.7868 (0.7917) time: 0.1830 data: 0.0858 max mem: 9377 +Train: [39] [1300/6250] eta: 0:13:59 lr: 0.000089 grad: 0.1218 (0.1337) loss: 0.7789 (0.7913) time: 0.1988 data: 0.1074 max mem: 9377 +Train: [39] [1400/6250] eta: 0:13:39 lr: 0.000089 grad: 0.1274 (0.1335) loss: 0.7805 (0.7905) time: 0.1738 data: 0.0739 max mem: 9377 +Train: [39] [1500/6250] eta: 0:13:20 lr: 0.000089 grad: 0.1285 (0.1333) loss: 0.7706 (0.7895) time: 0.1672 data: 0.0848 max mem: 9377 +Train: [39] [1600/6250] eta: 0:12:58 lr: 0.000089 grad: 0.1327 (0.1329) loss: 0.7757 (0.7888) time: 0.1392 data: 0.0520 max mem: 9377 +Train: [39] [1700/6250] eta: 0:12:38 lr: 0.000089 grad: 0.1150 (0.1324) loss: 0.7868 (0.7883) time: 0.1441 data: 0.0525 max mem: 9377 +Train: [39] [1800/6250] eta: 0:12:18 lr: 0.000089 grad: 0.1273 (0.1324) loss: 0.7733 (0.7877) time: 0.1421 data: 0.0499 max mem: 9377 +Train: [39] [1900/6250] eta: 0:12:01 lr: 0.000089 grad: 0.1231 (0.1324) loss: 0.7803 (0.7872) time: 0.1607 data: 0.0718 max mem: 9377 +Train: [39] [2000/6250] eta: 0:11:43 lr: 0.000089 grad: 0.1152 (0.1320) loss: 0.7842 (0.7866) time: 0.1552 data: 0.0615 max mem: 9377 +Train: [39] [2100/6250] eta: 0:11:28 lr: 0.000089 grad: 0.1262 (0.1317) loss: 0.7766 (0.7862) time: 0.1799 data: 0.0909 max mem: 9377 +Train: [39] [2200/6250] eta: 0:11:10 lr: 0.000089 grad: 0.1208 (0.1316) loss: 0.7832 (0.7859) time: 0.1377 data: 0.0526 max mem: 9377 +Train: [39] [2300/6250] eta: 0:10:52 lr: 0.000089 grad: 0.1180 (0.1313) loss: 0.7741 (0.7857) time: 0.1492 data: 0.0548 max mem: 9377 +Train: [39] [2400/6250] eta: 0:10:35 lr: 0.000089 grad: 0.1159 (0.1309) loss: 0.7826 (0.7855) time: 0.1574 data: 0.0604 max mem: 9377 +Train: [39] [2500/6250] eta: 0:10:17 lr: 0.000089 grad: 0.1185 (0.1308) loss: 0.7883 (0.7854) time: 0.1424 data: 0.0619 max mem: 9377 +Train: [39] [2600/6250] eta: 0:10:00 lr: 0.000089 grad: 0.1129 (0.1305) loss: 0.7857 (0.7854) time: 0.1839 data: 0.0958 max mem: 9377 +Train: [39] [2700/6250] eta: 0:09:42 lr: 0.000089 grad: 0.1220 (0.1302) loss: 0.7808 (0.7852) time: 0.1742 data: 0.0895 max mem: 9377 +Train: [39] [2800/6250] eta: 0:09:25 lr: 0.000089 grad: 0.1201 (0.1300) loss: 0.7927 (0.7852) time: 0.1570 data: 0.0756 max mem: 9377 +Train: [39] [2900/6250] eta: 0:09:08 lr: 0.000089 grad: 0.1264 (0.1299) loss: 0.7920 (0.7854) time: 0.1534 data: 0.0700 max mem: 9377 +Train: [39] [3000/6250] eta: 0:08:50 lr: 0.000089 grad: 0.1144 (0.1297) loss: 0.7920 (0.7855) time: 0.1642 data: 0.0754 max mem: 9377 +Train: [39] [3100/6250] eta: 0:08:33 lr: 0.000089 grad: 0.1241 (0.1296) loss: 0.7978 (0.7855) time: 0.1608 data: 0.0746 max mem: 9377 +Train: [39] [3200/6250] eta: 0:08:16 lr: 0.000089 grad: 0.1187 (0.1294) loss: 0.7868 (0.7855) time: 0.1511 data: 0.0656 max mem: 9377 +Train: [39] [3300/6250] eta: 0:08:00 lr: 0.000088 grad: 0.1374 (0.1294) loss: 0.7839 (0.7855) time: 0.1373 data: 0.0475 max mem: 9377 +Train: [39] [3400/6250] eta: 0:07:43 lr: 0.000088 grad: 0.1289 (0.1294) loss: 0.7753 (0.7854) time: 0.1577 data: 0.0663 max mem: 9377 +Train: [39] [3500/6250] eta: 0:07:26 lr: 0.000088 grad: 0.1220 (0.1294) loss: 0.7810 (0.7854) time: 0.1463 data: 0.0594 max mem: 9377 +Train: [39] [3600/6250] eta: 0:07:09 lr: 0.000088 grad: 0.1150 (0.1292) loss: 0.7842 (0.7854) time: 0.1760 data: 0.0898 max mem: 9377 +Train: [39] [3700/6250] eta: 0:06:52 lr: 0.000088 grad: 0.1235 (0.1291) loss: 0.7932 (0.7854) time: 0.1681 data: 0.0753 max mem: 9377 +Train: [39] [3800/6250] eta: 0:06:36 lr: 0.000088 grad: 0.1193 (0.1290) loss: 0.7926 (0.7854) time: 0.1842 data: 0.0996 max mem: 9377 +Train: [39] [3900/6250] eta: 0:06:20 lr: 0.000088 grad: 0.1333 (0.1289) loss: 0.7860 (0.7855) time: 0.1674 data: 0.0879 max mem: 9377 +Train: [39] [4000/6250] eta: 0:06:04 lr: 0.000088 grad: 0.1169 (0.1292) loss: 0.7936 (0.7856) time: 0.1615 data: 0.0792 max mem: 9377 +Train: [39] [4100/6250] eta: 0:05:48 lr: 0.000088 grad: 0.1263 (0.1292) loss: 0.7939 (0.7856) time: 0.1502 data: 0.0598 max mem: 9377 +Train: [39] [4200/6250] eta: 0:05:32 lr: 0.000088 grad: 0.1228 (0.1291) loss: 0.7938 (0.7857) time: 0.1285 data: 0.0410 max mem: 9377 +Train: [39] [4300/6250] eta: 0:05:16 lr: 0.000088 grad: 0.1317 (0.1292) loss: 0.7832 (0.7856) time: 0.1951 data: 0.1123 max mem: 9377 +Train: [39] [4400/6250] eta: 0:05:00 lr: 0.000088 grad: 0.1241 (0.1292) loss: 0.7936 (0.7856) time: 0.1542 data: 0.0610 max mem: 9377 +Train: [39] [4500/6250] eta: 0:04:44 lr: 0.000088 grad: 0.1251 (0.1291) loss: 0.7800 (0.7856) time: 0.1651 data: 0.0760 max mem: 9377 +Train: [39] [4600/6250] eta: 0:04:28 lr: 0.000088 grad: 0.1214 (0.1291) loss: 0.7867 (0.7856) time: 0.1649 data: 0.0762 max mem: 9377 +Train: [39] [4700/6250] eta: 0:04:11 lr: 0.000088 grad: 0.1205 (0.1292) loss: 0.7900 (0.7856) time: 0.1479 data: 0.0684 max mem: 9377 +Train: [39] [4800/6250] eta: 0:03:55 lr: 0.000088 grad: 0.1280 (0.1292) loss: 0.7779 (0.7855) time: 0.1527 data: 0.0645 max mem: 9377 +Train: [39] [4900/6250] eta: 0:03:39 lr: 0.000088 grad: 0.1246 (0.1292) loss: 0.7815 (0.7855) time: 0.1478 data: 0.0603 max mem: 9377 +Train: [39] [5000/6250] eta: 0:03:22 lr: 0.000088 grad: 0.1214 (0.1292) loss: 0.7782 (0.7855) time: 0.1686 data: 0.0820 max mem: 9377 +Train: [39] [5100/6250] eta: 0:03:07 lr: 0.000088 grad: 0.1293 (0.1291) loss: 0.7816 (0.7854) time: 0.1715 data: 0.0695 max mem: 9377 +Train: [39] [5200/6250] eta: 0:02:51 lr: 0.000088 grad: 0.1256 (0.1291) loss: 0.7799 (0.7854) time: 0.1950 data: 0.0866 max mem: 9377 +Train: [39] [5300/6250] eta: 0:02:35 lr: 0.000088 grad: 0.1292 (0.1292) loss: 0.7799 (0.7854) time: 0.1963 data: 0.0863 max mem: 9377 +Train: [39] [5400/6250] eta: 0:02:19 lr: 0.000088 grad: 0.1234 (0.1291) loss: 0.7854 (0.7854) time: 0.1739 data: 0.0906 max mem: 9377 +Train: [39] [5500/6250] eta: 0:02:03 lr: 0.000088 grad: 0.1247 (0.1290) loss: 0.7906 (0.7854) time: 0.1788 data: 0.0828 max mem: 9377 +Train: [39] [5600/6250] eta: 0:01:46 lr: 0.000088 grad: 0.1203 (0.1290) loss: 0.7880 (0.7854) time: 0.1691 data: 0.0705 max mem: 9377 +Train: [39] [5700/6250] eta: 0:01:30 lr: 0.000088 grad: 0.1182 (0.1289) loss: 0.7961 (0.7855) time: 0.1950 data: 0.0964 max mem: 9377 +Train: [39] [5800/6250] eta: 0:01:14 lr: 0.000088 grad: 0.1268 (0.1289) loss: 0.7803 (0.7855) time: 0.1756 data: 0.0718 max mem: 9377 +Train: [39] [5900/6250] eta: 0:00:57 lr: 0.000088 grad: 0.1171 (0.1289) loss: 0.7940 (0.7855) time: 0.1534 data: 0.0477 max mem: 9377 +Train: [39] [6000/6250] eta: 0:00:41 lr: 0.000088 grad: 0.1217 (0.1288) loss: 0.7830 (0.7855) time: 0.1799 data: 0.0876 max mem: 9377 +Train: [39] [6100/6250] eta: 0:00:24 lr: 0.000088 grad: 0.1230 (0.1288) loss: 0.7861 (0.7855) time: 0.1591 data: 0.0677 max mem: 9377 +Train: [39] [6200/6250] eta: 0:00:08 lr: 0.000088 grad: 0.1119 (0.1287) loss: 0.7944 (0.7855) time: 0.1519 data: 0.0637 max mem: 9377 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.1216 (0.1286) loss: 0.7878 (0.7855) time: 0.1693 data: 0.0739 max mem: 9377 +Train: [39] Total time: 0:17:13 (0.1653 s / it) +Averaged stats: lr: 0.000088 grad: 0.1216 (0.1286) loss: 0.7878 (0.7855) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:04:22 loss: 0.8486 (0.8486) time: 4.2417 data: 4.1602 max mem: 9377 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8486 (0.8500) time: 0.1360 data: 0.1106 max mem: 9377 +Eval (hcp-train-subset): [39] Total time: 0:00:14 (0.2338 s / it) +Averaged stats (hcp-train-subset): loss: 0.8486 (0.8500) +Making plots (hcp-train-subset): example=3 +Eval (hcp-val): [39] [ 0/62] eta: 0:05:33 loss: 0.8447 (0.8447) time: 5.3718 data: 5.3406 max mem: 9377 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8459 (0.8478) time: 0.1381 data: 0.1125 max mem: 9377 +Eval (hcp-val): [39] Total time: 0:00:14 (0.2301 s / it) +Averaged stats (hcp-val): loss: 0.8459 (0.8478) +Making plots (hcp-val): example=24 +Eval (nsd-val): [39] [ 0/62] eta: 0:06:03 loss: 0.8137 (0.8137) time: 5.8667 data: 5.8328 max mem: 9377 +Eval (nsd-val): [39] [61/62] eta: 0:00:00 loss: 0.8238 (0.8245) time: 0.1107 data: 0.0849 max mem: 9377 +Eval (nsd-val): [39] Total time: 0:00:14 (0.2291 s / it) +Averaged stats (nsd-val): loss: 0.8238 (0.8245) +Making plots (nsd-val): example=33 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 10:41:55 lr: 0.000088 grad: 0.6645 (0.6645) loss: 0.7803 (0.7803) time: 6.1624 data: 6.0390 max mem: 9377 +Train: [40] [ 100/6250] eta: 0:22:26 lr: 0.000088 grad: 0.1800 (0.2316) loss: 0.7829 (0.7904) time: 0.1591 data: 0.0513 max mem: 9377 +Train: [40] [ 200/6250] eta: 0:19:11 lr: 0.000088 grad: 0.1764 (0.2062) loss: 0.7751 (0.7853) time: 0.1152 data: 0.0242 max mem: 9377 +Train: [40] [ 300/6250] eta: 0:17:54 lr: 0.000088 grad: 0.1431 (0.1921) loss: 0.7774 (0.7813) time: 0.1608 data: 0.0664 max mem: 9377 +Train: [40] [ 400/6250] eta: 0:16:51 lr: 0.000087 grad: 0.1477 (0.1818) loss: 0.7669 (0.7790) time: 0.1453 data: 0.0382 max mem: 9377 +Train: [40] [ 500/6250] eta: 0:16:21 lr: 0.000087 grad: 0.1375 (0.1740) loss: 0.7768 (0.7776) time: 0.1538 data: 0.0687 max mem: 9377 +Train: [40] [ 600/6250] eta: 0:15:53 lr: 0.000087 grad: 0.1234 (0.1665) loss: 0.7895 (0.7784) time: 0.1757 data: 0.0891 max mem: 9377 +Train: [40] [ 700/6250] eta: 0:15:43 lr: 0.000087 grad: 0.1189 (0.1609) loss: 0.7869 (0.7795) time: 0.1759 data: 0.0937 max mem: 9377 +Train: [40] [ 800/6250] eta: 0:15:21 lr: 0.000087 grad: 0.1252 (0.1564) loss: 0.7864 (0.7802) time: 0.1530 data: 0.0667 max mem: 9377 +Train: [40] [ 900/6250] eta: 0:15:03 lr: 0.000087 grad: 0.1282 (0.1534) loss: 0.7693 (0.7799) time: 0.1411 data: 0.0580 max mem: 9377 +Train: [40] [1000/6250] eta: 0:14:43 lr: 0.000087 grad: 0.1263 (0.1511) loss: 0.7791 (0.7798) time: 0.1536 data: 0.0699 max mem: 9377 +Train: [40] [1100/6250] eta: 0:14:19 lr: 0.000087 grad: 0.1258 (0.1487) loss: 0.7767 (0.7797) time: 0.1557 data: 0.0764 max mem: 9377 +Train: [40] [1200/6250] eta: 0:14:05 lr: 0.000087 grad: 0.1177 (0.1467) loss: 0.7817 (0.7796) time: 0.1766 data: 0.0864 max mem: 9377 +Train: [40] [1300/6250] eta: 0:13:44 lr: 0.000087 grad: 0.1238 (0.1450) loss: 0.7767 (0.7796) time: 0.1615 data: 0.0645 max mem: 9377 +Train: [40] [1400/6250] eta: 0:13:29 lr: 0.000087 grad: 0.1261 (0.1437) loss: 0.7786 (0.7795) time: 0.1880 data: 0.0994 max mem: 9377 +Train: [40] [1500/6250] eta: 0:13:09 lr: 0.000087 grad: 0.1264 (0.1426) loss: 0.7720 (0.7794) time: 0.1634 data: 0.0701 max mem: 9377 +Train: [40] [1600/6250] eta: 0:12:50 lr: 0.000087 grad: 0.1195 (0.1414) loss: 0.7869 (0.7797) time: 0.1340 data: 0.0446 max mem: 9377 +Train: [40] [1700/6250] eta: 0:12:30 lr: 0.000087 grad: 0.1172 (0.1403) loss: 0.7738 (0.7798) time: 0.1562 data: 0.0697 max mem: 9377 +Train: [40] [1800/6250] eta: 0:12:13 lr: 0.000087 grad: 0.1175 (0.1392) loss: 0.7798 (0.7802) time: 0.1691 data: 0.0823 max mem: 9377 +Train: [40] [1900/6250] eta: 0:11:56 lr: 0.000087 grad: 0.1228 (0.1384) loss: 0.7863 (0.7803) time: 0.1541 data: 0.0663 max mem: 9377 +Train: [40] [2000/6250] eta: 0:11:39 lr: 0.000087 grad: 0.1252 (0.1378) loss: 0.7808 (0.7805) time: 0.1485 data: 0.0630 max mem: 9377 +Train: [40] [2100/6250] eta: 0:11:24 lr: 0.000087 grad: 0.1224 (0.1373) loss: 0.7877 (0.7807) time: 0.1735 data: 0.0958 max mem: 9377 +Train: [40] [2200/6250] eta: 0:11:07 lr: 0.000087 grad: 0.1321 (0.1367) loss: 0.7669 (0.7808) time: 0.1668 data: 0.0829 max mem: 9377 +Train: [40] [2300/6250] eta: 0:10:51 lr: 0.000087 grad: 0.1233 (0.1361) loss: 0.7817 (0.7810) time: 0.1584 data: 0.0727 max mem: 9377 +Train: [40] [2400/6250] eta: 0:10:33 lr: 0.000087 grad: 0.1088 (0.1355) loss: 0.7850 (0.7811) time: 0.1434 data: 0.0554 max mem: 9377 +Train: [40] [2500/6250] eta: 0:10:16 lr: 0.000087 grad: 0.1266 (0.1352) loss: 0.7877 (0.7811) time: 0.1590 data: 0.0769 max mem: 9377 +Train: [40] [2600/6250] eta: 0:09:58 lr: 0.000087 grad: 0.1238 (0.1349) loss: 0.7807 (0.7811) time: 0.1436 data: 0.0542 max mem: 9377 +Train: [40] [2700/6250] eta: 0:09:40 lr: 0.000087 grad: 0.1259 (0.1346) loss: 0.7766 (0.7808) time: 0.1815 data: 0.0935 max mem: 9377 +Train: [40] [2800/6250] eta: 0:09:23 lr: 0.000087 grad: 0.1188 (0.1342) loss: 0.7904 (0.7808) time: 0.1633 data: 0.0790 max mem: 9377 +Train: [40] [2900/6250] eta: 0:09:06 lr: 0.000087 grad: 0.1289 (0.1340) loss: 0.7662 (0.7806) time: 0.1418 data: 0.0472 max mem: 9377 +Train: [40] [3000/6250] eta: 0:08:49 lr: 0.000087 grad: 0.1192 (0.1338) loss: 0.7758 (0.7806) time: 0.1572 data: 0.0768 max mem: 9377 +Train: [40] [3100/6250] eta: 0:08:32 lr: 0.000087 grad: 0.1324 (0.1335) loss: 0.7756 (0.7805) time: 0.1701 data: 0.0875 max mem: 9377 +Train: [40] [3200/6250] eta: 0:08:15 lr: 0.000087 grad: 0.1278 (0.1332) loss: 0.7849 (0.7805) time: 0.1718 data: 0.0898 max mem: 9377 +Train: [40] [3300/6250] eta: 0:07:58 lr: 0.000087 grad: 0.1246 (0.1330) loss: 0.7720 (0.7804) time: 0.1430 data: 0.0525 max mem: 9377 +Train: [40] [3400/6250] eta: 0:07:41 lr: 0.000087 grad: 0.1249 (0.1329) loss: 0.7852 (0.7804) time: 0.1774 data: 0.0838 max mem: 9377 +Train: [40] [3500/6250] eta: 0:07:25 lr: 0.000087 grad: 0.1245 (0.1328) loss: 0.7705 (0.7803) time: 0.1796 data: 0.0980 max mem: 9377 +Train: [40] [3600/6250] eta: 0:07:08 lr: 0.000087 grad: 0.1283 (0.1327) loss: 0.7872 (0.7803) time: 0.1407 data: 0.0550 max mem: 9377 +Train: [40] [3700/6250] eta: 0:06:51 lr: 0.000086 grad: 0.1234 (0.1327) loss: 0.7765 (0.7802) time: 0.1679 data: 0.0831 max mem: 9377 +Train: [40] [3800/6250] eta: 0:06:35 lr: 0.000086 grad: 0.1262 (0.1326) loss: 0.7702 (0.7802) time: 0.1309 data: 0.0463 max mem: 9377 +Train: [40] [3900/6250] eta: 0:06:19 lr: 0.000086 grad: 0.1393 (0.1326) loss: 0.7734 (0.7801) time: 0.1665 data: 0.0848 max mem: 9377 +Train: [40] [4000/6250] eta: 0:06:02 lr: 0.000086 grad: 0.1364 (0.1326) loss: 0.7749 (0.7801) time: 0.1590 data: 0.0683 max mem: 9377 +Train: [40] [4100/6250] eta: 0:05:46 lr: 0.000086 grad: 0.1306 (0.1326) loss: 0.7719 (0.7799) time: 0.1512 data: 0.0713 max mem: 9377 +Train: [40] [4200/6250] eta: 0:05:29 lr: 0.000086 grad: 0.1347 (0.1327) loss: 0.7748 (0.7799) time: 0.1155 data: 0.0292 max mem: 9377 +Train: [40] [4300/6250] eta: 0:05:13 lr: 0.000086 grad: 0.1344 (0.1328) loss: 0.7568 (0.7798) time: 0.1605 data: 0.0727 max mem: 9377 +Train: [40] [4400/6250] eta: 0:04:57 lr: 0.000086 grad: 0.1232 (0.1327) loss: 0.7805 (0.7798) time: 0.1656 data: 0.0801 max mem: 9377 +Train: [40] [4500/6250] eta: 0:04:41 lr: 0.000086 grad: 0.1285 (0.1327) loss: 0.7762 (0.7797) time: 0.1939 data: 0.1105 max mem: 9377 +Train: [40] [4600/6250] eta: 0:04:26 lr: 0.000086 grad: 0.1291 (0.1327) loss: 0.7765 (0.7797) time: 0.1526 data: 0.0762 max mem: 9377 +Train: [40] [4700/6250] eta: 0:04:11 lr: 0.000086 grad: 0.1333 (0.1328) loss: 0.7742 (0.7796) time: 0.1837 data: 0.1007 max mem: 9377 +Train: [40] [4800/6250] eta: 0:03:55 lr: 0.000086 grad: 0.1233 (0.1329) loss: 0.7768 (0.7795) time: 0.1906 data: 0.1067 max mem: 9377 +Train: [40] [4900/6250] eta: 0:03:39 lr: 0.000086 grad: 0.1309 (0.1330) loss: 0.7783 (0.7795) time: 0.1952 data: 0.1068 max mem: 9377 +Train: [40] [5000/6250] eta: 0:03:23 lr: 0.000086 grad: 0.1230 (0.1330) loss: 0.7807 (0.7795) time: 0.1683 data: 0.0734 max mem: 9377 +Train: [40] [5100/6250] eta: 0:03:07 lr: 0.000086 grad: 0.1263 (0.1330) loss: 0.7826 (0.7795) time: 0.1733 data: 0.0824 max mem: 9377 +Train: [40] [5200/6250] eta: 0:02:52 lr: 0.000086 grad: 0.1318 (0.1330) loss: 0.7822 (0.7795) time: 0.1799 data: 0.0939 max mem: 9377 +Train: [40] [5300/6250] eta: 0:02:35 lr: 0.000086 grad: 0.1389 (0.1330) loss: 0.7688 (0.7795) time: 0.1945 data: 0.1139 max mem: 9377 +Train: [40] [5400/6250] eta: 0:02:19 lr: 0.000086 grad: 0.1297 (0.1332) loss: 0.7739 (0.7796) time: 0.1967 data: 0.1082 max mem: 9377 +Train: [40] [5500/6250] eta: 0:02:03 lr: 0.000086 grad: 0.1279 (0.1331) loss: 0.7707 (0.7797) time: 0.1666 data: 0.0726 max mem: 9377 +Train: [40] [5600/6250] eta: 0:01:46 lr: 0.000086 grad: 0.1167 (0.1331) loss: 0.7882 (0.7797) time: 0.1531 data: 0.0622 max mem: 9377 +Train: [40] [5700/6250] eta: 0:01:30 lr: 0.000086 grad: 0.1269 (0.1330) loss: 0.7785 (0.7798) time: 0.1759 data: 0.0858 max mem: 9377 +Train: [40] [5800/6250] eta: 0:01:13 lr: 0.000086 grad: 0.1289 (0.1330) loss: 0.7812 (0.7798) time: 0.1390 data: 0.0526 max mem: 9377 +Train: [40] [5900/6250] eta: 0:00:57 lr: 0.000086 grad: 0.1263 (0.1329) loss: 0.7850 (0.7799) time: 0.1776 data: 0.1019 max mem: 9377 +Train: [40] [6000/6250] eta: 0:00:40 lr: 0.000086 grad: 0.1243 (0.1329) loss: 0.7854 (0.7800) time: 0.2014 data: 0.1131 max mem: 9377 +Train: [40] [6100/6250] eta: 0:00:24 lr: 0.000086 grad: 0.1273 (0.1328) loss: 0.7826 (0.7801) time: 0.1512 data: 0.0624 max mem: 9377 +Train: [40] [6200/6250] eta: 0:00:08 lr: 0.000086 grad: 0.1281 (0.1327) loss: 0.7838 (0.7802) time: 0.1343 data: 0.0393 max mem: 9377 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.1296 (0.1327) loss: 0.7801 (0.7803) time: 0.1597 data: 0.0755 max mem: 9377 +Train: [40] Total time: 0:17:07 (0.1643 s / it) +Averaged stats: lr: 0.000086 grad: 0.1296 (0.1327) loss: 0.7801 (0.7803) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:06:12 loss: 0.8446 (0.8446) time: 6.0135 data: 5.9820 max mem: 9377 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8512 (0.8510) time: 0.1477 data: 0.1224 max mem: 9377 +Eval (hcp-train-subset): [40] Total time: 0:00:15 (0.2463 s / it) +Averaged stats (hcp-train-subset): loss: 0.8512 (0.8510) +Eval (hcp-val): [40] [ 0/62] eta: 0:03:52 loss: 0.8459 (0.8459) time: 3.7442 data: 3.6686 max mem: 9377 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8462 (0.8486) time: 0.1591 data: 0.1328 max mem: 9377 +Eval (hcp-val): [40] Total time: 0:00:15 (0.2499 s / it) +Averaged stats (hcp-val): loss: 0.8462 (0.8486) +Eval (nsd-val): [40] [ 0/62] eta: 0:06:02 loss: 0.8150 (0.8150) time: 5.8392 data: 5.8076 max mem: 9377 +Eval (nsd-val): [40] [61/62] eta: 0:00:00 loss: 0.8248 (0.8244) time: 0.1658 data: 0.1396 max mem: 9377 +Eval (nsd-val): [40] Total time: 0:00:15 (0.2575 s / it) +Averaged stats (nsd-val): loss: 0.8248 (0.8244) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 10:15:14 lr: 0.000086 grad: 0.1981 (0.1981) loss: 0.8521 (0.8521) time: 5.9062 data: 5.6897 max mem: 9377 +Train: [41] [ 100/6250] eta: 0:24:15 lr: 0.000086 grad: 0.1323 (0.1925) loss: 0.8124 (0.8182) time: 0.1700 data: 0.0675 max mem: 9377 +Train: [41] [ 200/6250] eta: 0:21:43 lr: 0.000086 grad: 0.1359 (0.1663) loss: 0.7982 (0.8134) time: 0.1668 data: 0.0688 max mem: 9377 +Train: [41] [ 300/6250] eta: 0:19:48 lr: 0.000086 grad: 0.1245 (0.1595) loss: 0.7934 (0.8074) time: 0.1680 data: 0.0731 max mem: 9377 +Train: [41] [ 400/6250] eta: 0:18:41 lr: 0.000086 grad: 0.1198 (0.1537) loss: 0.7966 (0.8036) time: 0.1624 data: 0.0777 max mem: 9377 +Train: [41] [ 500/6250] eta: 0:17:56 lr: 0.000086 grad: 0.1305 (0.1490) loss: 0.7774 (0.8017) time: 0.1548 data: 0.0586 max mem: 9377 +Train: [41] [ 600/6250] eta: 0:17:12 lr: 0.000086 grad: 0.1237 (0.1446) loss: 0.7893 (0.8011) time: 0.1594 data: 0.0585 max mem: 9377 +Train: [41] [ 700/6250] eta: 0:16:38 lr: 0.000085 grad: 0.1255 (0.1419) loss: 0.7845 (0.7993) time: 0.1656 data: 0.0785 max mem: 9377 +Train: [41] [ 800/6250] eta: 0:16:13 lr: 0.000085 grad: 0.1218 (0.1405) loss: 0.7876 (0.7976) time: 0.1515 data: 0.0659 max mem: 9377 +Train: [41] [ 900/6250] eta: 0:15:53 lr: 0.000085 grad: 0.1115 (0.1394) loss: 0.7895 (0.7961) time: 0.1503 data: 0.0687 max mem: 9377 +Train: [41] [1000/6250] eta: 0:15:26 lr: 0.000085 grad: 0.1239 (0.1385) loss: 0.7870 (0.7949) time: 0.1543 data: 0.0783 max mem: 9377 +Train: [41] [1100/6250] eta: 0:14:58 lr: 0.000085 grad: 0.1277 (0.1374) loss: 0.7820 (0.7942) time: 0.1653 data: 0.0852 max mem: 9377 +Train: [41] [1200/6250] eta: 0:14:41 lr: 0.000085 grad: 0.1176 (0.1367) loss: 0.7820 (0.7932) time: 0.1705 data: 0.0771 max mem: 9377 +Train: [41] [1300/6250] eta: 0:14:20 lr: 0.000085 grad: 0.1301 (0.1359) loss: 0.7853 (0.7926) time: 0.1598 data: 0.0667 max mem: 9377 +Train: [41] [1400/6250] eta: 0:13:57 lr: 0.000085 grad: 0.1273 (0.1356) loss: 0.7771 (0.7915) time: 0.1230 data: 0.0378 max mem: 9377 +Train: [41] [1500/6250] eta: 0:13:33 lr: 0.000085 grad: 0.1271 (0.1351) loss: 0.7802 (0.7908) time: 0.1655 data: 0.0771 max mem: 9377 +Train: [41] [1600/6250] eta: 0:13:09 lr: 0.000085 grad: 0.1269 (0.1350) loss: 0.7771 (0.7901) time: 0.1422 data: 0.0393 max mem: 9377 +Train: [41] [1700/6250] eta: 0:12:46 lr: 0.000085 grad: 0.1242 (0.1346) loss: 0.7820 (0.7893) time: 0.1457 data: 0.0588 max mem: 9377 +Train: [41] [1800/6250] eta: 0:12:24 lr: 0.000085 grad: 0.1291 (0.1343) loss: 0.7827 (0.7888) time: 0.1478 data: 0.0517 max mem: 9377 +Train: [41] [1900/6250] eta: 0:12:02 lr: 0.000085 grad: 0.1218 (0.1339) loss: 0.7743 (0.7883) time: 0.1582 data: 0.0706 max mem: 9377 +Train: [41] [2000/6250] eta: 0:11:42 lr: 0.000085 grad: 0.1186 (0.1338) loss: 0.7739 (0.7881) time: 0.1400 data: 0.0484 max mem: 9377 +Train: [41] [2100/6250] eta: 0:11:25 lr: 0.000085 grad: 0.1237 (0.1337) loss: 0.7801 (0.7878) time: 0.1699 data: 0.0892 max mem: 9377 +Train: [41] [2200/6250] eta: 0:11:06 lr: 0.000085 grad: 0.1255 (0.1334) loss: 0.7853 (0.7876) time: 0.1477 data: 0.0551 max mem: 9377 +Train: [41] [2300/6250] eta: 0:10:49 lr: 0.000085 grad: 0.1205 (0.1332) loss: 0.7756 (0.7875) time: 0.1748 data: 0.0849 max mem: 9377 +Train: [41] [2400/6250] eta: 0:10:31 lr: 0.000085 grad: 0.1221 (0.1330) loss: 0.7849 (0.7873) time: 0.1397 data: 0.0507 max mem: 9377 +Train: [41] [2500/6250] eta: 0:10:14 lr: 0.000085 grad: 0.1319 (0.1328) loss: 0.7752 (0.7870) time: 0.1414 data: 0.0521 max mem: 9377 +Train: [41] [2600/6250] eta: 0:09:56 lr: 0.000085 grad: 0.1372 (0.1329) loss: 0.7689 (0.7865) time: 0.1559 data: 0.0684 max mem: 9377 +Train: [41] [2700/6250] eta: 0:09:38 lr: 0.000085 grad: 0.1285 (0.1329) loss: 0.7825 (0.7863) time: 0.1388 data: 0.0510 max mem: 9377 +Train: [41] [2800/6250] eta: 0:09:21 lr: 0.000085 grad: 0.1261 (0.1328) loss: 0.7701 (0.7859) time: 0.1417 data: 0.0475 max mem: 9377 +Train: [41] [2900/6250] eta: 0:09:04 lr: 0.000085 grad: 0.1239 (0.1328) loss: 0.7778 (0.7856) time: 0.1593 data: 0.0768 max mem: 9377 +Train: [41] [3000/6250] eta: 0:08:46 lr: 0.000085 grad: 0.1256 (0.1326) loss: 0.7819 (0.7853) time: 0.1356 data: 0.0506 max mem: 9377 +Train: [41] [3100/6250] eta: 0:08:30 lr: 0.000085 grad: 0.1282 (0.1326) loss: 0.7755 (0.7851) time: 0.1342 data: 0.0512 max mem: 9377 +Train: [41] [3200/6250] eta: 0:08:13 lr: 0.000085 grad: 0.1328 (0.1325) loss: 0.7792 (0.7849) time: 0.1363 data: 0.0479 max mem: 9377 +Train: [41] [3300/6250] eta: 0:07:57 lr: 0.000085 grad: 0.1186 (0.1325) loss: 0.7880 (0.7848) time: 0.1878 data: 0.1100 max mem: 9377 +Train: [41] [3400/6250] eta: 0:07:40 lr: 0.000085 grad: 0.1277 (0.1324) loss: 0.7821 (0.7847) time: 0.2187 data: 0.0533 max mem: 9377 +Train: [41] [3500/6250] eta: 0:07:23 lr: 0.000085 grad: 0.1206 (0.1323) loss: 0.7820 (0.7845) time: 0.1553 data: 0.0652 max mem: 9377 +Train: [41] [3600/6250] eta: 0:07:07 lr: 0.000085 grad: 0.1296 (0.1324) loss: 0.7763 (0.7843) time: 0.1775 data: 0.1025 max mem: 9377 +Train: [41] [3700/6250] eta: 0:06:50 lr: 0.000085 grad: 0.1324 (0.1323) loss: 0.7703 (0.7841) time: 0.1788 data: 0.0924 max mem: 9377 +Train: [41] [3800/6250] eta: 0:06:34 lr: 0.000085 grad: 0.1362 (0.1323) loss: 0.7820 (0.7840) time: 0.1504 data: 0.0525 max mem: 9377 +Train: [41] [3900/6250] eta: 0:06:18 lr: 0.000084 grad: 0.1245 (0.1323) loss: 0.7848 (0.7838) time: 0.1515 data: 0.0582 max mem: 9377 +Train: [41] [4000/6250] eta: 0:06:02 lr: 0.000084 grad: 0.1175 (0.1323) loss: 0.7902 (0.7838) time: 0.1603 data: 0.0738 max mem: 9377 +Train: [41] [4100/6250] eta: 0:05:46 lr: 0.000084 grad: 0.1249 (0.1323) loss: 0.7833 (0.7837) time: 0.1615 data: 0.0759 max mem: 9377 +Train: [41] [4200/6250] eta: 0:05:30 lr: 0.000084 grad: 0.1278 (0.1323) loss: 0.7883 (0.7837) time: 0.1565 data: 0.0702 max mem: 9377 +Train: [41] [4300/6250] eta: 0:05:13 lr: 0.000084 grad: 0.1255 (0.1322) loss: 0.7834 (0.7837) time: 0.1374 data: 0.0520 max mem: 9377 +Train: [41] [4400/6250] eta: 0:04:58 lr: 0.000084 grad: 0.1309 (0.1322) loss: 0.7916 (0.7837) time: 0.1598 data: 0.0720 max mem: 9377 +Train: [41] [4500/6250] eta: 0:04:42 lr: 0.000084 grad: 0.1246 (0.1322) loss: 0.7734 (0.7837) time: 0.1516 data: 0.0662 max mem: 9377 +Train: [41] [4600/6250] eta: 0:04:27 lr: 0.000084 grad: 0.1233 (0.1322) loss: 0.7845 (0.7837) time: 0.1726 data: 0.0955 max mem: 9377 +Train: [41] [4700/6250] eta: 0:04:11 lr: 0.000084 grad: 0.1260 (0.1322) loss: 0.7814 (0.7837) time: 0.1714 data: 0.0773 max mem: 9377 +Train: [41] [4800/6250] eta: 0:03:55 lr: 0.000084 grad: 0.1312 (0.1321) loss: 0.7788 (0.7837) time: 0.1679 data: 0.0730 max mem: 9377 +Train: [41] [4900/6250] eta: 0:03:39 lr: 0.000084 grad: 0.1282 (0.1321) loss: 0.7772 (0.7836) time: 0.1620 data: 0.0713 max mem: 9377 +Train: [41] [5000/6250] eta: 0:03:22 lr: 0.000084 grad: 0.1348 (0.1322) loss: 0.7824 (0.7836) time: 0.1662 data: 0.0707 max mem: 9377 +Train: [41] [5100/6250] eta: 0:03:07 lr: 0.000084 grad: 0.1248 (0.1322) loss: 0.7791 (0.7835) time: 0.1942 data: 0.1146 max mem: 9377 +Train: [41] [5200/6250] eta: 0:02:51 lr: 0.000084 grad: 0.1333 (0.1323) loss: 0.7820 (0.7834) time: 0.1575 data: 0.0608 max mem: 9377 +Train: [41] [5300/6250] eta: 0:02:34 lr: 0.000084 grad: 0.1307 (0.1323) loss: 0.7773 (0.7834) time: 0.1605 data: 0.0707 max mem: 9377 +Train: [41] [5400/6250] eta: 0:02:18 lr: 0.000084 grad: 0.1265 (0.1323) loss: 0.7784 (0.7833) time: 0.1609 data: 0.0614 max mem: 9377 +Train: [41] [5500/6250] eta: 0:02:02 lr: 0.000084 grad: 0.1289 (0.1323) loss: 0.7815 (0.7833) time: 0.1756 data: 0.0818 max mem: 9377 +Train: [41] [5600/6250] eta: 0:01:46 lr: 0.000084 grad: 0.1221 (0.1323) loss: 0.7889 (0.7832) time: 0.1482 data: 0.0474 max mem: 9377 +Train: [41] [5700/6250] eta: 0:01:29 lr: 0.000084 grad: 0.1342 (0.1323) loss: 0.7775 (0.7832) time: 0.1625 data: 0.0640 max mem: 9377 +Train: [41] [5800/6250] eta: 0:01:13 lr: 0.000084 grad: 0.1331 (0.1323) loss: 0.7800 (0.7832) time: 0.1513 data: 0.0597 max mem: 9377 +Train: [41] [5900/6250] eta: 0:00:56 lr: 0.000084 grad: 0.1324 (0.1324) loss: 0.7723 (0.7831) time: 0.1531 data: 0.0630 max mem: 9377 +Train: [41] [6000/6250] eta: 0:00:40 lr: 0.000084 grad: 0.1293 (0.1323) loss: 0.7820 (0.7830) time: 0.1487 data: 0.0514 max mem: 9377 +Train: [41] [6100/6250] eta: 0:00:24 lr: 0.000084 grad: 0.1288 (0.1325) loss: 0.7736 (0.7828) time: 0.1444 data: 0.0456 max mem: 9377 +Train: [41] [6200/6250] eta: 0:00:08 lr: 0.000084 grad: 0.1328 (0.1326) loss: 0.7745 (0.7827) time: 0.1576 data: 0.0649 max mem: 9377 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.1296 (0.1326) loss: 0.7762 (0.7827) time: 0.1592 data: 0.0672 max mem: 9377 +Train: [41] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000084 grad: 0.1296 (0.1326) loss: 0.7762 (0.7827) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:03:53 loss: 0.8444 (0.8444) time: 3.7583 data: 3.6642 max mem: 9377 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8515 (0.8516) time: 0.1282 data: 0.1030 max mem: 9377 +Eval (hcp-train-subset): [41] Total time: 0:00:15 (0.2526 s / it) +Averaged stats (hcp-train-subset): loss: 0.8515 (0.8516) +Eval (hcp-val): [41] [ 0/62] eta: 0:05:04 loss: 0.8473 (0.8473) time: 4.9145 data: 4.8539 max mem: 9377 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8479 (0.8493) time: 0.1553 data: 0.1296 max mem: 9377 +Eval (hcp-val): [41] Total time: 0:00:15 (0.2489 s / it) +Averaged stats (hcp-val): loss: 0.8479 (0.8493) +Eval (nsd-val): [41] [ 0/62] eta: 0:06:07 loss: 0.8147 (0.8147) time: 5.9335 data: 5.9023 max mem: 9377 +Eval (nsd-val): [41] [61/62] eta: 0:00:00 loss: 0.8223 (0.8242) time: 0.1392 data: 0.1138 max mem: 9377 +Eval (nsd-val): [41] Total time: 0:00:15 (0.2453 s / it) +Averaged stats (nsd-val): loss: 0.8223 (0.8242) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [42] [ 0/6250] eta: 10:15:03 lr: 0.000084 grad: 0.0818 (0.0818) loss: 0.8565 (0.8565) time: 5.9046 data: 5.5706 max mem: 9377 +Train: [42] [ 100/6250] eta: 0:24:26 lr: 0.000084 grad: 0.1742 (0.2135) loss: 0.7706 (0.7951) time: 0.1752 data: 0.0592 max mem: 9377 +Train: [42] [ 200/6250] eta: 0:21:22 lr: 0.000084 grad: 0.1659 (0.1944) loss: 0.7720 (0.7870) time: 0.1614 data: 0.0601 max mem: 9377 +Train: [42] [ 300/6250] eta: 0:19:28 lr: 0.000084 grad: 0.1331 (0.1798) loss: 0.7860 (0.7856) time: 0.1478 data: 0.0465 max mem: 9377 +Train: [42] [ 400/6250] eta: 0:18:20 lr: 0.000084 grad: 0.1339 (0.1699) loss: 0.7780 (0.7844) time: 0.1737 data: 0.0717 max mem: 9377 +Train: [42] [ 500/6250] eta: 0:17:20 lr: 0.000084 grad: 0.1467 (0.1640) loss: 0.7880 (0.7842) time: 0.1434 data: 0.0532 max mem: 9377 +Train: [42] [ 600/6250] eta: 0:16:36 lr: 0.000084 grad: 0.1352 (0.1599) loss: 0.7769 (0.7833) time: 0.1539 data: 0.0623 max mem: 9377 +Train: [42] [ 700/6250] eta: 0:16:06 lr: 0.000084 grad: 0.1275 (0.1562) loss: 0.7954 (0.7834) time: 0.1300 data: 0.0387 max mem: 9377 +Train: [42] [ 800/6250] eta: 0:15:35 lr: 0.000084 grad: 0.1259 (0.1528) loss: 0.7827 (0.7832) time: 0.1380 data: 0.0414 max mem: 9377 +Train: [42] [ 900/6250] eta: 0:15:16 lr: 0.000083 grad: 0.1261 (0.1503) loss: 0.7909 (0.7828) time: 0.1870 data: 0.0952 max mem: 9377 +Train: [42] [1000/6250] eta: 0:14:54 lr: 0.000083 grad: 0.1326 (0.1479) loss: 0.7857 (0.7830) time: 0.1457 data: 0.0460 max mem: 9377 +Train: [42] [1100/6250] eta: 0:14:35 lr: 0.000083 grad: 0.1211 (0.1460) loss: 0.7880 (0.7830) time: 0.1778 data: 0.0920 max mem: 9377 +Train: [42] [1200/6250] eta: 0:14:19 lr: 0.000083 grad: 0.1240 (0.1445) loss: 0.7823 (0.7828) time: 0.1965 data: 0.1091 max mem: 9377 +Train: [42] [1300/6250] eta: 0:14:10 lr: 0.000083 grad: 0.1196 (0.1432) loss: 0.7769 (0.7825) time: 0.1836 data: 0.0958 max mem: 9377 +Train: [42] [1400/6250] eta: 0:13:54 lr: 0.000083 grad: 0.1334 (0.1422) loss: 0.7700 (0.7821) time: 0.1540 data: 0.0645 max mem: 9377 +Train: [42] [1500/6250] eta: 0:13:35 lr: 0.000083 grad: 0.1298 (0.1418) loss: 0.7756 (0.7817) time: 0.1683 data: 0.0766 max mem: 9377 +Train: [42] [1600/6250] eta: 0:13:13 lr: 0.000083 grad: 0.1226 (0.1410) loss: 0.7757 (0.7814) time: 0.1687 data: 0.0832 max mem: 9377 +Train: [42] [1700/6250] eta: 0:12:53 lr: 0.000083 grad: 0.1259 (0.1405) loss: 0.7805 (0.7809) time: 0.1288 data: 0.0312 max mem: 9377 +Train: [42] [1800/6250] eta: 0:12:34 lr: 0.000083 grad: 0.1343 (0.1402) loss: 0.7779 (0.7807) time: 0.1735 data: 0.0864 max mem: 9377 +Train: [42] [1900/6250] eta: 0:12:15 lr: 0.000083 grad: 0.1290 (0.1398) loss: 0.7787 (0.7804) time: 0.1521 data: 0.0618 max mem: 9377 +Train: [42] [2000/6250] eta: 0:11:58 lr: 0.000083 grad: 0.1365 (0.1396) loss: 0.7849 (0.7801) time: 0.1827 data: 0.0970 max mem: 9377 +Train: [42] [2100/6250] eta: 0:11:42 lr: 0.000083 grad: 0.1332 (0.1394) loss: 0.7656 (0.7797) time: 0.1547 data: 0.0740 max mem: 9377 +Train: [42] [2200/6250] eta: 0:11:26 lr: 0.000083 grad: 0.1352 (0.1392) loss: 0.7735 (0.7795) time: 0.2313 data: 0.1480 max mem: 9377 +Train: [42] [2300/6250] eta: 0:11:08 lr: 0.000083 grad: 0.1280 (0.1389) loss: 0.7755 (0.7793) time: 0.1805 data: 0.0864 max mem: 9377 +Train: [42] [2400/6250] eta: 0:10:49 lr: 0.000083 grad: 0.1246 (0.1386) loss: 0.7779 (0.7792) time: 0.1566 data: 0.0547 max mem: 9377 +Train: [42] [2500/6250] eta: 0:10:30 lr: 0.000083 grad: 0.1295 (0.1383) loss: 0.7855 (0.7792) time: 0.1353 data: 0.0304 max mem: 9377 +Train: [42] [2600/6250] eta: 0:10:11 lr: 0.000083 grad: 0.1226 (0.1380) loss: 0.7718 (0.7792) time: 0.1401 data: 0.0485 max mem: 9377 +Train: [42] [2700/6250] eta: 0:09:53 lr: 0.000083 grad: 0.1289 (0.1376) loss: 0.7842 (0.7791) time: 0.1572 data: 0.0809 max mem: 9377 +Train: [42] [2800/6250] eta: 0:09:36 lr: 0.000083 grad: 0.1188 (0.1371) loss: 0.7911 (0.7793) time: 0.1830 data: 0.0920 max mem: 9377 +Train: [42] [2900/6250] eta: 0:09:19 lr: 0.000083 grad: 0.1196 (0.1366) loss: 0.7812 (0.7795) time: 0.1769 data: 0.0838 max mem: 9377 +Train: [42] [3000/6250] eta: 0:09:01 lr: 0.000083 grad: 0.1243 (0.1363) loss: 0.7883 (0.7796) time: 0.1548 data: 0.0673 max mem: 9377 +Train: [42] [3100/6250] eta: 0:08:44 lr: 0.000083 grad: 0.1217 (0.1360) loss: 0.7922 (0.7798) time: 0.1359 data: 0.0492 max mem: 9377 +Train: [42] [3200/6250] eta: 0:08:27 lr: 0.000083 grad: 0.1277 (0.1359) loss: 0.7788 (0.7799) time: 0.1640 data: 0.0684 max mem: 9377 +Train: [42] [3300/6250] eta: 0:08:10 lr: 0.000083 grad: 0.1301 (0.1360) loss: 0.7795 (0.7799) time: 0.1742 data: 0.0860 max mem: 9377 +Train: [42] [3400/6250] eta: 0:07:53 lr: 0.000083 grad: 0.1298 (0.1360) loss: 0.7722 (0.7799) time: 0.1835 data: 0.0907 max mem: 9377 +Train: [42] [3500/6250] eta: 0:07:36 lr: 0.000083 grad: 0.1334 (0.1359) loss: 0.7764 (0.7798) time: 0.1515 data: 0.0672 max mem: 9377 +Train: [42] [3600/6250] eta: 0:07:19 lr: 0.000083 grad: 0.1257 (0.1362) loss: 0.7748 (0.7797) time: 0.1637 data: 0.0745 max mem: 9377 +Train: [42] [3700/6250] eta: 0:07:02 lr: 0.000083 grad: 0.1325 (0.1362) loss: 0.7738 (0.7796) time: 0.1450 data: 0.0611 max mem: 9377 +Train: [42] [3800/6250] eta: 0:06:45 lr: 0.000083 grad: 0.1289 (0.1362) loss: 0.7791 (0.7795) time: 0.1512 data: 0.0711 max mem: 9377 +Train: [42] [3900/6250] eta: 0:06:28 lr: 0.000083 grad: 0.1355 (0.1362) loss: 0.7722 (0.7793) time: 0.1620 data: 0.0713 max mem: 9377 +Train: [42] [4000/6250] eta: 0:06:11 lr: 0.000083 grad: 0.1295 (0.1363) loss: 0.7766 (0.7792) time: 0.1594 data: 0.0758 max mem: 9377 +Train: [42] [4100/6250] eta: 0:05:54 lr: 0.000082 grad: 0.1356 (0.1363) loss: 0.7726 (0.7791) time: 0.1471 data: 0.0625 max mem: 9377 +Train: [42] [4200/6250] eta: 0:05:37 lr: 0.000082 grad: 0.1361 (0.1363) loss: 0.7579 (0.7789) time: 0.1661 data: 0.0712 max mem: 9377 +Train: [42] [4300/6250] eta: 0:05:21 lr: 0.000082 grad: 0.1272 (0.1362) loss: 0.7822 (0.7788) time: 0.1933 data: 0.1062 max mem: 9377 +Train: [42] [4400/6250] eta: 0:05:05 lr: 0.000082 grad: 0.1325 (0.1362) loss: 0.7708 (0.7787) time: 0.1683 data: 0.0759 max mem: 9377 +Train: [42] [4500/6250] eta: 0:04:49 lr: 0.000082 grad: 0.1349 (0.1363) loss: 0.7810 (0.7786) time: 0.1891 data: 0.1097 max mem: 9377 +Train: [42] [4600/6250] eta: 0:04:32 lr: 0.000082 grad: 0.1306 (0.1362) loss: 0.7669 (0.7785) time: 0.1749 data: 0.0825 max mem: 9377 +Train: [42] [4700/6250] eta: 0:04:16 lr: 0.000082 grad: 0.1295 (0.1360) loss: 0.7714 (0.7785) time: 0.1467 data: 0.0608 max mem: 9377 +Train: [42] [4800/6250] eta: 0:03:59 lr: 0.000082 grad: 0.1293 (0.1359) loss: 0.7805 (0.7784) time: 0.1480 data: 0.0577 max mem: 9377 +Train: [42] [4900/6250] eta: 0:03:43 lr: 0.000082 grad: 0.1256 (0.1359) loss: 0.7855 (0.7784) time: 0.1530 data: 0.0597 max mem: 9377 +Train: [42] [5000/6250] eta: 0:03:26 lr: 0.000082 grad: 0.1295 (0.1358) loss: 0.7604 (0.7783) time: 0.1682 data: 0.0828 max mem: 9377 +Train: [42] [5100/6250] eta: 0:03:09 lr: 0.000082 grad: 0.1325 (0.1358) loss: 0.7755 (0.7782) time: 0.1648 data: 0.0754 max mem: 9377 +Train: [42] [5200/6250] eta: 0:02:53 lr: 0.000082 grad: 0.1358 (0.1357) loss: 0.7719 (0.7782) time: 0.1757 data: 0.0781 max mem: 9377 +Train: [42] [5300/6250] eta: 0:02:36 lr: 0.000082 grad: 0.1357 (0.1356) loss: 0.7750 (0.7782) time: 0.1445 data: 0.0517 max mem: 9377 +Train: [42] [5400/6250] eta: 0:02:20 lr: 0.000082 grad: 0.1312 (0.1357) loss: 0.7789 (0.7782) time: 0.1473 data: 0.0622 max mem: 9377 +Train: [42] [5500/6250] eta: 0:02:03 lr: 0.000082 grad: 0.1385 (0.1357) loss: 0.7744 (0.7781) time: 0.1661 data: 0.0791 max mem: 9377 +Train: [42] [5600/6250] eta: 0:01:47 lr: 0.000082 grad: 0.1241 (0.1356) loss: 0.7845 (0.7782) time: 0.1721 data: 0.0720 max mem: 9377 +Train: [42] [5700/6250] eta: 0:01:30 lr: 0.000082 grad: 0.1227 (0.1356) loss: 0.7796 (0.7782) time: 0.1800 data: 0.0824 max mem: 9377 +Train: [42] [5800/6250] eta: 0:01:14 lr: 0.000082 grad: 0.1291 (0.1355) loss: 0.7796 (0.7782) time: 0.1489 data: 0.0543 max mem: 9377 +Train: [42] [5900/6250] eta: 0:00:57 lr: 0.000082 grad: 0.1306 (0.1355) loss: 0.7719 (0.7782) time: 0.1545 data: 0.0595 max mem: 9377 +Train: [42] [6000/6250] eta: 0:00:41 lr: 0.000082 grad: 0.1273 (0.1354) loss: 0.7889 (0.7782) time: 0.1474 data: 0.0580 max mem: 9377 +Train: [42] [6100/6250] eta: 0:00:24 lr: 0.000082 grad: 0.1369 (0.1354) loss: 0.7799 (0.7782) time: 0.1638 data: 0.0709 max mem: 9377 +Train: [42] [6200/6250] eta: 0:00:08 lr: 0.000082 grad: 0.1352 (0.1354) loss: 0.7709 (0.7782) time: 0.1447 data: 0.0492 max mem: 9377 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.1261 (0.1354) loss: 0.7825 (0.7782) time: 0.1552 data: 0.0700 max mem: 9377 +Train: [42] Total time: 0:17:12 (0.1651 s / it) +Averaged stats: lr: 0.000082 grad: 0.1261 (0.1354) loss: 0.7825 (0.7782) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:05:32 loss: 0.8447 (0.8447) time: 5.3583 data: 5.3271 max mem: 9377 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8472 (0.8488) time: 0.1382 data: 0.1125 max mem: 9377 +Eval (hcp-train-subset): [42] Total time: 0:00:14 (0.2315 s / it) +Averaged stats (hcp-train-subset): loss: 0.8472 (0.8488) +Eval (hcp-val): [42] [ 0/62] eta: 0:03:31 loss: 0.8451 (0.8451) time: 3.4091 data: 3.3181 max mem: 9377 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8448 (0.8472) time: 0.1252 data: 0.0984 max mem: 9377 +Eval (hcp-val): [42] Total time: 0:00:13 (0.2258 s / it) +Averaged stats (hcp-val): loss: 0.8448 (0.8472) +Eval (nsd-val): [42] [ 0/62] eta: 0:05:06 loss: 0.8139 (0.8139) time: 4.9409 data: 4.9050 max mem: 9377 +Eval (nsd-val): [42] [61/62] eta: 0:00:00 loss: 0.8206 (0.8236) time: 0.1257 data: 0.1003 max mem: 9377 +Eval (nsd-val): [42] Total time: 0:00:13 (0.2207 s / it) +Averaged stats (nsd-val): loss: 0.8206 (0.8236) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [43] [ 0/6250] eta: 10:43:11 lr: 0.000082 grad: 0.1417 (0.1417) loss: 0.8454 (0.8454) time: 6.1747 data: 6.0479 max mem: 9377 +Train: [43] [ 100/6250] eta: 0:22:13 lr: 0.000082 grad: 0.1651 (0.2097) loss: 0.7915 (0.8030) time: 0.1585 data: 0.0529 max mem: 9377 +Train: [43] [ 200/6250] eta: 0:19:19 lr: 0.000082 grad: 0.1592 (0.1966) loss: 0.7841 (0.7961) time: 0.1724 data: 0.0754 max mem: 9377 +Train: [43] [ 300/6250] eta: 0:18:05 lr: 0.000082 grad: 0.1394 (0.1767) loss: 0.7959 (0.7960) time: 0.1460 data: 0.0373 max mem: 9377 +Train: [43] [ 400/6250] eta: 0:17:12 lr: 0.000082 grad: 0.1380 (0.1681) loss: 0.7923 (0.7943) time: 0.1552 data: 0.0515 max mem: 9377 +Train: [43] [ 500/6250] eta: 0:16:30 lr: 0.000082 grad: 0.1226 (0.1619) loss: 0.7939 (0.7928) time: 0.1607 data: 0.0564 max mem: 9377 +Train: [43] [ 600/6250] eta: 0:15:58 lr: 0.000082 grad: 0.1290 (0.1575) loss: 0.7807 (0.7908) time: 0.1264 data: 0.0283 max mem: 9377 +Train: [43] [ 700/6250] eta: 0:15:33 lr: 0.000082 grad: 0.1218 (0.1543) loss: 0.7891 (0.7895) time: 0.1425 data: 0.0503 max mem: 9377 +Train: [43] [ 800/6250] eta: 0:15:19 lr: 0.000082 grad: 0.1182 (0.1515) loss: 0.7906 (0.7884) time: 0.1749 data: 0.0891 max mem: 9377 +Train: [43] [ 900/6250] eta: 0:15:02 lr: 0.000082 grad: 0.1289 (0.1491) loss: 0.7905 (0.7881) time: 0.1852 data: 0.0969 max mem: 9377 +Train: [43] [1000/6250] eta: 0:14:44 lr: 0.000081 grad: 0.1257 (0.1474) loss: 0.7923 (0.7880) time: 0.1567 data: 0.0691 max mem: 9377 +Train: [43] [1100/6250] eta: 0:14:25 lr: 0.000081 grad: 0.1248 (0.1458) loss: 0.7886 (0.7877) time: 0.1705 data: 0.0818 max mem: 9377 +Train: [43] [1200/6250] eta: 0:14:04 lr: 0.000081 grad: 0.1183 (0.1446) loss: 0.7927 (0.7873) time: 0.1659 data: 0.0817 max mem: 9377 +Train: [43] [1300/6250] eta: 0:13:49 lr: 0.000081 grad: 0.1212 (0.1434) loss: 0.7855 (0.7870) time: 0.1692 data: 0.0860 max mem: 9377 +Train: [43] [1400/6250] eta: 0:13:32 lr: 0.000081 grad: 0.1280 (0.1425) loss: 0.7940 (0.7871) time: 0.1719 data: 0.0762 max mem: 9377 +Train: [43] [1500/6250] eta: 0:13:12 lr: 0.000081 grad: 0.1261 (0.1417) loss: 0.7763 (0.7868) time: 0.1727 data: 0.0833 max mem: 9377 +Train: [43] [1600/6250] eta: 0:12:51 lr: 0.000081 grad: 0.1361 (0.1412) loss: 0.7714 (0.7865) time: 0.1547 data: 0.0681 max mem: 9377 +Train: [43] [1700/6250] eta: 0:12:30 lr: 0.000081 grad: 0.1355 (0.1405) loss: 0.7826 (0.7863) time: 0.1482 data: 0.0550 max mem: 9377 +Train: [43] [1800/6250] eta: 0:12:11 lr: 0.000081 grad: 0.1381 (0.1405) loss: 0.7874 (0.7862) time: 0.1408 data: 0.0461 max mem: 9377 +Train: [43] [1900/6250] eta: 0:11:51 lr: 0.000081 grad: 0.1317 (0.1402) loss: 0.7791 (0.7859) time: 0.1455 data: 0.0599 max mem: 9377 +Train: [43] [2000/6250] eta: 0:11:32 lr: 0.000081 grad: 0.1366 (0.1400) loss: 0.7713 (0.7853) time: 0.1476 data: 0.0577 max mem: 9377 +Train: [43] [2100/6250] eta: 0:11:16 lr: 0.000081 grad: 0.1297 (0.1399) loss: 0.7742 (0.7847) time: 0.1867 data: 0.0988 max mem: 9377 +Train: [43] [2200/6250] eta: 0:10:58 lr: 0.000081 grad: 0.1274 (0.1396) loss: 0.7726 (0.7843) time: 0.1577 data: 0.0749 max mem: 9377 +Train: [43] [2300/6250] eta: 0:10:41 lr: 0.000081 grad: 0.1299 (0.1392) loss: 0.7751 (0.7840) time: 0.1586 data: 0.0752 max mem: 9377 +Train: [43] [2400/6250] eta: 0:10:24 lr: 0.000081 grad: 0.1312 (0.1389) loss: 0.7683 (0.7838) time: 0.1725 data: 0.0761 max mem: 9377 +Train: [43] [2500/6250] eta: 0:10:07 lr: 0.000081 grad: 0.1314 (0.1387) loss: 0.7772 (0.7835) time: 0.1382 data: 0.0539 max mem: 9377 +Train: [43] [2600/6250] eta: 0:09:50 lr: 0.000081 grad: 0.1380 (0.1387) loss: 0.7811 (0.7832) time: 0.1617 data: 0.0775 max mem: 9377 +Train: [43] [2700/6250] eta: 0:09:33 lr: 0.000081 grad: 0.1242 (0.1385) loss: 0.7826 (0.7830) time: 0.1549 data: 0.0614 max mem: 9377 +Train: [43] [2800/6250] eta: 0:09:16 lr: 0.000081 grad: 0.1329 (0.1382) loss: 0.7636 (0.7829) time: 0.1582 data: 0.0707 max mem: 9377 +Train: [43] [2900/6250] eta: 0:09:00 lr: 0.000081 grad: 0.1305 (0.1380) loss: 0.7879 (0.7829) time: 0.1562 data: 0.0708 max mem: 9377 +Train: [43] [3000/6250] eta: 0:08:43 lr: 0.000081 grad: 0.1298 (0.1378) loss: 0.7805 (0.7827) time: 0.1578 data: 0.0741 max mem: 9377 +Train: [43] [3100/6250] eta: 0:08:26 lr: 0.000081 grad: 0.1356 (0.1378) loss: 0.7803 (0.7826) time: 0.1414 data: 0.0505 max mem: 9377 +Train: [43] [3200/6250] eta: 0:08:10 lr: 0.000081 grad: 0.1337 (0.1375) loss: 0.7805 (0.7825) time: 0.1656 data: 0.0659 max mem: 9377 +Train: [43] [3300/6250] eta: 0:07:53 lr: 0.000081 grad: 0.1361 (0.1375) loss: 0.7851 (0.7824) time: 0.1608 data: 0.0688 max mem: 9377 +Train: [43] [3400/6250] eta: 0:07:37 lr: 0.000081 grad: 0.1373 (0.1375) loss: 0.7699 (0.7823) time: 0.1765 data: 0.0895 max mem: 9377 +Train: [43] [3500/6250] eta: 0:07:20 lr: 0.000081 grad: 0.1377 (0.1375) loss: 0.7779 (0.7821) time: 0.1521 data: 0.0593 max mem: 9377 +Train: [43] [3600/6250] eta: 0:07:04 lr: 0.000081 grad: 0.1336 (0.1376) loss: 0.7711 (0.7819) time: 0.1822 data: 0.1005 max mem: 9377 +Train: [43] [3700/6250] eta: 0:06:48 lr: 0.000081 grad: 0.1351 (0.1376) loss: 0.7792 (0.7818) time: 0.1501 data: 0.0632 max mem: 9377 +Train: [43] [3800/6250] eta: 0:06:32 lr: 0.000081 grad: 0.1278 (0.1375) loss: 0.7782 (0.7816) time: 0.1646 data: 0.0757 max mem: 9377 +Train: [43] [3900/6250] eta: 0:06:16 lr: 0.000081 grad: 0.1302 (0.1374) loss: 0.7754 (0.7815) time: 0.1574 data: 0.0724 max mem: 9377 +Train: [43] [4000/6250] eta: 0:06:00 lr: 0.000081 grad: 0.1321 (0.1375) loss: 0.7842 (0.7814) time: 0.1684 data: 0.0831 max mem: 9377 +Train: [43] [4100/6250] eta: 0:05:44 lr: 0.000081 grad: 0.1349 (0.1375) loss: 0.7636 (0.7812) time: 0.1853 data: 0.1019 max mem: 9377 +Train: [43] [4200/6250] eta: 0:05:27 lr: 0.000080 grad: 0.1299 (0.1375) loss: 0.7777 (0.7810) time: 0.1657 data: 0.0739 max mem: 9377 +Train: [43] [4300/6250] eta: 0:05:12 lr: 0.000080 grad: 0.1332 (0.1376) loss: 0.7829 (0.7809) time: 0.1768 data: 0.0863 max mem: 9377 +Train: [43] [4400/6250] eta: 0:04:56 lr: 0.000080 grad: 0.1286 (0.1376) loss: 0.7750 (0.7807) time: 0.1479 data: 0.0593 max mem: 9377 +Train: [43] [4500/6250] eta: 0:04:40 lr: 0.000080 grad: 0.1442 (0.1377) loss: 0.7808 (0.7805) time: 0.1511 data: 0.0611 max mem: 9377 +Train: [43] [4600/6250] eta: 0:04:24 lr: 0.000080 grad: 0.1335 (0.1378) loss: 0.7693 (0.7804) time: 0.1553 data: 0.0605 max mem: 9377 +Train: [43] [4700/6250] eta: 0:04:08 lr: 0.000080 grad: 0.1416 (0.1377) loss: 0.7758 (0.7803) time: 0.1784 data: 0.0830 max mem: 9377 +Train: [43] [4800/6250] eta: 0:03:52 lr: 0.000080 grad: 0.1299 (0.1377) loss: 0.7822 (0.7801) time: 0.1556 data: 0.0677 max mem: 9377 +Train: [43] [4900/6250] eta: 0:03:36 lr: 0.000080 grad: 0.1332 (0.1377) loss: 0.7652 (0.7800) time: 0.1771 data: 0.0853 max mem: 9377 +Train: [43] [5000/6250] eta: 0:03:20 lr: 0.000080 grad: 0.1337 (0.1377) loss: 0.7665 (0.7799) time: 0.1671 data: 0.0728 max mem: 9377 +Train: [43] [5100/6250] eta: 0:03:05 lr: 0.000080 grad: 0.1384 (0.1377) loss: 0.7825 (0.7798) time: 0.1842 data: 0.0962 max mem: 9377 +Train: [43] [5200/6250] eta: 0:02:49 lr: 0.000080 grad: 0.1359 (0.1377) loss: 0.7653 (0.7797) time: 0.1588 data: 0.0638 max mem: 9377 +Train: [43] [5300/6250] eta: 0:02:33 lr: 0.000080 grad: 0.1364 (0.1377) loss: 0.7745 (0.7795) time: 0.1503 data: 0.0509 max mem: 9377 +Train: [43] [5400/6250] eta: 0:02:17 lr: 0.000080 grad: 0.1346 (0.1377) loss: 0.7734 (0.7794) time: 0.1249 data: 0.0405 max mem: 9377 +Train: [43] [5500/6250] eta: 0:02:00 lr: 0.000080 grad: 0.1355 (0.1378) loss: 0.7731 (0.7793) time: 0.1388 data: 0.0454 max mem: 9377 +Train: [43] [5600/6250] eta: 0:01:44 lr: 0.000080 grad: 0.1379 (0.1378) loss: 0.7725 (0.7792) time: 0.1763 data: 0.0789 max mem: 9377 +Train: [43] [5700/6250] eta: 0:01:28 lr: 0.000080 grad: 0.1307 (0.1378) loss: 0.7757 (0.7791) time: 0.1592 data: 0.0610 max mem: 9377 +Train: [43] [5800/6250] eta: 0:01:12 lr: 0.000080 grad: 0.1299 (0.1378) loss: 0.7787 (0.7791) time: 0.1470 data: 0.0567 max mem: 9377 +Train: [43] [5900/6250] eta: 0:00:56 lr: 0.000080 grad: 0.1300 (0.1377) loss: 0.7722 (0.7791) time: 0.1565 data: 0.0643 max mem: 9377 +Train: [43] [6000/6250] eta: 0:00:40 lr: 0.000080 grad: 0.1346 (0.1376) loss: 0.7764 (0.7790) time: 0.1798 data: 0.0884 max mem: 9377 +Train: [43] [6100/6250] eta: 0:00:24 lr: 0.000080 grad: 0.1286 (0.1375) loss: 0.7757 (0.7789) time: 0.1477 data: 0.0586 max mem: 9377 +Train: [43] [6200/6250] eta: 0:00:08 lr: 0.000080 grad: 0.1400 (0.1375) loss: 0.7683 (0.7789) time: 0.1497 data: 0.0595 max mem: 9377 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.1360 (0.1374) loss: 0.7692 (0.7789) time: 0.1835 data: 0.1037 max mem: 9377 +Train: [43] Total time: 0:16:52 (0.1619 s / it) +Averaged stats: lr: 0.000080 grad: 0.1360 (0.1374) loss: 0.7692 (0.7789) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:05:36 loss: 0.8485 (0.8485) time: 5.4306 data: 5.3992 max mem: 9377 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8498 (0.8517) time: 0.1597 data: 0.1337 max mem: 9377 +Eval (hcp-train-subset): [43] Total time: 0:00:15 (0.2548 s / it) +Averaged stats (hcp-train-subset): loss: 0.8498 (0.8517) +Eval (hcp-val): [43] [ 0/62] eta: 0:05:47 loss: 0.8466 (0.8466) time: 5.6017 data: 5.5641 max mem: 9377 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8492 (0.8494) time: 0.1280 data: 0.1007 max mem: 9377 +Eval (hcp-val): [43] Total time: 0:00:15 (0.2486 s / it) +Averaged stats (hcp-val): loss: 0.8492 (0.8494) +Eval (nsd-val): [43] [ 0/62] eta: 0:05:38 loss: 0.8083 (0.8083) time: 5.4578 data: 5.4267 max mem: 9377 +Eval (nsd-val): [43] [61/62] eta: 0:00:00 loss: 0.8216 (0.8230) time: 0.1551 data: 0.1293 max mem: 9377 +Eval (nsd-val): [43] Total time: 0:00:15 (0.2541 s / it) +Averaged stats (nsd-val): loss: 0.8216 (0.8230) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [44] [ 0/6250] eta: 11:38:38 lr: 0.000080 grad: 0.1207 (0.1207) loss: 0.8324 (0.8324) time: 6.7069 data: 6.5750 max mem: 9377 +Train: [44] [ 100/6250] eta: 0:25:38 lr: 0.000080 grad: 0.2048 (0.2213) loss: 0.7804 (0.8029) time: 0.1454 data: 0.0005 max mem: 9377 +Train: [44] [ 200/6250] eta: 0:21:35 lr: 0.000080 grad: 0.1372 (0.2062) loss: 0.7980 (0.7935) time: 0.1849 data: 0.0915 max mem: 9377 +Train: [44] [ 300/6250] eta: 0:19:54 lr: 0.000080 grad: 0.1309 (0.1889) loss: 0.7943 (0.7895) time: 0.1463 data: 0.0376 max mem: 9377 +Train: [44] [ 400/6250] eta: 0:18:32 lr: 0.000080 grad: 0.1346 (0.1775) loss: 0.7879 (0.7885) time: 0.1652 data: 0.0746 max mem: 9377 +Train: [44] [ 500/6250] eta: 0:17:45 lr: 0.000080 grad: 0.1366 (0.1695) loss: 0.7828 (0.7880) time: 0.1757 data: 0.0834 max mem: 9377 +Train: [44] [ 600/6250] eta: 0:17:05 lr: 0.000080 grad: 0.1335 (0.1634) loss: 0.7801 (0.7880) time: 0.1875 data: 0.0997 max mem: 9377 +Train: [44] [ 700/6250] eta: 0:16:22 lr: 0.000080 grad: 0.1169 (0.1584) loss: 0.7955 (0.7889) time: 0.1551 data: 0.0602 max mem: 9377 +Train: [44] [ 800/6250] eta: 0:15:50 lr: 0.000080 grad: 0.1322 (0.1549) loss: 0.7889 (0.7892) time: 0.1770 data: 0.0891 max mem: 9377 +Train: [44] [ 900/6250] eta: 0:15:31 lr: 0.000080 grad: 0.1211 (0.1517) loss: 0.7868 (0.7897) time: 0.1738 data: 0.0948 max mem: 9377 +Train: [44] [1000/6250] eta: 0:15:14 lr: 0.000080 grad: 0.1338 (0.1498) loss: 0.7869 (0.7897) time: 0.1648 data: 0.0811 max mem: 9377 +Train: [44] [1100/6250] eta: 0:15:05 lr: 0.000079 grad: 0.1216 (0.1477) loss: 0.7838 (0.7892) time: 0.2039 data: 0.1104 max mem: 9377 +Train: [44] [1200/6250] eta: 0:14:51 lr: 0.000079 grad: 0.1271 (0.1463) loss: 0.7861 (0.7887) time: 0.1520 data: 0.0561 max mem: 9377 +Train: [44] [1300/6250] eta: 0:14:41 lr: 0.000079 grad: 0.1261 (0.1452) loss: 0.7809 (0.7883) time: 0.1949 data: 0.0963 max mem: 9377 +Train: [44] [1400/6250] eta: 0:14:27 lr: 0.000079 grad: 0.1353 (0.1446) loss: 0.7786 (0.7877) time: 0.1756 data: 0.0768 max mem: 9377 +Train: [44] [1500/6250] eta: 0:14:07 lr: 0.000079 grad: 0.1273 (0.1438) loss: 0.7781 (0.7871) time: 0.1638 data: 0.0674 max mem: 9377 +Train: [44] [1600/6250] eta: 0:13:49 lr: 0.000079 grad: 0.1260 (0.1434) loss: 0.7753 (0.7866) time: 0.1733 data: 0.0830 max mem: 9377 +Train: [44] [1700/6250] eta: 0:13:27 lr: 0.000079 grad: 0.1365 (0.1428) loss: 0.7755 (0.7864) time: 0.1688 data: 0.0740 max mem: 9377 +Train: [44] [1800/6250] eta: 0:13:05 lr: 0.000079 grad: 0.1279 (0.1426) loss: 0.7900 (0.7861) time: 0.1633 data: 0.0590 max mem: 9377 +Train: [44] [1900/6250] eta: 0:12:44 lr: 0.000079 grad: 0.1302 (0.1421) loss: 0.7822 (0.7861) time: 0.1661 data: 0.0822 max mem: 9377 +Train: [44] [2000/6250] eta: 0:12:24 lr: 0.000079 grad: 0.1263 (0.1416) loss: 0.7823 (0.7860) time: 0.1579 data: 0.0706 max mem: 9377 +Train: [44] [2100/6250] eta: 0:12:04 lr: 0.000079 grad: 0.1284 (0.1413) loss: 0.7825 (0.7859) time: 0.1607 data: 0.0705 max mem: 9377 +Train: [44] [2200/6250] eta: 0:11:44 lr: 0.000079 grad: 0.1303 (0.1408) loss: 0.7823 (0.7858) time: 0.1595 data: 0.0670 max mem: 9377 +Train: [44] [2300/6250] eta: 0:11:23 lr: 0.000079 grad: 0.1268 (0.1403) loss: 0.7796 (0.7859) time: 0.1145 data: 0.0249 max mem: 9377 +Train: [44] [2400/6250] eta: 0:11:03 lr: 0.000079 grad: 0.1291 (0.1399) loss: 0.7889 (0.7859) time: 0.1480 data: 0.0527 max mem: 9377 +Train: [44] [2500/6250] eta: 0:10:44 lr: 0.000079 grad: 0.1299 (0.1396) loss: 0.7845 (0.7859) time: 0.1541 data: 0.0593 max mem: 9377 +Train: [44] [2600/6250] eta: 0:10:26 lr: 0.000079 grad: 0.1360 (0.1394) loss: 0.7846 (0.7858) time: 0.1733 data: 0.0904 max mem: 9377 +Train: [44] [2700/6250] eta: 0:10:09 lr: 0.000079 grad: 0.1354 (0.1392) loss: 0.7819 (0.7858) time: 0.1621 data: 0.0719 max mem: 9377 +Train: [44] [2800/6250] eta: 0:09:50 lr: 0.000079 grad: 0.1382 (0.1390) loss: 0.7844 (0.7856) time: 0.1486 data: 0.0639 max mem: 9377 +Train: [44] [2900/6250] eta: 0:09:32 lr: 0.000079 grad: 0.1323 (0.1388) loss: 0.7851 (0.7856) time: 0.1776 data: 0.0929 max mem: 9377 +Train: [44] [3000/6250] eta: 0:09:14 lr: 0.000079 grad: 0.1314 (0.1387) loss: 0.7914 (0.7855) time: 0.1545 data: 0.0703 max mem: 9377 +Train: [44] [3100/6250] eta: 0:08:57 lr: 0.000079 grad: 0.1250 (0.1384) loss: 0.7834 (0.7855) time: 0.1610 data: 0.0694 max mem: 9377 +Train: [44] [3200/6250] eta: 0:08:38 lr: 0.000079 grad: 0.1281 (0.1381) loss: 0.7719 (0.7854) time: 0.1547 data: 0.0644 max mem: 9377 +Train: [44] [3300/6250] eta: 0:08:21 lr: 0.000079 grad: 0.1367 (0.1381) loss: 0.7885 (0.7854) time: 0.1470 data: 0.0634 max mem: 9377 +Train: [44] [3400/6250] eta: 0:08:03 lr: 0.000079 grad: 0.1308 (0.1380) loss: 0.7855 (0.7854) time: 0.1625 data: 0.0727 max mem: 9377 +Train: [44] [3500/6250] eta: 0:07:46 lr: 0.000079 grad: 0.1326 (0.1379) loss: 0.7825 (0.7853) time: 0.1547 data: 0.0667 max mem: 9377 +Train: [44] [3600/6250] eta: 0:07:29 lr: 0.000079 grad: 0.1325 (0.1378) loss: 0.7785 (0.7852) time: 0.2024 data: 0.1187 max mem: 9377 +Train: [44] [3700/6250] eta: 0:07:10 lr: 0.000079 grad: 0.1350 (0.1377) loss: 0.7783 (0.7851) time: 0.1715 data: 0.0835 max mem: 9377 +Train: [44] [3800/6250] eta: 0:06:52 lr: 0.000079 grad: 0.1376 (0.1377) loss: 0.7813 (0.7851) time: 0.1491 data: 0.0542 max mem: 9377 +Train: [44] [3900/6250] eta: 0:06:34 lr: 0.000079 grad: 0.1283 (0.1375) loss: 0.7813 (0.7850) time: 0.1494 data: 0.0610 max mem: 9377 +Train: [44] [4000/6250] eta: 0:06:17 lr: 0.000079 grad: 0.1304 (0.1374) loss: 0.7823 (0.7850) time: 0.1680 data: 0.0695 max mem: 9377 +Train: [44] [4100/6250] eta: 0:05:59 lr: 0.000079 grad: 0.1284 (0.1373) loss: 0.7879 (0.7850) time: 0.1456 data: 0.0624 max mem: 9377 +Train: [44] [4200/6250] eta: 0:05:42 lr: 0.000078 grad: 0.1355 (0.1373) loss: 0.7830 (0.7850) time: 0.1519 data: 0.0649 max mem: 9377 +Train: [44] [4300/6250] eta: 0:05:26 lr: 0.000078 grad: 0.1362 (0.1372) loss: 0.7815 (0.7850) time: 0.1564 data: 0.0648 max mem: 9377 +Train: [44] [4400/6250] eta: 0:05:09 lr: 0.000078 grad: 0.1332 (0.1371) loss: 0.7808 (0.7850) time: 0.1566 data: 0.0726 max mem: 9377 +Train: [44] [4500/6250] eta: 0:04:52 lr: 0.000078 grad: 0.1259 (0.1370) loss: 0.7924 (0.7851) time: 0.1728 data: 0.0846 max mem: 9377 +Train: [44] [4600/6250] eta: 0:04:35 lr: 0.000078 grad: 0.1231 (0.1370) loss: 0.7883 (0.7851) time: 0.1530 data: 0.0682 max mem: 9377 +Train: [44] [4700/6250] eta: 0:04:18 lr: 0.000078 grad: 0.1300 (0.1370) loss: 0.7890 (0.7851) time: 0.1688 data: 0.0773 max mem: 9377 +Train: [44] [4800/6250] eta: 0:04:01 lr: 0.000078 grad: 0.1351 (0.1369) loss: 0.7872 (0.7851) time: 0.2014 data: 0.1164 max mem: 9377 +Train: [44] [4900/6250] eta: 0:03:45 lr: 0.000078 grad: 0.1307 (0.1368) loss: 0.7868 (0.7852) time: 0.1800 data: 0.1005 max mem: 9377 +Train: [44] [5000/6250] eta: 0:03:28 lr: 0.000078 grad: 0.1251 (0.1367) loss: 0.7844 (0.7852) time: 0.1818 data: 0.0905 max mem: 9377 +Train: [44] [5100/6250] eta: 0:03:11 lr: 0.000078 grad: 0.1356 (0.1366) loss: 0.7775 (0.7852) time: 0.1702 data: 0.0796 max mem: 9377 +Train: [44] [5200/6250] eta: 0:02:54 lr: 0.000078 grad: 0.1391 (0.1366) loss: 0.7702 (0.7851) time: 0.1636 data: 0.0656 max mem: 9377 +Train: [44] [5300/6250] eta: 0:02:38 lr: 0.000078 grad: 0.1287 (0.1365) loss: 0.7748 (0.7850) time: 0.1655 data: 0.0723 max mem: 9377 +Train: [44] [5400/6250] eta: 0:02:21 lr: 0.000078 grad: 0.1399 (0.1365) loss: 0.7790 (0.7849) time: 0.1327 data: 0.0431 max mem: 9377 +Train: [44] [5500/6250] eta: 0:02:05 lr: 0.000078 grad: 0.1295 (0.1364) loss: 0.7814 (0.7847) time: 0.2092 data: 0.1211 max mem: 9377 +Train: [44] [5600/6250] eta: 0:01:48 lr: 0.000078 grad: 0.1294 (0.1365) loss: 0.7767 (0.7846) time: 0.2043 data: 0.0990 max mem: 9377 +Train: [44] [5700/6250] eta: 0:01:31 lr: 0.000078 grad: 0.1295 (0.1365) loss: 0.7824 (0.7844) time: 0.1551 data: 0.0584 max mem: 9377 +Train: [44] [5800/6250] eta: 0:01:15 lr: 0.000078 grad: 0.1404 (0.1364) loss: 0.7727 (0.7843) time: 0.1660 data: 0.0627 max mem: 9377 +Train: [44] [5900/6250] eta: 0:00:58 lr: 0.000078 grad: 0.1306 (0.1364) loss: 0.7830 (0.7842) time: 0.1552 data: 0.0658 max mem: 9377 +Train: [44] [6000/6250] eta: 0:00:41 lr: 0.000078 grad: 0.1402 (0.1364) loss: 0.7794 (0.7841) time: 0.1604 data: 0.0607 max mem: 9377 +Train: [44] [6100/6250] eta: 0:00:24 lr: 0.000078 grad: 0.1309 (0.1364) loss: 0.7911 (0.7841) time: 0.1504 data: 0.0548 max mem: 9377 +Train: [44] [6200/6250] eta: 0:00:08 lr: 0.000078 grad: 0.1269 (0.1364) loss: 0.7790 (0.7840) time: 0.1574 data: 0.0617 max mem: 9377 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.1366 (0.1364) loss: 0.7721 (0.7840) time: 0.1674 data: 0.0743 max mem: 9377 +Train: [44] Total time: 0:17:23 (0.1670 s / it) +Averaged stats: lr: 0.000078 grad: 0.1366 (0.1364) loss: 0.7721 (0.7840) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:05:39 loss: 0.8451 (0.8451) time: 5.4686 data: 5.4379 max mem: 9377 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8483 (0.8503) time: 0.1376 data: 0.1124 max mem: 9377 +Eval (hcp-train-subset): [44] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (hcp-train-subset): loss: 0.8483 (0.8503) +Making plots (hcp-train-subset): example=37 +Eval (hcp-val): [44] [ 0/62] eta: 0:06:27 loss: 0.8446 (0.8446) time: 6.2451 data: 6.2129 max mem: 9377 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8456 (0.8483) time: 0.1370 data: 0.1116 max mem: 9377 +Eval (hcp-val): [44] Total time: 0:00:14 (0.2336 s / it) +Averaged stats (hcp-val): loss: 0.8456 (0.8483) +Making plots (hcp-val): example=5 +Eval (nsd-val): [44] [ 0/62] eta: 0:05:52 loss: 0.8131 (0.8131) time: 5.6869 data: 5.6538 max mem: 9377 +Eval (nsd-val): [44] [61/62] eta: 0:00:00 loss: 0.8221 (0.8225) time: 0.1297 data: 0.1044 max mem: 9377 +Eval (nsd-val): [44] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (nsd-val): loss: 0.8221 (0.8225) +Making plots (nsd-val): example=7 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00044.pth +Train: [45] [ 0/6250] eta: 9:02:28 lr: 0.000078 grad: 0.2486 (0.2486) loss: 0.7926 (0.7926) time: 5.2077 data: 5.0058 max mem: 9377 +Train: [45] [ 100/6250] eta: 0:24:14 lr: 0.000078 grad: 0.1901 (0.2266) loss: 0.7877 (0.7959) time: 0.2021 data: 0.0985 max mem: 9377 +Train: [45] [ 200/6250] eta: 0:21:17 lr: 0.000078 grad: 0.1714 (0.2069) loss: 0.7678 (0.7863) time: 0.1781 data: 0.0779 max mem: 9377 +Train: [45] [ 300/6250] eta: 0:19:41 lr: 0.000078 grad: 0.1564 (0.1968) loss: 0.7749 (0.7820) time: 0.1733 data: 0.0721 max mem: 9377 +Train: [45] [ 400/6250] eta: 0:18:31 lr: 0.000078 grad: 0.1509 (0.1891) loss: 0.7802 (0.7803) time: 0.1698 data: 0.0810 max mem: 9377 +Train: [45] [ 500/6250] eta: 0:17:47 lr: 0.000078 grad: 0.1478 (0.1831) loss: 0.7751 (0.7796) time: 0.1632 data: 0.0767 max mem: 9377 +Train: [45] [ 600/6250] eta: 0:17:07 lr: 0.000078 grad: 0.1367 (0.1773) loss: 0.7811 (0.7787) time: 0.1499 data: 0.0600 max mem: 9377 +Train: [45] [ 700/6250] eta: 0:16:37 lr: 0.000078 grad: 0.1346 (0.1723) loss: 0.7734 (0.7782) time: 0.1588 data: 0.0558 max mem: 9377 +Train: [45] [ 800/6250] eta: 0:16:13 lr: 0.000078 grad: 0.1359 (0.1682) loss: 0.7769 (0.7777) time: 0.1688 data: 0.0789 max mem: 9377 +Train: [45] [ 900/6250] eta: 0:15:47 lr: 0.000078 grad: 0.1390 (0.1651) loss: 0.7733 (0.7771) time: 0.1320 data: 0.0449 max mem: 9377 +Train: [45] [1000/6250] eta: 0:15:24 lr: 0.000078 grad: 0.1348 (0.1628) loss: 0.7725 (0.7766) time: 0.1603 data: 0.0728 max mem: 9377 +Train: [45] [1100/6250] eta: 0:15:00 lr: 0.000077 grad: 0.1350 (0.1606) loss: 0.7708 (0.7764) time: 0.1767 data: 0.0975 max mem: 9377 +Train: [45] [1200/6250] eta: 0:14:34 lr: 0.000077 grad: 0.1327 (0.1586) loss: 0.7842 (0.7762) time: 0.1440 data: 0.0595 max mem: 9377 +Train: [45] [1300/6250] eta: 0:14:15 lr: 0.000077 grad: 0.1355 (0.1570) loss: 0.7707 (0.7758) time: 0.1684 data: 0.0794 max mem: 9377 +Train: [45] [1400/6250] eta: 0:13:59 lr: 0.000077 grad: 0.1374 (0.1556) loss: 0.7713 (0.7755) time: 0.2031 data: 0.1130 max mem: 9377 +Train: [45] [1500/6250] eta: 0:13:36 lr: 0.000077 grad: 0.1238 (0.1541) loss: 0.7859 (0.7755) time: 0.1495 data: 0.0590 max mem: 9377 +Train: [45] [1600/6250] eta: 0:13:15 lr: 0.000077 grad: 0.1246 (0.1527) loss: 0.7891 (0.7756) time: 0.1534 data: 0.0563 max mem: 9377 +Train: [45] [1700/6250] eta: 0:12:56 lr: 0.000077 grad: 0.1330 (0.1516) loss: 0.7776 (0.7757) time: 0.1642 data: 0.0691 max mem: 9377 +Train: [45] [1800/6250] eta: 0:12:34 lr: 0.000077 grad: 0.1323 (0.1506) loss: 0.7750 (0.7759) time: 0.1203 data: 0.0166 max mem: 9377 +Train: [45] [1900/6250] eta: 0:12:14 lr: 0.000077 grad: 0.1309 (0.1499) loss: 0.7776 (0.7759) time: 0.1503 data: 0.0542 max mem: 9377 +Train: [45] [2000/6250] eta: 0:11:55 lr: 0.000077 grad: 0.1319 (0.1494) loss: 0.7783 (0.7760) time: 0.1668 data: 0.0909 max mem: 9377 +Train: [45] [2100/6250] eta: 0:11:37 lr: 0.000077 grad: 0.1333 (0.1487) loss: 0.7818 (0.7761) time: 0.1657 data: 0.0713 max mem: 9377 +Train: [45] [2200/6250] eta: 0:11:17 lr: 0.000077 grad: 0.1364 (0.1484) loss: 0.7714 (0.7761) time: 0.1659 data: 0.0739 max mem: 9377 +Train: [45] [2300/6250] eta: 0:10:58 lr: 0.000077 grad: 0.1296 (0.1479) loss: 0.7781 (0.7761) time: 0.1559 data: 0.0665 max mem: 9377 +Train: [45] [2400/6250] eta: 0:10:40 lr: 0.000077 grad: 0.1369 (0.1476) loss: 0.7737 (0.7759) time: 0.1532 data: 0.0662 max mem: 9377 +Train: [45] [2500/6250] eta: 0:10:22 lr: 0.000077 grad: 0.1469 (0.1475) loss: 0.7635 (0.7757) time: 0.1844 data: 0.0987 max mem: 9377 +Train: [45] [2600/6250] eta: 0:10:04 lr: 0.000077 grad: 0.1379 (0.1473) loss: 0.7664 (0.7754) time: 0.1436 data: 0.0500 max mem: 9377 +Train: [45] [2700/6250] eta: 0:09:46 lr: 0.000077 grad: 0.1416 (0.1471) loss: 0.7686 (0.7751) time: 0.1501 data: 0.0580 max mem: 9377 +Train: [45] [2800/6250] eta: 0:09:28 lr: 0.000077 grad: 0.1417 (0.1471) loss: 0.7654 (0.7749) time: 0.1719 data: 0.0865 max mem: 9377 +Train: [45] [2900/6250] eta: 0:09:10 lr: 0.000077 grad: 0.1438 (0.1470) loss: 0.7653 (0.7747) time: 0.1494 data: 0.0637 max mem: 9377 +Train: [45] [3000/6250] eta: 0:08:53 lr: 0.000077 grad: 0.1426 (0.1469) loss: 0.7704 (0.7744) time: 0.1533 data: 0.0716 max mem: 9377 +Train: [45] [3100/6250] eta: 0:08:36 lr: 0.000077 grad: 0.1474 (0.1469) loss: 0.7606 (0.7741) time: 0.1166 data: 0.0257 max mem: 9377 +Train: [45] [3200/6250] eta: 0:08:19 lr: 0.000077 grad: 0.1415 (0.1467) loss: 0.7649 (0.7738) time: 0.1346 data: 0.0452 max mem: 9377 +Train: [45] [3300/6250] eta: 0:08:01 lr: 0.000077 grad: 0.1322 (0.1467) loss: 0.7697 (0.7737) time: 0.1483 data: 0.0531 max mem: 9377 +Train: [45] [3400/6250] eta: 0:07:45 lr: 0.000077 grad: 0.1493 (0.1466) loss: 0.7672 (0.7735) time: 0.2179 data: 0.1300 max mem: 9377 +Train: [45] [3500/6250] eta: 0:07:27 lr: 0.000077 grad: 0.1316 (0.1464) loss: 0.7809 (0.7735) time: 0.1555 data: 0.0665 max mem: 9377 +Train: [45] [3600/6250] eta: 0:07:10 lr: 0.000077 grad: 0.1380 (0.1463) loss: 0.7678 (0.7734) time: 0.1551 data: 0.0688 max mem: 9377 +Train: [45] [3700/6250] eta: 0:06:54 lr: 0.000077 grad: 0.1365 (0.1462) loss: 0.7698 (0.7733) time: 0.1349 data: 0.0458 max mem: 9377 +Train: [45] [3800/6250] eta: 0:06:38 lr: 0.000077 grad: 0.1361 (0.1460) loss: 0.7676 (0.7732) time: 0.2315 data: 0.1443 max mem: 9377 +Train: [45] [3900/6250] eta: 0:06:21 lr: 0.000077 grad: 0.1377 (0.1459) loss: 0.7758 (0.7732) time: 0.1428 data: 0.0490 max mem: 9377 +Train: [45] [4000/6250] eta: 0:06:04 lr: 0.000077 grad: 0.1343 (0.1458) loss: 0.7827 (0.7732) time: 0.1415 data: 0.0545 max mem: 9377 +Train: [45] [4100/6250] eta: 0:05:47 lr: 0.000077 grad: 0.1383 (0.1457) loss: 0.7710 (0.7731) time: 0.1478 data: 0.0535 max mem: 9377 +Train: [45] [4200/6250] eta: 0:05:31 lr: 0.000076 grad: 0.1413 (0.1456) loss: 0.7767 (0.7731) time: 0.1298 data: 0.0400 max mem: 9377 +Train: [45] [4300/6250] eta: 0:05:16 lr: 0.000076 grad: 0.1514 (0.1455) loss: 0.7726 (0.7731) time: 0.1829 data: 0.0930 max mem: 9377 +Train: [45] [4400/6250] eta: 0:05:01 lr: 0.000076 grad: 0.1352 (0.1454) loss: 0.7717 (0.7731) time: 0.1675 data: 0.0864 max mem: 9377 +Train: [45] [4500/6250] eta: 0:04:46 lr: 0.000076 grad: 0.1338 (0.1453) loss: 0.7693 (0.7732) time: 0.1734 data: 0.0895 max mem: 9377 +Train: [45] [4600/6250] eta: 0:04:30 lr: 0.000076 grad: 0.1379 (0.1452) loss: 0.7764 (0.7732) time: 0.1795 data: 0.0904 max mem: 9377 +Train: [45] [4700/6250] eta: 0:04:15 lr: 0.000076 grad: 0.1317 (0.1450) loss: 0.7757 (0.7733) time: 0.1793 data: 0.0896 max mem: 9377 +Train: [45] [4800/6250] eta: 0:03:58 lr: 0.000076 grad: 0.1351 (0.1448) loss: 0.7769 (0.7734) time: 0.1743 data: 0.0876 max mem: 9377 +Train: [45] [4900/6250] eta: 0:03:42 lr: 0.000076 grad: 0.1376 (0.1447) loss: 0.7682 (0.7734) time: 0.1734 data: 0.0818 max mem: 9377 +Train: [45] [5000/6250] eta: 0:03:25 lr: 0.000076 grad: 0.1386 (0.1446) loss: 0.7646 (0.7734) time: 0.1533 data: 0.0607 max mem: 9377 +Train: [45] [5100/6250] eta: 0:03:09 lr: 0.000076 grad: 0.1376 (0.1445) loss: 0.7693 (0.7735) time: 0.1708 data: 0.0896 max mem: 9377 +Train: [45] [5200/6250] eta: 0:02:53 lr: 0.000076 grad: 0.1328 (0.1444) loss: 0.7760 (0.7735) time: 0.1759 data: 0.0856 max mem: 9377 +Train: [45] [5300/6250] eta: 0:02:36 lr: 0.000076 grad: 0.1312 (0.1444) loss: 0.7792 (0.7736) time: 0.1594 data: 0.0714 max mem: 9377 +Train: [45] [5400/6250] eta: 0:02:19 lr: 0.000076 grad: 0.1360 (0.1443) loss: 0.7813 (0.7736) time: 0.1931 data: 0.0949 max mem: 9377 +Train: [45] [5500/6250] eta: 0:02:03 lr: 0.000076 grad: 0.1364 (0.1442) loss: 0.7741 (0.7736) time: 0.1657 data: 0.0735 max mem: 9377 +Train: [45] [5600/6250] eta: 0:01:46 lr: 0.000076 grad: 0.1329 (0.1441) loss: 0.7812 (0.7737) time: 0.1488 data: 0.0509 max mem: 9377 +Train: [45] [5700/6250] eta: 0:01:30 lr: 0.000076 grad: 0.1362 (0.1440) loss: 0.7803 (0.7738) time: 0.1122 data: 0.0097 max mem: 9377 +Train: [45] [5800/6250] eta: 0:01:13 lr: 0.000076 grad: 0.1304 (0.1439) loss: 0.7825 (0.7738) time: 0.1493 data: 0.0611 max mem: 9377 +Train: [45] [5900/6250] eta: 0:00:57 lr: 0.000076 grad: 0.1305 (0.1437) loss: 0.7785 (0.7739) time: 0.1614 data: 0.0682 max mem: 9377 +Train: [45] [6000/6250] eta: 0:00:41 lr: 0.000076 grad: 0.1282 (0.1436) loss: 0.7725 (0.7740) time: 0.1414 data: 0.0428 max mem: 9377 +Train: [45] [6100/6250] eta: 0:00:24 lr: 0.000076 grad: 0.1297 (0.1434) loss: 0.7783 (0.7740) time: 0.1400 data: 0.0524 max mem: 9377 +Train: [45] [6200/6250] eta: 0:00:08 lr: 0.000076 grad: 0.1365 (0.1433) loss: 0.7766 (0.7740) time: 0.1779 data: 0.0869 max mem: 9377 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.1286 (0.1433) loss: 0.7835 (0.7740) time: 0.1529 data: 0.0590 max mem: 9377 +Train: [45] Total time: 0:17:10 (0.1649 s / it) +Averaged stats: lr: 0.000076 grad: 0.1286 (0.1433) loss: 0.7835 (0.7740) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:04:36 loss: 0.8457 (0.8457) time: 4.4534 data: 4.3722 max mem: 9377 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8479 (0.8504) time: 0.1289 data: 0.1036 max mem: 9377 +Eval (hcp-train-subset): [45] Total time: 0:00:14 (0.2374 s / it) +Averaged stats (hcp-train-subset): loss: 0.8479 (0.8504) +Eval (hcp-val): [45] [ 0/62] eta: 0:04:51 loss: 0.8450 (0.8450) time: 4.7021 data: 4.6260 max mem: 9377 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8464 (0.8490) time: 0.1275 data: 0.1024 max mem: 9377 +Eval (hcp-val): [45] Total time: 0:00:14 (0.2301 s / it) +Averaged stats (hcp-val): loss: 0.8464 (0.8490) +Eval (nsd-val): [45] [ 0/62] eta: 0:05:53 loss: 0.8152 (0.8152) time: 5.7039 data: 5.6701 max mem: 9377 +Eval (nsd-val): [45] [61/62] eta: 0:00:00 loss: 0.8211 (0.8231) time: 0.1131 data: 0.0877 max mem: 9377 +Eval (nsd-val): [45] Total time: 0:00:14 (0.2345 s / it) +Averaged stats (nsd-val): loss: 0.8211 (0.8231) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 8:03:17 lr: 0.000076 grad: nan (nan) loss: 0.8211 (0.8211) time: 4.6396 data: 4.4365 max mem: 9377 +Train: [46] [ 100/6250] eta: 0:24:15 lr: 0.000076 grad: 0.1986 (0.2000) loss: 0.7813 (0.8092) time: 0.1853 data: 0.0825 max mem: 9377 +Train: [46] [ 200/6250] eta: 0:20:59 lr: 0.000076 grad: 0.1551 (0.1951) loss: 0.7788 (0.7942) time: 0.1700 data: 0.0754 max mem: 9377 +Train: [46] [ 300/6250] eta: 0:19:31 lr: 0.000076 grad: 0.1342 (0.1795) loss: 0.7659 (0.7918) time: 0.1485 data: 0.0412 max mem: 9377 +Train: [46] [ 400/6250] eta: 0:18:29 lr: 0.000076 grad: 0.1462 (0.1688) loss: 0.7849 (0.7913) time: 0.1572 data: 0.0522 max mem: 9377 +Train: [46] [ 500/6250] eta: 0:17:43 lr: 0.000076 grad: 0.1352 (0.1625) loss: 0.7903 (0.7910) time: 0.1465 data: 0.0378 max mem: 9377 +Train: [46] [ 600/6250] eta: 0:17:11 lr: 0.000076 grad: 0.1345 (0.1582) loss: 0.7881 (0.7908) time: 0.1656 data: 0.0645 max mem: 9377 +Train: [46] [ 700/6250] eta: 0:16:36 lr: 0.000076 grad: 0.1337 (0.1551) loss: 0.7735 (0.7904) time: 0.1608 data: 0.0644 max mem: 9377 +Train: [46] [ 800/6250] eta: 0:16:09 lr: 0.000076 grad: 0.1264 (0.1525) loss: 0.7825 (0.7901) time: 0.1686 data: 0.0802 max mem: 9377 +Train: [46] [ 900/6250] eta: 0:15:39 lr: 0.000076 grad: 0.1324 (0.1505) loss: 0.7874 (0.7897) time: 0.1495 data: 0.0550 max mem: 9377 +Train: [46] [1000/6250] eta: 0:15:21 lr: 0.000076 grad: 0.1304 (0.1485) loss: 0.7787 (0.7890) time: 0.1746 data: 0.0853 max mem: 9377 +Train: [46] [1100/6250] eta: 0:14:59 lr: 0.000075 grad: 0.1304 (0.1475) loss: 0.7805 (0.7883) time: 0.1765 data: 0.0921 max mem: 9377 +Train: [46] [1200/6250] eta: 0:14:39 lr: 0.000075 grad: 0.1325 (0.1466) loss: 0.7831 (0.7877) time: 0.1657 data: 0.0849 max mem: 9377 +Train: [46] [1300/6250] eta: 0:14:21 lr: 0.000075 grad: 0.1325 (0.1454) loss: 0.7708 (0.7871) time: 0.1890 data: 0.0972 max mem: 9377 +Train: [46] [1400/6250] eta: 0:14:04 lr: 0.000075 grad: 0.1223 (0.1445) loss: 0.7852 (0.7867) time: 0.1823 data: 0.0857 max mem: 9377 +Train: [46] [1500/6250] eta: 0:13:44 lr: 0.000075 grad: 0.1234 (0.1436) loss: 0.7775 (0.7862) time: 0.1706 data: 0.0813 max mem: 9377 +Train: [46] [1600/6250] eta: 0:13:26 lr: 0.000075 grad: 0.1277 (0.1430) loss: 0.7660 (0.7857) time: 0.1735 data: 0.0725 max mem: 9377 +Train: [46] [1700/6250] eta: 0:13:06 lr: 0.000075 grad: 0.1366 (0.1425) loss: 0.7677 (0.7853) time: 0.1734 data: 0.0873 max mem: 9377 +Train: [46] [1800/6250] eta: 0:12:47 lr: 0.000075 grad: 0.1347 (0.1420) loss: 0.7731 (0.7849) time: 0.1745 data: 0.0824 max mem: 9377 +Train: [46] [1900/6250] eta: 0:12:27 lr: 0.000075 grad: 0.1299 (0.1416) loss: 0.7773 (0.7846) time: 0.1414 data: 0.0564 max mem: 9377 +Train: [46] [2000/6250] eta: 0:12:05 lr: 0.000075 grad: 0.1306 (0.1413) loss: 0.7763 (0.7842) time: 0.1574 data: 0.0712 max mem: 9377 +Train: [46] [2100/6250] eta: 0:11:45 lr: 0.000075 grad: 0.1327 (0.1410) loss: 0.7782 (0.7839) time: 0.1478 data: 0.0573 max mem: 9377 +Train: [46] [2200/6250] eta: 0:11:25 lr: 0.000075 grad: 0.1361 (0.1410) loss: 0.7798 (0.7835) time: 0.1561 data: 0.0676 max mem: 9377 +Train: [46] [2300/6250] eta: 0:11:07 lr: 0.000075 grad: 0.1328 (0.1408) loss: 0.7923 (0.7834) time: 0.1695 data: 0.0940 max mem: 9377 +Train: [46] [2400/6250] eta: 0:10:50 lr: 0.000075 grad: 0.1296 (0.1405) loss: 0.7860 (0.7833) time: 0.1744 data: 0.0945 max mem: 9377 +Train: [46] [2500/6250] eta: 0:10:31 lr: 0.000075 grad: 0.1393 (0.1406) loss: 0.7789 (0.7833) time: 0.1468 data: 0.0650 max mem: 9377 +Train: [46] [2600/6250] eta: 0:10:14 lr: 0.000075 grad: 0.1308 (0.1405) loss: 0.7886 (0.7833) time: 0.1748 data: 0.0918 max mem: 9377 +Train: [46] [2700/6250] eta: 0:09:57 lr: 0.000075 grad: 0.1349 (0.1405) loss: 0.7814 (0.7831) time: 0.1872 data: 0.1062 max mem: 9377 +Train: [46] [2800/6250] eta: 0:09:39 lr: 0.000075 grad: 0.1396 (0.1404) loss: 0.7742 (0.7830) time: 0.1355 data: 0.0461 max mem: 9377 +Train: [46] [2900/6250] eta: 0:09:21 lr: 0.000075 grad: 0.1398 (0.1404) loss: 0.7696 (0.7827) time: 0.1685 data: 0.0762 max mem: 9377 +Train: [46] [3000/6250] eta: 0:09:04 lr: 0.000075 grad: 0.1256 (0.1403) loss: 0.7795 (0.7826) time: 0.1645 data: 0.0681 max mem: 9377 +Train: [46] [3100/6250] eta: 0:08:46 lr: 0.000075 grad: 0.1322 (0.1401) loss: 0.7843 (0.7824) time: 0.1677 data: 0.0817 max mem: 9377 +Train: [46] [3200/6250] eta: 0:08:28 lr: 0.000075 grad: 0.1457 (0.1402) loss: 0.7791 (0.7822) time: 0.1484 data: 0.0578 max mem: 9377 +Train: [46] [3300/6250] eta: 0:08:11 lr: 0.000075 grad: 0.1357 (0.1403) loss: 0.7678 (0.7821) time: 0.1418 data: 0.0665 max mem: 9377 +Train: [46] [3400/6250] eta: 0:07:53 lr: 0.000075 grad: 0.1393 (0.1402) loss: 0.7742 (0.7819) time: 0.1219 data: 0.0246 max mem: 9377 +Train: [46] [3500/6250] eta: 0:07:36 lr: 0.000075 grad: 0.1388 (0.1402) loss: 0.7730 (0.7819) time: 0.1734 data: 0.0865 max mem: 9377 +Train: [46] [3600/6250] eta: 0:07:19 lr: 0.000075 grad: 0.1418 (0.1402) loss: 0.7699 (0.7817) time: 0.1514 data: 0.0671 max mem: 9377 +Train: [46] [3700/6250] eta: 0:07:02 lr: 0.000075 grad: 0.1283 (0.1402) loss: 0.7761 (0.7816) time: 0.1519 data: 0.0586 max mem: 9377 +Train: [46] [3800/6250] eta: 0:06:45 lr: 0.000075 grad: 0.1368 (0.1401) loss: 0.7780 (0.7814) time: 0.1723 data: 0.0803 max mem: 9377 +Train: [46] [3900/6250] eta: 0:06:28 lr: 0.000075 grad: 0.1321 (0.1400) loss: 0.7743 (0.7813) time: 0.1330 data: 0.0473 max mem: 9377 +Train: [46] [4000/6250] eta: 0:06:11 lr: 0.000075 grad: 0.1343 (0.1401) loss: 0.7768 (0.7812) time: 0.1761 data: 0.0813 max mem: 9377 +Train: [46] [4100/6250] eta: 0:05:55 lr: 0.000075 grad: 0.1367 (0.1400) loss: 0.7755 (0.7811) time: 0.1674 data: 0.0818 max mem: 9377 +Train: [46] [4200/6250] eta: 0:05:38 lr: 0.000074 grad: 0.1322 (0.1402) loss: 0.7753 (0.7809) time: 0.1780 data: 0.0945 max mem: 9377 +Train: [46] [4300/6250] eta: 0:05:22 lr: 0.000074 grad: 0.1351 (0.1403) loss: 0.7777 (0.7807) time: 0.1888 data: 0.1078 max mem: 9377 +Train: [46] [4400/6250] eta: 0:05:05 lr: 0.000074 grad: 0.1384 (0.1403) loss: 0.7632 (0.7806) time: 0.1555 data: 0.0695 max mem: 9377 +Train: [46] [4500/6250] eta: 0:04:48 lr: 0.000074 grad: 0.1286 (0.1403) loss: 0.7857 (0.7804) time: 0.1655 data: 0.0789 max mem: 9377 +Train: [46] [4600/6250] eta: 0:04:32 lr: 0.000074 grad: 0.1318 (0.1402) loss: 0.7810 (0.7803) time: 0.1531 data: 0.0747 max mem: 9377 +Train: [46] [4700/6250] eta: 0:04:15 lr: 0.000074 grad: 0.1428 (0.1402) loss: 0.7791 (0.7802) time: 0.1549 data: 0.0682 max mem: 9377 +Train: [46] [4800/6250] eta: 0:03:59 lr: 0.000074 grad: 0.1377 (0.1403) loss: 0.7711 (0.7801) time: 0.1596 data: 0.0729 max mem: 9377 +Train: [46] [4900/6250] eta: 0:03:42 lr: 0.000074 grad: 0.1423 (0.1404) loss: 0.7724 (0.7800) time: 0.1565 data: 0.0579 max mem: 9377 +Train: [46] [5000/6250] eta: 0:03:25 lr: 0.000074 grad: 0.1348 (0.1404) loss: 0.7708 (0.7798) time: 0.1681 data: 0.0769 max mem: 9377 +Train: [46] [5100/6250] eta: 0:03:09 lr: 0.000074 grad: 0.1392 (0.1404) loss: 0.7807 (0.7797) time: 0.1918 data: 0.0956 max mem: 9377 +Train: [46] [5200/6250] eta: 0:02:53 lr: 0.000074 grad: 0.1398 (0.1405) loss: 0.7688 (0.7796) time: 0.1781 data: 0.0882 max mem: 9377 +Train: [46] [5300/6250] eta: 0:02:37 lr: 0.000074 grad: 0.1396 (0.1406) loss: 0.7738 (0.7794) time: 0.1754 data: 0.0864 max mem: 9377 +Train: [46] [5400/6250] eta: 0:02:21 lr: 0.000074 grad: 0.1402 (0.1407) loss: 0.7684 (0.7793) time: 0.1784 data: 0.0877 max mem: 9377 +Train: [46] [5500/6250] eta: 0:02:04 lr: 0.000074 grad: 0.1356 (0.1407) loss: 0.7680 (0.7792) time: 0.1790 data: 0.0740 max mem: 9377 +Train: [46] [5600/6250] eta: 0:01:48 lr: 0.000074 grad: 0.1390 (0.1407) loss: 0.7670 (0.7791) time: 0.1623 data: 0.0646 max mem: 9377 +Train: [46] [5700/6250] eta: 0:01:31 lr: 0.000074 grad: 0.1463 (0.1408) loss: 0.7721 (0.7789) time: 0.1654 data: 0.0693 max mem: 9377 +Train: [46] [5800/6250] eta: 0:01:14 lr: 0.000074 grad: 0.1402 (0.1408) loss: 0.7735 (0.7788) time: 0.1671 data: 0.0792 max mem: 9377 +Train: [46] [5900/6250] eta: 0:00:58 lr: 0.000074 grad: 0.1408 (0.1408) loss: 0.7737 (0.7788) time: 0.1547 data: 0.0628 max mem: 9377 +Train: [46] [6000/6250] eta: 0:00:41 lr: 0.000074 grad: 0.1341 (0.1408) loss: 0.7800 (0.7787) time: 0.1605 data: 0.0743 max mem: 9377 +Train: [46] [6100/6250] eta: 0:00:24 lr: 0.000074 grad: 0.1423 (0.1409) loss: 0.7716 (0.7786) time: 0.1762 data: 0.0914 max mem: 9377 +Train: [46] [6200/6250] eta: 0:00:08 lr: 0.000074 grad: 0.1391 (0.1410) loss: 0.7789 (0.7785) time: 0.1638 data: 0.0765 max mem: 9377 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.1481 (0.1410) loss: 0.7745 (0.7785) time: 0.1720 data: 0.0850 max mem: 9377 +Train: [46] Total time: 0:17:22 (0.1669 s / it) +Averaged stats: lr: 0.000074 grad: 0.1481 (0.1410) loss: 0.7745 (0.7785) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:03:43 loss: 0.8488 (0.8488) time: 3.6070 data: 3.5489 max mem: 9377 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8519 (0.8519) time: 0.1190 data: 0.0923 max mem: 9377 +Eval (hcp-train-subset): [46] Total time: 0:00:14 (0.2356 s / it) +Averaged stats (hcp-train-subset): loss: 0.8519 (0.8519) +Eval (hcp-val): [46] [ 0/62] eta: 0:05:45 loss: 0.8487 (0.8487) time: 5.5754 data: 5.5456 max mem: 9377 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8503 (0.8493) time: 0.1325 data: 0.1058 max mem: 9377 +Eval (hcp-val): [46] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-val): loss: 0.8503 (0.8493) +Eval (nsd-val): [46] [ 0/62] eta: 0:06:20 loss: 0.8162 (0.8162) time: 6.1428 data: 6.1077 max mem: 9377 +Eval (nsd-val): [46] [61/62] eta: 0:00:00 loss: 0.8242 (0.8252) time: 0.1245 data: 0.0977 max mem: 9377 +Eval (nsd-val): [46] Total time: 0:00:14 (0.2328 s / it) +Averaged stats (nsd-val): loss: 0.8242 (0.8252) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 11:56:13 lr: 0.000074 grad: 0.1036 (0.1036) loss: 0.8445 (0.8445) time: 6.8758 data: 6.7776 max mem: 9377 +Train: [47] [ 100/6250] eta: 0:22:53 lr: 0.000074 grad: 0.1817 (0.2275) loss: 0.7859 (0.7947) time: 0.1572 data: 0.0399 max mem: 9377 +Train: [47] [ 200/6250] eta: 0:20:16 lr: 0.000074 grad: 0.1776 (0.2070) loss: 0.7647 (0.7855) time: 0.2171 data: 0.1169 max mem: 9377 +Train: [47] [ 300/6250] eta: 0:18:36 lr: 0.000074 grad: 0.1648 (0.1970) loss: 0.7734 (0.7822) time: 0.1774 data: 0.0794 max mem: 9377 +Train: [47] [ 400/6250] eta: 0:17:46 lr: 0.000074 grad: 0.1737 (0.1905) loss: 0.7561 (0.7783) time: 0.1700 data: 0.0685 max mem: 9377 +Train: [47] [ 500/6250] eta: 0:17:15 lr: 0.000074 grad: 0.1474 (0.1822) loss: 0.7623 (0.7766) time: 0.1646 data: 0.0706 max mem: 9377 +Train: [47] [ 600/6250] eta: 0:16:48 lr: 0.000074 grad: 0.1486 (0.1768) loss: 0.7633 (0.7755) time: 0.1570 data: 0.0643 max mem: 9377 +Train: [47] [ 700/6250] eta: 0:16:15 lr: 0.000074 grad: 0.1525 (0.1731) loss: 0.7753 (0.7752) time: 0.1789 data: 0.0842 max mem: 9377 +Train: [47] [ 800/6250] eta: 0:15:45 lr: 0.000074 grad: 0.1415 (0.1696) loss: 0.7773 (0.7749) time: 0.1631 data: 0.0745 max mem: 9377 +Train: [47] [ 900/6250] eta: 0:15:24 lr: 0.000074 grad: 0.1492 (0.1668) loss: 0.7656 (0.7746) time: 0.1635 data: 0.0639 max mem: 9377 +Train: [47] [1000/6250] eta: 0:15:05 lr: 0.000073 grad: 0.1412 (0.1645) loss: 0.7782 (0.7745) time: 0.1864 data: 0.0986 max mem: 9377 +Train: [47] [1100/6250] eta: 0:14:44 lr: 0.000073 grad: 0.1394 (0.1624) loss: 0.7662 (0.7742) time: 0.1671 data: 0.0725 max mem: 9377 +Train: [47] [1200/6250] eta: 0:14:25 lr: 0.000073 grad: 0.1359 (0.1606) loss: 0.7704 (0.7739) time: 0.1392 data: 0.0510 max mem: 9377 +Train: [47] [1300/6250] eta: 0:14:08 lr: 0.000073 grad: 0.1408 (0.1592) loss: 0.7638 (0.7732) time: 0.1613 data: 0.0694 max mem: 9377 +Train: [47] [1400/6250] eta: 0:13:51 lr: 0.000073 grad: 0.1409 (0.1580) loss: 0.7655 (0.7728) time: 0.1806 data: 0.0861 max mem: 9377 +Train: [47] [1500/6250] eta: 0:13:34 lr: 0.000073 grad: 0.1389 (0.1573) loss: 0.7617 (0.7722) time: 0.1695 data: 0.0873 max mem: 9377 +Train: [47] [1600/6250] eta: 0:13:12 lr: 0.000073 grad: 0.1378 (0.1563) loss: 0.7581 (0.7718) time: 0.1510 data: 0.0535 max mem: 9377 +Train: [47] [1700/6250] eta: 0:12:50 lr: 0.000073 grad: 0.1365 (0.1556) loss: 0.7678 (0.7714) time: 0.1532 data: 0.0651 max mem: 9377 +Train: [47] [1800/6250] eta: 0:12:29 lr: 0.000073 grad: 0.1376 (0.1548) loss: 0.7679 (0.7712) time: 0.1648 data: 0.0777 max mem: 9377 +Train: [47] [1900/6250] eta: 0:12:09 lr: 0.000073 grad: 0.1398 (0.1540) loss: 0.7719 (0.7712) time: 0.1665 data: 0.0744 max mem: 9377 +Train: [47] [2000/6250] eta: 0:11:48 lr: 0.000073 grad: 0.1341 (0.1534) loss: 0.7671 (0.7712) time: 0.1427 data: 0.0596 max mem: 9377 +Train: [47] [2100/6250] eta: 0:11:29 lr: 0.000073 grad: 0.1428 (0.1529) loss: 0.7727 (0.7712) time: 0.1804 data: 0.0945 max mem: 9377 +Train: [47] [2200/6250] eta: 0:11:11 lr: 0.000073 grad: 0.1342 (0.1523) loss: 0.7719 (0.7711) time: 0.1600 data: 0.0667 max mem: 9377 +Train: [47] [2300/6250] eta: 0:10:55 lr: 0.000073 grad: 0.1329 (0.1518) loss: 0.7760 (0.7712) time: 0.1602 data: 0.0747 max mem: 9377 +Train: [47] [2400/6250] eta: 0:10:38 lr: 0.000073 grad: 0.1334 (0.1512) loss: 0.7855 (0.7713) time: 0.1474 data: 0.0547 max mem: 9377 +Train: [47] [2500/6250] eta: 0:10:21 lr: 0.000073 grad: 0.1380 (0.1508) loss: 0.7728 (0.7715) time: 0.1586 data: 0.0694 max mem: 9377 +Train: [47] [2600/6250] eta: 0:10:02 lr: 0.000073 grad: 0.1292 (0.1503) loss: 0.7812 (0.7718) time: 0.1601 data: 0.0602 max mem: 9377 +Train: [47] [2700/6250] eta: 0:09:44 lr: 0.000073 grad: 0.1339 (0.1499) loss: 0.7892 (0.7720) time: 0.1495 data: 0.0676 max mem: 9377 +Train: [47] [2800/6250] eta: 0:09:27 lr: 0.000073 grad: 0.1352 (0.1495) loss: 0.7905 (0.7723) time: 0.1471 data: 0.0622 max mem: 9377 +Train: [47] [2900/6250] eta: 0:09:10 lr: 0.000073 grad: 0.1382 (0.1492) loss: 0.7751 (0.7723) time: 0.1482 data: 0.0618 max mem: 9377 +Train: [47] [3000/6250] eta: 0:08:54 lr: 0.000073 grad: 0.1302 (0.1490) loss: 0.7871 (0.7726) time: 0.1880 data: 0.1022 max mem: 9377 +Train: [47] [3100/6250] eta: 0:08:37 lr: 0.000073 grad: 0.1298 (0.1485) loss: 0.7777 (0.7728) time: 0.1692 data: 0.0844 max mem: 9377 +Train: [47] [3200/6250] eta: 0:08:21 lr: 0.000073 grad: 0.1447 (0.1483) loss: 0.7786 (0.7729) time: 0.1688 data: 0.0855 max mem: 9377 +Train: [47] [3300/6250] eta: 0:08:05 lr: 0.000073 grad: 0.1340 (0.1481) loss: 0.7768 (0.7730) time: 0.1683 data: 0.0787 max mem: 9377 +Train: [47] [3400/6250] eta: 0:07:48 lr: 0.000073 grad: 0.1356 (0.1478) loss: 0.7718 (0.7731) time: 0.1253 data: 0.0337 max mem: 9377 +Train: [47] [3500/6250] eta: 0:07:31 lr: 0.000073 grad: 0.1379 (0.1476) loss: 0.7802 (0.7731) time: 0.1856 data: 0.1020 max mem: 9377 +Train: [47] [3600/6250] eta: 0:07:15 lr: 0.000073 grad: 0.1347 (0.1473) loss: 0.7723 (0.7733) time: 0.1590 data: 0.0677 max mem: 9377 +Train: [47] [3700/6250] eta: 0:06:58 lr: 0.000073 grad: 0.1347 (0.1471) loss: 0.7723 (0.7732) time: 0.1598 data: 0.0682 max mem: 9377 +Train: [47] [3800/6250] eta: 0:06:41 lr: 0.000073 grad: 0.1393 (0.1469) loss: 0.7681 (0.7732) time: 0.1275 data: 0.0438 max mem: 9377 +Train: [47] [3900/6250] eta: 0:06:24 lr: 0.000073 grad: 0.1337 (0.1466) loss: 0.7746 (0.7732) time: 0.1461 data: 0.0535 max mem: 9377 +Train: [47] [4000/6250] eta: 0:06:07 lr: 0.000073 grad: 0.1383 (0.1465) loss: 0.7756 (0.7733) time: 0.1531 data: 0.0641 max mem: 9377 +Train: [47] [4100/6250] eta: 0:05:51 lr: 0.000072 grad: 0.1366 (0.1463) loss: 0.7813 (0.7734) time: 0.1604 data: 0.0687 max mem: 9377 +Train: [47] [4200/6250] eta: 0:05:35 lr: 0.000072 grad: 0.1357 (0.1462) loss: 0.7710 (0.7733) time: 0.1496 data: 0.0701 max mem: 9377 +Train: [47] [4300/6250] eta: 0:05:19 lr: 0.000072 grad: 0.1376 (0.1461) loss: 0.7768 (0.7733) time: 0.1542 data: 0.0695 max mem: 9377 +Train: [47] [4400/6250] eta: 0:05:02 lr: 0.000072 grad: 0.1326 (0.1461) loss: 0.7796 (0.7733) time: 0.1467 data: 0.0611 max mem: 9377 +Train: [47] [4500/6250] eta: 0:04:46 lr: 0.000072 grad: 0.1390 (0.1461) loss: 0.7727 (0.7732) time: 0.1924 data: 0.1090 max mem: 9377 +Train: [47] [4600/6250] eta: 0:04:30 lr: 0.000072 grad: 0.1378 (0.1460) loss: 0.7724 (0.7732) time: 0.1822 data: 0.0874 max mem: 9377 +Train: [47] [4700/6250] eta: 0:04:14 lr: 0.000072 grad: 0.1320 (0.1459) loss: 0.7724 (0.7731) time: 0.1857 data: 0.0962 max mem: 9377 +Train: [47] [4800/6250] eta: 0:03:58 lr: 0.000072 grad: 0.1427 (0.1458) loss: 0.7637 (0.7731) time: 0.1848 data: 0.0945 max mem: 9377 +Train: [47] [4900/6250] eta: 0:03:42 lr: 0.000072 grad: 0.1410 (0.1458) loss: 0.7741 (0.7732) time: 0.1498 data: 0.0585 max mem: 9377 +Train: [47] [5000/6250] eta: 0:03:25 lr: 0.000072 grad: 0.1382 (0.1458) loss: 0.7759 (0.7732) time: 0.1437 data: 0.0603 max mem: 9377 +Train: [47] [5100/6250] eta: 0:03:09 lr: 0.000072 grad: 0.1431 (0.1459) loss: 0.7705 (0.7732) time: 0.1426 data: 0.0569 max mem: 9377 +Train: [47] [5200/6250] eta: 0:02:53 lr: 0.000072 grad: 0.1431 (0.1458) loss: 0.7733 (0.7732) time: 0.1786 data: 0.1008 max mem: 9377 +Train: [47] [5300/6250] eta: 0:02:36 lr: 0.000072 grad: 0.1425 (0.1458) loss: 0.7735 (0.7732) time: 0.1792 data: 0.1024 max mem: 9377 +Train: [47] [5400/6250] eta: 0:02:20 lr: 0.000072 grad: 0.1369 (0.1458) loss: 0.7793 (0.7732) time: 0.1709 data: 0.0695 max mem: 9377 +Train: [47] [5500/6250] eta: 0:02:04 lr: 0.000072 grad: 0.1399 (0.1458) loss: 0.7744 (0.7732) time: 0.1691 data: 0.0788 max mem: 9377 +Train: [47] [5600/6250] eta: 0:01:47 lr: 0.000072 grad: 0.1498 (0.1458) loss: 0.7638 (0.7731) time: 0.1787 data: 0.0879 max mem: 9377 +Train: [47] [5700/6250] eta: 0:01:30 lr: 0.000072 grad: 0.1403 (0.1459) loss: 0.7733 (0.7730) time: 0.1819 data: 0.0867 max mem: 9377 +Train: [47] [5800/6250] eta: 0:01:14 lr: 0.000072 grad: 0.1526 (0.1459) loss: 0.7612 (0.7729) time: 0.1975 data: 0.1154 max mem: 9377 +Train: [47] [5900/6250] eta: 0:00:57 lr: 0.000072 grad: 0.1405 (0.1459) loss: 0.7715 (0.7728) time: 0.1471 data: 0.0645 max mem: 9377 +Train: [47] [6000/6250] eta: 0:00:41 lr: 0.000072 grad: 0.1437 (0.1459) loss: 0.7735 (0.7727) time: 0.1481 data: 0.0590 max mem: 9377 +Train: [47] [6100/6250] eta: 0:00:24 lr: 0.000072 grad: 0.1501 (0.1459) loss: 0.7642 (0.7726) time: 0.1536 data: 0.0653 max mem: 9377 +Train: [47] [6200/6250] eta: 0:00:08 lr: 0.000072 grad: 0.1315 (0.1460) loss: 0.7695 (0.7725) time: 0.1690 data: 0.0846 max mem: 9377 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.1439 (0.1460) loss: 0.7637 (0.7725) time: 0.1355 data: 0.0375 max mem: 9377 +Train: [47] Total time: 0:17:14 (0.1656 s / it) +Averaged stats: lr: 0.000072 grad: 0.1439 (0.1460) loss: 0.7637 (0.7725) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:05:42 loss: 0.8460 (0.8460) time: 5.5167 data: 5.4835 max mem: 9377 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8515 (0.8507) time: 0.1214 data: 0.0961 max mem: 9377 +Eval (hcp-train-subset): [47] Total time: 0:00:14 (0.2337 s / it) +Averaged stats (hcp-train-subset): loss: 0.8515 (0.8507) +Eval (hcp-val): [47] [ 0/62] eta: 0:04:02 loss: 0.8467 (0.8467) time: 3.9074 data: 3.8267 max mem: 9377 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8482 (0.8487) time: 0.1286 data: 0.1033 max mem: 9377 +Eval (hcp-val): [47] Total time: 0:00:14 (0.2312 s / it) +Averaged stats (hcp-val): loss: 0.8482 (0.8487) +Eval (nsd-val): [47] [ 0/62] eta: 0:03:50 loss: 0.8148 (0.8148) time: 3.7219 data: 3.6030 max mem: 9377 +Eval (nsd-val): [47] [61/62] eta: 0:00:00 loss: 0.8233 (0.8248) time: 0.1459 data: 0.1205 max mem: 9377 +Eval (nsd-val): [47] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (nsd-val): loss: 0.8233 (0.8248) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [48] [ 0/6250] eta: 10:35:51 lr: 0.000072 grad: 0.3112 (0.3112) loss: 0.6232 (0.6232) time: 6.1042 data: 5.9909 max mem: 9377 +Train: [48] [ 100/6250] eta: 0:22:58 lr: 0.000072 grad: 0.2403 (0.2441) loss: 0.7671 (0.7971) time: 0.1729 data: 0.0745 max mem: 9377 +Train: [48] [ 200/6250] eta: 0:19:54 lr: 0.000072 grad: 0.1851 (0.2255) loss: 0.7784 (0.7864) time: 0.1701 data: 0.0696 max mem: 9377 +Train: [48] [ 300/6250] eta: 0:18:52 lr: 0.000072 grad: 0.1716 (0.2105) loss: 0.7618 (0.7814) time: 0.1595 data: 0.0467 max mem: 9377 +Train: [48] [ 400/6250] eta: 0:17:56 lr: 0.000072 grad: 0.1760 (0.2013) loss: 0.7532 (0.7765) time: 0.1066 data: 0.0010 max mem: 9377 +Train: [48] [ 500/6250] eta: 0:17:20 lr: 0.000072 grad: 0.1437 (0.1929) loss: 0.7675 (0.7744) time: 0.1577 data: 0.0548 max mem: 9377 +Train: [48] [ 600/6250] eta: 0:16:48 lr: 0.000072 grad: 0.1475 (0.1855) loss: 0.7708 (0.7732) time: 0.1717 data: 0.0734 max mem: 9377 +Train: [48] [ 700/6250] eta: 0:16:16 lr: 0.000072 grad: 0.1496 (0.1806) loss: 0.7770 (0.7722) time: 0.1768 data: 0.0840 max mem: 9377 +Train: [48] [ 800/6250] eta: 0:15:45 lr: 0.000072 grad: 0.1440 (0.1766) loss: 0.7578 (0.7712) time: 0.1534 data: 0.0577 max mem: 9377 +Train: [48] [ 900/6250] eta: 0:15:21 lr: 0.000071 grad: 0.1446 (0.1741) loss: 0.7710 (0.7700) time: 0.1528 data: 0.0616 max mem: 9377 +Train: [48] [1000/6250] eta: 0:15:00 lr: 0.000071 grad: 0.1436 (0.1718) loss: 0.7672 (0.7692) time: 0.2000 data: 0.1205 max mem: 9377 +Train: [48] [1100/6250] eta: 0:14:39 lr: 0.000071 grad: 0.1429 (0.1694) loss: 0.7588 (0.7688) time: 0.1659 data: 0.0839 max mem: 9377 +Train: [48] [1200/6250] eta: 0:14:19 lr: 0.000071 grad: 0.1494 (0.1676) loss: 0.7554 (0.7684) time: 0.1706 data: 0.0853 max mem: 9377 +Train: [48] [1300/6250] eta: 0:13:59 lr: 0.000071 grad: 0.1424 (0.1663) loss: 0.7565 (0.7677) time: 0.1502 data: 0.0688 max mem: 9377 +Train: [48] [1400/6250] eta: 0:13:40 lr: 0.000071 grad: 0.1451 (0.1651) loss: 0.7575 (0.7672) time: 0.1681 data: 0.0665 max mem: 9377 +Train: [48] [1500/6250] eta: 0:13:22 lr: 0.000071 grad: 0.1471 (0.1640) loss: 0.7621 (0.7669) time: 0.1624 data: 0.0624 max mem: 9377 +Train: [48] [1600/6250] eta: 0:13:03 lr: 0.000071 grad: 0.1436 (0.1629) loss: 0.7549 (0.7665) time: 0.1661 data: 0.0809 max mem: 9377 +Train: [48] [1700/6250] eta: 0:12:41 lr: 0.000071 grad: 0.1364 (0.1618) loss: 0.7537 (0.7663) time: 0.1487 data: 0.0606 max mem: 9377 +Train: [48] [1800/6250] eta: 0:12:21 lr: 0.000071 grad: 0.1424 (0.1610) loss: 0.7601 (0.7661) time: 0.1578 data: 0.0569 max mem: 9377 +Train: [48] [1900/6250] eta: 0:12:00 lr: 0.000071 grad: 0.1429 (0.1601) loss: 0.7599 (0.7662) time: 0.1250 data: 0.0222 max mem: 9377 +Train: [48] [2000/6250] eta: 0:11:41 lr: 0.000071 grad: 0.1419 (0.1594) loss: 0.7573 (0.7660) time: 0.1535 data: 0.0607 max mem: 9377 +Train: [48] [2100/6250] eta: 0:11:22 lr: 0.000071 grad: 0.1431 (0.1587) loss: 0.7563 (0.7659) time: 0.1807 data: 0.0870 max mem: 9377 +Train: [48] [2200/6250] eta: 0:11:04 lr: 0.000071 grad: 0.1409 (0.1580) loss: 0.7551 (0.7658) time: 0.1730 data: 0.0718 max mem: 9377 +Train: [48] [2300/6250] eta: 0:10:47 lr: 0.000071 grad: 0.1414 (0.1575) loss: 0.7735 (0.7658) time: 0.1692 data: 0.0839 max mem: 9377 +Train: [48] [2400/6250] eta: 0:10:29 lr: 0.000071 grad: 0.1393 (0.1569) loss: 0.7641 (0.7657) time: 0.1665 data: 0.0821 max mem: 9377 +Train: [48] [2500/6250] eta: 0:10:12 lr: 0.000071 grad: 0.1377 (0.1564) loss: 0.7610 (0.7659) time: 0.1471 data: 0.0623 max mem: 9377 +Train: [48] [2600/6250] eta: 0:09:55 lr: 0.000071 grad: 0.1448 (0.1560) loss: 0.7709 (0.7659) time: 0.1623 data: 0.0802 max mem: 9377 +Train: [48] [2700/6250] eta: 0:09:38 lr: 0.000071 grad: 0.1481 (0.1562) loss: 0.7722 (0.7661) time: 0.1447 data: 0.0471 max mem: 9377 +Train: [48] [2800/6250] eta: 0:09:20 lr: 0.000071 grad: 0.1371 (0.1559) loss: 0.7722 (0.7662) time: 0.1646 data: 0.0775 max mem: 9377 +Train: [48] [2900/6250] eta: 0:09:03 lr: 0.000071 grad: 0.1400 (0.1555) loss: 0.7645 (0.7664) time: 0.1580 data: 0.0740 max mem: 9377 +Train: [48] [3000/6250] eta: 0:08:46 lr: 0.000071 grad: 0.1459 (0.1553) loss: 0.7642 (0.7665) time: 0.1503 data: 0.0575 max mem: 9377 +Train: [48] [3100/6250] eta: 0:08:29 lr: 0.000071 grad: 0.1402 (0.1548) loss: 0.7772 (0.7667) time: 0.1778 data: 0.0903 max mem: 9377 +Train: [48] [3200/6250] eta: 0:08:12 lr: 0.000071 grad: 0.1495 (0.1546) loss: 0.7769 (0.7669) time: 0.1486 data: 0.0611 max mem: 9377 +Train: [48] [3300/6250] eta: 0:07:56 lr: 0.000071 grad: 0.1372 (0.1542) loss: 0.7755 (0.7670) time: 0.1952 data: 0.1030 max mem: 9377 +Train: [48] [3400/6250] eta: 0:07:38 lr: 0.000071 grad: 0.1465 (0.1539) loss: 0.7739 (0.7672) time: 0.1477 data: 0.0574 max mem: 9377 +Train: [48] [3500/6250] eta: 0:07:22 lr: 0.000071 grad: 0.1390 (0.1535) loss: 0.7773 (0.7674) time: 0.1603 data: 0.0704 max mem: 9377 +Train: [48] [3600/6250] eta: 0:07:05 lr: 0.000071 grad: 0.1346 (0.1532) loss: 0.7747 (0.7675) time: 0.1595 data: 0.0756 max mem: 9377 +Train: [48] [3700/6250] eta: 0:06:49 lr: 0.000071 grad: 0.1476 (0.1529) loss: 0.7723 (0.7677) time: 0.1846 data: 0.0885 max mem: 9377 +Train: [48] [3800/6250] eta: 0:06:33 lr: 0.000071 grad: 0.1385 (0.1527) loss: 0.7779 (0.7679) time: 0.1786 data: 0.0949 max mem: 9377 +Train: [48] [3900/6250] eta: 0:06:17 lr: 0.000070 grad: 0.1436 (0.1525) loss: 0.7721 (0.7680) time: 0.1521 data: 0.0558 max mem: 9377 +Train: [48] [4000/6250] eta: 0:06:00 lr: 0.000070 grad: 0.1498 (0.1524) loss: 0.7765 (0.7682) time: 0.1649 data: 0.0717 max mem: 9377 +Train: [48] [4100/6250] eta: 0:05:46 lr: 0.000070 grad: 0.1384 (0.1523) loss: 0.7757 (0.7683) time: 0.1997 data: 0.1184 max mem: 9377 +Train: [48] [4200/6250] eta: 0:05:30 lr: 0.000070 grad: 0.1440 (0.1521) loss: 0.7713 (0.7684) time: 0.1880 data: 0.1020 max mem: 9377 +Train: [48] [4300/6250] eta: 0:05:14 lr: 0.000070 grad: 0.1490 (0.1520) loss: 0.7707 (0.7685) time: 0.1828 data: 0.0952 max mem: 9377 +Train: [48] [4400/6250] eta: 0:04:58 lr: 0.000070 grad: 0.1495 (0.1520) loss: 0.7674 (0.7686) time: 0.1774 data: 0.0853 max mem: 9377 +Train: [48] [4500/6250] eta: 0:04:43 lr: 0.000070 grad: 0.1347 (0.1519) loss: 0.7757 (0.7687) time: 0.1536 data: 0.0579 max mem: 9377 +Train: [48] [4600/6250] eta: 0:04:27 lr: 0.000070 grad: 0.1412 (0.1519) loss: 0.7721 (0.7687) time: 0.1778 data: 0.0812 max mem: 9377 +Train: [48] [4700/6250] eta: 0:04:11 lr: 0.000070 grad: 0.1406 (0.1519) loss: 0.7691 (0.7688) time: 0.1827 data: 0.0925 max mem: 9377 +Train: [48] [4800/6250] eta: 0:03:55 lr: 0.000070 grad: 0.1521 (0.1518) loss: 0.7730 (0.7688) time: 0.1695 data: 0.0825 max mem: 9377 +Train: [48] [4900/6250] eta: 0:03:39 lr: 0.000070 grad: 0.1442 (0.1518) loss: 0.7702 (0.7688) time: 0.1597 data: 0.0639 max mem: 9377 +Train: [48] [5000/6250] eta: 0:03:23 lr: 0.000070 grad: 0.1434 (0.1518) loss: 0.7762 (0.7689) time: 0.2214 data: 0.1428 max mem: 9377 +Train: [48] [5100/6250] eta: 0:03:06 lr: 0.000070 grad: 0.1466 (0.1518) loss: 0.7666 (0.7688) time: 0.1534 data: 0.0681 max mem: 9377 +Train: [48] [5200/6250] eta: 0:02:50 lr: 0.000070 grad: 0.1466 (0.1517) loss: 0.7704 (0.7687) time: 0.1436 data: 0.0585 max mem: 9377 +Train: [48] [5300/6250] eta: 0:02:34 lr: 0.000070 grad: 0.1443 (0.1517) loss: 0.7701 (0.7687) time: 0.1535 data: 0.0667 max mem: 9377 +Train: [48] [5400/6250] eta: 0:02:18 lr: 0.000070 grad: 0.1507 (0.1518) loss: 0.7672 (0.7686) time: 0.1705 data: 0.0763 max mem: 9377 +Train: [48] [5500/6250] eta: 0:02:02 lr: 0.000070 grad: 0.1477 (0.1517) loss: 0.7660 (0.7686) time: 0.1496 data: 0.0533 max mem: 9377 +Train: [48] [5600/6250] eta: 0:01:45 lr: 0.000070 grad: 0.1340 (0.1516) loss: 0.7698 (0.7686) time: 0.1572 data: 0.0670 max mem: 9377 +Train: [48] [5700/6250] eta: 0:01:29 lr: 0.000070 grad: 0.1473 (0.1516) loss: 0.7633 (0.7686) time: 0.1495 data: 0.0483 max mem: 9377 +Train: [48] [5800/6250] eta: 0:01:13 lr: 0.000070 grad: 0.1461 (0.1515) loss: 0.7571 (0.7686) time: 0.1556 data: 0.0716 max mem: 9377 +Train: [48] [5900/6250] eta: 0:00:56 lr: 0.000070 grad: 0.1428 (0.1516) loss: 0.7666 (0.7686) time: 0.1413 data: 0.0567 max mem: 9377 +Train: [48] [6000/6250] eta: 0:00:40 lr: 0.000070 grad: 0.1365 (0.1515) loss: 0.7517 (0.7686) time: 0.1635 data: 0.0770 max mem: 9377 +Train: [48] [6100/6250] eta: 0:00:24 lr: 0.000070 grad: 0.1377 (0.1515) loss: 0.7690 (0.7685) time: 0.1515 data: 0.0640 max mem: 9377 +Train: [48] [6200/6250] eta: 0:00:08 lr: 0.000070 grad: 0.1454 (0.1515) loss: 0.7644 (0.7685) time: 0.1740 data: 0.0870 max mem: 9377 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.1397 (0.1514) loss: 0.7727 (0.7685) time: 0.1678 data: 0.0845 max mem: 9377 +Train: [48] Total time: 0:16:59 (0.1632 s / it) +Averaged stats: lr: 0.000070 grad: 0.1397 (0.1514) loss: 0.7727 (0.7685) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:05:52 loss: 0.8457 (0.8457) time: 5.6789 data: 5.6490 max mem: 9377 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8504 (0.8515) time: 0.1462 data: 0.1206 max mem: 9377 +Eval (hcp-train-subset): [48] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (hcp-train-subset): loss: 0.8504 (0.8515) +Eval (hcp-val): [48] [ 0/62] eta: 0:04:55 loss: 0.8473 (0.8473) time: 4.7665 data: 4.7268 max mem: 9377 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8514 (0.8498) time: 0.1211 data: 0.0958 max mem: 9377 +Eval (hcp-val): [48] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (hcp-val): loss: 0.8514 (0.8498) +Eval (nsd-val): [48] [ 0/62] eta: 0:04:25 loss: 0.8146 (0.8146) time: 4.2862 data: 4.2225 max mem: 9377 +Eval (nsd-val): [48] [61/62] eta: 0:00:00 loss: 0.8272 (0.8287) time: 0.1405 data: 0.1147 max mem: 9377 +Eval (nsd-val): [48] Total time: 0:00:13 (0.2229 s / it) +Averaged stats (nsd-val): loss: 0.8272 (0.8287) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [49] [ 0/6250] eta: 14:29:07 lr: 0.000070 grad: 0.2103 (0.2103) loss: 0.8189 (0.8189) time: 8.3436 data: 8.2383 max mem: 9377 +Train: [49] [ 100/6250] eta: 0:23:27 lr: 0.000070 grad: 0.1808 (0.2361) loss: 0.7745 (0.7798) time: 0.1593 data: 0.0460 max mem: 9377 +Train: [49] [ 200/6250] eta: 0:19:57 lr: 0.000070 grad: 0.1832 (0.2155) loss: 0.7575 (0.7780) time: 0.1679 data: 0.0697 max mem: 9377 +Train: [49] [ 300/6250] eta: 0:18:23 lr: 0.000070 grad: 0.1500 (0.2004) loss: 0.7735 (0.7769) time: 0.1617 data: 0.0637 max mem: 9377 +Train: [49] [ 400/6250] eta: 0:17:20 lr: 0.000070 grad: 0.1378 (0.1882) loss: 0.7861 (0.7780) time: 0.1583 data: 0.0577 max mem: 9377 +Train: [49] [ 500/6250] eta: 0:16:33 lr: 0.000070 grad: 0.1416 (0.1803) loss: 0.7852 (0.7794) time: 0.1405 data: 0.0432 max mem: 9377 +Train: [49] [ 600/6250] eta: 0:15:56 lr: 0.000070 grad: 0.1356 (0.1748) loss: 0.7871 (0.7803) time: 0.1466 data: 0.0485 max mem: 9377 +Train: [49] [ 700/6250] eta: 0:15:34 lr: 0.000069 grad: 0.1435 (0.1700) loss: 0.7806 (0.7811) time: 0.1836 data: 0.0824 max mem: 9377 +Train: [49] [ 800/6250] eta: 0:15:04 lr: 0.000069 grad: 0.1365 (0.1662) loss: 0.7877 (0.7817) time: 0.1553 data: 0.0622 max mem: 9377 +Train: [49] [ 900/6250] eta: 0:14:46 lr: 0.000069 grad: 0.1410 (0.1641) loss: 0.7798 (0.7814) time: 0.1906 data: 0.1063 max mem: 9377 +Train: [49] [1000/6250] eta: 0:14:29 lr: 0.000069 grad: 0.1422 (0.1622) loss: 0.7752 (0.7813) time: 0.1665 data: 0.0806 max mem: 9377 +Train: [49] [1100/6250] eta: 0:14:17 lr: 0.000069 grad: 0.1385 (0.1605) loss: 0.7758 (0.7805) time: 0.1749 data: 0.0857 max mem: 9377 +Train: [49] [1200/6250] eta: 0:13:57 lr: 0.000069 grad: 0.1350 (0.1586) loss: 0.7747 (0.7799) time: 0.1568 data: 0.0720 max mem: 9377 +Train: [49] [1300/6250] eta: 0:13:38 lr: 0.000069 grad: 0.1384 (0.1574) loss: 0.7742 (0.7794) time: 0.1466 data: 0.0613 max mem: 9377 +Train: [49] [1400/6250] eta: 0:13:21 lr: 0.000069 grad: 0.1456 (0.1563) loss: 0.7618 (0.7788) time: 0.1754 data: 0.0828 max mem: 9377 +Train: [49] [1500/6250] eta: 0:13:04 lr: 0.000069 grad: 0.1373 (0.1551) loss: 0.7726 (0.7783) time: 0.1383 data: 0.0331 max mem: 9377 +Train: [49] [1600/6250] eta: 0:12:49 lr: 0.000069 grad: 0.1392 (0.1544) loss: 0.7664 (0.7778) time: 0.1664 data: 0.0616 max mem: 9377 +Train: [49] [1700/6250] eta: 0:12:37 lr: 0.000069 grad: 0.1384 (0.1538) loss: 0.7709 (0.7772) time: 0.1955 data: 0.0997 max mem: 9377 +Train: [49] [1800/6250] eta: 0:12:18 lr: 0.000069 grad: 0.1347 (0.1530) loss: 0.7737 (0.7769) time: 0.1748 data: 0.0751 max mem: 9377 +Train: [49] [1900/6250] eta: 0:11:58 lr: 0.000069 grad: 0.1363 (0.1527) loss: 0.7723 (0.7767) time: 0.1507 data: 0.0597 max mem: 9377 +Train: [49] [2000/6250] eta: 0:11:39 lr: 0.000069 grad: 0.1425 (0.1525) loss: 0.7676 (0.7764) time: 0.1445 data: 0.0500 max mem: 9377 +Train: [49] [2100/6250] eta: 0:11:21 lr: 0.000069 grad: 0.1333 (0.1520) loss: 0.7792 (0.7763) time: 0.1763 data: 0.0977 max mem: 9377 +Train: [49] [2200/6250] eta: 0:11:06 lr: 0.000069 grad: 0.1407 (0.1519) loss: 0.7758 (0.7761) time: 0.1539 data: 0.0621 max mem: 9377 +Train: [49] [2300/6250] eta: 0:10:51 lr: 0.000069 grad: 0.1317 (0.1514) loss: 0.7759 (0.7761) time: 0.1570 data: 0.0686 max mem: 9377 +Train: [49] [2400/6250] eta: 0:10:34 lr: 0.000069 grad: 0.1360 (0.1511) loss: 0.7840 (0.7760) time: 0.1449 data: 0.0455 max mem: 9377 +Train: [49] [2500/6250] eta: 0:10:17 lr: 0.000069 grad: 0.1354 (0.1506) loss: 0.7821 (0.7761) time: 0.1512 data: 0.0643 max mem: 9377 +Train: [49] [2600/6250] eta: 0:10:00 lr: 0.000069 grad: 0.1350 (0.1501) loss: 0.7778 (0.7762) time: 0.1688 data: 0.0799 max mem: 9377 +Train: [49] [2700/6250] eta: 0:09:44 lr: 0.000069 grad: 0.1517 (0.1499) loss: 0.7770 (0.7760) time: 0.1833 data: 0.0987 max mem: 9377 +Train: [49] [2800/6250] eta: 0:09:26 lr: 0.000069 grad: 0.1337 (0.1496) loss: 0.7791 (0.7760) time: 0.1571 data: 0.0696 max mem: 9377 +Train: [49] [2900/6250] eta: 0:09:09 lr: 0.000069 grad: 0.1332 (0.1491) loss: 0.7784 (0.7761) time: 0.1570 data: 0.0763 max mem: 9377 +Train: [49] [3000/6250] eta: 0:08:52 lr: 0.000069 grad: 0.1384 (0.1489) loss: 0.7815 (0.7760) time: 0.1590 data: 0.0670 max mem: 9377 +Train: [49] [3100/6250] eta: 0:08:36 lr: 0.000069 grad: 0.1388 (0.1487) loss: 0.7702 (0.7759) time: 0.1770 data: 0.0867 max mem: 9377 +Train: [49] [3200/6250] eta: 0:08:20 lr: 0.000069 grad: 0.1381 (0.1486) loss: 0.7755 (0.7756) time: 0.2096 data: 0.1187 max mem: 9377 +Train: [49] [3300/6250] eta: 0:08:02 lr: 0.000069 grad: 0.1519 (0.1487) loss: 0.7604 (0.7753) time: 0.1781 data: 0.0912 max mem: 9377 +Train: [49] [3400/6250] eta: 0:07:45 lr: 0.000069 grad: 0.1364 (0.1486) loss: 0.7645 (0.7750) time: 0.1489 data: 0.0581 max mem: 9377 +Train: [49] [3500/6250] eta: 0:07:28 lr: 0.000069 grad: 0.1434 (0.1486) loss: 0.7604 (0.7748) time: 0.1533 data: 0.0625 max mem: 9377 +Train: [49] [3600/6250] eta: 0:07:12 lr: 0.000069 grad: 0.1486 (0.1488) loss: 0.7569 (0.7745) time: 0.1614 data: 0.0575 max mem: 9377 +Train: [49] [3700/6250] eta: 0:06:55 lr: 0.000069 grad: 0.1439 (0.1488) loss: 0.7634 (0.7741) time: 0.1585 data: 0.0717 max mem: 9377 +Train: [49] [3800/6250] eta: 0:06:38 lr: 0.000068 grad: 0.1401 (0.1487) loss: 0.7780 (0.7738) time: 0.1267 data: 0.0343 max mem: 9377 +Train: [49] [3900/6250] eta: 0:06:21 lr: 0.000068 grad: 0.1413 (0.1488) loss: 0.7795 (0.7737) time: 0.1518 data: 0.0524 max mem: 9377 +Train: [49] [4000/6250] eta: 0:06:04 lr: 0.000068 grad: 0.1438 (0.1487) loss: 0.7588 (0.7734) time: 0.1531 data: 0.0535 max mem: 9377 +Train: [49] [4100/6250] eta: 0:05:49 lr: 0.000068 grad: 0.1460 (0.1487) loss: 0.7679 (0.7733) time: 0.1832 data: 0.0920 max mem: 9377 +Train: [49] [4200/6250] eta: 0:05:33 lr: 0.000068 grad: 0.1453 (0.1487) loss: 0.7683 (0.7730) time: 0.1885 data: 0.1011 max mem: 9377 +Train: [49] [4300/6250] eta: 0:05:17 lr: 0.000068 grad: 0.1448 (0.1487) loss: 0.7613 (0.7729) time: 0.2037 data: 0.1192 max mem: 9377 +Train: [49] [4400/6250] eta: 0:05:00 lr: 0.000068 grad: 0.1531 (0.1488) loss: 0.7649 (0.7727) time: 0.1631 data: 0.0830 max mem: 9377 +Train: [49] [4500/6250] eta: 0:04:44 lr: 0.000068 grad: 0.1449 (0.1488) loss: 0.7569 (0.7725) time: 0.1785 data: 0.0930 max mem: 9377 +Train: [49] [4600/6250] eta: 0:04:28 lr: 0.000068 grad: 0.1428 (0.1489) loss: 0.7754 (0.7724) time: 0.1734 data: 0.0841 max mem: 9377 +Train: [49] [4700/6250] eta: 0:04:12 lr: 0.000068 grad: 0.1497 (0.1489) loss: 0.7541 (0.7723) time: 0.1681 data: 0.0766 max mem: 9377 +Train: [49] [4800/6250] eta: 0:03:55 lr: 0.000068 grad: 0.1582 (0.1491) loss: 0.7580 (0.7721) time: 0.1688 data: 0.0725 max mem: 9377 +Train: [49] [4900/6250] eta: 0:03:39 lr: 0.000068 grad: 0.1419 (0.1491) loss: 0.7568 (0.7720) time: 0.1545 data: 0.0636 max mem: 9377 +Train: [49] [5000/6250] eta: 0:03:23 lr: 0.000068 grad: 0.1461 (0.1491) loss: 0.7606 (0.7719) time: 0.1466 data: 0.0520 max mem: 9377 +Train: [49] [5100/6250] eta: 0:03:07 lr: 0.000068 grad: 0.1502 (0.1492) loss: 0.7528 (0.7717) time: 0.1809 data: 0.1016 max mem: 9377 +Train: [49] [5200/6250] eta: 0:02:51 lr: 0.000068 grad: 0.1500 (0.1492) loss: 0.7624 (0.7715) time: 0.1854 data: 0.0941 max mem: 9377 +Train: [49] [5300/6250] eta: 0:02:35 lr: 0.000068 grad: 0.1469 (0.1493) loss: 0.7628 (0.7713) time: 0.1732 data: 0.0758 max mem: 9377 +Train: [49] [5400/6250] eta: 0:02:18 lr: 0.000068 grad: 0.1476 (0.1493) loss: 0.7668 (0.7712) time: 0.1622 data: 0.0799 max mem: 9377 +Train: [49] [5500/6250] eta: 0:02:02 lr: 0.000068 grad: 0.1509 (0.1494) loss: 0.7584 (0.7711) time: 0.1643 data: 0.0744 max mem: 9377 +Train: [49] [5600/6250] eta: 0:01:46 lr: 0.000068 grad: 0.1444 (0.1494) loss: 0.7650 (0.7710) time: 0.1629 data: 0.0671 max mem: 9377 +Train: [49] [5700/6250] eta: 0:01:29 lr: 0.000068 grad: 0.1423 (0.1494) loss: 0.7717 (0.7709) time: 0.1574 data: 0.0606 max mem: 9377 +Train: [49] [5800/6250] eta: 0:01:13 lr: 0.000068 grad: 0.1381 (0.1494) loss: 0.7653 (0.7708) time: 0.1693 data: 0.0740 max mem: 9377 +Train: [49] [5900/6250] eta: 0:00:57 lr: 0.000068 grad: 0.1643 (0.1495) loss: 0.7666 (0.7707) time: 0.1312 data: 0.0428 max mem: 9377 +Train: [49] [6000/6250] eta: 0:00:40 lr: 0.000068 grad: 0.1466 (0.1495) loss: 0.7649 (0.7707) time: 0.1726 data: 0.0886 max mem: 9377 +Train: [49] [6100/6250] eta: 0:00:24 lr: 0.000068 grad: 0.1502 (0.1495) loss: 0.7744 (0.7706) time: 0.1564 data: 0.0647 max mem: 9377 +Train: [49] [6200/6250] eta: 0:00:08 lr: 0.000068 grad: 0.1522 (0.1495) loss: 0.7499 (0.7706) time: 0.1577 data: 0.0676 max mem: 9377 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.1481 (0.1495) loss: 0.7681 (0.7705) time: 0.1542 data: 0.0662 max mem: 9377 +Train: [49] Total time: 0:17:04 (0.1639 s / it) +Averaged stats: lr: 0.000068 grad: 0.1481 (0.1495) loss: 0.7681 (0.7705) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:04:57 loss: 0.8490 (0.8490) time: 4.7991 data: 4.7592 max mem: 9377 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8537 (0.8524) time: 0.1319 data: 0.1065 max mem: 9377 +Eval (hcp-train-subset): [49] Total time: 0:00:14 (0.2321 s / it) +Averaged stats (hcp-train-subset): loss: 0.8537 (0.8524) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [49] [ 0/62] eta: 0:05:36 loss: 0.8450 (0.8450) time: 5.4327 data: 5.4009 max mem: 9377 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8481 (0.8498) time: 0.1321 data: 0.1064 max mem: 9377 +Eval (hcp-val): [49] Total time: 0:00:14 (0.2318 s / it) +Averaged stats (hcp-val): loss: 0.8481 (0.8498) +Making plots (hcp-val): example=14 +Eval (nsd-val): [49] [ 0/62] eta: 0:04:51 loss: 0.8166 (0.8166) time: 4.6972 data: 4.6305 max mem: 9377 +Eval (nsd-val): [49] [61/62] eta: 0:00:00 loss: 0.8282 (0.8278) time: 0.1200 data: 0.0947 max mem: 9377 +Eval (nsd-val): [49] Total time: 0:00:14 (0.2306 s / it) +Averaged stats (nsd-val): loss: 0.8282 (0.8278) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00049.pth +Train: [50] [ 0/6250] eta: 7:40:24 lr: 0.000068 grad: 0.3795 (0.3795) loss: 0.7827 (0.7827) time: 4.4199 data: 4.1456 max mem: 9377 +Train: [50] [ 100/6250] eta: 0:22:37 lr: 0.000068 grad: 0.1860 (0.2514) loss: 0.7766 (0.7909) time: 0.1790 data: 0.0727 max mem: 9377 +Train: [50] [ 200/6250] eta: 0:19:34 lr: 0.000068 grad: 0.1845 (0.2289) loss: 0.7610 (0.7828) time: 0.1852 data: 0.0911 max mem: 9377 +Train: [50] [ 300/6250] eta: 0:17:49 lr: 0.000068 grad: 0.1776 (0.2126) loss: 0.7664 (0.7778) time: 0.1513 data: 0.0551 max mem: 9377 +Train: [50] [ 400/6250] eta: 0:16:54 lr: 0.000068 grad: 0.1488 (0.2013) loss: 0.7763 (0.7770) time: 0.1466 data: 0.0512 max mem: 9377 +Train: [50] [ 500/6250] eta: 0:16:14 lr: 0.000067 grad: 0.1540 (0.1923) loss: 0.7800 (0.7772) time: 0.1728 data: 0.0785 max mem: 9377 +Train: [50] [ 600/6250] eta: 0:15:41 lr: 0.000067 grad: 0.1495 (0.1844) loss: 0.7773 (0.7777) time: 0.1488 data: 0.0582 max mem: 9377 +Train: [50] [ 700/6250] eta: 0:15:10 lr: 0.000067 grad: 0.1451 (0.1793) loss: 0.7894 (0.7780) time: 0.1549 data: 0.0562 max mem: 9377 +Train: [50] [ 800/6250] eta: 0:14:48 lr: 0.000067 grad: 0.1404 (0.1754) loss: 0.7898 (0.7782) time: 0.1712 data: 0.0797 max mem: 9377 +Train: [50] [ 900/6250] eta: 0:14:25 lr: 0.000067 grad: 0.1386 (0.1728) loss: 0.7781 (0.7784) time: 0.1367 data: 0.0388 max mem: 9377 +Train: [50] [1000/6250] eta: 0:14:17 lr: 0.000067 grad: 0.1376 (0.1699) loss: 0.7846 (0.7784) time: 0.2009 data: 0.1276 max mem: 9377 +Train: [50] [1100/6250] eta: 0:13:56 lr: 0.000067 grad: 0.1425 (0.1673) loss: 0.7749 (0.7784) time: 0.1443 data: 0.0693 max mem: 9377 +Train: [50] [1200/6250] eta: 0:13:38 lr: 0.000067 grad: 0.1399 (0.1651) loss: 0.7691 (0.7783) time: 0.1580 data: 0.0824 max mem: 9377 +Train: [50] [1300/6250] eta: 0:13:22 lr: 0.000067 grad: 0.1353 (0.1633) loss: 0.7754 (0.7780) time: 0.1636 data: 0.0839 max mem: 9377 +Train: [50] [1400/6250] eta: 0:13:05 lr: 0.000067 grad: 0.1366 (0.1615) loss: 0.7629 (0.7778) time: 0.1706 data: 0.0742 max mem: 9377 +Train: [50] [1500/6250] eta: 0:12:49 lr: 0.000067 grad: 0.1369 (0.1601) loss: 0.7687 (0.7775) time: 0.1628 data: 0.0708 max mem: 9377 +Train: [50] [1600/6250] eta: 0:12:32 lr: 0.000067 grad: 0.1379 (0.1589) loss: 0.7724 (0.7771) time: 0.1603 data: 0.0718 max mem: 9377 +Train: [50] [1700/6250] eta: 0:12:16 lr: 0.000067 grad: 0.1368 (0.1579) loss: 0.7687 (0.7768) time: 0.1686 data: 0.0806 max mem: 9377 +Train: [50] [1800/6250] eta: 0:11:57 lr: 0.000067 grad: 0.1442 (0.1573) loss: 0.7702 (0.7764) time: 0.1528 data: 0.0677 max mem: 9377 +Train: [50] [1900/6250] eta: 0:11:38 lr: 0.000067 grad: 0.1566 (0.1568) loss: 0.7655 (0.7759) time: 0.1347 data: 0.0462 max mem: 9377 +Train: [50] [2000/6250] eta: 0:11:23 lr: 0.000067 grad: 0.1369 (0.1561) loss: 0.7806 (0.7755) time: 0.1743 data: 0.0769 max mem: 9377 +Train: [50] [2100/6250] eta: 0:11:06 lr: 0.000067 grad: 0.1526 (0.1556) loss: 0.7720 (0.7752) time: 0.1623 data: 0.0745 max mem: 9377 +Train: [50] [2200/6250] eta: 0:10:47 lr: 0.000067 grad: 0.1449 (0.1549) loss: 0.7647 (0.7751) time: 0.1396 data: 0.0527 max mem: 9377 +Train: [50] [2300/6250] eta: 0:10:30 lr: 0.000067 grad: 0.1355 (0.1542) loss: 0.7661 (0.7751) time: 0.1474 data: 0.0526 max mem: 9377 +Train: [50] [2400/6250] eta: 0:10:12 lr: 0.000067 grad: 0.1421 (0.1538) loss: 0.7621 (0.7751) time: 0.1370 data: 0.0552 max mem: 9377 +Train: [50] [2500/6250] eta: 0:09:56 lr: 0.000067 grad: 0.1423 (0.1534) loss: 0.7713 (0.7749) time: 0.1772 data: 0.0910 max mem: 9377 +Train: [50] [2600/6250] eta: 0:09:39 lr: 0.000067 grad: 0.1377 (0.1530) loss: 0.7731 (0.7748) time: 0.1564 data: 0.0791 max mem: 9377 +Train: [50] [2700/6250] eta: 0:09:23 lr: 0.000067 grad: 0.1388 (0.1530) loss: 0.7796 (0.7748) time: 0.1457 data: 0.0594 max mem: 9377 +Train: [50] [2800/6250] eta: 0:09:07 lr: 0.000067 grad: 0.1423 (0.1529) loss: 0.7613 (0.7746) time: 0.1449 data: 0.0625 max mem: 9377 +Train: [50] [2900/6250] eta: 0:08:51 lr: 0.000067 grad: 0.1513 (0.1528) loss: 0.7756 (0.7746) time: 0.1693 data: 0.0793 max mem: 9377 +Train: [50] [3000/6250] eta: 0:08:34 lr: 0.000067 grad: 0.1388 (0.1524) loss: 0.7767 (0.7745) time: 0.1471 data: 0.0570 max mem: 9377 +Train: [50] [3100/6250] eta: 0:08:17 lr: 0.000067 grad: 0.1432 (0.1523) loss: 0.7637 (0.7743) time: 0.1579 data: 0.0845 max mem: 9377 +Train: [50] [3200/6250] eta: 0:08:02 lr: 0.000067 grad: 0.1449 (0.1522) loss: 0.7643 (0.7741) time: 0.1681 data: 0.0724 max mem: 9377 +Train: [50] [3300/6250] eta: 0:07:45 lr: 0.000067 grad: 0.1465 (0.1521) loss: 0.7699 (0.7740) time: 0.1524 data: 0.0624 max mem: 9377 +Train: [50] [3400/6250] eta: 0:07:29 lr: 0.000067 grad: 0.1405 (0.1520) loss: 0.7690 (0.7738) time: 0.1512 data: 0.0633 max mem: 9377 +Train: [50] [3500/6250] eta: 0:07:13 lr: 0.000067 grad: 0.1529 (0.1519) loss: 0.7659 (0.7736) time: 0.1610 data: 0.0800 max mem: 9377 +Train: [50] [3600/6250] eta: 0:06:57 lr: 0.000066 grad: 0.1548 (0.1519) loss: 0.7674 (0.7735) time: 0.1583 data: 0.0790 max mem: 9377 +Train: [50] [3700/6250] eta: 0:06:41 lr: 0.000066 grad: 0.1468 (0.1519) loss: 0.7665 (0.7732) time: 0.1336 data: 0.0414 max mem: 9377 +Train: [50] [3800/6250] eta: 0:06:25 lr: 0.000066 grad: 0.1429 (0.1519) loss: 0.7591 (0.7730) time: 0.1387 data: 0.0517 max mem: 9377 +Train: [50] [3900/6250] eta: 0:06:09 lr: 0.000066 grad: 0.1411 (0.1518) loss: 0.7678 (0.7729) time: 0.1477 data: 0.0604 max mem: 9377 +Train: [50] [4000/6250] eta: 0:05:53 lr: 0.000066 grad: 0.1433 (0.1516) loss: 0.7625 (0.7727) time: 0.1560 data: 0.0729 max mem: 9377 +Train: [50] [4100/6250] eta: 0:05:38 lr: 0.000066 grad: 0.1338 (0.1515) loss: 0.7700 (0.7726) time: 0.1420 data: 0.0548 max mem: 9377 +Train: [50] [4200/6250] eta: 0:05:22 lr: 0.000066 grad: 0.1444 (0.1515) loss: 0.7662 (0.7725) time: 0.1467 data: 0.0648 max mem: 9377 +Train: [50] [4300/6250] eta: 0:05:07 lr: 0.000066 grad: 0.1379 (0.1514) loss: 0.7769 (0.7725) time: 0.1619 data: 0.0790 max mem: 9377 +Train: [50] [4400/6250] eta: 0:04:51 lr: 0.000066 grad: 0.1480 (0.1513) loss: 0.7539 (0.7723) time: 0.1621 data: 0.0725 max mem: 9377 +Train: [50] [4500/6250] eta: 0:04:36 lr: 0.000066 grad: 0.1437 (0.1513) loss: 0.7558 (0.7722) time: 0.2063 data: 0.1242 max mem: 9377 +Train: [50] [4600/6250] eta: 0:04:21 lr: 0.000066 grad: 0.1416 (0.1512) loss: 0.7715 (0.7722) time: 0.1588 data: 0.0771 max mem: 9377 +Train: [50] [4700/6250] eta: 0:04:05 lr: 0.000066 grad: 0.1427 (0.1511) loss: 0.7689 (0.7719) time: 0.1589 data: 0.0675 max mem: 9377 +Train: [50] [4800/6250] eta: 0:03:49 lr: 0.000066 grad: 0.1387 (0.1510) loss: 0.7565 (0.7718) time: 0.1433 data: 0.0564 max mem: 9377 +Train: [50] [4900/6250] eta: 0:03:33 lr: 0.000066 grad: 0.1451 (0.1510) loss: 0.7608 (0.7716) time: 0.1739 data: 0.0887 max mem: 9377 +Train: [50] [5000/6250] eta: 0:03:17 lr: 0.000066 grad: 0.1516 (0.1511) loss: 0.7541 (0.7714) time: 0.1483 data: 0.0629 max mem: 9377 +Train: [50] [5100/6250] eta: 0:03:02 lr: 0.000066 grad: 0.1585 (0.1511) loss: 0.7608 (0.7712) time: 0.1949 data: 0.1082 max mem: 9377 +Train: [50] [5200/6250] eta: 0:02:46 lr: 0.000066 grad: 0.1413 (0.1510) loss: 0.7603 (0.7710) time: 0.1742 data: 0.0922 max mem: 9377 +Train: [50] [5300/6250] eta: 0:02:30 lr: 0.000066 grad: 0.1455 (0.1509) loss: 0.7632 (0.7709) time: 0.1565 data: 0.0741 max mem: 9377 +Train: [50] [5400/6250] eta: 0:02:15 lr: 0.000066 grad: 0.1479 (0.1509) loss: 0.7636 (0.7708) time: 0.1478 data: 0.0501 max mem: 9377 +Train: [50] [5500/6250] eta: 0:01:59 lr: 0.000066 grad: 0.1433 (0.1508) loss: 0.7582 (0.7707) time: 0.1617 data: 0.0747 max mem: 9377 +Train: [50] [5600/6250] eta: 0:01:43 lr: 0.000066 grad: 0.1451 (0.1508) loss: 0.7641 (0.7707) time: 0.1548 data: 0.0681 max mem: 9377 +Train: [50] [5700/6250] eta: 0:01:27 lr: 0.000066 grad: 0.1414 (0.1509) loss: 0.7702 (0.7705) time: 0.1310 data: 0.0414 max mem: 9377 +Train: [50] [5800/6250] eta: 0:01:11 lr: 0.000066 grad: 0.1451 (0.1508) loss: 0.7719 (0.7705) time: 0.1632 data: 0.0795 max mem: 9377 +Train: [50] [5900/6250] eta: 0:00:55 lr: 0.000066 grad: 0.1388 (0.1507) loss: 0.7679 (0.7705) time: 0.1629 data: 0.0767 max mem: 9377 +Train: [50] [6000/6250] eta: 0:00:39 lr: 0.000066 grad: 0.1496 (0.1506) loss: 0.7727 (0.7704) time: 0.1634 data: 0.0694 max mem: 9377 +Train: [50] [6100/6250] eta: 0:00:23 lr: 0.000066 grad: 0.1462 (0.1505) loss: 0.7742 (0.7704) time: 0.1317 data: 0.0538 max mem: 9377 +Train: [50] [6200/6250] eta: 0:00:07 lr: 0.000066 grad: 0.1423 (0.1505) loss: 0.7610 (0.7704) time: 0.1617 data: 0.0711 max mem: 9377 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.1455 (0.1505) loss: 0.7687 (0.7704) time: 0.1886 data: 0.1065 max mem: 9377 +Train: [50] Total time: 0:16:37 (0.1595 s / it) +Averaged stats: lr: 0.000066 grad: 0.1455 (0.1505) loss: 0.7687 (0.7704) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:03:39 loss: 0.8477 (0.8477) time: 3.5338 data: 3.4698 max mem: 9377 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8516 (0.8533) time: 0.1371 data: 0.1120 max mem: 9377 +Eval (hcp-train-subset): [50] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (hcp-train-subset): loss: 0.8516 (0.8533) +Eval (hcp-val): [50] [ 0/62] eta: 0:05:19 loss: 0.8490 (0.8490) time: 5.1483 data: 5.1172 max mem: 9377 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8508 (0.8510) time: 0.1311 data: 0.1059 max mem: 9377 +Eval (hcp-val): [50] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (hcp-val): loss: 0.8508 (0.8510) +Eval (nsd-val): [50] [ 0/62] eta: 0:05:06 loss: 0.8095 (0.8095) time: 4.9412 data: 4.9097 max mem: 9377 +Eval (nsd-val): [50] [61/62] eta: 0:00:00 loss: 0.8236 (0.8245) time: 0.1332 data: 0.1058 max mem: 9377 +Eval (nsd-val): [50] Total time: 0:00:13 (0.2250 s / it) +Averaged stats (nsd-val): loss: 0.8236 (0.8245) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 12:15:16 lr: 0.000066 grad: 0.1372 (0.1372) loss: 0.8439 (0.8439) time: 7.0587 data: 6.9487 max mem: 9377 +Train: [51] [ 100/6250] eta: 0:24:02 lr: 0.000066 grad: 0.1657 (0.2319) loss: 0.8026 (0.7999) time: 0.1765 data: 0.0715 max mem: 9377 +Train: [51] [ 200/6250] eta: 0:21:01 lr: 0.000066 grad: 0.1646 (0.2047) loss: 0.7887 (0.7945) time: 0.1645 data: 0.0638 max mem: 9377 +Train: [51] [ 300/6250] eta: 0:19:07 lr: 0.000065 grad: 0.1448 (0.1876) loss: 0.7857 (0.7930) time: 0.1733 data: 0.0766 max mem: 9377 +Train: [51] [ 400/6250] eta: 0:17:51 lr: 0.000065 grad: 0.1408 (0.1782) loss: 0.7928 (0.7916) time: 0.1420 data: 0.0493 max mem: 9377 +Train: [51] [ 500/6250] eta: 0:17:23 lr: 0.000065 grad: 0.1447 (0.1707) loss: 0.7932 (0.7909) time: 0.1859 data: 0.0879 max mem: 9377 +Train: [51] [ 600/6250] eta: 0:16:53 lr: 0.000065 grad: 0.1484 (0.1668) loss: 0.7786 (0.7897) time: 0.1895 data: 0.1022 max mem: 9377 +Train: [51] [ 700/6250] eta: 0:16:24 lr: 0.000065 grad: 0.1449 (0.1640) loss: 0.7763 (0.7880) time: 0.1572 data: 0.0542 max mem: 9377 +Train: [51] [ 800/6250] eta: 0:15:58 lr: 0.000065 grad: 0.1440 (0.1619) loss: 0.7711 (0.7859) time: 0.1810 data: 0.0894 max mem: 9377 +Train: [51] [ 900/6250] eta: 0:15:31 lr: 0.000065 grad: 0.1357 (0.1604) loss: 0.7707 (0.7840) time: 0.1843 data: 0.1041 max mem: 9377 +Train: [51] [1000/6250] eta: 0:15:09 lr: 0.000065 grad: 0.1531 (0.1600) loss: 0.7652 (0.7824) time: 0.1722 data: 0.0869 max mem: 9377 +Train: [51] [1100/6250] eta: 0:15:02 lr: 0.000065 grad: 0.1426 (0.1590) loss: 0.7639 (0.7808) time: 0.2051 data: 0.1258 max mem: 9377 +Train: [51] [1200/6250] eta: 0:14:44 lr: 0.000065 grad: 0.1454 (0.1583) loss: 0.7667 (0.7793) time: 0.1960 data: 0.1081 max mem: 9377 +Train: [51] [1300/6250] eta: 0:14:31 lr: 0.000065 grad: 0.1466 (0.1574) loss: 0.7640 (0.7782) time: 0.1473 data: 0.0541 max mem: 9377 +Train: [51] [1400/6250] eta: 0:14:17 lr: 0.000065 grad: 0.1507 (0.1567) loss: 0.7610 (0.7769) time: 0.1949 data: 0.1121 max mem: 9377 +Train: [51] [1500/6250] eta: 0:14:05 lr: 0.000065 grad: 0.1429 (0.1561) loss: 0.7652 (0.7760) time: 0.1859 data: 0.0852 max mem: 9377 +Train: [51] [1600/6250] eta: 0:13:49 lr: 0.000065 grad: 0.1407 (0.1555) loss: 0.7614 (0.7754) time: 0.1808 data: 0.0907 max mem: 9377 +Train: [51] [1700/6250] eta: 0:13:32 lr: 0.000065 grad: 0.1451 (0.1554) loss: 0.7702 (0.7745) time: 0.1975 data: 0.1077 max mem: 9377 +Train: [51] [1800/6250] eta: 0:13:10 lr: 0.000065 grad: 0.1553 (0.1551) loss: 0.7664 (0.7738) time: 0.1497 data: 0.0586 max mem: 9377 +Train: [51] [1900/6250] eta: 0:12:47 lr: 0.000065 grad: 0.1379 (0.1544) loss: 0.7680 (0.7735) time: 0.1442 data: 0.0484 max mem: 9377 +Train: [51] [2000/6250] eta: 0:12:26 lr: 0.000065 grad: 0.1451 (0.1541) loss: 0.7622 (0.7732) time: 0.1617 data: 0.0779 max mem: 9377 +Train: [51] [2100/6250] eta: 0:12:08 lr: 0.000065 grad: 0.1363 (0.1535) loss: 0.7706 (0.7732) time: 0.1922 data: 0.1081 max mem: 9377 +Train: [51] [2200/6250] eta: 0:11:47 lr: 0.000065 grad: 0.1474 (0.1531) loss: 0.7661 (0.7728) time: 0.1577 data: 0.0535 max mem: 9377 +Train: [51] [2300/6250] eta: 0:11:28 lr: 0.000065 grad: 0.1486 (0.1527) loss: 0.7713 (0.7727) time: 0.1303 data: 0.0312 max mem: 9377 +Train: [51] [2400/6250] eta: 0:11:09 lr: 0.000065 grad: 0.1419 (0.1523) loss: 0.7692 (0.7725) time: 0.1443 data: 0.0642 max mem: 9377 +Train: [51] [2500/6250] eta: 0:10:50 lr: 0.000065 grad: 0.1463 (0.1521) loss: 0.7715 (0.7723) time: 0.1647 data: 0.0762 max mem: 9377 +Train: [51] [2600/6250] eta: 0:10:30 lr: 0.000065 grad: 0.1386 (0.1518) loss: 0.7790 (0.7722) time: 0.1682 data: 0.0727 max mem: 9377 +Train: [51] [2700/6250] eta: 0:10:11 lr: 0.000065 grad: 0.1415 (0.1516) loss: 0.7684 (0.7722) time: 0.1429 data: 0.0508 max mem: 9377 +Train: [51] [2800/6250] eta: 0:09:52 lr: 0.000065 grad: 0.1446 (0.1514) loss: 0.7763 (0.7722) time: 0.1706 data: 0.0806 max mem: 9377 +Train: [51] [2900/6250] eta: 0:09:34 lr: 0.000065 grad: 0.1407 (0.1511) loss: 0.7817 (0.7723) time: 0.1788 data: 0.0894 max mem: 9377 +Train: [51] [3000/6250] eta: 0:09:15 lr: 0.000065 grad: 0.1391 (0.1507) loss: 0.7745 (0.7724) time: 0.1745 data: 0.0812 max mem: 9377 +Train: [51] [3100/6250] eta: 0:08:56 lr: 0.000065 grad: 0.1384 (0.1503) loss: 0.7757 (0.7727) time: 0.1541 data: 0.0680 max mem: 9377 +Train: [51] [3200/6250] eta: 0:08:38 lr: 0.000065 grad: 0.1398 (0.1500) loss: 0.7745 (0.7729) time: 0.1708 data: 0.0774 max mem: 9377 +Train: [51] [3300/6250] eta: 0:08:21 lr: 0.000065 grad: 0.1382 (0.1497) loss: 0.7846 (0.7732) time: 0.1784 data: 0.0944 max mem: 9377 +Train: [51] [3400/6250] eta: 0:08:03 lr: 0.000064 grad: 0.1376 (0.1496) loss: 0.7736 (0.7733) time: 0.1453 data: 0.0611 max mem: 9377 +Train: [51] [3500/6250] eta: 0:07:45 lr: 0.000064 grad: 0.1366 (0.1494) loss: 0.7724 (0.7734) time: 0.1528 data: 0.0677 max mem: 9377 +Train: [51] [3600/6250] eta: 0:07:28 lr: 0.000064 grad: 0.1452 (0.1492) loss: 0.7747 (0.7734) time: 0.1584 data: 0.0613 max mem: 9377 +Train: [51] [3700/6250] eta: 0:07:10 lr: 0.000064 grad: 0.1366 (0.1490) loss: 0.7814 (0.7735) time: 0.1373 data: 0.0614 max mem: 9377 +Train: [51] [3800/6250] eta: 0:06:53 lr: 0.000064 grad: 0.1426 (0.1489) loss: 0.7684 (0.7735) time: 0.1487 data: 0.0574 max mem: 9377 +Train: [51] [3900/6250] eta: 0:06:36 lr: 0.000064 grad: 0.1484 (0.1488) loss: 0.7706 (0.7735) time: 0.1556 data: 0.0688 max mem: 9377 +Train: [51] [4000/6250] eta: 0:06:20 lr: 0.000064 grad: 0.1463 (0.1488) loss: 0.7718 (0.7734) time: 0.1736 data: 0.0866 max mem: 9377 +Train: [51] [4100/6250] eta: 0:06:03 lr: 0.000064 grad: 0.1492 (0.1487) loss: 0.7660 (0.7734) time: 0.1870 data: 0.1020 max mem: 9377 +Train: [51] [4200/6250] eta: 0:05:46 lr: 0.000064 grad: 0.1521 (0.1487) loss: 0.7705 (0.7733) time: 0.1710 data: 0.0753 max mem: 9377 +Train: [51] [4300/6250] eta: 0:05:29 lr: 0.000064 grad: 0.1474 (0.1487) loss: 0.7664 (0.7732) time: 0.1981 data: 0.1151 max mem: 9377 +Train: [51] [4400/6250] eta: 0:05:12 lr: 0.000064 grad: 0.1392 (0.1487) loss: 0.7684 (0.7731) time: 0.1827 data: 0.0896 max mem: 9377 +Train: [51] [4500/6250] eta: 0:04:55 lr: 0.000064 grad: 0.1471 (0.1487) loss: 0.7579 (0.7729) time: 0.1773 data: 0.0889 max mem: 9377 +Train: [51] [4600/6250] eta: 0:04:38 lr: 0.000064 grad: 0.1425 (0.1488) loss: 0.7699 (0.7728) time: 0.1691 data: 0.0799 max mem: 9377 +Train: [51] [4700/6250] eta: 0:04:21 lr: 0.000064 grad: 0.1568 (0.1490) loss: 0.7658 (0.7727) time: 0.1646 data: 0.0727 max mem: 9377 +Train: [51] [4800/6250] eta: 0:04:03 lr: 0.000064 grad: 0.1478 (0.1491) loss: 0.7649 (0.7725) time: 0.1495 data: 0.0651 max mem: 9377 +Train: [51] [4900/6250] eta: 0:03:46 lr: 0.000064 grad: 0.1475 (0.1491) loss: 0.7620 (0.7723) time: 0.1393 data: 0.0395 max mem: 9377 +Train: [51] [5000/6250] eta: 0:03:29 lr: 0.000064 grad: 0.1469 (0.1492) loss: 0.7685 (0.7721) time: 0.1999 data: 0.1137 max mem: 9377 +Train: [51] [5100/6250] eta: 0:03:12 lr: 0.000064 grad: 0.1477 (0.1492) loss: 0.7627 (0.7720) time: 0.1600 data: 0.0741 max mem: 9377 +Train: [51] [5200/6250] eta: 0:02:55 lr: 0.000064 grad: 0.1466 (0.1492) loss: 0.7649 (0.7718) time: 0.1554 data: 0.0568 max mem: 9377 +Train: [51] [5300/6250] eta: 0:02:38 lr: 0.000064 grad: 0.1437 (0.1492) loss: 0.7655 (0.7718) time: 0.1412 data: 0.0437 max mem: 9377 +Train: [51] [5400/6250] eta: 0:02:22 lr: 0.000064 grad: 0.1513 (0.1493) loss: 0.7649 (0.7717) time: 0.1611 data: 0.0620 max mem: 9377 +Train: [51] [5500/6250] eta: 0:02:05 lr: 0.000064 grad: 0.1461 (0.1494) loss: 0.7639 (0.7715) time: 0.1419 data: 0.0581 max mem: 9377 +Train: [51] [5600/6250] eta: 0:01:48 lr: 0.000064 grad: 0.1580 (0.1496) loss: 0.7626 (0.7714) time: 0.1548 data: 0.0632 max mem: 9377 +Train: [51] [5700/6250] eta: 0:01:31 lr: 0.000064 grad: 0.1538 (0.1497) loss: 0.7600 (0.7712) time: 0.1473 data: 0.0520 max mem: 9377 +Train: [51] [5800/6250] eta: 0:01:14 lr: 0.000064 grad: 0.1566 (0.1498) loss: 0.7566 (0.7710) time: 0.1432 data: 0.0457 max mem: 9377 +Train: [51] [5900/6250] eta: 0:00:58 lr: 0.000064 grad: 0.1487 (0.1500) loss: 0.7679 (0.7709) time: 0.1613 data: 0.0778 max mem: 9377 +Train: [51] [6000/6250] eta: 0:00:41 lr: 0.000064 grad: 0.1460 (0.1500) loss: 0.7515 (0.7707) time: 0.1572 data: 0.0686 max mem: 9377 +Train: [51] [6100/6250] eta: 0:00:24 lr: 0.000064 grad: 0.1605 (0.1502) loss: 0.7539 (0.7705) time: 0.1600 data: 0.0791 max mem: 9377 +Train: [51] [6200/6250] eta: 0:00:08 lr: 0.000064 grad: 0.1539 (0.1503) loss: 0.7553 (0.7703) time: 0.1538 data: 0.0734 max mem: 9377 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.1554 (0.1503) loss: 0.7597 (0.7703) time: 0.1634 data: 0.0749 max mem: 9377 +Train: [51] Total time: 0:17:19 (0.1663 s / it) +Averaged stats: lr: 0.000064 grad: 0.1554 (0.1503) loss: 0.7597 (0.7703) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:04:31 loss: 0.8485 (0.8485) time: 4.3839 data: 4.3076 max mem: 9377 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8510 (0.8541) time: 0.1329 data: 0.1078 max mem: 9377 +Eval (hcp-train-subset): [51] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (hcp-train-subset): loss: 0.8510 (0.8541) +Eval (hcp-val): [51] [ 0/62] eta: 0:03:56 loss: 0.8488 (0.8488) time: 3.8171 data: 3.7190 max mem: 9377 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8485 (0.8509) time: 0.1283 data: 0.1031 max mem: 9377 +Eval (hcp-val): [51] Total time: 0:00:14 (0.2348 s / it) +Averaged stats (hcp-val): loss: 0.8485 (0.8509) +Eval (nsd-val): [51] [ 0/62] eta: 0:05:40 loss: 0.8110 (0.8110) time: 5.4856 data: 5.4536 max mem: 9377 +Eval (nsd-val): [51] [61/62] eta: 0:00:00 loss: 0.8241 (0.8248) time: 0.1355 data: 0.1087 max mem: 9377 +Eval (nsd-val): [51] Total time: 0:00:14 (0.2303 s / it) +Averaged stats (nsd-val): loss: 0.8241 (0.8248) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 11:10:12 lr: 0.000064 grad: 0.2611 (0.2611) loss: 0.8206 (0.8206) time: 6.4340 data: 6.3349 max mem: 9377 +Train: [52] [ 100/6250] eta: 0:22:15 lr: 0.000063 grad: 0.2328 (0.2246) loss: 0.7687 (0.8087) time: 0.1603 data: 0.0629 max mem: 9377 +Train: [52] [ 200/6250] eta: 0:19:33 lr: 0.000063 grad: 0.1844 (0.2169) loss: 0.7836 (0.7957) time: 0.1722 data: 0.0675 max mem: 9377 +Train: [52] [ 300/6250] eta: 0:18:44 lr: 0.000063 grad: 0.1446 (0.1998) loss: 0.7930 (0.7914) time: 0.1801 data: 0.0818 max mem: 9377 +Train: [52] [ 400/6250] eta: 0:17:49 lr: 0.000063 grad: 0.1573 (0.1900) loss: 0.7874 (0.7892) time: 0.1818 data: 0.0963 max mem: 9377 +Train: [52] [ 500/6250] eta: 0:17:06 lr: 0.000063 grad: 0.1587 (0.1856) loss: 0.7820 (0.7866) time: 0.1636 data: 0.0709 max mem: 9377 +Train: [52] [ 600/6250] eta: 0:16:39 lr: 0.000063 grad: 0.1619 (0.1816) loss: 0.7758 (0.7843) time: 0.1839 data: 0.0977 max mem: 9377 +Train: [52] [ 700/6250] eta: 0:16:11 lr: 0.000063 grad: 0.1560 (0.1778) loss: 0.7712 (0.7827) time: 0.1765 data: 0.0834 max mem: 9377 +Train: [52] [ 800/6250] eta: 0:15:40 lr: 0.000063 grad: 0.1521 (0.1748) loss: 0.7632 (0.7811) time: 0.1296 data: 0.0320 max mem: 9377 +Train: [52] [ 900/6250] eta: 0:15:16 lr: 0.000063 grad: 0.1557 (0.1732) loss: 0.7698 (0.7790) time: 0.1518 data: 0.0633 max mem: 9377 +Train: [52] [1000/6250] eta: 0:14:49 lr: 0.000063 grad: 0.1466 (0.1712) loss: 0.7647 (0.7777) time: 0.1370 data: 0.0477 max mem: 9377 +Train: [52] [1100/6250] eta: 0:14:32 lr: 0.000063 grad: 0.1400 (0.1692) loss: 0.7707 (0.7767) time: 0.1624 data: 0.0690 max mem: 9377 +Train: [52] [1200/6250] eta: 0:14:12 lr: 0.000063 grad: 0.1471 (0.1676) loss: 0.7611 (0.7758) time: 0.1692 data: 0.0742 max mem: 9377 +Train: [52] [1300/6250] eta: 0:13:53 lr: 0.000063 grad: 0.1422 (0.1663) loss: 0.7783 (0.7749) time: 0.1480 data: 0.0718 max mem: 9377 +Train: [52] [1400/6250] eta: 0:13:34 lr: 0.000063 grad: 0.1484 (0.1649) loss: 0.7601 (0.7741) time: 0.1742 data: 0.0829 max mem: 9377 +Train: [52] [1500/6250] eta: 0:13:19 lr: 0.000063 grad: 0.1507 (0.1638) loss: 0.7679 (0.7735) time: 0.1539 data: 0.0589 max mem: 9377 +Train: [52] [1600/6250] eta: 0:13:01 lr: 0.000063 grad: 0.1441 (0.1628) loss: 0.7674 (0.7729) time: 0.1512 data: 0.0658 max mem: 9377 +Train: [52] [1700/6250] eta: 0:12:44 lr: 0.000063 grad: 0.1447 (0.1621) loss: 0.7664 (0.7725) time: 0.1802 data: 0.0912 max mem: 9377 +Train: [52] [1800/6250] eta: 0:12:26 lr: 0.000063 grad: 0.1459 (0.1614) loss: 0.7568 (0.7722) time: 0.1540 data: 0.0504 max mem: 9377 +Train: [52] [1900/6250] eta: 0:12:06 lr: 0.000063 grad: 0.1490 (0.1608) loss: 0.7680 (0.7719) time: 0.1364 data: 0.0564 max mem: 9377 +Train: [52] [2000/6250] eta: 0:11:48 lr: 0.000063 grad: 0.1500 (0.1603) loss: 0.7653 (0.7716) time: 0.1580 data: 0.0554 max mem: 9377 +Train: [52] [2100/6250] eta: 0:11:29 lr: 0.000063 grad: 0.1585 (0.1600) loss: 0.7605 (0.7712) time: 0.1788 data: 0.0837 max mem: 9377 +Train: [52] [2200/6250] eta: 0:11:09 lr: 0.000063 grad: 0.1510 (0.1595) loss: 0.7626 (0.7709) time: 0.1656 data: 0.0772 max mem: 9377 +Train: [52] [2300/6250] eta: 0:10:51 lr: 0.000063 grad: 0.1401 (0.1590) loss: 0.7564 (0.7706) time: 0.1428 data: 0.0545 max mem: 9377 +Train: [52] [2400/6250] eta: 0:10:33 lr: 0.000063 grad: 0.1563 (0.1589) loss: 0.7582 (0.7702) time: 0.1444 data: 0.0553 max mem: 9377 +Train: [52] [2500/6250] eta: 0:10:15 lr: 0.000063 grad: 0.1429 (0.1587) loss: 0.7581 (0.7699) time: 0.1508 data: 0.0694 max mem: 9377 +Train: [52] [2600/6250] eta: 0:09:57 lr: 0.000063 grad: 0.1481 (0.1583) loss: 0.7583 (0.7697) time: 0.1590 data: 0.0723 max mem: 9377 +Train: [52] [2700/6250] eta: 0:09:41 lr: 0.000063 grad: 0.1450 (0.1580) loss: 0.7640 (0.7694) time: 0.1372 data: 0.0498 max mem: 9377 +Train: [52] [2800/6250] eta: 0:09:23 lr: 0.000063 grad: 0.1450 (0.1578) loss: 0.7695 (0.7692) time: 0.1616 data: 0.0745 max mem: 9377 +Train: [52] [2900/6250] eta: 0:09:06 lr: 0.000063 grad: 0.1482 (0.1576) loss: 0.7619 (0.7691) time: 0.1579 data: 0.0684 max mem: 9377 +Train: [52] [3000/6250] eta: 0:08:50 lr: 0.000063 grad: 0.1484 (0.1575) loss: 0.7587 (0.7690) time: 0.1581 data: 0.0738 max mem: 9377 +Train: [52] [3100/6250] eta: 0:08:33 lr: 0.000063 grad: 0.1553 (0.1572) loss: 0.7559 (0.7689) time: 0.1534 data: 0.0717 max mem: 9377 +Train: [52] [3200/6250] eta: 0:08:16 lr: 0.000062 grad: 0.1470 (0.1572) loss: 0.7720 (0.7688) time: 0.1394 data: 0.0560 max mem: 9377 +Train: [52] [3300/6250] eta: 0:07:59 lr: 0.000062 grad: 0.1491 (0.1571) loss: 0.7697 (0.7686) time: 0.1546 data: 0.0575 max mem: 9377 +Train: [52] [3400/6250] eta: 0:07:43 lr: 0.000062 grad: 0.1439 (0.1570) loss: 0.7542 (0.7683) time: 0.1650 data: 0.0842 max mem: 9377 +Train: [52] [3500/6250] eta: 0:07:26 lr: 0.000062 grad: 0.1636 (0.1574) loss: 0.7611 (0.7682) time: 0.1421 data: 0.0583 max mem: 9377 +Train: [52] [3600/6250] eta: 0:07:10 lr: 0.000062 grad: 0.1421 (0.1574) loss: 0.7775 (0.7680) time: 0.1545 data: 0.0563 max mem: 9377 +Train: [52] [3700/6250] eta: 0:06:53 lr: 0.000062 grad: 0.1498 (0.1573) loss: 0.7539 (0.7678) time: 0.1456 data: 0.0514 max mem: 9377 +Train: [52] [3800/6250] eta: 0:06:36 lr: 0.000062 grad: 0.1452 (0.1574) loss: 0.7637 (0.7676) time: 0.1508 data: 0.0608 max mem: 9377 +Train: [52] [3900/6250] eta: 0:06:19 lr: 0.000062 grad: 0.1528 (0.1575) loss: 0.7622 (0.7674) time: 0.1526 data: 0.0681 max mem: 9377 +Train: [52] [4000/6250] eta: 0:06:04 lr: 0.000062 grad: 0.1570 (0.1575) loss: 0.7455 (0.7672) time: 0.1776 data: 0.0947 max mem: 9377 +Train: [52] [4100/6250] eta: 0:05:48 lr: 0.000062 grad: 0.1490 (0.1575) loss: 0.7684 (0.7669) time: 0.1471 data: 0.0623 max mem: 9377 +Train: [52] [4200/6250] eta: 0:05:31 lr: 0.000062 grad: 0.1565 (0.1574) loss: 0.7577 (0.7668) time: 0.1538 data: 0.0717 max mem: 9377 +Train: [52] [4300/6250] eta: 0:05:15 lr: 0.000062 grad: 0.1642 (0.1575) loss: 0.7585 (0.7666) time: 0.1645 data: 0.0711 max mem: 9377 +Train: [52] [4400/6250] eta: 0:04:59 lr: 0.000062 grad: 0.1619 (0.1576) loss: 0.7510 (0.7663) time: 0.1723 data: 0.0825 max mem: 9377 +Train: [52] [4500/6250] eta: 0:04:43 lr: 0.000062 grad: 0.1581 (0.1576) loss: 0.7583 (0.7661) time: 0.1813 data: 0.0937 max mem: 9377 +Train: [52] [4600/6250] eta: 0:04:27 lr: 0.000062 grad: 0.1605 (0.1577) loss: 0.7562 (0.7660) time: 0.1393 data: 0.0493 max mem: 9377 +Train: [52] [4700/6250] eta: 0:04:11 lr: 0.000062 grad: 0.1515 (0.1578) loss: 0.7644 (0.7660) time: 0.1594 data: 0.0702 max mem: 9377 +Train: [52] [4800/6250] eta: 0:03:54 lr: 0.000062 grad: 0.1496 (0.1577) loss: 0.7616 (0.7659) time: 0.1622 data: 0.0734 max mem: 9377 +Train: [52] [4900/6250] eta: 0:03:38 lr: 0.000062 grad: 0.1486 (0.1576) loss: 0.7669 (0.7659) time: 0.1452 data: 0.0556 max mem: 9377 +Train: [52] [5000/6250] eta: 0:03:22 lr: 0.000062 grad: 0.1430 (0.1575) loss: 0.7608 (0.7659) time: 0.1536 data: 0.0647 max mem: 9377 +Train: [52] [5100/6250] eta: 0:03:05 lr: 0.000062 grad: 0.1476 (0.1573) loss: 0.7662 (0.7658) time: 0.1564 data: 0.0756 max mem: 9377 +Train: [52] [5200/6250] eta: 0:02:49 lr: 0.000062 grad: 0.1556 (0.1573) loss: 0.7561 (0.7657) time: 0.1518 data: 0.0630 max mem: 9377 +Train: [52] [5300/6250] eta: 0:02:33 lr: 0.000062 grad: 0.1501 (0.1572) loss: 0.7624 (0.7657) time: 0.1606 data: 0.0750 max mem: 9377 +Train: [52] [5400/6250] eta: 0:02:17 lr: 0.000062 grad: 0.1552 (0.1572) loss: 0.7510 (0.7655) time: 0.1476 data: 0.0561 max mem: 9377 +Train: [52] [5500/6250] eta: 0:02:01 lr: 0.000062 grad: 0.1414 (0.1571) loss: 0.7714 (0.7654) time: 0.1644 data: 0.0805 max mem: 9377 +Train: [52] [5600/6250] eta: 0:01:44 lr: 0.000062 grad: 0.1537 (0.1570) loss: 0.7621 (0.7653) time: 0.1842 data: 0.0980 max mem: 9377 +Train: [52] [5700/6250] eta: 0:01:28 lr: 0.000062 grad: 0.1455 (0.1569) loss: 0.7655 (0.7653) time: 0.1444 data: 0.0518 max mem: 9377 +Train: [52] [5800/6250] eta: 0:01:12 lr: 0.000062 grad: 0.1432 (0.1568) loss: 0.7664 (0.7653) time: 0.1422 data: 0.0447 max mem: 9377 +Train: [52] [5900/6250] eta: 0:00:56 lr: 0.000062 grad: 0.1504 (0.1568) loss: 0.7708 (0.7653) time: 0.1315 data: 0.0314 max mem: 9377 +Train: [52] [6000/6250] eta: 0:00:40 lr: 0.000062 grad: 0.1421 (0.1566) loss: 0.7747 (0.7654) time: 0.1894 data: 0.1062 max mem: 9377 +Train: [52] [6100/6250] eta: 0:00:24 lr: 0.000062 grad: 0.1524 (0.1565) loss: 0.7616 (0.7654) time: 0.1344 data: 0.0449 max mem: 9377 +Train: [52] [6200/6250] eta: 0:00:08 lr: 0.000061 grad: 0.1448 (0.1564) loss: 0.7649 (0.7655) time: 0.1560 data: 0.0647 max mem: 9377 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.1543 (0.1564) loss: 0.7568 (0.7654) time: 0.1858 data: 0.1003 max mem: 9377 +Train: [52] Total time: 0:16:47 (0.1613 s / it) +Averaged stats: lr: 0.000061 grad: 0.1543 (0.1564) loss: 0.7568 (0.7654) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:05:21 loss: 0.8458 (0.8458) time: 5.1934 data: 5.1273 max mem: 9377 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8499 (0.8529) time: 0.1564 data: 0.1313 max mem: 9377 +Eval (hcp-train-subset): [52] Total time: 0:00:15 (0.2508 s / it) +Averaged stats (hcp-train-subset): loss: 0.8499 (0.8529) +Eval (hcp-val): [52] [ 0/62] eta: 0:03:54 loss: 0.8448 (0.8448) time: 3.7762 data: 3.6932 max mem: 9377 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8493 (0.8505) time: 0.1443 data: 0.1171 max mem: 9377 +Eval (hcp-val): [52] Total time: 0:00:16 (0.2600 s / it) +Averaged stats (hcp-val): loss: 0.8493 (0.8505) +Eval (nsd-val): [52] [ 0/62] eta: 0:06:04 loss: 0.8122 (0.8122) time: 5.8813 data: 5.8501 max mem: 9377 +Eval (nsd-val): [52] [61/62] eta: 0:00:00 loss: 0.8250 (0.8252) time: 0.1542 data: 0.1287 max mem: 9377 +Eval (nsd-val): [52] Total time: 0:00:15 (0.2497 s / it) +Averaged stats (nsd-val): loss: 0.8250 (0.8252) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [53] [ 0/6250] eta: 11:32:32 lr: 0.000061 grad: 0.1835 (0.1835) loss: 0.8033 (0.8033) time: 6.6485 data: 6.4908 max mem: 9377 +Train: [53] [ 100/6250] eta: 0:24:15 lr: 0.000061 grad: 0.2171 (0.2311) loss: 0.7834 (0.8029) time: 0.1904 data: 0.0823 max mem: 9377 +Train: [53] [ 200/6250] eta: 0:21:08 lr: 0.000061 grad: 0.1652 (0.2111) loss: 0.7812 (0.7935) time: 0.1861 data: 0.0861 max mem: 9377 +Train: [53] [ 300/6250] eta: 0:19:45 lr: 0.000061 grad: 0.1586 (0.1989) loss: 0.7831 (0.7883) time: 0.1850 data: 0.0892 max mem: 9377 +Train: [53] [ 400/6250] eta: 0:18:55 lr: 0.000061 grad: 0.1868 (0.1933) loss: 0.7624 (0.7845) time: 0.1681 data: 0.0750 max mem: 9377 +Train: [53] [ 500/6250] eta: 0:18:09 lr: 0.000061 grad: 0.1512 (0.1909) loss: 0.7736 (0.7814) time: 0.1750 data: 0.0865 max mem: 9377 +Train: [53] [ 600/6250] eta: 0:17:24 lr: 0.000061 grad: 0.1405 (0.1855) loss: 0.7782 (0.7798) time: 0.1431 data: 0.0452 max mem: 9377 +Train: [53] [ 700/6250] eta: 0:16:48 lr: 0.000061 grad: 0.1594 (0.1817) loss: 0.7662 (0.7777) time: 0.1557 data: 0.0588 max mem: 9377 +Train: [53] [ 800/6250] eta: 0:16:15 lr: 0.000061 grad: 0.1496 (0.1784) loss: 0.7752 (0.7767) time: 0.1734 data: 0.0693 max mem: 9377 +Train: [53] [ 900/6250] eta: 0:15:51 lr: 0.000061 grad: 0.1524 (0.1760) loss: 0.7691 (0.7759) time: 0.1860 data: 0.0964 max mem: 9377 +Train: [53] [1000/6250] eta: 0:15:20 lr: 0.000061 grad: 0.1419 (0.1735) loss: 0.7703 (0.7758) time: 0.1545 data: 0.0688 max mem: 9377 +Train: [53] [1100/6250] eta: 0:15:00 lr: 0.000061 grad: 0.1430 (0.1715) loss: 0.7725 (0.7751) time: 0.1574 data: 0.0655 max mem: 9377 +Train: [53] [1200/6250] eta: 0:14:36 lr: 0.000061 grad: 0.1435 (0.1696) loss: 0.7670 (0.7748) time: 0.1721 data: 0.0838 max mem: 9377 +Train: [53] [1300/6250] eta: 0:14:14 lr: 0.000061 grad: 0.1471 (0.1680) loss: 0.7710 (0.7746) time: 0.1705 data: 0.0836 max mem: 9377 +Train: [53] [1400/6250] eta: 0:13:54 lr: 0.000061 grad: 0.1451 (0.1669) loss: 0.7763 (0.7744) time: 0.1747 data: 0.0802 max mem: 9377 +Train: [53] [1500/6250] eta: 0:13:36 lr: 0.000061 grad: 0.1390 (0.1657) loss: 0.7770 (0.7742) time: 0.1569 data: 0.0715 max mem: 9377 +Train: [53] [1600/6250] eta: 0:13:20 lr: 0.000061 grad: 0.1443 (0.1648) loss: 0.7739 (0.7739) time: 0.1921 data: 0.1007 max mem: 9377 +Train: [53] [1700/6250] eta: 0:13:01 lr: 0.000061 grad: 0.1472 (0.1638) loss: 0.7755 (0.7740) time: 0.1535 data: 0.0655 max mem: 9377 +Train: [53] [1800/6250] eta: 0:12:41 lr: 0.000061 grad: 0.1455 (0.1629) loss: 0.7771 (0.7740) time: 0.1804 data: 0.0909 max mem: 9377 +Train: [53] [1900/6250] eta: 0:12:22 lr: 0.000061 grad: 0.1429 (0.1620) loss: 0.7700 (0.7739) time: 0.1888 data: 0.0893 max mem: 9377 +Train: [53] [2000/6250] eta: 0:12:03 lr: 0.000061 grad: 0.1435 (0.1611) loss: 0.7836 (0.7739) time: 0.1577 data: 0.0764 max mem: 9377 +Train: [53] [2100/6250] eta: 0:11:43 lr: 0.000061 grad: 0.1468 (0.1605) loss: 0.7691 (0.7736) time: 0.1610 data: 0.0677 max mem: 9377 +Train: [53] [2200/6250] eta: 0:11:23 lr: 0.000061 grad: 0.1446 (0.1599) loss: 0.7715 (0.7734) time: 0.1572 data: 0.0605 max mem: 9377 +Train: [53] [2300/6250] eta: 0:11:02 lr: 0.000061 grad: 0.1435 (0.1593) loss: 0.7804 (0.7734) time: 0.1487 data: 0.0584 max mem: 9377 +Train: [53] [2400/6250] eta: 0:10:44 lr: 0.000061 grad: 0.1459 (0.1587) loss: 0.7736 (0.7735) time: 0.1491 data: 0.0618 max mem: 9377 +Train: [53] [2500/6250] eta: 0:10:25 lr: 0.000061 grad: 0.1431 (0.1583) loss: 0.7725 (0.7735) time: 0.1564 data: 0.0683 max mem: 9377 +Train: [53] [2600/6250] eta: 0:10:06 lr: 0.000061 grad: 0.1352 (0.1577) loss: 0.7841 (0.7736) time: 0.1521 data: 0.0635 max mem: 9377 +Train: [53] [2700/6250] eta: 0:09:48 lr: 0.000061 grad: 0.1426 (0.1572) loss: 0.7701 (0.7736) time: 0.1542 data: 0.0712 max mem: 9377 +Train: [53] [2800/6250] eta: 0:09:31 lr: 0.000061 grad: 0.1426 (0.1567) loss: 0.7762 (0.7737) time: 0.2069 data: 0.1216 max mem: 9377 +Train: [53] [2900/6250] eta: 0:09:12 lr: 0.000061 grad: 0.1425 (0.1564) loss: 0.7731 (0.7738) time: 0.1506 data: 0.0626 max mem: 9377 +Train: [53] [3000/6250] eta: 0:08:54 lr: 0.000060 grad: 0.1414 (0.1561) loss: 0.7839 (0.7739) time: 0.1398 data: 0.0532 max mem: 9377 +Train: [53] [3100/6250] eta: 0:08:37 lr: 0.000060 grad: 0.1468 (0.1558) loss: 0.7694 (0.7740) time: 0.1495 data: 0.0692 max mem: 9377 +Train: [53] [3200/6250] eta: 0:08:19 lr: 0.000060 grad: 0.1439 (0.1556) loss: 0.7740 (0.7741) time: 0.1669 data: 0.0795 max mem: 9377 +Train: [53] [3300/6250] eta: 0:08:02 lr: 0.000060 grad: 0.1440 (0.1554) loss: 0.7644 (0.7741) time: 0.1570 data: 0.0650 max mem: 9377 +Train: [53] [3400/6250] eta: 0:07:46 lr: 0.000060 grad: 0.1429 (0.1553) loss: 0.7833 (0.7741) time: 0.1130 data: 0.0192 max mem: 9377 +Train: [53] [3500/6250] eta: 0:07:28 lr: 0.000060 grad: 0.1449 (0.1551) loss: 0.7697 (0.7740) time: 0.1559 data: 0.0643 max mem: 9377 +Train: [53] [3600/6250] eta: 0:07:11 lr: 0.000060 grad: 0.1421 (0.1550) loss: 0.7744 (0.7738) time: 0.1404 data: 0.0560 max mem: 9377 +Train: [53] [3700/6250] eta: 0:06:54 lr: 0.000060 grad: 0.1464 (0.1548) loss: 0.7683 (0.7737) time: 0.1524 data: 0.0661 max mem: 9377 +Train: [53] [3800/6250] eta: 0:06:37 lr: 0.000060 grad: 0.1385 (0.1546) loss: 0.7647 (0.7737) time: 0.1613 data: 0.0795 max mem: 9377 +Train: [53] [3900/6250] eta: 0:06:22 lr: 0.000060 grad: 0.1506 (0.1544) loss: 0.7624 (0.7735) time: 0.2051 data: 0.1233 max mem: 9377 +Train: [53] [4000/6250] eta: 0:06:06 lr: 0.000060 grad: 0.1463 (0.1543) loss: 0.7700 (0.7735) time: 0.3075 data: 0.2335 max mem: 9377 +Train: [53] [4100/6250] eta: 0:05:50 lr: 0.000060 grad: 0.1505 (0.1542) loss: 0.7719 (0.7734) time: 0.1716 data: 0.0872 max mem: 9377 +Train: [53] [4200/6250] eta: 0:05:33 lr: 0.000060 grad: 0.1522 (0.1541) loss: 0.7587 (0.7732) time: 0.1523 data: 0.0693 max mem: 9377 +Train: [53] [4300/6250] eta: 0:05:17 lr: 0.000060 grad: 0.1396 (0.1541) loss: 0.7688 (0.7730) time: 0.1540 data: 0.0664 max mem: 9377 +Train: [53] [4400/6250] eta: 0:05:00 lr: 0.000060 grad: 0.1418 (0.1539) loss: 0.7691 (0.7729) time: 0.1689 data: 0.0829 max mem: 9377 +Train: [53] [4500/6250] eta: 0:04:44 lr: 0.000060 grad: 0.1569 (0.1540) loss: 0.7638 (0.7727) time: 0.1555 data: 0.0552 max mem: 9377 +Train: [53] [4600/6250] eta: 0:04:28 lr: 0.000060 grad: 0.1448 (0.1542) loss: 0.7689 (0.7725) time: 0.1731 data: 0.0888 max mem: 9377 +Train: [53] [4700/6250] eta: 0:04:11 lr: 0.000060 grad: 0.1484 (0.1541) loss: 0.7724 (0.7724) time: 0.1511 data: 0.0579 max mem: 9377 +Train: [53] [4800/6250] eta: 0:03:55 lr: 0.000060 grad: 0.1440 (0.1540) loss: 0.7639 (0.7724) time: 0.1558 data: 0.0665 max mem: 9377 +Train: [53] [4900/6250] eta: 0:03:38 lr: 0.000060 grad: 0.1455 (0.1540) loss: 0.7621 (0.7722) time: 0.1299 data: 0.0414 max mem: 9377 +Train: [53] [5000/6250] eta: 0:03:22 lr: 0.000060 grad: 0.1433 (0.1539) loss: 0.7679 (0.7722) time: 0.1800 data: 0.0853 max mem: 9377 +Train: [53] [5100/6250] eta: 0:03:06 lr: 0.000060 grad: 0.1477 (0.1538) loss: 0.7686 (0.7722) time: 0.1668 data: 0.0745 max mem: 9377 +Train: [53] [5200/6250] eta: 0:02:50 lr: 0.000060 grad: 0.1552 (0.1538) loss: 0.7578 (0.7721) time: 0.1581 data: 0.0677 max mem: 9377 +Train: [53] [5300/6250] eta: 0:02:33 lr: 0.000060 grad: 0.1413 (0.1538) loss: 0.7675 (0.7721) time: 0.1515 data: 0.0514 max mem: 9377 +Train: [53] [5400/6250] eta: 0:02:17 lr: 0.000060 grad: 0.1493 (0.1537) loss: 0.7682 (0.7721) time: 0.1678 data: 0.0689 max mem: 9377 +Train: [53] [5500/6250] eta: 0:02:01 lr: 0.000060 grad: 0.1485 (0.1537) loss: 0.7623 (0.7720) time: 0.1517 data: 0.0535 max mem: 9377 +Train: [53] [5600/6250] eta: 0:01:45 lr: 0.000060 grad: 0.1539 (0.1538) loss: 0.7746 (0.7720) time: 0.1729 data: 0.0868 max mem: 9377 +Train: [53] [5700/6250] eta: 0:01:29 lr: 0.000060 grad: 0.1420 (0.1539) loss: 0.7731 (0.7720) time: 0.1467 data: 0.0483 max mem: 9377 +Train: [53] [5800/6250] eta: 0:01:12 lr: 0.000060 grad: 0.1440 (0.1538) loss: 0.7751 (0.7720) time: 0.1622 data: 0.0759 max mem: 9377 +Train: [53] [5900/6250] eta: 0:00:56 lr: 0.000060 grad: 0.1398 (0.1537) loss: 0.7764 (0.7720) time: 0.1531 data: 0.0602 max mem: 9377 +Train: [53] [6000/6250] eta: 0:00:40 lr: 0.000059 grad: 0.1457 (0.1538) loss: 0.7685 (0.7721) time: 0.1632 data: 0.0776 max mem: 9377 +Train: [53] [6100/6250] eta: 0:00:24 lr: 0.000059 grad: 0.1446 (0.1537) loss: 0.7738 (0.7721) time: 0.1459 data: 0.0574 max mem: 9377 +Train: [53] [6200/6250] eta: 0:00:08 lr: 0.000059 grad: 0.1477 (0.1537) loss: 0.7726 (0.7721) time: 0.1497 data: 0.0653 max mem: 9377 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.1474 (0.1536) loss: 0.7737 (0.7722) time: 0.1370 data: 0.0397 max mem: 9377 +Train: [53] Total time: 0:16:54 (0.1622 s / it) +Averaged stats: lr: 0.000059 grad: 0.1474 (0.1536) loss: 0.7737 (0.7722) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:05:02 loss: 0.8457 (0.8457) time: 4.8778 data: 4.8417 max mem: 9377 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8521 (0.8532) time: 0.1386 data: 0.1095 max mem: 9377 +Eval (hcp-train-subset): [53] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (hcp-train-subset): loss: 0.8521 (0.8532) +Eval (hcp-val): [53] [ 0/62] eta: 0:03:37 loss: 0.8495 (0.8495) time: 3.5063 data: 3.4261 max mem: 9377 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8504 (0.8517) time: 0.1448 data: 0.1195 max mem: 9377 +Eval (hcp-val): [53] Total time: 0:00:14 (0.2339 s / it) +Averaged stats (hcp-val): loss: 0.8504 (0.8517) +Eval (nsd-val): [53] [ 0/62] eta: 0:04:26 loss: 0.8193 (0.8193) time: 4.2926 data: 4.2328 max mem: 9377 +Eval (nsd-val): [53] [61/62] eta: 0:00:00 loss: 0.8234 (0.8256) time: 0.0785 data: 0.0529 max mem: 9377 +Eval (nsd-val): [53] Total time: 0:00:15 (0.2428 s / it) +Averaged stats (nsd-val): loss: 0.8234 (0.8256) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 10:22:29 lr: 0.000059 grad: 0.3823 (0.3823) loss: 0.8489 (0.8489) time: 5.9759 data: 5.8445 max mem: 9377 +Train: [54] [ 100/6250] eta: 0:22:35 lr: 0.000059 grad: 0.1859 (0.2531) loss: 0.7929 (0.7959) time: 0.1769 data: 0.0718 max mem: 9377 +Train: [54] [ 200/6250] eta: 0:19:40 lr: 0.000059 grad: 0.1828 (0.2205) loss: 0.7690 (0.7863) time: 0.1821 data: 0.0896 max mem: 9377 +Train: [54] [ 300/6250] eta: 0:18:36 lr: 0.000059 grad: 0.1626 (0.2096) loss: 0.7668 (0.7808) time: 0.1520 data: 0.0510 max mem: 9377 +Train: [54] [ 400/6250] eta: 0:17:45 lr: 0.000059 grad: 0.1664 (0.2007) loss: 0.7805 (0.7788) time: 0.1572 data: 0.0548 max mem: 9377 +Train: [54] [ 500/6250] eta: 0:17:06 lr: 0.000059 grad: 0.1544 (0.1934) loss: 0.7656 (0.7770) time: 0.1533 data: 0.0584 max mem: 9377 +Train: [54] [ 600/6250] eta: 0:16:34 lr: 0.000059 grad: 0.1679 (0.1877) loss: 0.7602 (0.7764) time: 0.1500 data: 0.0634 max mem: 9377 +Train: [54] [ 700/6250] eta: 0:16:02 lr: 0.000059 grad: 0.1570 (0.1836) loss: 0.7727 (0.7756) time: 0.1717 data: 0.0738 max mem: 9377 +Train: [54] [ 800/6250] eta: 0:15:37 lr: 0.000059 grad: 0.1577 (0.1804) loss: 0.7654 (0.7747) time: 0.1562 data: 0.0646 max mem: 9377 +Train: [54] [ 900/6250] eta: 0:15:11 lr: 0.000059 grad: 0.1463 (0.1779) loss: 0.7657 (0.7743) time: 0.1458 data: 0.0533 max mem: 9377 +Train: [54] [1000/6250] eta: 0:14:47 lr: 0.000059 grad: 0.1411 (0.1750) loss: 0.7747 (0.7742) time: 0.1516 data: 0.0572 max mem: 9377 +Train: [54] [1100/6250] eta: 0:14:27 lr: 0.000059 grad: 0.1536 (0.1726) loss: 0.7654 (0.7738) time: 0.1675 data: 0.0897 max mem: 9377 +Train: [54] [1200/6250] eta: 0:14:10 lr: 0.000059 grad: 0.1395 (0.1705) loss: 0.7658 (0.7736) time: 0.1661 data: 0.0834 max mem: 9377 +Train: [54] [1300/6250] eta: 0:13:52 lr: 0.000059 grad: 0.1518 (0.1692) loss: 0.7525 (0.7730) time: 0.1801 data: 0.0898 max mem: 9377 +Train: [54] [1400/6250] eta: 0:13:36 lr: 0.000059 grad: 0.1402 (0.1679) loss: 0.7681 (0.7725) time: 0.1939 data: 0.1136 max mem: 9377 +Train: [54] [1500/6250] eta: 0:13:21 lr: 0.000059 grad: 0.1438 (0.1668) loss: 0.7644 (0.7719) time: 0.1786 data: 0.0981 max mem: 9377 +Train: [54] [1600/6250] eta: 0:13:05 lr: 0.000059 grad: 0.1436 (0.1658) loss: 0.7704 (0.7713) time: 0.1785 data: 0.0908 max mem: 9377 +Train: [54] [1700/6250] eta: 0:12:49 lr: 0.000059 grad: 0.1522 (0.1649) loss: 0.7599 (0.7708) time: 0.1768 data: 0.0780 max mem: 9377 +Train: [54] [1800/6250] eta: 0:12:31 lr: 0.000059 grad: 0.1482 (0.1642) loss: 0.7616 (0.7702) time: 0.1769 data: 0.0875 max mem: 9377 +Train: [54] [1900/6250] eta: 0:12:12 lr: 0.000059 grad: 0.1513 (0.1637) loss: 0.7583 (0.7695) time: 0.1641 data: 0.0731 max mem: 9377 +Train: [54] [2000/6250] eta: 0:11:54 lr: 0.000059 grad: 0.1522 (0.1635) loss: 0.7576 (0.7689) time: 0.1719 data: 0.0827 max mem: 9377 +Train: [54] [2100/6250] eta: 0:11:35 lr: 0.000059 grad: 0.1539 (0.1630) loss: 0.7564 (0.7684) time: 0.1699 data: 0.0875 max mem: 9377 +Train: [54] [2200/6250] eta: 0:11:18 lr: 0.000059 grad: 0.1452 (0.1625) loss: 0.7610 (0.7680) time: 0.1756 data: 0.0892 max mem: 9377 +Train: [54] [2300/6250] eta: 0:11:00 lr: 0.000059 grad: 0.1510 (0.1622) loss: 0.7610 (0.7678) time: 0.1904 data: 0.1038 max mem: 9377 +Train: [54] [2400/6250] eta: 0:10:43 lr: 0.000059 grad: 0.1450 (0.1618) loss: 0.7728 (0.7677) time: 0.1581 data: 0.0769 max mem: 9377 +Train: [54] [2500/6250] eta: 0:10:27 lr: 0.000059 grad: 0.1558 (0.1615) loss: 0.7616 (0.7674) time: 0.1631 data: 0.0846 max mem: 9377 +Train: [54] [2600/6250] eta: 0:10:12 lr: 0.000059 grad: 0.1420 (0.1611) loss: 0.7741 (0.7673) time: 0.2518 data: 0.1681 max mem: 9377 +Train: [54] [2700/6250] eta: 0:09:53 lr: 0.000059 grad: 0.1486 (0.1606) loss: 0.7596 (0.7673) time: 0.1786 data: 0.0950 max mem: 9377 +Train: [54] [2800/6250] eta: 0:09:36 lr: 0.000058 grad: 0.1496 (0.1603) loss: 0.7598 (0.7671) time: 0.1505 data: 0.0709 max mem: 9377 +Train: [54] [2900/6250] eta: 0:09:17 lr: 0.000058 grad: 0.1610 (0.1602) loss: 0.7480 (0.7670) time: 0.1582 data: 0.0716 max mem: 9377 +Train: [54] [3000/6250] eta: 0:09:00 lr: 0.000058 grad: 0.1452 (0.1600) loss: 0.7678 (0.7668) time: 0.1742 data: 0.0965 max mem: 9377 +Train: [54] [3100/6250] eta: 0:08:43 lr: 0.000058 grad: 0.1503 (0.1598) loss: 0.7596 (0.7667) time: 0.1702 data: 0.0800 max mem: 9377 +Train: [54] [3200/6250] eta: 0:08:27 lr: 0.000058 grad: 0.1471 (0.1594) loss: 0.7655 (0.7668) time: 0.1880 data: 0.0985 max mem: 9377 +Train: [54] [3300/6250] eta: 0:08:09 lr: 0.000058 grad: 0.1437 (0.1591) loss: 0.7644 (0.7668) time: 0.1468 data: 0.0614 max mem: 9377 +Train: [54] [3400/6250] eta: 0:07:52 lr: 0.000058 grad: 0.1492 (0.1588) loss: 0.7596 (0.7668) time: 0.1673 data: 0.0839 max mem: 9377 +Train: [54] [3500/6250] eta: 0:07:34 lr: 0.000058 grad: 0.1511 (0.1585) loss: 0.7713 (0.7668) time: 0.1622 data: 0.0759 max mem: 9377 +Train: [54] [3600/6250] eta: 0:07:17 lr: 0.000058 grad: 0.1532 (0.1585) loss: 0.7699 (0.7668) time: 0.1626 data: 0.0770 max mem: 9377 +Train: [54] [3700/6250] eta: 0:06:59 lr: 0.000058 grad: 0.1484 (0.1583) loss: 0.7627 (0.7667) time: 0.1592 data: 0.0777 max mem: 9377 +Train: [54] [3800/6250] eta: 0:06:43 lr: 0.000058 grad: 0.1602 (0.1583) loss: 0.7564 (0.7667) time: 0.1645 data: 0.0753 max mem: 9377 +Train: [54] [3900/6250] eta: 0:06:26 lr: 0.000058 grad: 0.1496 (0.1583) loss: 0.7699 (0.7667) time: 0.1132 data: 0.0006 max mem: 9377 +Train: [54] [4000/6250] eta: 0:06:10 lr: 0.000058 grad: 0.1472 (0.1581) loss: 0.7605 (0.7668) time: 0.1594 data: 0.0678 max mem: 9377 +Train: [54] [4100/6250] eta: 0:05:53 lr: 0.000058 grad: 0.1458 (0.1579) loss: 0.7590 (0.7668) time: 0.1518 data: 0.0616 max mem: 9377 +Train: [54] [4200/6250] eta: 0:05:37 lr: 0.000058 grad: 0.1445 (0.1576) loss: 0.7647 (0.7669) time: 0.1687 data: 0.0888 max mem: 9377 +Train: [54] [4300/6250] eta: 0:05:20 lr: 0.000058 grad: 0.1431 (0.1576) loss: 0.7674 (0.7669) time: 0.1400 data: 0.0542 max mem: 9377 +Train: [54] [4400/6250] eta: 0:05:03 lr: 0.000058 grad: 0.1562 (0.1574) loss: 0.7576 (0.7670) time: 0.1546 data: 0.0628 max mem: 9377 +Train: [54] [4500/6250] eta: 0:04:47 lr: 0.000058 grad: 0.1486 (0.1573) loss: 0.7568 (0.7671) time: 0.1603 data: 0.0754 max mem: 9377 +Train: [54] [4600/6250] eta: 0:04:30 lr: 0.000058 grad: 0.1439 (0.1571) loss: 0.7670 (0.7671) time: 0.1463 data: 0.0620 max mem: 9377 +Train: [54] [4700/6250] eta: 0:04:14 lr: 0.000058 grad: 0.1481 (0.1569) loss: 0.7681 (0.7672) time: 0.1852 data: 0.0913 max mem: 9377 +Train: [54] [4800/6250] eta: 0:03:58 lr: 0.000058 grad: 0.1486 (0.1568) loss: 0.7720 (0.7673) time: 0.1704 data: 0.0804 max mem: 9377 +Train: [54] [4900/6250] eta: 0:03:42 lr: 0.000058 grad: 0.1572 (0.1569) loss: 0.7683 (0.7673) time: 0.1683 data: 0.0692 max mem: 9377 +Train: [54] [5000/6250] eta: 0:03:26 lr: 0.000058 grad: 0.1457 (0.1568) loss: 0.7687 (0.7673) time: 0.1909 data: 0.0922 max mem: 9377 +Train: [54] [5100/6250] eta: 0:03:10 lr: 0.000058 grad: 0.1516 (0.1568) loss: 0.7663 (0.7673) time: 0.1690 data: 0.0752 max mem: 9377 +Train: [54] [5200/6250] eta: 0:02:54 lr: 0.000058 grad: 0.1434 (0.1567) loss: 0.7668 (0.7673) time: 0.1739 data: 0.0818 max mem: 9377 +Train: [54] [5300/6250] eta: 0:02:37 lr: 0.000058 grad: 0.1522 (0.1567) loss: 0.7588 (0.7673) time: 0.1743 data: 0.0893 max mem: 9377 +Train: [54] [5400/6250] eta: 0:02:21 lr: 0.000058 grad: 0.1533 (0.1567) loss: 0.7739 (0.7673) time: 0.1943 data: 0.1010 max mem: 9377 +Train: [54] [5500/6250] eta: 0:02:04 lr: 0.000058 grad: 0.1573 (0.1567) loss: 0.7575 (0.7672) time: 0.1932 data: 0.1027 max mem: 9377 +Train: [54] [5600/6250] eta: 0:01:48 lr: 0.000058 grad: 0.1564 (0.1567) loss: 0.7668 (0.7671) time: 0.1645 data: 0.0656 max mem: 9377 +Train: [54] [5700/6250] eta: 0:01:31 lr: 0.000058 grad: 0.1553 (0.1568) loss: 0.7610 (0.7670) time: 0.1444 data: 0.0511 max mem: 9377 +Train: [54] [5800/6250] eta: 0:01:14 lr: 0.000057 grad: 0.1548 (0.1567) loss: 0.7634 (0.7670) time: 0.1460 data: 0.0434 max mem: 9377 +Train: [54] [5900/6250] eta: 0:00:58 lr: 0.000057 grad: 0.1635 (0.1568) loss: 0.7430 (0.7668) time: 0.1408 data: 0.0453 max mem: 9377 +Train: [54] [6000/6250] eta: 0:00:41 lr: 0.000057 grad: 0.1482 (0.1568) loss: 0.7541 (0.7667) time: 0.1532 data: 0.0613 max mem: 9377 +Train: [54] [6100/6250] eta: 0:00:24 lr: 0.000057 grad: 0.1519 (0.1568) loss: 0.7683 (0.7667) time: 0.1675 data: 0.0729 max mem: 9377 +Train: [54] [6200/6250] eta: 0:00:08 lr: 0.000057 grad: 0.1699 (0.1569) loss: 0.7538 (0.7666) time: 0.1659 data: 0.0692 max mem: 9377 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1539 (0.1569) loss: 0.7582 (0.7666) time: 0.1498 data: 0.0579 max mem: 9377 +Train: [54] Total time: 0:17:20 (0.1665 s / it) +Averaged stats: lr: 0.000057 grad: 0.1539 (0.1569) loss: 0.7582 (0.7666) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:05:01 loss: 0.8470 (0.8470) time: 4.8678 data: 4.8092 max mem: 9377 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8540 (0.8532) time: 0.1408 data: 0.1156 max mem: 9377 +Eval (hcp-train-subset): [54] Total time: 0:00:14 (0.2408 s / it) +Averaged stats (hcp-train-subset): loss: 0.8540 (0.8532) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [54] [ 0/62] eta: 0:04:35 loss: 0.8517 (0.8517) time: 4.4435 data: 4.3850 max mem: 9377 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8485 (0.8513) time: 0.1223 data: 0.0970 max mem: 9377 +Eval (hcp-val): [54] Total time: 0:00:14 (0.2369 s / it) +Averaged stats (hcp-val): loss: 0.8485 (0.8513) +Making plots (hcp-val): example=7 +Eval (nsd-val): [54] [ 0/62] eta: 0:06:24 loss: 0.8139 (0.8139) time: 6.2046 data: 6.1725 max mem: 9377 +Eval (nsd-val): [54] [61/62] eta: 0:00:00 loss: 0.8227 (0.8246) time: 0.1438 data: 0.1169 max mem: 9377 +Eval (nsd-val): [54] Total time: 0:00:15 (0.2425 s / it) +Averaged stats (nsd-val): loss: 0.8227 (0.8246) +Making plots (nsd-val): example=45 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00054.pth +Train: [55] [ 0/6250] eta: 13:06:00 lr: 0.000057 grad: 0.2300 (0.2300) loss: 0.8371 (0.8371) time: 7.5457 data: 7.4386 max mem: 9377 +Train: [55] [ 100/6250] eta: 0:24:04 lr: 0.000057 grad: 0.2056 (0.2158) loss: 0.7754 (0.7954) time: 0.2012 data: 0.0800 max mem: 9377 +Train: [55] [ 200/6250] eta: 0:20:35 lr: 0.000057 grad: 0.2036 (0.2255) loss: 0.7575 (0.7772) time: 0.1803 data: 0.0751 max mem: 9377 +Train: [55] [ 300/6250] eta: 0:19:13 lr: 0.000057 grad: 0.2153 (0.2240) loss: 0.7569 (0.7707) time: 0.1755 data: 0.0805 max mem: 9377 +Train: [55] [ 400/6250] eta: 0:18:23 lr: 0.000057 grad: 0.1778 (0.2203) loss: 0.7509 (0.7677) time: 0.1648 data: 0.0741 max mem: 9377 +Train: [55] [ 500/6250] eta: 0:17:33 lr: 0.000057 grad: 0.1791 (0.2119) loss: 0.7695 (0.7673) time: 0.1691 data: 0.0740 max mem: 9377 +Train: [55] [ 600/6250] eta: 0:16:53 lr: 0.000057 grad: 0.1546 (0.2049) loss: 0.7687 (0.7670) time: 0.1448 data: 0.0353 max mem: 9377 +Train: [55] [ 700/6250] eta: 0:16:20 lr: 0.000057 grad: 0.1570 (0.1995) loss: 0.7634 (0.7665) time: 0.1476 data: 0.0486 max mem: 9377 +Train: [55] [ 800/6250] eta: 0:15:55 lr: 0.000057 grad: 0.1557 (0.1949) loss: 0.7676 (0.7658) time: 0.1680 data: 0.0704 max mem: 9377 +Train: [55] [ 900/6250] eta: 0:15:30 lr: 0.000057 grad: 0.1615 (0.1909) loss: 0.7668 (0.7656) time: 0.1681 data: 0.0805 max mem: 9377 +Train: [55] [1000/6250] eta: 0:15:00 lr: 0.000057 grad: 0.1588 (0.1879) loss: 0.7550 (0.7655) time: 0.1569 data: 0.0687 max mem: 9377 +Train: [55] [1100/6250] eta: 0:14:47 lr: 0.000057 grad: 0.1516 (0.1854) loss: 0.7633 (0.7650) time: 0.1588 data: 0.0830 max mem: 9377 +Train: [55] [1200/6250] eta: 0:14:25 lr: 0.000057 grad: 0.1483 (0.1829) loss: 0.7643 (0.7648) time: 0.1729 data: 0.0907 max mem: 9377 +Train: [55] [1300/6250] eta: 0:14:05 lr: 0.000057 grad: 0.1495 (0.1808) loss: 0.7657 (0.7645) time: 0.1744 data: 0.0884 max mem: 9377 +Train: [55] [1400/6250] eta: 0:13:43 lr: 0.000057 grad: 0.1559 (0.1792) loss: 0.7547 (0.7641) time: 0.1549 data: 0.0721 max mem: 9377 +Train: [55] [1500/6250] eta: 0:13:23 lr: 0.000057 grad: 0.1596 (0.1776) loss: 0.7555 (0.7638) time: 0.1728 data: 0.0886 max mem: 9377 +Train: [55] [1600/6250] eta: 0:13:04 lr: 0.000057 grad: 0.1501 (0.1763) loss: 0.7576 (0.7636) time: 0.1547 data: 0.0672 max mem: 9377 +Train: [55] [1700/6250] eta: 0:12:45 lr: 0.000057 grad: 0.1541 (0.1751) loss: 0.7612 (0.7634) time: 0.1671 data: 0.0758 max mem: 9377 +Train: [55] [1800/6250] eta: 0:12:24 lr: 0.000057 grad: 0.1542 (0.1739) loss: 0.7600 (0.7634) time: 0.1460 data: 0.0556 max mem: 9377 +Train: [55] [1900/6250] eta: 0:12:04 lr: 0.000057 grad: 0.1530 (0.1729) loss: 0.7580 (0.7632) time: 0.1375 data: 0.0418 max mem: 9377 +Train: [55] [2000/6250] eta: 0:11:44 lr: 0.000057 grad: 0.1448 (0.1720) loss: 0.7694 (0.7632) time: 0.1457 data: 0.0520 max mem: 9377 +Train: [55] [2100/6250] eta: 0:11:25 lr: 0.000057 grad: 0.1569 (0.1713) loss: 0.7589 (0.7630) time: 0.1487 data: 0.0523 max mem: 9377 +Train: [55] [2200/6250] eta: 0:11:09 lr: 0.000057 grad: 0.1519 (0.1704) loss: 0.7670 (0.7630) time: 0.1577 data: 0.0494 max mem: 9377 +Train: [55] [2300/6250] eta: 0:10:52 lr: 0.000057 grad: 0.1566 (0.1700) loss: 0.7607 (0.7627) time: 0.1568 data: 0.0602 max mem: 9377 +Train: [55] [2400/6250] eta: 0:10:35 lr: 0.000057 grad: 0.1596 (0.1697) loss: 0.7491 (0.7625) time: 0.1780 data: 0.0940 max mem: 9377 +Train: [55] [2500/6250] eta: 0:10:15 lr: 0.000057 grad: 0.1527 (0.1692) loss: 0.7592 (0.7622) time: 0.1354 data: 0.0398 max mem: 9377 +Train: [55] [2600/6250] eta: 0:09:57 lr: 0.000056 grad: 0.1579 (0.1686) loss: 0.7597 (0.7623) time: 0.1441 data: 0.0578 max mem: 9377 +Train: [55] [2700/6250] eta: 0:09:38 lr: 0.000056 grad: 0.1608 (0.1683) loss: 0.7566 (0.7621) time: 0.1590 data: 0.0736 max mem: 9377 +Train: [55] [2800/6250] eta: 0:09:22 lr: 0.000056 grad: 0.1577 (0.1679) loss: 0.7575 (0.7621) time: 0.1900 data: 0.0965 max mem: 9377 +Train: [55] [2900/6250] eta: 0:09:05 lr: 0.000056 grad: 0.1607 (0.1676) loss: 0.7617 (0.7620) time: 0.1250 data: 0.0136 max mem: 9377 +Train: [55] [3000/6250] eta: 0:08:48 lr: 0.000056 grad: 0.1573 (0.1675) loss: 0.7496 (0.7617) time: 0.1562 data: 0.0693 max mem: 9377 +Train: [55] [3100/6250] eta: 0:08:31 lr: 0.000056 grad: 0.1602 (0.1673) loss: 0.7421 (0.7615) time: 0.1794 data: 0.0919 max mem: 9377 +Train: [55] [3200/6250] eta: 0:08:13 lr: 0.000056 grad: 0.1495 (0.1672) loss: 0.7523 (0.7612) time: 0.1451 data: 0.0504 max mem: 9377 +Train: [55] [3300/6250] eta: 0:07:55 lr: 0.000056 grad: 0.1542 (0.1669) loss: 0.7647 (0.7611) time: 0.1521 data: 0.0529 max mem: 9377 +Train: [55] [3400/6250] eta: 0:07:39 lr: 0.000056 grad: 0.1608 (0.1667) loss: 0.7566 (0.7609) time: 0.1343 data: 0.0390 max mem: 9377 +Train: [55] [3500/6250] eta: 0:07:22 lr: 0.000056 grad: 0.1634 (0.1666) loss: 0.7580 (0.7607) time: 0.1393 data: 0.0519 max mem: 9377 +Train: [55] [3600/6250] eta: 0:07:06 lr: 0.000056 grad: 0.1570 (0.1664) loss: 0.7686 (0.7606) time: 0.1402 data: 0.0464 max mem: 9377 +Train: [55] [3700/6250] eta: 0:06:49 lr: 0.000056 grad: 0.1581 (0.1663) loss: 0.7602 (0.7605) time: 0.1563 data: 0.0579 max mem: 9377 +Train: [55] [3800/6250] eta: 0:06:34 lr: 0.000056 grad: 0.1586 (0.1662) loss: 0.7608 (0.7605) time: 0.1942 data: 0.1052 max mem: 9377 +Train: [55] [3900/6250] eta: 0:06:18 lr: 0.000056 grad: 0.1617 (0.1661) loss: 0.7536 (0.7603) time: 0.1723 data: 0.0863 max mem: 9377 +Train: [55] [4000/6250] eta: 0:06:05 lr: 0.000056 grad: 0.1651 (0.1662) loss: 0.7507 (0.7602) time: 0.1622 data: 0.0691 max mem: 9377 +Train: [55] [4100/6250] eta: 0:05:50 lr: 0.000056 grad: 0.1527 (0.1661) loss: 0.7609 (0.7601) time: 0.2262 data: 0.1430 max mem: 9377 +Train: [55] [4200/6250] eta: 0:05:35 lr: 0.000056 grad: 0.1532 (0.1660) loss: 0.7631 (0.7601) time: 0.1895 data: 0.1025 max mem: 9377 +Train: [55] [4300/6250] eta: 0:05:19 lr: 0.000056 grad: 0.1627 (0.1659) loss: 0.7521 (0.7600) time: 0.2145 data: 0.1283 max mem: 9377 +Train: [55] [4400/6250] eta: 0:05:03 lr: 0.000056 grad: 0.1507 (0.1657) loss: 0.7641 (0.7600) time: 0.1944 data: 0.0949 max mem: 9377 +Train: [55] [4500/6250] eta: 0:04:48 lr: 0.000056 grad: 0.1539 (0.1655) loss: 0.7602 (0.7601) time: 0.2068 data: 0.1146 max mem: 9377 +Train: [55] [4600/6250] eta: 0:04:32 lr: 0.000056 grad: 0.1661 (0.1655) loss: 0.7492 (0.7600) time: 0.1984 data: 0.1042 max mem: 9377 +Train: [55] [4700/6250] eta: 0:04:16 lr: 0.000056 grad: 0.1595 (0.1655) loss: 0.7500 (0.7600) time: 0.1887 data: 0.1045 max mem: 9377 +Train: [55] [4800/6250] eta: 0:03:59 lr: 0.000056 grad: 0.1541 (0.1653) loss: 0.7659 (0.7600) time: 0.1650 data: 0.0699 max mem: 9377 +Train: [55] [4900/6250] eta: 0:03:43 lr: 0.000056 grad: 0.1529 (0.1651) loss: 0.7588 (0.7600) time: 0.1747 data: 0.0827 max mem: 9377 +Train: [55] [5000/6250] eta: 0:03:27 lr: 0.000056 grad: 0.1550 (0.1649) loss: 0.7613 (0.7600) time: 0.1696 data: 0.0698 max mem: 9377 +Train: [55] [5100/6250] eta: 0:03:10 lr: 0.000056 grad: 0.1586 (0.1648) loss: 0.7541 (0.7599) time: 0.1633 data: 0.0788 max mem: 9377 +Train: [55] [5200/6250] eta: 0:02:54 lr: 0.000056 grad: 0.1470 (0.1646) loss: 0.7629 (0.7600) time: 0.1657 data: 0.0731 max mem: 9377 +Train: [55] [5300/6250] eta: 0:02:37 lr: 0.000056 grad: 0.1680 (0.1645) loss: 0.7538 (0.7600) time: 0.1561 data: 0.0691 max mem: 9377 +Train: [55] [5400/6250] eta: 0:02:21 lr: 0.000056 grad: 0.1550 (0.1645) loss: 0.7607 (0.7600) time: 0.1629 data: 0.0726 max mem: 9377 +Train: [55] [5500/6250] eta: 0:02:04 lr: 0.000056 grad: 0.1562 (0.1644) loss: 0.7521 (0.7599) time: 0.1599 data: 0.0581 max mem: 9377 +Train: [55] [5600/6250] eta: 0:01:48 lr: 0.000055 grad: 0.1487 (0.1643) loss: 0.7686 (0.7599) time: 0.1613 data: 0.0698 max mem: 9377 +Train: [55] [5700/6250] eta: 0:01:31 lr: 0.000055 grad: 0.1557 (0.1641) loss: 0.7580 (0.7600) time: 0.1599 data: 0.0711 max mem: 9377 +Train: [55] [5800/6250] eta: 0:01:14 lr: 0.000055 grad: 0.1505 (0.1641) loss: 0.7624 (0.7600) time: 0.1641 data: 0.0757 max mem: 9377 +Train: [55] [5900/6250] eta: 0:00:58 lr: 0.000055 grad: 0.1494 (0.1639) loss: 0.7622 (0.7601) time: 0.1398 data: 0.0423 max mem: 9377 +Train: [55] [6000/6250] eta: 0:00:41 lr: 0.000055 grad: 0.1560 (0.1638) loss: 0.7734 (0.7602) time: 0.1226 data: 0.0308 max mem: 9377 +Train: [55] [6100/6250] eta: 0:00:24 lr: 0.000055 grad: 0.1550 (0.1638) loss: 0.7732 (0.7602) time: 0.1645 data: 0.0826 max mem: 9377 +Train: [55] [6200/6250] eta: 0:00:08 lr: 0.000055 grad: 0.1523 (0.1637) loss: 0.7600 (0.7602) time: 0.1654 data: 0.0859 max mem: 9377 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.1448 (0.1636) loss: 0.7628 (0.7602) time: 0.1384 data: 0.0443 max mem: 9377 +Train: [55] Total time: 0:17:20 (0.1664 s / it) +Averaged stats: lr: 0.000055 grad: 0.1448 (0.1636) loss: 0.7628 (0.7602) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:05:28 loss: 0.8476 (0.8476) time: 5.2917 data: 5.2609 max mem: 9377 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8534 (0.8527) time: 0.1461 data: 0.1176 max mem: 9377 +Eval (hcp-train-subset): [55] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (hcp-train-subset): loss: 0.8534 (0.8527) +Eval (hcp-val): [55] [ 0/62] eta: 0:05:38 loss: 0.8481 (0.8481) time: 5.4544 data: 5.4227 max mem: 9377 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8506 (0.8513) time: 0.1262 data: 0.1011 max mem: 9377 +Eval (hcp-val): [55] Total time: 0:00:14 (0.2327 s / it) +Averaged stats (hcp-val): loss: 0.8506 (0.8513) +Eval (nsd-val): [55] [ 0/62] eta: 0:03:52 loss: 0.8104 (0.8104) time: 3.7460 data: 3.6784 max mem: 9377 +Eval (nsd-val): [55] [61/62] eta: 0:00:00 loss: 0.8257 (0.8259) time: 0.1168 data: 0.0913 max mem: 9377 +Eval (nsd-val): [55] Total time: 0:00:14 (0.2342 s / it) +Averaged stats (nsd-val): loss: 0.8257 (0.8259) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [56] [ 0/6250] eta: 10:43:03 lr: 0.000055 grad: 0.1355 (0.1355) loss: 0.7857 (0.7857) time: 6.1733 data: 6.0456 max mem: 9377 +Train: [56] [ 100/6250] eta: 0:24:23 lr: 0.000055 grad: 0.2403 (0.3170) loss: 0.7724 (0.7662) time: 0.1812 data: 0.0857 max mem: 9377 +Train: [56] [ 200/6250] eta: 0:20:27 lr: 0.000055 grad: 0.1760 (0.2603) loss: 0.7843 (0.7681) time: 0.1802 data: 0.0848 max mem: 9377 +Train: [56] [ 300/6250] eta: 0:19:12 lr: 0.000055 grad: 0.1752 (0.2347) loss: 0.7815 (0.7702) time: 0.1994 data: 0.1088 max mem: 9377 +Train: [56] [ 400/6250] eta: 0:18:05 lr: 0.000055 grad: 0.1726 (0.2235) loss: 0.7761 (0.7697) time: 0.1735 data: 0.0814 max mem: 9377 +Train: [56] [ 500/6250] eta: 0:17:18 lr: 0.000055 grad: 0.1738 (0.2130) loss: 0.7689 (0.7692) time: 0.1728 data: 0.0741 max mem: 9377 +Train: [56] [ 600/6250] eta: 0:16:29 lr: 0.000055 grad: 0.1674 (0.2049) loss: 0.7762 (0.7688) time: 0.1569 data: 0.0558 max mem: 9377 +Train: [56] [ 700/6250] eta: 0:15:58 lr: 0.000055 grad: 0.1551 (0.1988) loss: 0.7707 (0.7688) time: 0.1445 data: 0.0505 max mem: 9377 +Train: [56] [ 800/6250] eta: 0:15:34 lr: 0.000055 grad: 0.1522 (0.1937) loss: 0.7693 (0.7687) time: 0.1887 data: 0.0958 max mem: 9377 +Train: [56] [ 900/6250] eta: 0:15:09 lr: 0.000055 grad: 0.1431 (0.1891) loss: 0.7605 (0.7693) time: 0.1745 data: 0.0887 max mem: 9377 +Train: [56] [1000/6250] eta: 0:14:42 lr: 0.000055 grad: 0.1592 (0.1858) loss: 0.7592 (0.7694) time: 0.1501 data: 0.0583 max mem: 9377 +Train: [56] [1100/6250] eta: 0:14:19 lr: 0.000055 grad: 0.1449 (0.1834) loss: 0.7678 (0.7692) time: 0.1624 data: 0.0701 max mem: 9377 +Train: [56] [1200/6250] eta: 0:14:00 lr: 0.000055 grad: 0.1508 (0.1812) loss: 0.7601 (0.7688) time: 0.1638 data: 0.0794 max mem: 9377 +Train: [56] [1300/6250] eta: 0:13:43 lr: 0.000055 grad: 0.1536 (0.1797) loss: 0.7621 (0.7681) time: 0.1784 data: 0.1052 max mem: 9377 +Train: [56] [1400/6250] eta: 0:13:33 lr: 0.000055 grad: 0.1433 (0.1779) loss: 0.7683 (0.7677) time: 0.1982 data: 0.1013 max mem: 9377 +Train: [56] [1500/6250] eta: 0:13:16 lr: 0.000055 grad: 0.1496 (0.1762) loss: 0.7603 (0.7673) time: 0.1640 data: 0.0806 max mem: 9377 +Train: [56] [1600/6250] eta: 0:13:06 lr: 0.000055 grad: 0.1599 (0.1752) loss: 0.7535 (0.7668) time: 0.1891 data: 0.1077 max mem: 9377 +Train: [56] [1700/6250] eta: 0:12:54 lr: 0.000055 grad: 0.1713 (0.1741) loss: 0.7454 (0.7663) time: 0.1832 data: 0.0933 max mem: 9377 +Train: [56] [1800/6250] eta: 0:12:36 lr: 0.000055 grad: 0.1534 (0.1731) loss: 0.7660 (0.7660) time: 0.1484 data: 0.0575 max mem: 9377 +Train: [56] [1900/6250] eta: 0:12:16 lr: 0.000055 grad: 0.1473 (0.1724) loss: 0.7673 (0.7660) time: 0.1850 data: 0.0957 max mem: 9377 +Train: [56] [2000/6250] eta: 0:11:56 lr: 0.000055 grad: 0.1500 (0.1714) loss: 0.7608 (0.7657) time: 0.1383 data: 0.0516 max mem: 9377 +Train: [56] [2100/6250] eta: 0:11:39 lr: 0.000055 grad: 0.1546 (0.1706) loss: 0.7624 (0.7658) time: 0.1619 data: 0.0769 max mem: 9377 +Train: [56] [2200/6250] eta: 0:11:25 lr: 0.000055 grad: 0.1532 (0.1701) loss: 0.7673 (0.7658) time: 0.1707 data: 0.0872 max mem: 9377 +Train: [56] [2300/6250] eta: 0:11:09 lr: 0.000055 grad: 0.1477 (0.1693) loss: 0.7675 (0.7659) time: 0.1867 data: 0.1059 max mem: 9377 +Train: [56] [2400/6250] eta: 0:10:51 lr: 0.000054 grad: 0.1569 (0.1686) loss: 0.7641 (0.7660) time: 0.1704 data: 0.0883 max mem: 9377 +Train: [56] [2500/6250] eta: 0:10:36 lr: 0.000054 grad: 0.1493 (0.1681) loss: 0.7631 (0.7659) time: 0.1813 data: 0.0997 max mem: 9377 +Train: [56] [2600/6250] eta: 0:10:18 lr: 0.000054 grad: 0.1566 (0.1675) loss: 0.7650 (0.7659) time: 0.1512 data: 0.0698 max mem: 9377 +Train: [56] [2700/6250] eta: 0:10:01 lr: 0.000054 grad: 0.1458 (0.1670) loss: 0.7671 (0.7658) time: 0.1873 data: 0.0986 max mem: 9377 +Train: [56] [2800/6250] eta: 0:09:42 lr: 0.000054 grad: 0.1571 (0.1667) loss: 0.7544 (0.7656) time: 0.1484 data: 0.0629 max mem: 9377 +Train: [56] [2900/6250] eta: 0:09:25 lr: 0.000054 grad: 0.1568 (0.1664) loss: 0.7644 (0.7654) time: 0.1647 data: 0.0764 max mem: 9377 +Train: [56] [3000/6250] eta: 0:09:07 lr: 0.000054 grad: 0.1639 (0.1663) loss: 0.7621 (0.7654) time: 0.0986 data: 0.0003 max mem: 9377 +Train: [56] [3100/6250] eta: 0:08:51 lr: 0.000054 grad: 0.1529 (0.1660) loss: 0.7616 (0.7653) time: 0.1874 data: 0.1026 max mem: 9377 +Train: [56] [3200/6250] eta: 0:08:33 lr: 0.000054 grad: 0.1443 (0.1657) loss: 0.7652 (0.7651) time: 0.1615 data: 0.0700 max mem: 9377 +Train: [56] [3300/6250] eta: 0:08:15 lr: 0.000054 grad: 0.1555 (0.1654) loss: 0.7637 (0.7650) time: 0.1588 data: 0.0694 max mem: 9377 +Train: [56] [3400/6250] eta: 0:07:57 lr: 0.000054 grad: 0.1501 (0.1653) loss: 0.7522 (0.7648) time: 0.1703 data: 0.0818 max mem: 9377 +Train: [56] [3500/6250] eta: 0:07:40 lr: 0.000054 grad: 0.1553 (0.1653) loss: 0.7493 (0.7646) time: 0.1654 data: 0.0745 max mem: 9377 +Train: [56] [3600/6250] eta: 0:07:23 lr: 0.000054 grad: 0.1501 (0.1650) loss: 0.7652 (0.7645) time: 0.1584 data: 0.0765 max mem: 9377 +Train: [56] [3700/6250] eta: 0:07:05 lr: 0.000054 grad: 0.1557 (0.1651) loss: 0.7575 (0.7644) time: 0.1503 data: 0.0682 max mem: 9377 +Train: [56] [3800/6250] eta: 0:06:49 lr: 0.000054 grad: 0.1551 (0.1650) loss: 0.7605 (0.7643) time: 0.1667 data: 0.0750 max mem: 9377 +Train: [56] [3900/6250] eta: 0:06:33 lr: 0.000054 grad: 0.1584 (0.1648) loss: 0.7500 (0.7642) time: 0.1988 data: 0.1244 max mem: 9377 +Train: [56] [4000/6250] eta: 0:06:15 lr: 0.000054 grad: 0.1560 (0.1647) loss: 0.7640 (0.7642) time: 0.1400 data: 0.0533 max mem: 9377 +Train: [56] [4100/6250] eta: 0:05:58 lr: 0.000054 grad: 0.1585 (0.1646) loss: 0.7492 (0.7640) time: 0.1534 data: 0.0685 max mem: 9377 +Train: [56] [4200/6250] eta: 0:05:41 lr: 0.000054 grad: 0.1600 (0.1645) loss: 0.7615 (0.7639) time: 0.1396 data: 0.0572 max mem: 9377 +Train: [56] [4300/6250] eta: 0:05:25 lr: 0.000054 grad: 0.1623 (0.1644) loss: 0.7592 (0.7638) time: 0.1826 data: 0.1002 max mem: 9377 +Train: [56] [4400/6250] eta: 0:05:08 lr: 0.000054 grad: 0.1603 (0.1643) loss: 0.7534 (0.7636) time: 0.1772 data: 0.0932 max mem: 9377 +Train: [56] [4500/6250] eta: 0:04:52 lr: 0.000054 grad: 0.1563 (0.1642) loss: 0.7548 (0.7635) time: 0.1803 data: 0.0909 max mem: 9377 +Train: [56] [4600/6250] eta: 0:04:35 lr: 0.000054 grad: 0.1533 (0.1641) loss: 0.7536 (0.7635) time: 0.1535 data: 0.0594 max mem: 9377 +Train: [56] [4700/6250] eta: 0:04:18 lr: 0.000054 grad: 0.1583 (0.1642) loss: 0.7602 (0.7634) time: 0.1697 data: 0.0809 max mem: 9377 +Train: [56] [4800/6250] eta: 0:04:01 lr: 0.000054 grad: 0.1554 (0.1642) loss: 0.7561 (0.7632) time: 0.1487 data: 0.0550 max mem: 9377 +Train: [56] [4900/6250] eta: 0:03:44 lr: 0.000054 grad: 0.1604 (0.1642) loss: 0.7538 (0.7631) time: 0.1495 data: 0.0593 max mem: 9377 +Train: [56] [5000/6250] eta: 0:03:27 lr: 0.000054 grad: 0.1594 (0.1641) loss: 0.7528 (0.7630) time: 0.1292 data: 0.0456 max mem: 9377 +Train: [56] [5100/6250] eta: 0:03:10 lr: 0.000054 grad: 0.1603 (0.1641) loss: 0.7501 (0.7628) time: 0.1412 data: 0.0502 max mem: 9377 +Train: [56] [5200/6250] eta: 0:02:54 lr: 0.000054 grad: 0.1514 (0.1639) loss: 0.7740 (0.7628) time: 0.1742 data: 0.0909 max mem: 9377 +Train: [56] [5300/6250] eta: 0:02:37 lr: 0.000054 grad: 0.1502 (0.1638) loss: 0.7746 (0.7628) time: 0.1637 data: 0.0843 max mem: 9377 +Train: [56] [5400/6250] eta: 0:02:21 lr: 0.000054 grad: 0.1594 (0.1637) loss: 0.7657 (0.7627) time: 0.1870 data: 0.0962 max mem: 9377 +Train: [56] [5500/6250] eta: 0:02:04 lr: 0.000053 grad: 0.1565 (0.1637) loss: 0.7541 (0.7626) time: 0.1588 data: 0.0617 max mem: 9377 +Train: [56] [5600/6250] eta: 0:01:47 lr: 0.000053 grad: 0.1653 (0.1637) loss: 0.7499 (0.7626) time: 0.1368 data: 0.0447 max mem: 9377 +Train: [56] [5700/6250] eta: 0:01:31 lr: 0.000053 grad: 0.1591 (0.1637) loss: 0.7609 (0.7625) time: 0.1674 data: 0.0765 max mem: 9377 +Train: [56] [5800/6250] eta: 0:01:14 lr: 0.000053 grad: 0.1541 (0.1636) loss: 0.7688 (0.7625) time: 0.1742 data: 0.0883 max mem: 9377 +Train: [56] [5900/6250] eta: 0:00:57 lr: 0.000053 grad: 0.1537 (0.1635) loss: 0.7654 (0.7624) time: 0.1596 data: 0.0735 max mem: 9377 +Train: [56] [6000/6250] eta: 0:00:41 lr: 0.000053 grad: 0.1587 (0.1636) loss: 0.7415 (0.7623) time: 0.1567 data: 0.0573 max mem: 9377 +Train: [56] [6100/6250] eta: 0:00:24 lr: 0.000053 grad: 0.1596 (0.1635) loss: 0.7421 (0.7623) time: 0.1519 data: 0.0630 max mem: 9377 +Train: [56] [6200/6250] eta: 0:00:08 lr: 0.000053 grad: 0.1613 (0.1635) loss: 0.7626 (0.7622) time: 0.1682 data: 0.0868 max mem: 9377 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.1548 (0.1634) loss: 0.7573 (0.7622) time: 0.1509 data: 0.0655 max mem: 9377 +Train: [56] Total time: 0:17:17 (0.1660 s / it) +Averaged stats: lr: 0.000053 grad: 0.1548 (0.1634) loss: 0.7573 (0.7622) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:06:06 loss: 0.8535 (0.8535) time: 5.9178 data: 5.8866 max mem: 9377 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8546 (0.8552) time: 0.1137 data: 0.0870 max mem: 9377 +Eval (hcp-train-subset): [56] Total time: 0:00:15 (0.2425 s / it) +Averaged stats (hcp-train-subset): loss: 0.8546 (0.8552) +Eval (hcp-val): [56] [ 0/62] eta: 0:05:42 loss: 0.8490 (0.8490) time: 5.5232 data: 5.4844 max mem: 9377 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8521 (0.8539) time: 0.1303 data: 0.1053 max mem: 9377 +Eval (hcp-val): [56] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (hcp-val): loss: 0.8521 (0.8539) +Eval (nsd-val): [56] [ 0/62] eta: 0:06:18 loss: 0.8172 (0.8172) time: 6.1015 data: 6.0702 max mem: 9377 +Eval (nsd-val): [56] [61/62] eta: 0:00:00 loss: 0.8262 (0.8280) time: 0.1295 data: 0.1041 max mem: 9377 +Eval (nsd-val): [56] Total time: 0:00:14 (0.2391 s / it) +Averaged stats (nsd-val): loss: 0.8262 (0.8280) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [57] [ 0/6250] eta: 11:19:57 lr: 0.000053 grad: 0.1591 (0.1591) loss: 0.8735 (0.8735) time: 6.5275 data: 6.3936 max mem: 9377 +Train: [57] [ 100/6250] eta: 0:22:44 lr: 0.000053 grad: 0.2375 (0.2521) loss: 0.7715 (0.7868) time: 0.1708 data: 0.0733 max mem: 9377 +Train: [57] [ 200/6250] eta: 0:19:18 lr: 0.000053 grad: 0.2136 (0.2564) loss: 0.7728 (0.7736) time: 0.1468 data: 0.0444 max mem: 9377 +Train: [57] [ 300/6250] eta: 0:18:13 lr: 0.000053 grad: 0.1859 (0.2417) loss: 0.7546 (0.7709) time: 0.1626 data: 0.0721 max mem: 9377 +Train: [57] [ 400/6250] eta: 0:17:33 lr: 0.000053 grad: 0.1741 (0.2276) loss: 0.7613 (0.7697) time: 0.1993 data: 0.1074 max mem: 9377 +Train: [57] [ 500/6250] eta: 0:16:40 lr: 0.000053 grad: 0.1696 (0.2167) loss: 0.7602 (0.7684) time: 0.1500 data: 0.0581 max mem: 9377 +Train: [57] [ 600/6250] eta: 0:16:03 lr: 0.000053 grad: 0.1699 (0.2092) loss: 0.7625 (0.7678) time: 0.1432 data: 0.0469 max mem: 9377 +Train: [57] [ 700/6250] eta: 0:15:32 lr: 0.000053 grad: 0.1639 (0.2037) loss: 0.7680 (0.7679) time: 0.1479 data: 0.0437 max mem: 9377 +Train: [57] [ 800/6250] eta: 0:15:11 lr: 0.000053 grad: 0.1574 (0.1988) loss: 0.7576 (0.7677) time: 0.1579 data: 0.0635 max mem: 9377 +Train: [57] [ 900/6250] eta: 0:14:47 lr: 0.000053 grad: 0.1611 (0.1952) loss: 0.7620 (0.7668) time: 0.1533 data: 0.0593 max mem: 9377 +Train: [57] [1000/6250] eta: 0:14:25 lr: 0.000053 grad: 0.1540 (0.1917) loss: 0.7777 (0.7669) time: 0.1655 data: 0.0772 max mem: 9377 +Train: [57] [1100/6250] eta: 0:14:01 lr: 0.000053 grad: 0.1496 (0.1891) loss: 0.7640 (0.7666) time: 0.1563 data: 0.0749 max mem: 9377 +Train: [57] [1200/6250] eta: 0:13:46 lr: 0.000053 grad: 0.1528 (0.1868) loss: 0.7655 (0.7667) time: 0.1641 data: 0.0857 max mem: 9377 +Train: [57] [1300/6250] eta: 0:13:31 lr: 0.000053 grad: 0.1509 (0.1844) loss: 0.7743 (0.7670) time: 0.1531 data: 0.0611 max mem: 9377 +Train: [57] [1400/6250] eta: 0:13:13 lr: 0.000053 grad: 0.1547 (0.1828) loss: 0.7734 (0.7673) time: 0.1599 data: 0.0672 max mem: 9377 +Train: [57] [1500/6250] eta: 0:12:57 lr: 0.000053 grad: 0.1582 (0.1809) loss: 0.7579 (0.7673) time: 0.1530 data: 0.0645 max mem: 9377 +Train: [57] [1600/6250] eta: 0:12:42 lr: 0.000053 grad: 0.1641 (0.1794) loss: 0.7512 (0.7671) time: 0.1280 data: 0.0406 max mem: 9377 +Train: [57] [1700/6250] eta: 0:12:25 lr: 0.000053 grad: 0.1456 (0.1782) loss: 0.7729 (0.7671) time: 0.1649 data: 0.0754 max mem: 9377 +Train: [57] [1800/6250] eta: 0:12:05 lr: 0.000053 grad: 0.1505 (0.1767) loss: 0.7677 (0.7673) time: 0.1368 data: 0.0437 max mem: 9377 +Train: [57] [1900/6250] eta: 0:11:48 lr: 0.000053 grad: 0.1535 (0.1753) loss: 0.7678 (0.7675) time: 0.1534 data: 0.0577 max mem: 9377 +Train: [57] [2000/6250] eta: 0:11:30 lr: 0.000053 grad: 0.1511 (0.1743) loss: 0.7663 (0.7674) time: 0.1550 data: 0.0640 max mem: 9377 +Train: [57] [2100/6250] eta: 0:11:10 lr: 0.000053 grad: 0.1449 (0.1733) loss: 0.7722 (0.7675) time: 0.1498 data: 0.0614 max mem: 9377 +Train: [57] [2200/6250] eta: 0:10:52 lr: 0.000053 grad: 0.1465 (0.1725) loss: 0.7790 (0.7677) time: 0.1424 data: 0.0551 max mem: 9377 +Train: [57] [2300/6250] eta: 0:10:34 lr: 0.000052 grad: 0.1441 (0.1715) loss: 0.7809 (0.7679) time: 0.1539 data: 0.0695 max mem: 9377 +Train: [57] [2400/6250] eta: 0:10:16 lr: 0.000052 grad: 0.1553 (0.1706) loss: 0.7704 (0.7682) time: 0.1569 data: 0.0701 max mem: 9377 +Train: [57] [2500/6250] eta: 0:09:59 lr: 0.000052 grad: 0.1498 (0.1699) loss: 0.7730 (0.7684) time: 0.1492 data: 0.0654 max mem: 9377 +Train: [57] [2600/6250] eta: 0:09:43 lr: 0.000052 grad: 0.1478 (0.1693) loss: 0.7716 (0.7685) time: 0.1543 data: 0.0600 max mem: 9377 +Train: [57] [2700/6250] eta: 0:09:27 lr: 0.000052 grad: 0.1434 (0.1686) loss: 0.7675 (0.7685) time: 0.1511 data: 0.0648 max mem: 9377 +Train: [57] [2800/6250] eta: 0:09:11 lr: 0.000052 grad: 0.1568 (0.1681) loss: 0.7603 (0.7685) time: 0.1638 data: 0.0751 max mem: 9377 +Train: [57] [2900/6250] eta: 0:08:55 lr: 0.000052 grad: 0.1573 (0.1677) loss: 0.7775 (0.7686) time: 0.1603 data: 0.0728 max mem: 9377 +Train: [57] [3000/6250] eta: 0:08:39 lr: 0.000052 grad: 0.1488 (0.1674) loss: 0.7690 (0.7686) time: 0.1495 data: 0.0662 max mem: 9377 +Train: [57] [3100/6250] eta: 0:08:22 lr: 0.000052 grad: 0.1446 (0.1670) loss: 0.7758 (0.7687) time: 0.1557 data: 0.0719 max mem: 9377 +Train: [57] [3200/6250] eta: 0:08:06 lr: 0.000052 grad: 0.1603 (0.1665) loss: 0.7588 (0.7687) time: 0.1496 data: 0.0575 max mem: 9377 +Train: [57] [3300/6250] eta: 0:07:49 lr: 0.000052 grad: 0.1596 (0.1663) loss: 0.7612 (0.7686) time: 0.1736 data: 0.0857 max mem: 9377 +Train: [57] [3400/6250] eta: 0:07:33 lr: 0.000052 grad: 0.1542 (0.1661) loss: 0.7695 (0.7685) time: 0.1411 data: 0.0533 max mem: 9377 +Train: [57] [3500/6250] eta: 0:07:17 lr: 0.000052 grad: 0.1510 (0.1658) loss: 0.7674 (0.7685) time: 0.1679 data: 0.0854 max mem: 9377 +Train: [57] [3600/6250] eta: 0:07:00 lr: 0.000052 grad: 0.1602 (0.1657) loss: 0.7723 (0.7685) time: 0.1578 data: 0.0718 max mem: 9377 +Train: [57] [3700/6250] eta: 0:06:45 lr: 0.000052 grad: 0.1554 (0.1656) loss: 0.7515 (0.7684) time: 0.1540 data: 0.0663 max mem: 9377 +Train: [57] [3800/6250] eta: 0:06:28 lr: 0.000052 grad: 0.1632 (0.1654) loss: 0.7628 (0.7683) time: 0.1355 data: 0.0406 max mem: 9377 +Train: [57] [3900/6250] eta: 0:06:14 lr: 0.000052 grad: 0.1652 (0.1655) loss: 0.7592 (0.7681) time: 0.1673 data: 0.0741 max mem: 9377 +Train: [57] [4000/6250] eta: 0:05:58 lr: 0.000052 grad: 0.1613 (0.1655) loss: 0.7645 (0.7680) time: 0.1642 data: 0.0829 max mem: 9377 +Train: [57] [4100/6250] eta: 0:05:42 lr: 0.000052 grad: 0.1598 (0.1654) loss: 0.7714 (0.7680) time: 0.1515 data: 0.0528 max mem: 9377 +Train: [57] [4200/6250] eta: 0:05:26 lr: 0.000052 grad: 0.1651 (0.1653) loss: 0.7617 (0.7679) time: 0.1880 data: 0.0955 max mem: 9377 +Train: [57] [4300/6250] eta: 0:05:11 lr: 0.000052 grad: 0.1511 (0.1652) loss: 0.7754 (0.7679) time: 0.1626 data: 0.0695 max mem: 9377 +Train: [57] [4400/6250] eta: 0:04:55 lr: 0.000052 grad: 0.1630 (0.1652) loss: 0.7616 (0.7678) time: 0.1625 data: 0.0657 max mem: 9377 +Train: [57] [4500/6250] eta: 0:04:40 lr: 0.000052 grad: 0.1557 (0.1651) loss: 0.7681 (0.7677) time: 0.1679 data: 0.0815 max mem: 9377 +Train: [57] [4600/6250] eta: 0:04:24 lr: 0.000052 grad: 0.1521 (0.1649) loss: 0.7653 (0.7677) time: 0.1569 data: 0.0637 max mem: 9377 +Train: [57] [4700/6250] eta: 0:04:08 lr: 0.000052 grad: 0.1620 (0.1649) loss: 0.7650 (0.7676) time: 0.1449 data: 0.0462 max mem: 9377 +Train: [57] [4800/6250] eta: 0:03:52 lr: 0.000052 grad: 0.1574 (0.1647) loss: 0.7572 (0.7675) time: 0.1530 data: 0.0538 max mem: 9377 +Train: [57] [4900/6250] eta: 0:03:36 lr: 0.000052 grad: 0.1502 (0.1646) loss: 0.7680 (0.7675) time: 0.1961 data: 0.1064 max mem: 9377 +Train: [57] [5000/6250] eta: 0:03:20 lr: 0.000052 grad: 0.1602 (0.1646) loss: 0.7625 (0.7674) time: 0.1669 data: 0.0777 max mem: 9377 +Train: [57] [5100/6250] eta: 0:03:04 lr: 0.000052 grad: 0.1618 (0.1646) loss: 0.7540 (0.7673) time: 0.1484 data: 0.0536 max mem: 9377 +Train: [57] [5200/6250] eta: 0:02:48 lr: 0.000052 grad: 0.1527 (0.1645) loss: 0.7602 (0.7673) time: 0.1657 data: 0.0739 max mem: 9377 +Train: [57] [5300/6250] eta: 0:02:32 lr: 0.000052 grad: 0.1590 (0.1644) loss: 0.7638 (0.7672) time: 0.1670 data: 0.0752 max mem: 9377 +Train: [57] [5400/6250] eta: 0:02:16 lr: 0.000051 grad: 0.1566 (0.1644) loss: 0.7657 (0.7672) time: 0.1614 data: 0.0746 max mem: 9377 +Train: [57] [5500/6250] eta: 0:02:00 lr: 0.000051 grad: 0.1588 (0.1643) loss: 0.7659 (0.7672) time: 0.1381 data: 0.0425 max mem: 9377 +Train: [57] [5600/6250] eta: 0:01:44 lr: 0.000051 grad: 0.1617 (0.1642) loss: 0.7565 (0.7671) time: 0.1420 data: 0.0455 max mem: 9377 +Train: [57] [5700/6250] eta: 0:01:28 lr: 0.000051 grad: 0.1638 (0.1642) loss: 0.7494 (0.7670) time: 0.1488 data: 0.0580 max mem: 9377 +Train: [57] [5800/6250] eta: 0:01:11 lr: 0.000051 grad: 0.1563 (0.1642) loss: 0.7552 (0.7669) time: 0.1366 data: 0.0488 max mem: 9377 +Train: [57] [5900/6250] eta: 0:00:55 lr: 0.000051 grad: 0.1587 (0.1641) loss: 0.7524 (0.7668) time: 0.1606 data: 0.0759 max mem: 9377 +Train: [57] [6000/6250] eta: 0:00:39 lr: 0.000051 grad: 0.1567 (0.1640) loss: 0.7628 (0.7668) time: 0.1777 data: 0.0947 max mem: 9377 +Train: [57] [6100/6250] eta: 0:00:23 lr: 0.000051 grad: 0.1585 (0.1639) loss: 0.7660 (0.7668) time: 0.1430 data: 0.0659 max mem: 9377 +Train: [57] [6200/6250] eta: 0:00:07 lr: 0.000051 grad: 0.1546 (0.1638) loss: 0.7623 (0.7668) time: 0.1393 data: 0.0581 max mem: 9377 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1592 (0.1638) loss: 0.7670 (0.7668) time: 0.1594 data: 0.0711 max mem: 9377 +Train: [57] Total time: 0:16:41 (0.1603 s / it) +Averaged stats: lr: 0.000051 grad: 0.1592 (0.1638) loss: 0.7670 (0.7668) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:03:45 loss: 0.8479 (0.8479) time: 3.6405 data: 3.5774 max mem: 9377 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8526 (0.8523) time: 0.1322 data: 0.1069 max mem: 9377 +Eval (hcp-train-subset): [57] Total time: 0:00:14 (0.2384 s / it) +Averaged stats (hcp-train-subset): loss: 0.8526 (0.8523) +Eval (hcp-val): [57] [ 0/62] eta: 0:05:14 loss: 0.8472 (0.8472) time: 5.0706 data: 5.0086 max mem: 9377 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8505 (0.8511) time: 0.1442 data: 0.1189 max mem: 9377 +Eval (hcp-val): [57] Total time: 0:00:16 (0.2595 s / it) +Averaged stats (hcp-val): loss: 0.8505 (0.8511) +Eval (nsd-val): [57] [ 0/62] eta: 0:06:17 loss: 0.8109 (0.8109) time: 6.0814 data: 6.0478 max mem: 9377 +Eval (nsd-val): [57] [61/62] eta: 0:00:00 loss: 0.8229 (0.8244) time: 0.1622 data: 0.1362 max mem: 9377 +Eval (nsd-val): [57] Total time: 0:00:15 (0.2525 s / it) +Averaged stats (nsd-val): loss: 0.8229 (0.8244) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [58] [ 0/6250] eta: 10:34:57 lr: 0.000051 grad: 0.3166 (0.3166) loss: 0.7626 (0.7626) time: 6.0955 data: 5.6929 max mem: 9377 +Train: [58] [ 100/6250] eta: 0:25:31 lr: 0.000051 grad: 0.2297 (0.2917) loss: 0.7700 (0.7801) time: 0.1815 data: 0.0790 max mem: 9377 +Train: [58] [ 200/6250] eta: 0:22:06 lr: 0.000051 grad: 0.1907 (0.2692) loss: 0.7640 (0.7681) time: 0.1645 data: 0.0562 max mem: 9377 +Train: [58] [ 300/6250] eta: 0:20:13 lr: 0.000051 grad: 0.1818 (0.2450) loss: 0.7696 (0.7668) time: 0.1575 data: 0.0648 max mem: 9377 +Train: [58] [ 400/6250] eta: 0:19:13 lr: 0.000051 grad: 0.1816 (0.2310) loss: 0.7635 (0.7649) time: 0.1964 data: 0.1104 max mem: 9377 +Train: [58] [ 500/6250] eta: 0:18:21 lr: 0.000051 grad: 0.1685 (0.2195) loss: 0.7627 (0.7641) time: 0.1840 data: 0.0961 max mem: 9377 +Train: [58] [ 600/6250] eta: 0:17:37 lr: 0.000051 grad: 0.1536 (0.2103) loss: 0.7666 (0.7649) time: 0.1623 data: 0.0733 max mem: 9377 +Train: [58] [ 700/6250] eta: 0:17:01 lr: 0.000051 grad: 0.1574 (0.2035) loss: 0.7695 (0.7651) time: 0.1425 data: 0.0569 max mem: 9377 +Train: [58] [ 800/6250] eta: 0:16:25 lr: 0.000051 grad: 0.1672 (0.1988) loss: 0.7599 (0.7650) time: 0.1634 data: 0.0789 max mem: 9377 +Train: [58] [ 900/6250] eta: 0:16:18 lr: 0.000051 grad: 0.1505 (0.1947) loss: 0.7722 (0.7652) time: 0.1920 data: 0.1021 max mem: 9377 +Train: [58] [1000/6250] eta: 0:15:54 lr: 0.000051 grad: 0.1581 (0.1915) loss: 0.7652 (0.7651) time: 0.2306 data: 0.1541 max mem: 9377 +Train: [58] [1100/6250] eta: 0:15:28 lr: 0.000051 grad: 0.1559 (0.1890) loss: 0.7637 (0.7644) time: 0.1972 data: 0.1127 max mem: 9377 +Train: [58] [1200/6250] eta: 0:14:59 lr: 0.000051 grad: 0.1552 (0.1868) loss: 0.7592 (0.7639) time: 0.1774 data: 0.0836 max mem: 9377 +Train: [58] [1300/6250] eta: 0:14:38 lr: 0.000051 grad: 0.1525 (0.1848) loss: 0.7525 (0.7634) time: 0.1742 data: 0.0840 max mem: 9377 +Train: [58] [1400/6250] eta: 0:14:14 lr: 0.000051 grad: 0.1595 (0.1832) loss: 0.7529 (0.7626) time: 0.1443 data: 0.0563 max mem: 9377 +Train: [58] [1500/6250] eta: 0:13:52 lr: 0.000051 grad: 0.1571 (0.1820) loss: 0.7549 (0.7622) time: 0.1523 data: 0.0631 max mem: 9377 +Train: [58] [1600/6250] eta: 0:13:32 lr: 0.000051 grad: 0.1559 (0.1809) loss: 0.7687 (0.7620) time: 0.1981 data: 0.1201 max mem: 9377 +Train: [58] [1700/6250] eta: 0:13:12 lr: 0.000051 grad: 0.1576 (0.1800) loss: 0.7590 (0.7616) time: 0.1912 data: 0.1133 max mem: 9377 +Train: [58] [1800/6250] eta: 0:12:54 lr: 0.000051 grad: 0.1551 (0.1791) loss: 0.7518 (0.7613) time: 0.1840 data: 0.0865 max mem: 9377 +Train: [58] [1900/6250] eta: 0:12:35 lr: 0.000051 grad: 0.1575 (0.1781) loss: 0.7561 (0.7611) time: 0.1683 data: 0.0797 max mem: 9377 +Train: [58] [2000/6250] eta: 0:12:14 lr: 0.000051 grad: 0.1594 (0.1773) loss: 0.7578 (0.7610) time: 0.1530 data: 0.0624 max mem: 9377 +Train: [58] [2100/6250] eta: 0:11:54 lr: 0.000051 grad: 0.1544 (0.1764) loss: 0.7592 (0.7610) time: 0.1526 data: 0.0581 max mem: 9377 +Train: [58] [2200/6250] eta: 0:11:33 lr: 0.000050 grad: 0.1586 (0.1759) loss: 0.7584 (0.7608) time: 0.1450 data: 0.0478 max mem: 9377 +Train: [58] [2300/6250] eta: 0:11:14 lr: 0.000050 grad: 0.1639 (0.1753) loss: 0.7632 (0.7607) time: 0.1519 data: 0.0697 max mem: 9377 +Train: [58] [2400/6250] eta: 0:10:54 lr: 0.000050 grad: 0.1624 (0.1752) loss: 0.7658 (0.7606) time: 0.1690 data: 0.0810 max mem: 9377 +Train: [58] [2500/6250] eta: 0:10:37 lr: 0.000050 grad: 0.1539 (0.1747) loss: 0.7542 (0.7606) time: 0.1992 data: 0.1169 max mem: 9377 +Train: [58] [2600/6250] eta: 0:10:17 lr: 0.000050 grad: 0.1574 (0.1743) loss: 0.7629 (0.7606) time: 0.1460 data: 0.0478 max mem: 9377 +Train: [58] [2700/6250] eta: 0:10:00 lr: 0.000050 grad: 0.1586 (0.1740) loss: 0.7553 (0.7606) time: 0.1628 data: 0.0769 max mem: 9377 +Train: [58] [2800/6250] eta: 0:09:42 lr: 0.000050 grad: 0.1659 (0.1739) loss: 0.7578 (0.7606) time: 0.1644 data: 0.0681 max mem: 9377 +Train: [58] [2900/6250] eta: 0:09:23 lr: 0.000050 grad: 0.1599 (0.1736) loss: 0.7649 (0.7605) time: 0.1755 data: 0.0834 max mem: 9377 +Train: [58] [3000/6250] eta: 0:09:05 lr: 0.000050 grad: 0.1657 (0.1734) loss: 0.7504 (0.7605) time: 0.1618 data: 0.0736 max mem: 9377 +Train: [58] [3100/6250] eta: 0:08:47 lr: 0.000050 grad: 0.1485 (0.1731) loss: 0.7699 (0.7605) time: 0.1579 data: 0.0663 max mem: 9377 +Train: [58] [3200/6250] eta: 0:08:30 lr: 0.000050 grad: 0.1671 (0.1728) loss: 0.7605 (0.7605) time: 0.1643 data: 0.0695 max mem: 9377 +Train: [58] [3300/6250] eta: 0:08:12 lr: 0.000050 grad: 0.1627 (0.1729) loss: 0.7594 (0.7605) time: 0.1372 data: 0.0440 max mem: 9377 +Train: [58] [3400/6250] eta: 0:07:55 lr: 0.000050 grad: 0.1564 (0.1729) loss: 0.7592 (0.7604) time: 0.1671 data: 0.0853 max mem: 9377 +Train: [58] [3500/6250] eta: 0:07:37 lr: 0.000050 grad: 0.1618 (0.1727) loss: 0.7546 (0.7603) time: 0.1579 data: 0.0665 max mem: 9377 +Train: [58] [3600/6250] eta: 0:07:20 lr: 0.000050 grad: 0.1606 (0.1726) loss: 0.7662 (0.7603) time: 0.1757 data: 0.0900 max mem: 9377 +Train: [58] [3700/6250] eta: 0:07:02 lr: 0.000050 grad: 0.1596 (0.1724) loss: 0.7585 (0.7602) time: 0.1519 data: 0.0689 max mem: 9377 +Train: [58] [3800/6250] eta: 0:06:45 lr: 0.000050 grad: 0.1629 (0.1724) loss: 0.7656 (0.7601) time: 0.1467 data: 0.0527 max mem: 9377 +Train: [58] [3900/6250] eta: 0:06:29 lr: 0.000050 grad: 0.1614 (0.1722) loss: 0.7556 (0.7600) time: 0.1826 data: 0.0996 max mem: 9377 +Train: [58] [4000/6250] eta: 0:06:12 lr: 0.000050 grad: 0.1743 (0.1721) loss: 0.7719 (0.7600) time: 0.1812 data: 0.0963 max mem: 9377 +Train: [58] [4100/6250] eta: 0:05:55 lr: 0.000050 grad: 0.1642 (0.1720) loss: 0.7675 (0.7600) time: 0.1674 data: 0.0785 max mem: 9377 +Train: [58] [4200/6250] eta: 0:05:39 lr: 0.000050 grad: 0.1603 (0.1717) loss: 0.7702 (0.7600) time: 0.1723 data: 0.0789 max mem: 9377 +Train: [58] [4300/6250] eta: 0:05:23 lr: 0.000050 grad: 0.1615 (0.1715) loss: 0.7590 (0.7602) time: 0.1752 data: 0.0815 max mem: 9377 +Train: [58] [4400/6250] eta: 0:05:06 lr: 0.000050 grad: 0.1655 (0.1712) loss: 0.7586 (0.7603) time: 0.1571 data: 0.0620 max mem: 9377 +Train: [58] [4500/6250] eta: 0:04:49 lr: 0.000050 grad: 0.1629 (0.1713) loss: 0.7603 (0.7604) time: 0.1622 data: 0.0682 max mem: 9377 +Train: [58] [4600/6250] eta: 0:04:33 lr: 0.000050 grad: 0.1602 (0.1712) loss: 0.7672 (0.7604) time: 0.1429 data: 0.0478 max mem: 9377 +Train: [58] [4700/6250] eta: 0:04:16 lr: 0.000050 grad: 0.1558 (0.1709) loss: 0.7736 (0.7606) time: 0.1676 data: 0.0762 max mem: 9377 +Train: [58] [4800/6250] eta: 0:03:59 lr: 0.000050 grad: 0.1613 (0.1708) loss: 0.7620 (0.7606) time: 0.1418 data: 0.0477 max mem: 9377 +Train: [58] [4900/6250] eta: 0:03:42 lr: 0.000050 grad: 0.1575 (0.1706) loss: 0.7545 (0.7607) time: 0.1433 data: 0.0540 max mem: 9377 +Train: [58] [5000/6250] eta: 0:03:26 lr: 0.000050 grad: 0.1608 (0.1705) loss: 0.7662 (0.7607) time: 0.1814 data: 0.1010 max mem: 9377 +Train: [58] [5100/6250] eta: 0:03:09 lr: 0.000050 grad: 0.1683 (0.1703) loss: 0.7565 (0.7608) time: 0.1469 data: 0.0636 max mem: 9377 +Train: [58] [5200/6250] eta: 0:02:53 lr: 0.000050 grad: 0.1686 (0.1702) loss: 0.7576 (0.7609) time: 0.1585 data: 0.0728 max mem: 9377 +Train: [58] [5300/6250] eta: 0:02:36 lr: 0.000049 grad: 0.1727 (0.1702) loss: 0.7524 (0.7608) time: 0.1623 data: 0.0692 max mem: 9377 +Train: [58] [5400/6250] eta: 0:02:20 lr: 0.000049 grad: 0.1588 (0.1701) loss: 0.7686 (0.7609) time: 0.1588 data: 0.0741 max mem: 9377 +Train: [58] [5500/6250] eta: 0:02:03 lr: 0.000049 grad: 0.1613 (0.1700) loss: 0.7538 (0.7609) time: 0.1403 data: 0.0408 max mem: 9377 +Train: [58] [5600/6250] eta: 0:01:46 lr: 0.000049 grad: 0.1640 (0.1699) loss: 0.7522 (0.7608) time: 0.1498 data: 0.0538 max mem: 9377 +Train: [58] [5700/6250] eta: 0:01:30 lr: 0.000049 grad: 0.1580 (0.1698) loss: 0.7648 (0.7609) time: 0.1687 data: 0.0687 max mem: 9377 +Train: [58] [5800/6250] eta: 0:01:13 lr: 0.000049 grad: 0.1631 (0.1697) loss: 0.7527 (0.7609) time: 0.1547 data: 0.0524 max mem: 9377 +Train: [58] [5900/6250] eta: 0:00:57 lr: 0.000049 grad: 0.1693 (0.1697) loss: 0.7538 (0.7609) time: 0.1564 data: 0.0610 max mem: 9377 +Train: [58] [6000/6250] eta: 0:00:41 lr: 0.000049 grad: 0.1594 (0.1696) loss: 0.7597 (0.7609) time: 0.1636 data: 0.0743 max mem: 9377 +Train: [58] [6100/6250] eta: 0:00:24 lr: 0.000049 grad: 0.1608 (0.1696) loss: 0.7591 (0.7609) time: 0.1680 data: 0.0826 max mem: 9377 +Train: [58] [6200/6250] eta: 0:00:08 lr: 0.000049 grad: 0.1534 (0.1695) loss: 0.7626 (0.7609) time: 0.1588 data: 0.0727 max mem: 9377 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.1561 (0.1694) loss: 0.7712 (0.7609) time: 0.1604 data: 0.0739 max mem: 9377 +Train: [58] Total time: 0:17:09 (0.1648 s / it) +Averaged stats: lr: 0.000049 grad: 0.1561 (0.1694) loss: 0.7712 (0.7609) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:05:19 loss: 0.8432 (0.8432) time: 5.1607 data: 5.1296 max mem: 9377 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8556 (0.8551) time: 0.1443 data: 0.1189 max mem: 9377 +Eval (hcp-train-subset): [58] Total time: 0:00:14 (0.2327 s / it) +Averaged stats (hcp-train-subset): loss: 0.8556 (0.8551) +Eval (hcp-val): [58] [ 0/62] eta: 0:03:38 loss: 0.8519 (0.8519) time: 3.5257 data: 3.4502 max mem: 9377 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8514 (0.8529) time: 0.1413 data: 0.1141 max mem: 9377 +Eval (hcp-val): [58] Total time: 0:00:14 (0.2320 s / it) +Averaged stats (hcp-val): loss: 0.8514 (0.8529) +Eval (nsd-val): [58] [ 0/62] eta: 0:04:54 loss: 0.8170 (0.8170) time: 4.7499 data: 4.7198 max mem: 9377 +Eval (nsd-val): [58] [61/62] eta: 0:00:00 loss: 0.8262 (0.8276) time: 0.1446 data: 0.1190 max mem: 9377 +Eval (nsd-val): [58] Total time: 0:00:14 (0.2302 s / it) +Averaged stats (nsd-val): loss: 0.8262 (0.8276) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [59] [ 0/6250] eta: 10:34:47 lr: 0.000049 grad: 0.1115 (0.1115) loss: 0.8580 (0.8580) time: 6.0940 data: 5.9921 max mem: 9377 +Train: [59] [ 100/6250] eta: 0:22:56 lr: 0.000049 grad: 0.2157 (0.2730) loss: 0.7692 (0.7831) time: 0.1700 data: 0.0533 max mem: 9377 +Train: [59] [ 200/6250] eta: 0:19:42 lr: 0.000049 grad: 0.1838 (0.2451) loss: 0.7842 (0.7753) time: 0.1621 data: 0.0467 max mem: 9377 +Train: [59] [ 300/6250] eta: 0:18:14 lr: 0.000049 grad: 0.2149 (0.2376) loss: 0.7641 (0.7735) time: 0.1694 data: 0.0773 max mem: 9377 +Train: [59] [ 400/6250] eta: 0:17:13 lr: 0.000049 grad: 0.1843 (0.2275) loss: 0.7515 (0.7716) time: 0.1445 data: 0.0449 max mem: 9377 +Train: [59] [ 500/6250] eta: 0:16:35 lr: 0.000049 grad: 0.1549 (0.2159) loss: 0.7664 (0.7699) time: 0.1617 data: 0.0639 max mem: 9377 +Train: [59] [ 600/6250] eta: 0:16:09 lr: 0.000049 grad: 0.1600 (0.2082) loss: 0.7581 (0.7682) time: 0.1667 data: 0.0692 max mem: 9377 +Train: [59] [ 700/6250] eta: 0:15:34 lr: 0.000049 grad: 0.1675 (0.2029) loss: 0.7610 (0.7673) time: 0.1690 data: 0.0756 max mem: 9377 +Train: [59] [ 800/6250] eta: 0:15:09 lr: 0.000049 grad: 0.1614 (0.1985) loss: 0.7602 (0.7669) time: 0.1446 data: 0.0638 max mem: 9377 +Train: [59] [ 900/6250] eta: 0:14:44 lr: 0.000049 grad: 0.1593 (0.1950) loss: 0.7648 (0.7668) time: 0.1512 data: 0.0516 max mem: 9377 +Train: [59] [1000/6250] eta: 0:14:22 lr: 0.000049 grad: 0.1589 (0.1919) loss: 0.7705 (0.7669) time: 0.1728 data: 0.0887 max mem: 9377 +Train: [59] [1100/6250] eta: 0:14:00 lr: 0.000049 grad: 0.1594 (0.1889) loss: 0.7632 (0.7668) time: 0.1591 data: 0.0734 max mem: 9377 +Train: [59] [1200/6250] eta: 0:13:44 lr: 0.000049 grad: 0.1573 (0.1863) loss: 0.7599 (0.7666) time: 0.1724 data: 0.0888 max mem: 9377 +Train: [59] [1300/6250] eta: 0:13:36 lr: 0.000049 grad: 0.1500 (0.1841) loss: 0.7578 (0.7662) time: 0.1698 data: 0.0860 max mem: 9377 +Train: [59] [1400/6250] eta: 0:13:24 lr: 0.000049 grad: 0.1605 (0.1825) loss: 0.7586 (0.7656) time: 0.1789 data: 0.0906 max mem: 9377 +Train: [59] [1500/6250] eta: 0:13:12 lr: 0.000049 grad: 0.1513 (0.1809) loss: 0.7623 (0.7651) time: 0.1895 data: 0.1063 max mem: 9377 +Train: [59] [1600/6250] eta: 0:12:56 lr: 0.000049 grad: 0.1567 (0.1795) loss: 0.7589 (0.7649) time: 0.1696 data: 0.0858 max mem: 9377 +Train: [59] [1700/6250] eta: 0:12:47 lr: 0.000049 grad: 0.1583 (0.1784) loss: 0.7524 (0.7646) time: 0.1717 data: 0.0746 max mem: 9377 +Train: [59] [1800/6250] eta: 0:12:34 lr: 0.000049 grad: 0.1652 (0.1775) loss: 0.7459 (0.7643) time: 0.1604 data: 0.0646 max mem: 9377 +Train: [59] [1900/6250] eta: 0:12:20 lr: 0.000049 grad: 0.1549 (0.1766) loss: 0.7665 (0.7641) time: 0.1722 data: 0.0808 max mem: 9377 +Train: [59] [2000/6250] eta: 0:12:04 lr: 0.000049 grad: 0.1530 (0.1756) loss: 0.7620 (0.7639) time: 0.1758 data: 0.0847 max mem: 9377 +Train: [59] [2100/6250] eta: 0:11:46 lr: 0.000048 grad: 0.1511 (0.1748) loss: 0.7643 (0.7639) time: 0.1733 data: 0.0816 max mem: 9377 +Train: [59] [2200/6250] eta: 0:11:25 lr: 0.000048 grad: 0.1568 (0.1742) loss: 0.7648 (0.7638) time: 0.1552 data: 0.0690 max mem: 9377 +Train: [59] [2300/6250] eta: 0:11:06 lr: 0.000048 grad: 0.1567 (0.1737) loss: 0.7632 (0.7637) time: 0.1540 data: 0.0620 max mem: 9377 +Train: [59] [2400/6250] eta: 0:10:47 lr: 0.000048 grad: 0.1544 (0.1731) loss: 0.7586 (0.7636) time: 0.1699 data: 0.0735 max mem: 9377 +Train: [59] [2500/6250] eta: 0:10:29 lr: 0.000048 grad: 0.1560 (0.1725) loss: 0.7629 (0.7634) time: 0.1511 data: 0.0665 max mem: 9377 +Train: [59] [2600/6250] eta: 0:10:12 lr: 0.000048 grad: 0.1607 (0.1722) loss: 0.7570 (0.7632) time: 0.1827 data: 0.0941 max mem: 9377 +Train: [59] [2700/6250] eta: 0:09:54 lr: 0.000048 grad: 0.1577 (0.1719) loss: 0.7609 (0.7631) time: 0.1765 data: 0.0795 max mem: 9377 +Train: [59] [2800/6250] eta: 0:09:36 lr: 0.000048 grad: 0.1561 (0.1716) loss: 0.7506 (0.7628) time: 0.1745 data: 0.0872 max mem: 9377 +Train: [59] [2900/6250] eta: 0:09:18 lr: 0.000048 grad: 0.1617 (0.1714) loss: 0.7540 (0.7626) time: 0.1844 data: 0.0911 max mem: 9377 +Train: [59] [3000/6250] eta: 0:08:59 lr: 0.000048 grad: 0.1641 (0.1712) loss: 0.7553 (0.7625) time: 0.1053 data: 0.0151 max mem: 9377 +Train: [59] [3100/6250] eta: 0:08:42 lr: 0.000048 grad: 0.1599 (0.1710) loss: 0.7481 (0.7622) time: 0.1643 data: 0.0763 max mem: 9377 +Train: [59] [3200/6250] eta: 0:08:24 lr: 0.000048 grad: 0.1555 (0.1708) loss: 0.7558 (0.7619) time: 0.1620 data: 0.0611 max mem: 9377 +Train: [59] [3300/6250] eta: 0:08:07 lr: 0.000048 grad: 0.1607 (0.1706) loss: 0.7493 (0.7617) time: 0.1840 data: 0.1006 max mem: 9377 +Train: [59] [3400/6250] eta: 0:07:50 lr: 0.000048 grad: 0.1698 (0.1705) loss: 0.7464 (0.7615) time: 0.1469 data: 0.0524 max mem: 9377 +Train: [59] [3500/6250] eta: 0:07:33 lr: 0.000048 grad: 0.1598 (0.1705) loss: 0.7508 (0.7613) time: 0.1543 data: 0.0704 max mem: 9377 +Train: [59] [3600/6250] eta: 0:07:16 lr: 0.000048 grad: 0.1646 (0.1705) loss: 0.7539 (0.7611) time: 0.1802 data: 0.0966 max mem: 9377 +Train: [59] [3700/6250] eta: 0:06:59 lr: 0.000048 grad: 0.1642 (0.1704) loss: 0.7517 (0.7610) time: 0.1582 data: 0.0707 max mem: 9377 +Train: [59] [3800/6250] eta: 0:06:42 lr: 0.000048 grad: 0.1652 (0.1704) loss: 0.7485 (0.7608) time: 0.1789 data: 0.0865 max mem: 9377 +Train: [59] [3900/6250] eta: 0:06:26 lr: 0.000048 grad: 0.1674 (0.1705) loss: 0.7474 (0.7607) time: 0.1781 data: 0.0916 max mem: 9377 +Train: [59] [4000/6250] eta: 0:06:10 lr: 0.000048 grad: 0.1617 (0.1704) loss: 0.7637 (0.7606) time: 0.1497 data: 0.0584 max mem: 9377 +Train: [59] [4100/6250] eta: 0:05:53 lr: 0.000048 grad: 0.1658 (0.1702) loss: 0.7516 (0.7606) time: 0.1511 data: 0.0625 max mem: 9377 +Train: [59] [4200/6250] eta: 0:05:36 lr: 0.000048 grad: 0.1658 (0.1700) loss: 0.7568 (0.7606) time: 0.1523 data: 0.0627 max mem: 9377 +Train: [59] [4300/6250] eta: 0:05:20 lr: 0.000048 grad: 0.1604 (0.1700) loss: 0.7651 (0.7607) time: 0.1857 data: 0.0998 max mem: 9377 +Train: [59] [4400/6250] eta: 0:05:03 lr: 0.000048 grad: 0.1492 (0.1698) loss: 0.7707 (0.7608) time: 0.1525 data: 0.0660 max mem: 9377 +Train: [59] [4500/6250] eta: 0:04:47 lr: 0.000048 grad: 0.1562 (0.1696) loss: 0.7650 (0.7608) time: 0.1650 data: 0.0754 max mem: 9377 +Train: [59] [4600/6250] eta: 0:04:30 lr: 0.000048 grad: 0.1622 (0.1695) loss: 0.7527 (0.7608) time: 0.1672 data: 0.0800 max mem: 9377 +Train: [59] [4700/6250] eta: 0:04:13 lr: 0.000048 grad: 0.1649 (0.1694) loss: 0.7609 (0.7609) time: 0.1580 data: 0.0642 max mem: 9377 +Train: [59] [4800/6250] eta: 0:03:56 lr: 0.000048 grad: 0.1561 (0.1692) loss: 0.7674 (0.7610) time: 0.1345 data: 0.0320 max mem: 9377 +Train: [59] [4900/6250] eta: 0:03:40 lr: 0.000048 grad: 0.1487 (0.1691) loss: 0.7766 (0.7612) time: 0.1913 data: 0.1167 max mem: 9377 +Train: [59] [5000/6250] eta: 0:03:24 lr: 0.000048 grad: 0.1677 (0.1689) loss: 0.7692 (0.7613) time: 0.1602 data: 0.0712 max mem: 9377 +Train: [59] [5100/6250] eta: 0:03:08 lr: 0.000048 grad: 0.1562 (0.1689) loss: 0.7684 (0.7614) time: 0.1780 data: 0.0865 max mem: 9377 +Train: [59] [5200/6250] eta: 0:02:52 lr: 0.000047 grad: 0.1693 (0.1689) loss: 0.7660 (0.7615) time: 0.1873 data: 0.0993 max mem: 9377 +Train: [59] [5300/6250] eta: 0:02:35 lr: 0.000047 grad: 0.1564 (0.1688) loss: 0.7606 (0.7615) time: 0.1958 data: 0.0948 max mem: 9377 +Train: [59] [5400/6250] eta: 0:02:19 lr: 0.000047 grad: 0.1674 (0.1687) loss: 0.7638 (0.7615) time: 0.2156 data: 0.1238 max mem: 9377 +Train: [59] [5500/6250] eta: 0:02:03 lr: 0.000047 grad: 0.1695 (0.1686) loss: 0.7481 (0.7615) time: 0.1542 data: 0.0637 max mem: 9377 +Train: [59] [5600/6250] eta: 0:01:46 lr: 0.000047 grad: 0.1550 (0.1685) loss: 0.7714 (0.7616) time: 0.1711 data: 0.0812 max mem: 9377 +Train: [59] [5700/6250] eta: 0:01:30 lr: 0.000047 grad: 0.1535 (0.1684) loss: 0.7686 (0.7616) time: 0.1544 data: 0.0579 max mem: 9377 +Train: [59] [5800/6250] eta: 0:01:13 lr: 0.000047 grad: 0.1630 (0.1683) loss: 0.7716 (0.7617) time: 0.1635 data: 0.0787 max mem: 9377 +Train: [59] [5900/6250] eta: 0:00:57 lr: 0.000047 grad: 0.1531 (0.1682) loss: 0.7640 (0.7618) time: 0.1599 data: 0.0753 max mem: 9377 +Train: [59] [6000/6250] eta: 0:00:41 lr: 0.000047 grad: 0.1610 (0.1681) loss: 0.7655 (0.7619) time: 0.1403 data: 0.0499 max mem: 9377 +Train: [59] [6100/6250] eta: 0:00:24 lr: 0.000047 grad: 0.1597 (0.1680) loss: 0.7596 (0.7620) time: 0.1691 data: 0.0888 max mem: 9377 +Train: [59] [6200/6250] eta: 0:00:08 lr: 0.000047 grad: 0.1561 (0.1679) loss: 0.7714 (0.7620) time: 0.1523 data: 0.0481 max mem: 9377 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.1601 (0.1678) loss: 0.7659 (0.7620) time: 0.1601 data: 0.0648 max mem: 9377 +Train: [59] Total time: 0:17:12 (0.1652 s / it) +Averaged stats: lr: 0.000047 grad: 0.1601 (0.1678) loss: 0.7659 (0.7620) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:05:02 loss: 0.8476 (0.8476) time: 4.8747 data: 4.8416 max mem: 9377 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8514 (0.8524) time: 0.1156 data: 0.0880 max mem: 9377 +Eval (hcp-train-subset): [59] Total time: 0:00:14 (0.2338 s / it) +Averaged stats (hcp-train-subset): loss: 0.8514 (0.8524) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [59] [ 0/62] eta: 0:04:10 loss: 0.8543 (0.8543) time: 4.0445 data: 3.9650 max mem: 9377 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8486 (0.8507) time: 0.1322 data: 0.1069 max mem: 9377 +Eval (hcp-val): [59] Total time: 0:00:14 (0.2295 s / it) +Averaged stats (hcp-val): loss: 0.8486 (0.8507) +Making plots (hcp-val): example=2 +Eval (nsd-val): [59] [ 0/62] eta: 0:04:44 loss: 0.8152 (0.8152) time: 4.5857 data: 4.5182 max mem: 9377 +Eval (nsd-val): [59] [61/62] eta: 0:00:00 loss: 0.8219 (0.8248) time: 0.1242 data: 0.0988 max mem: 9377 +Eval (nsd-val): [59] Total time: 0:00:14 (0.2324 s / it) +Averaged stats (nsd-val): loss: 0.8219 (0.8248) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 8:01:29 lr: 0.000047 grad: 0.1108 (0.1108) loss: 0.8796 (0.8796) time: 4.6222 data: 4.4069 max mem: 9377 +Train: [60] [ 100/6250] eta: 0:23:24 lr: 0.000047 grad: 0.2200 (0.2609) loss: 0.7669 (0.7964) time: 0.1857 data: 0.0829 max mem: 9377 +Train: [60] [ 200/6250] eta: 0:19:54 lr: 0.000047 grad: 0.2249 (0.2500) loss: 0.7663 (0.7857) time: 0.1721 data: 0.0689 max mem: 9377 +Train: [60] [ 300/6250] eta: 0:18:13 lr: 0.000047 grad: 0.1953 (0.2384) loss: 0.7705 (0.7806) time: 0.1479 data: 0.0467 max mem: 9377 +Train: [60] [ 400/6250] eta: 0:17:04 lr: 0.000047 grad: 0.1769 (0.2289) loss: 0.7681 (0.7782) time: 0.1418 data: 0.0530 max mem: 9377 +Train: [60] [ 500/6250] eta: 0:16:39 lr: 0.000047 grad: 0.1598 (0.2173) loss: 0.7726 (0.7768) time: 0.1887 data: 0.0921 max mem: 9377 +Train: [60] [ 600/6250] eta: 0:16:07 lr: 0.000047 grad: 0.1656 (0.2097) loss: 0.7693 (0.7759) time: 0.1487 data: 0.0540 max mem: 9377 +Train: [60] [ 700/6250] eta: 0:15:46 lr: 0.000047 grad: 0.1648 (0.2042) loss: 0.7668 (0.7745) time: 0.1615 data: 0.0673 max mem: 9377 +Train: [60] [ 800/6250] eta: 0:15:26 lr: 0.000047 grad: 0.1620 (0.1996) loss: 0.7603 (0.7733) time: 0.1405 data: 0.0430 max mem: 9377 +Train: [60] [ 900/6250] eta: 0:15:09 lr: 0.000047 grad: 0.1730 (0.1970) loss: 0.7571 (0.7719) time: 0.1864 data: 0.0953 max mem: 9377 +Train: [60] [1000/6250] eta: 0:14:44 lr: 0.000047 grad: 0.1583 (0.1936) loss: 0.7623 (0.7709) time: 0.1458 data: 0.0622 max mem: 9377 +Train: [60] [1100/6250] eta: 0:14:22 lr: 0.000047 grad: 0.1533 (0.1909) loss: 0.7612 (0.7698) time: 0.1810 data: 0.1031 max mem: 9377 +Train: [60] [1200/6250] eta: 0:14:09 lr: 0.000047 grad: 0.1607 (0.1890) loss: 0.7507 (0.7686) time: 0.1837 data: 0.1020 max mem: 9377 +Train: [60] [1300/6250] eta: 0:13:54 lr: 0.000047 grad: 0.1689 (0.1872) loss: 0.7587 (0.7675) time: 0.1569 data: 0.0752 max mem: 9377 +Train: [60] [1400/6250] eta: 0:13:40 lr: 0.000047 grad: 0.1640 (0.1858) loss: 0.7629 (0.7666) time: 0.1846 data: 0.0997 max mem: 9377 +Train: [60] [1500/6250] eta: 0:13:21 lr: 0.000047 grad: 0.1633 (0.1843) loss: 0.7576 (0.7663) time: 0.1556 data: 0.0685 max mem: 9377 +Train: [60] [1600/6250] eta: 0:13:03 lr: 0.000047 grad: 0.1644 (0.1828) loss: 0.7584 (0.7659) time: 0.1780 data: 0.0844 max mem: 9377 +Train: [60] [1700/6250] eta: 0:12:46 lr: 0.000047 grad: 0.1683 (0.1817) loss: 0.7475 (0.7653) time: 0.1623 data: 0.0733 max mem: 9377 +Train: [60] [1800/6250] eta: 0:12:29 lr: 0.000047 grad: 0.1607 (0.1807) loss: 0.7550 (0.7648) time: 0.1706 data: 0.0802 max mem: 9377 +Train: [60] [1900/6250] eta: 0:12:10 lr: 0.000047 grad: 0.1674 (0.1799) loss: 0.7611 (0.7644) time: 0.1634 data: 0.0686 max mem: 9377 +Train: [60] [2000/6250] eta: 0:11:53 lr: 0.000047 grad: 0.1560 (0.1789) loss: 0.7607 (0.7643) time: 0.1646 data: 0.0770 max mem: 9377 +Train: [60] [2100/6250] eta: 0:11:34 lr: 0.000046 grad: 0.1541 (0.1782) loss: 0.7718 (0.7642) time: 0.1564 data: 0.0658 max mem: 9377 +Train: [60] [2200/6250] eta: 0:11:16 lr: 0.000046 grad: 0.1593 (0.1779) loss: 0.7577 (0.7640) time: 0.1739 data: 0.0923 max mem: 9377 +Train: [60] [2300/6250] eta: 0:10:59 lr: 0.000046 grad: 0.1630 (0.1773) loss: 0.7652 (0.7638) time: 0.1790 data: 0.0957 max mem: 9377 +Train: [60] [2400/6250] eta: 0:10:40 lr: 0.000046 grad: 0.1570 (0.1768) loss: 0.7617 (0.7638) time: 0.1602 data: 0.0615 max mem: 9377 +Train: [60] [2500/6250] eta: 0:10:23 lr: 0.000046 grad: 0.1578 (0.1762) loss: 0.7660 (0.7638) time: 0.1455 data: 0.0553 max mem: 9377 +Train: [60] [2600/6250] eta: 0:10:05 lr: 0.000046 grad: 0.1683 (0.1758) loss: 0.7530 (0.7637) time: 0.1928 data: 0.1129 max mem: 9377 +Train: [60] [2700/6250] eta: 0:09:47 lr: 0.000046 grad: 0.1717 (0.1758) loss: 0.7532 (0.7635) time: 0.1500 data: 0.0626 max mem: 9377 +Train: [60] [2800/6250] eta: 0:09:30 lr: 0.000046 grad: 0.1577 (0.1754) loss: 0.7657 (0.7635) time: 0.1661 data: 0.0811 max mem: 9377 +Train: [60] [2900/6250] eta: 0:09:11 lr: 0.000046 grad: 0.1688 (0.1752) loss: 0.7614 (0.7634) time: 0.1246 data: 0.0408 max mem: 9377 +Train: [60] [3000/6250] eta: 0:08:54 lr: 0.000046 grad: 0.1551 (0.1749) loss: 0.7657 (0.7634) time: 0.1583 data: 0.0713 max mem: 9377 +Train: [60] [3100/6250] eta: 0:08:37 lr: 0.000046 grad: 0.1580 (0.1746) loss: 0.7701 (0.7634) time: 0.1708 data: 0.0847 max mem: 9377 +Train: [60] [3200/6250] eta: 0:08:19 lr: 0.000046 grad: 0.1592 (0.1746) loss: 0.7555 (0.7633) time: 0.1844 data: 0.0906 max mem: 9377 +Train: [60] [3300/6250] eta: 0:08:01 lr: 0.000046 grad: 0.1585 (0.1743) loss: 0.7534 (0.7632) time: 0.1526 data: 0.0580 max mem: 9377 +Train: [60] [3400/6250] eta: 0:07:45 lr: 0.000046 grad: 0.1718 (0.1741) loss: 0.7576 (0.7632) time: 0.1074 data: 0.0146 max mem: 9377 +Train: [60] [3500/6250] eta: 0:07:28 lr: 0.000046 grad: 0.1556 (0.1739) loss: 0.7672 (0.7632) time: 0.1636 data: 0.0742 max mem: 9377 +Train: [60] [3600/6250] eta: 0:07:12 lr: 0.000046 grad: 0.1574 (0.1738) loss: 0.7581 (0.7631) time: 0.1611 data: 0.0791 max mem: 9377 +Train: [60] [3700/6250] eta: 0:06:55 lr: 0.000046 grad: 0.1574 (0.1736) loss: 0.7530 (0.7630) time: 0.1454 data: 0.0627 max mem: 9377 +Train: [60] [3800/6250] eta: 0:06:38 lr: 0.000046 grad: 0.1681 (0.1734) loss: 0.7620 (0.7629) time: 0.1668 data: 0.0820 max mem: 9377 +Train: [60] [3900/6250] eta: 0:06:22 lr: 0.000046 grad: 0.1597 (0.1733) loss: 0.7466 (0.7628) time: 0.1562 data: 0.0646 max mem: 9377 +Train: [60] [4000/6250] eta: 0:06:06 lr: 0.000046 grad: 0.1703 (0.1732) loss: 0.7530 (0.7627) time: 0.1552 data: 0.0644 max mem: 9377 +Train: [60] [4100/6250] eta: 0:05:50 lr: 0.000046 grad: 0.1631 (0.1730) loss: 0.7619 (0.7627) time: 0.1602 data: 0.0707 max mem: 9377 +Train: [60] [4200/6250] eta: 0:05:34 lr: 0.000046 grad: 0.1520 (0.1729) loss: 0.7678 (0.7627) time: 0.1855 data: 0.0936 max mem: 9377 +Train: [60] [4300/6250] eta: 0:05:19 lr: 0.000046 grad: 0.1532 (0.1727) loss: 0.7669 (0.7627) time: 0.1898 data: 0.1005 max mem: 9377 +Train: [60] [4400/6250] eta: 0:05:03 lr: 0.000046 grad: 0.1654 (0.1725) loss: 0.7637 (0.7627) time: 0.1583 data: 0.0701 max mem: 9377 +Train: [60] [4500/6250] eta: 0:04:47 lr: 0.000046 grad: 0.1590 (0.1723) loss: 0.7679 (0.7627) time: 0.1770 data: 0.0872 max mem: 9377 +Train: [60] [4600/6250] eta: 0:04:30 lr: 0.000046 grad: 0.1656 (0.1722) loss: 0.7546 (0.7627) time: 0.1622 data: 0.0669 max mem: 9377 +Train: [60] [4700/6250] eta: 0:04:14 lr: 0.000046 grad: 0.1529 (0.1719) loss: 0.7630 (0.7627) time: 0.1556 data: 0.0584 max mem: 9377 +Train: [60] [4800/6250] eta: 0:03:57 lr: 0.000046 grad: 0.1587 (0.1717) loss: 0.7529 (0.7627) time: 0.1485 data: 0.0612 max mem: 9377 +Train: [60] [4900/6250] eta: 0:03:41 lr: 0.000046 grad: 0.1562 (0.1715) loss: 0.7571 (0.7628) time: 0.2205 data: 0.1447 max mem: 9377 +Train: [60] [5000/6250] eta: 0:03:24 lr: 0.000046 grad: 0.1548 (0.1713) loss: 0.7642 (0.7628) time: 0.1420 data: 0.0538 max mem: 9377 +Train: [60] [5100/6250] eta: 0:03:08 lr: 0.000046 grad: 0.1632 (0.1711) loss: 0.7677 (0.7628) time: 0.1540 data: 0.0695 max mem: 9377 +Train: [60] [5200/6250] eta: 0:02:51 lr: 0.000045 grad: 0.1640 (0.1709) loss: 0.7611 (0.7629) time: 0.1468 data: 0.0591 max mem: 9377 +Train: [60] [5300/6250] eta: 0:02:35 lr: 0.000045 grad: 0.1630 (0.1706) loss: 0.7563 (0.7629) time: 0.1597 data: 0.0715 max mem: 9377 +Train: [60] [5400/6250] eta: 0:02:19 lr: 0.000045 grad: 0.1658 (0.1705) loss: 0.7646 (0.7629) time: 0.1743 data: 0.0876 max mem: 9377 +Train: [60] [5500/6250] eta: 0:02:02 lr: 0.000045 grad: 0.1487 (0.1705) loss: 0.7737 (0.7630) time: 0.1697 data: 0.0851 max mem: 9377 +Train: [60] [5600/6250] eta: 0:01:46 lr: 0.000045 grad: 0.1601 (0.1703) loss: 0.7617 (0.7630) time: 0.1442 data: 0.0473 max mem: 9377 +Train: [60] [5700/6250] eta: 0:01:29 lr: 0.000045 grad: 0.1595 (0.1701) loss: 0.7597 (0.7631) time: 0.1545 data: 0.0652 max mem: 9377 +Train: [60] [5800/6250] eta: 0:01:13 lr: 0.000045 grad: 0.1604 (0.1700) loss: 0.7702 (0.7631) time: 0.1485 data: 0.0625 max mem: 9377 +Train: [60] [5900/6250] eta: 0:00:56 lr: 0.000045 grad: 0.1598 (0.1699) loss: 0.7635 (0.7630) time: 0.1620 data: 0.0689 max mem: 9377 +Train: [60] [6000/6250] eta: 0:00:40 lr: 0.000045 grad: 0.1517 (0.1697) loss: 0.7737 (0.7631) time: 0.1528 data: 0.0665 max mem: 9377 +Train: [60] [6100/6250] eta: 0:00:24 lr: 0.000045 grad: 0.1622 (0.1696) loss: 0.7584 (0.7631) time: 0.1963 data: 0.1125 max mem: 9377 +Train: [60] [6200/6250] eta: 0:00:08 lr: 0.000045 grad: 0.1602 (0.1695) loss: 0.7674 (0.7632) time: 0.1775 data: 0.0942 max mem: 9377 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1500 (0.1694) loss: 0.7750 (0.7631) time: 0.1274 data: 0.0379 max mem: 9377 +Train: [60] Total time: 0:17:01 (0.1634 s / it) +Averaged stats: lr: 0.000045 grad: 0.1500 (0.1694) loss: 0.7750 (0.7631) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:04:05 loss: 0.8469 (0.8469) time: 3.9587 data: 3.8922 max mem: 9377 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8559 (0.8562) time: 0.1249 data: 0.0985 max mem: 9377 +Eval (hcp-train-subset): [60] Total time: 0:00:14 (0.2366 s / it) +Averaged stats (hcp-train-subset): loss: 0.8559 (0.8562) +Eval (hcp-val): [60] [ 0/62] eta: 0:05:34 loss: 0.8517 (0.8517) time: 5.3939 data: 5.3437 max mem: 9377 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.8526 (0.8529) time: 0.1238 data: 0.0986 max mem: 9377 +Eval (hcp-val): [60] Total time: 0:00:14 (0.2261 s / it) +Averaged stats (hcp-val): loss: 0.8526 (0.8529) +Eval (nsd-val): [60] [ 0/62] eta: 0:06:12 loss: 0.8185 (0.8185) time: 6.0022 data: 5.9713 max mem: 9377 +Eval (nsd-val): [60] [61/62] eta: 0:00:00 loss: 0.8248 (0.8265) time: 0.1253 data: 0.1002 max mem: 9377 +Eval (nsd-val): [60] Total time: 0:00:13 (0.2229 s / it) +Averaged stats (nsd-val): loss: 0.8248 (0.8265) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [61] [ 0/6250] eta: 8:34:55 lr: 0.000045 grad: 0.4887 (0.4887) loss: 0.7846 (0.7846) time: 4.9434 data: 4.6594 max mem: 9377 +Train: [61] [ 100/6250] eta: 0:22:13 lr: 0.000045 grad: 0.2105 (0.2825) loss: 0.7663 (0.7687) time: 0.1647 data: 0.0597 max mem: 9377 +Train: [61] [ 200/6250] eta: 0:19:19 lr: 0.000045 grad: 0.2042 (0.2664) loss: 0.7721 (0.7710) time: 0.1530 data: 0.0508 max mem: 9377 +Train: [61] [ 300/6250] eta: 0:18:21 lr: 0.000045 grad: 0.1773 (0.2440) loss: 0.7676 (0.7707) time: 0.1762 data: 0.0746 max mem: 9377 +Train: [61] [ 400/6250] eta: 0:17:34 lr: 0.000045 grad: 0.1712 (0.2259) loss: 0.7622 (0.7725) time: 0.1527 data: 0.0608 max mem: 9377 +Train: [61] [ 500/6250] eta: 0:16:58 lr: 0.000045 grad: 0.1816 (0.2177) loss: 0.7678 (0.7719) time: 0.1806 data: 0.0809 max mem: 9377 +Train: [61] [ 600/6250] eta: 0:16:24 lr: 0.000045 grad: 0.1680 (0.2104) loss: 0.7691 (0.7715) time: 0.1507 data: 0.0631 max mem: 9377 +Train: [61] [ 700/6250] eta: 0:15:57 lr: 0.000045 grad: 0.1617 (0.2040) loss: 0.7673 (0.7709) time: 0.1442 data: 0.0440 max mem: 9377 +Train: [61] [ 800/6250] eta: 0:15:26 lr: 0.000045 grad: 0.1549 (0.1997) loss: 0.7536 (0.7694) time: 0.1556 data: 0.0653 max mem: 9377 +Train: [61] [ 900/6250] eta: 0:15:05 lr: 0.000045 grad: 0.1632 (0.1965) loss: 0.7749 (0.7687) time: 0.1588 data: 0.0737 max mem: 9377 +Train: [61] [1000/6250] eta: 0:14:42 lr: 0.000045 grad: 0.1650 (0.1941) loss: 0.7638 (0.7678) time: 0.1410 data: 0.0509 max mem: 9377 +Train: [61] [1100/6250] eta: 0:14:18 lr: 0.000045 grad: 0.1588 (0.1914) loss: 0.7584 (0.7672) time: 0.1555 data: 0.0597 max mem: 9377 +Train: [61] [1200/6250] eta: 0:13:54 lr: 0.000045 grad: 0.1602 (0.1889) loss: 0.7517 (0.7671) time: 0.1710 data: 0.0834 max mem: 9377 +Train: [61] [1300/6250] eta: 0:13:38 lr: 0.000045 grad: 0.1676 (0.1872) loss: 0.7595 (0.7666) time: 0.2029 data: 0.1253 max mem: 9377 +Train: [61] [1400/6250] eta: 0:13:16 lr: 0.000045 grad: 0.1680 (0.1858) loss: 0.7606 (0.7662) time: 0.1373 data: 0.0585 max mem: 9377 +Train: [61] [1500/6250] eta: 0:13:00 lr: 0.000045 grad: 0.1595 (0.1844) loss: 0.7541 (0.7657) time: 0.1441 data: 0.0664 max mem: 9377 +Train: [61] [1600/6250] eta: 0:12:41 lr: 0.000045 grad: 0.1608 (0.1833) loss: 0.7679 (0.7654) time: 0.1416 data: 0.0556 max mem: 9377 +Train: [61] [1700/6250] eta: 0:12:28 lr: 0.000045 grad: 0.1619 (0.1824) loss: 0.7534 (0.7651) time: 0.1902 data: 0.1054 max mem: 9377 +Train: [61] [1800/6250] eta: 0:12:12 lr: 0.000045 grad: 0.1594 (0.1817) loss: 0.7640 (0.7648) time: 0.1716 data: 0.0792 max mem: 9377 +Train: [61] [1900/6250] eta: 0:11:55 lr: 0.000045 grad: 0.1627 (0.1806) loss: 0.7702 (0.7647) time: 0.1505 data: 0.0628 max mem: 9377 +Train: [61] [2000/6250] eta: 0:11:38 lr: 0.000045 grad: 0.1617 (0.1797) loss: 0.7659 (0.7645) time: 0.1641 data: 0.0735 max mem: 9377 +Train: [61] [2100/6250] eta: 0:11:20 lr: 0.000044 grad: 0.1573 (0.1791) loss: 0.7565 (0.7643) time: 0.1434 data: 0.0428 max mem: 9377 +Train: [61] [2200/6250] eta: 0:11:02 lr: 0.000044 grad: 0.1675 (0.1784) loss: 0.7588 (0.7641) time: 0.1516 data: 0.0603 max mem: 9377 +Train: [61] [2300/6250] eta: 0:10:45 lr: 0.000044 grad: 0.1563 (0.1778) loss: 0.7666 (0.7639) time: 0.1568 data: 0.0674 max mem: 9377 +Train: [61] [2400/6250] eta: 0:10:27 lr: 0.000044 grad: 0.1509 (0.1773) loss: 0.7682 (0.7638) time: 0.1498 data: 0.0681 max mem: 9377 +Train: [61] [2500/6250] eta: 0:10:10 lr: 0.000044 grad: 0.1643 (0.1767) loss: 0.7573 (0.7637) time: 0.1582 data: 0.0703 max mem: 9377 +Train: [61] [2600/6250] eta: 0:09:55 lr: 0.000044 grad: 0.1735 (0.1763) loss: 0.7535 (0.7636) time: 0.1569 data: 0.0675 max mem: 9377 +Train: [61] [2700/6250] eta: 0:09:38 lr: 0.000044 grad: 0.1689 (0.1760) loss: 0.7646 (0.7636) time: 0.1327 data: 0.0446 max mem: 9377 +Train: [61] [2800/6250] eta: 0:09:20 lr: 0.000044 grad: 0.1610 (0.1756) loss: 0.7673 (0.7636) time: 0.1280 data: 0.0338 max mem: 9377 +Train: [61] [2900/6250] eta: 0:09:04 lr: 0.000044 grad: 0.1630 (0.1755) loss: 0.7610 (0.7636) time: 0.1766 data: 0.0923 max mem: 9377 +Train: [61] [3000/6250] eta: 0:08:47 lr: 0.000044 grad: 0.1619 (0.1753) loss: 0.7624 (0.7635) time: 0.1666 data: 0.0790 max mem: 9377 +Train: [61] [3100/6250] eta: 0:08:29 lr: 0.000044 grad: 0.1633 (0.1751) loss: 0.7665 (0.7634) time: 0.1665 data: 0.0860 max mem: 9377 +Train: [61] [3200/6250] eta: 0:08:13 lr: 0.000044 grad: 0.1747 (0.1751) loss: 0.7517 (0.7632) time: 0.1590 data: 0.0718 max mem: 9377 +Train: [61] [3300/6250] eta: 0:07:58 lr: 0.000044 grad: 0.1618 (0.1749) loss: 0.7537 (0.7630) time: 0.1660 data: 0.0815 max mem: 9377 +Train: [61] [3400/6250] eta: 0:07:42 lr: 0.000044 grad: 0.1677 (0.1748) loss: 0.7576 (0.7630) time: 0.1378 data: 0.0451 max mem: 9377 +Train: [61] [3500/6250] eta: 0:07:25 lr: 0.000044 grad: 0.1694 (0.1747) loss: 0.7591 (0.7628) time: 0.1400 data: 0.0622 max mem: 9377 +Train: [61] [3600/6250] eta: 0:07:09 lr: 0.000044 grad: 0.1700 (0.1746) loss: 0.7521 (0.7628) time: 0.1658 data: 0.0775 max mem: 9377 +Train: [61] [3700/6250] eta: 0:06:53 lr: 0.000044 grad: 0.1639 (0.1745) loss: 0.7583 (0.7626) time: 0.1540 data: 0.0675 max mem: 9377 +Train: [61] [3800/6250] eta: 0:06:38 lr: 0.000044 grad: 0.1714 (0.1744) loss: 0.7561 (0.7625) time: 0.1673 data: 0.0874 max mem: 9377 +Train: [61] [3900/6250] eta: 0:06:22 lr: 0.000044 grad: 0.1684 (0.1743) loss: 0.7639 (0.7624) time: 0.1588 data: 0.0726 max mem: 9377 +Train: [61] [4000/6250] eta: 0:06:06 lr: 0.000044 grad: 0.1614 (0.1741) loss: 0.7627 (0.7623) time: 0.1837 data: 0.0908 max mem: 9377 +Train: [61] [4100/6250] eta: 0:05:50 lr: 0.000044 grad: 0.1719 (0.1740) loss: 0.7572 (0.7622) time: 0.1627 data: 0.0700 max mem: 9377 +Train: [61] [4200/6250] eta: 0:05:34 lr: 0.000044 grad: 0.1627 (0.1739) loss: 0.7556 (0.7622) time: 0.1696 data: 0.0773 max mem: 9377 +Train: [61] [4300/6250] eta: 0:05:17 lr: 0.000044 grad: 0.1609 (0.1738) loss: 0.7650 (0.7620) time: 0.1441 data: 0.0534 max mem: 9377 +Train: [61] [4400/6250] eta: 0:05:01 lr: 0.000044 grad: 0.1752 (0.1737) loss: 0.7654 (0.7620) time: 0.1636 data: 0.0734 max mem: 9377 +Train: [61] [4500/6250] eta: 0:04:45 lr: 0.000044 grad: 0.1603 (0.1736) loss: 0.7721 (0.7619) time: 0.1340 data: 0.0356 max mem: 9377 +Train: [61] [4600/6250] eta: 0:04:28 lr: 0.000044 grad: 0.1588 (0.1735) loss: 0.7571 (0.7619) time: 0.1355 data: 0.0430 max mem: 9377 +Train: [61] [4700/6250] eta: 0:04:11 lr: 0.000044 grad: 0.1604 (0.1733) loss: 0.7646 (0.7619) time: 0.1494 data: 0.0603 max mem: 9377 +Train: [61] [4800/6250] eta: 0:03:54 lr: 0.000044 grad: 0.1588 (0.1732) loss: 0.7656 (0.7619) time: 0.1385 data: 0.0546 max mem: 9377 +Train: [61] [4900/6250] eta: 0:03:38 lr: 0.000044 grad: 0.1745 (0.1731) loss: 0.7635 (0.7619) time: 0.1416 data: 0.0535 max mem: 9377 +Train: [61] [5000/6250] eta: 0:03:22 lr: 0.000044 grad: 0.1500 (0.1730) loss: 0.7664 (0.7619) time: 0.1706 data: 0.0810 max mem: 9377 +Train: [61] [5100/6250] eta: 0:03:06 lr: 0.000044 grad: 0.1601 (0.1729) loss: 0.7624 (0.7620) time: 0.1643 data: 0.0752 max mem: 9377 +Train: [61] [5200/6250] eta: 0:02:50 lr: 0.000044 grad: 0.1616 (0.1727) loss: 0.7631 (0.7620) time: 0.1400 data: 0.0533 max mem: 9377 +Train: [61] [5300/6250] eta: 0:02:34 lr: 0.000043 grad: 0.1607 (0.1726) loss: 0.7648 (0.7621) time: 0.1853 data: 0.0913 max mem: 9377 +Train: [61] [5400/6250] eta: 0:02:18 lr: 0.000043 grad: 0.1663 (0.1725) loss: 0.7583 (0.7622) time: 0.1550 data: 0.0598 max mem: 9377 +Train: [61] [5500/6250] eta: 0:02:02 lr: 0.000043 grad: 0.1673 (0.1724) loss: 0.7565 (0.7621) time: 0.1432 data: 0.0557 max mem: 9377 +Train: [61] [5600/6250] eta: 0:01:45 lr: 0.000043 grad: 0.1670 (0.1723) loss: 0.7584 (0.7621) time: 0.1643 data: 0.0696 max mem: 9377 +Train: [61] [5700/6250] eta: 0:01:29 lr: 0.000043 grad: 0.1734 (0.1723) loss: 0.7466 (0.7620) time: 0.2015 data: 0.1206 max mem: 9377 +Train: [61] [5800/6250] eta: 0:01:13 lr: 0.000043 grad: 0.1663 (0.1723) loss: 0.7464 (0.7619) time: 0.1549 data: 0.0684 max mem: 9377 +Train: [61] [5900/6250] eta: 0:00:56 lr: 0.000043 grad: 0.1722 (0.1723) loss: 0.7486 (0.7618) time: 0.1738 data: 0.0865 max mem: 9377 +Train: [61] [6000/6250] eta: 0:00:40 lr: 0.000043 grad: 0.1714 (0.1722) loss: 0.7513 (0.7618) time: 0.1487 data: 0.0565 max mem: 9377 +Train: [61] [6100/6250] eta: 0:00:24 lr: 0.000043 grad: 0.1651 (0.1722) loss: 0.7561 (0.7617) time: 0.1583 data: 0.0728 max mem: 9377 +Train: [61] [6200/6250] eta: 0:00:08 lr: 0.000043 grad: 0.1562 (0.1721) loss: 0.7703 (0.7618) time: 0.1540 data: 0.0659 max mem: 9377 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1747 (0.1721) loss: 0.7604 (0.7618) time: 0.1775 data: 0.0877 max mem: 9377 +Train: [61] Total time: 0:17:02 (0.1635 s / it) +Averaged stats: lr: 0.000043 grad: 0.1747 (0.1721) loss: 0.7604 (0.7618) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:04:20 loss: 0.8542 (0.8542) time: 4.2076 data: 4.1384 max mem: 9377 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8549 (0.8572) time: 0.1387 data: 0.1130 max mem: 9377 +Eval (hcp-train-subset): [61] Total time: 0:00:16 (0.2592 s / it) +Averaged stats (hcp-train-subset): loss: 0.8549 (0.8572) +Eval (hcp-val): [61] [ 0/62] eta: 0:05:10 loss: 0.8534 (0.8534) time: 5.0150 data: 4.9182 max mem: 9377 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8528 (0.8554) time: 0.1457 data: 0.1184 max mem: 9377 +Eval (hcp-val): [61] Total time: 0:00:16 (0.2633 s / it) +Averaged stats (hcp-val): loss: 0.8528 (0.8554) +Eval (nsd-val): [61] [ 0/62] eta: 0:05:36 loss: 0.8203 (0.8203) time: 5.4215 data: 5.3770 max mem: 9377 +Eval (nsd-val): [61] [61/62] eta: 0:00:00 loss: 0.8307 (0.8321) time: 0.1417 data: 0.1139 max mem: 9377 +Eval (nsd-val): [61] Total time: 0:00:16 (0.2632 s / it) +Averaged stats (nsd-val): loss: 0.8307 (0.8321) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [62] [ 0/6250] eta: 10:27:51 lr: 0.000043 grad: 0.1465 (0.1465) loss: 0.8421 (0.8421) time: 6.0275 data: 5.6069 max mem: 9377 +Train: [62] [ 100/6250] eta: 0:26:57 lr: 0.000043 grad: 0.2147 (0.2976) loss: 0.7876 (0.7878) time: 0.2582 data: 0.1576 max mem: 9377 +Train: [62] [ 200/6250] eta: 0:22:42 lr: 0.000043 grad: 0.2085 (0.2576) loss: 0.7780 (0.7855) time: 0.1908 data: 0.0866 max mem: 9377 +Train: [62] [ 300/6250] eta: 0:20:37 lr: 0.000043 grad: 0.2093 (0.2400) loss: 0.7660 (0.7809) time: 0.1560 data: 0.0455 max mem: 9377 +Train: [62] [ 400/6250] eta: 0:19:32 lr: 0.000043 grad: 0.1785 (0.2263) loss: 0.7656 (0.7776) time: 0.1923 data: 0.0952 max mem: 9377 +Train: [62] [ 500/6250] eta: 0:18:35 lr: 0.000043 grad: 0.1625 (0.2152) loss: 0.7737 (0.7758) time: 0.1639 data: 0.0616 max mem: 9377 +Train: [62] [ 600/6250] eta: 0:17:55 lr: 0.000043 grad: 0.1768 (0.2082) loss: 0.7643 (0.7744) time: 0.1657 data: 0.0650 max mem: 9377 +Train: [62] [ 700/6250] eta: 0:17:22 lr: 0.000043 grad: 0.1820 (0.2048) loss: 0.7653 (0.7734) time: 0.1691 data: 0.0593 max mem: 9377 +Train: [62] [ 800/6250] eta: 0:16:52 lr: 0.000043 grad: 0.1639 (0.2013) loss: 0.7653 (0.7720) time: 0.1978 data: 0.1135 max mem: 9377 +Train: [62] [ 900/6250] eta: 0:16:23 lr: 0.000043 grad: 0.1629 (0.1977) loss: 0.7592 (0.7707) time: 0.1690 data: 0.0729 max mem: 9377 +Train: [62] [1000/6250] eta: 0:15:51 lr: 0.000043 grad: 0.1654 (0.1946) loss: 0.7649 (0.7698) time: 0.1460 data: 0.0539 max mem: 9377 +Train: [62] [1100/6250] eta: 0:15:25 lr: 0.000043 grad: 0.1643 (0.1919) loss: 0.7699 (0.7690) time: 0.1743 data: 0.0870 max mem: 9377 +Train: [62] [1200/6250] eta: 0:14:56 lr: 0.000043 grad: 0.1562 (0.1898) loss: 0.7702 (0.7685) time: 0.1538 data: 0.0681 max mem: 9377 +Train: [62] [1300/6250] eta: 0:14:37 lr: 0.000043 grad: 0.1574 (0.1878) loss: 0.7752 (0.7681) time: 0.1609 data: 0.0853 max mem: 9377 +Train: [62] [1400/6250] eta: 0:14:17 lr: 0.000043 grad: 0.1636 (0.1862) loss: 0.7573 (0.7678) time: 0.1739 data: 0.0900 max mem: 9377 +Train: [62] [1500/6250] eta: 0:13:55 lr: 0.000043 grad: 0.1650 (0.1848) loss: 0.7498 (0.7675) time: 0.1565 data: 0.0740 max mem: 9377 +Train: [62] [1600/6250] eta: 0:13:30 lr: 0.000043 grad: 0.1664 (0.1838) loss: 0.7517 (0.7671) time: 0.1397 data: 0.0599 max mem: 9377 +Train: [62] [1700/6250] eta: 0:13:13 lr: 0.000043 grad: 0.1694 (0.1826) loss: 0.7549 (0.7669) time: 0.2022 data: 0.1176 max mem: 9377 +Train: [62] [1800/6250] eta: 0:12:53 lr: 0.000043 grad: 0.1622 (0.1817) loss: 0.7665 (0.7667) time: 0.1653 data: 0.0745 max mem: 9377 +Train: [62] [1900/6250] eta: 0:12:35 lr: 0.000043 grad: 0.1654 (0.1813) loss: 0.7506 (0.7663) time: 0.1404 data: 0.0452 max mem: 9377 +Train: [62] [2000/6250] eta: 0:12:13 lr: 0.000043 grad: 0.1657 (0.1808) loss: 0.7643 (0.7659) time: 0.1233 data: 0.0378 max mem: 9377 +Train: [62] [2100/6250] eta: 0:11:52 lr: 0.000043 grad: 0.1630 (0.1806) loss: 0.7693 (0.7657) time: 0.1636 data: 0.0717 max mem: 9377 +Train: [62] [2200/6250] eta: 0:11:31 lr: 0.000042 grad: 0.1591 (0.1800) loss: 0.7669 (0.7656) time: 0.1361 data: 0.0400 max mem: 9377 +Train: [62] [2300/6250] eta: 0:11:11 lr: 0.000042 grad: 0.1670 (0.1795) loss: 0.7520 (0.7654) time: 0.1562 data: 0.0761 max mem: 9377 +Train: [62] [2400/6250] eta: 0:10:51 lr: 0.000042 grad: 0.1652 (0.1791) loss: 0.7586 (0.7653) time: 0.1538 data: 0.0697 max mem: 9377 +Train: [62] [2500/6250] eta: 0:10:32 lr: 0.000042 grad: 0.1697 (0.1787) loss: 0.7405 (0.7650) time: 0.1641 data: 0.0781 max mem: 9377 +Train: [62] [2600/6250] eta: 0:10:13 lr: 0.000042 grad: 0.1784 (0.1785) loss: 0.7394 (0.7646) time: 0.1565 data: 0.0620 max mem: 9377 +Train: [62] [2700/6250] eta: 0:09:55 lr: 0.000042 grad: 0.1720 (0.1782) loss: 0.7573 (0.7642) time: 0.1416 data: 0.0511 max mem: 9377 +Train: [62] [2800/6250] eta: 0:09:36 lr: 0.000042 grad: 0.1695 (0.1779) loss: 0.7576 (0.7638) time: 0.1439 data: 0.0584 max mem: 9377 +Train: [62] [2900/6250] eta: 0:09:17 lr: 0.000042 grad: 0.1689 (0.1777) loss: 0.7577 (0.7636) time: 0.1267 data: 0.0344 max mem: 9377 +Train: [62] [3000/6250] eta: 0:08:59 lr: 0.000042 grad: 0.1612 (0.1778) loss: 0.7536 (0.7632) time: 0.1358 data: 0.0512 max mem: 9377 +Train: [62] [3100/6250] eta: 0:08:41 lr: 0.000042 grad: 0.1638 (0.1776) loss: 0.7515 (0.7630) time: 0.1587 data: 0.0707 max mem: 9377 +Train: [62] [3200/6250] eta: 0:08:24 lr: 0.000042 grad: 0.1709 (0.1775) loss: 0.7484 (0.7625) time: 0.1580 data: 0.0650 max mem: 9377 +Train: [62] [3300/6250] eta: 0:08:07 lr: 0.000042 grad: 0.1623 (0.1774) loss: 0.7551 (0.7622) time: 0.2067 data: 0.1300 max mem: 9377 +Train: [62] [3400/6250] eta: 0:07:49 lr: 0.000042 grad: 0.1624 (0.1771) loss: 0.7639 (0.7619) time: 0.1627 data: 0.0731 max mem: 9377 +Train: [62] [3500/6250] eta: 0:07:32 lr: 0.000042 grad: 0.1677 (0.1771) loss: 0.7547 (0.7616) time: 0.1696 data: 0.0783 max mem: 9377 +Train: [62] [3600/6250] eta: 0:07:14 lr: 0.000042 grad: 0.1679 (0.1770) loss: 0.7553 (0.7614) time: 0.1574 data: 0.0764 max mem: 9377 +Train: [62] [3700/6250] eta: 0:06:57 lr: 0.000042 grad: 0.1653 (0.1767) loss: 0.7607 (0.7612) time: 0.1532 data: 0.0641 max mem: 9377 +Train: [62] [3800/6250] eta: 0:06:42 lr: 0.000042 grad: 0.1743 (0.1767) loss: 0.7543 (0.7611) time: 0.1675 data: 0.0758 max mem: 9377 +Train: [62] [3900/6250] eta: 0:06:25 lr: 0.000042 grad: 0.1835 (0.1767) loss: 0.7474 (0.7608) time: 0.1689 data: 0.0774 max mem: 9377 +Train: [62] [4000/6250] eta: 0:06:09 lr: 0.000042 grad: 0.1631 (0.1766) loss: 0.7567 (0.7607) time: 0.1709 data: 0.0837 max mem: 9377 +Train: [62] [4100/6250] eta: 0:05:53 lr: 0.000042 grad: 0.1696 (0.1765) loss: 0.7452 (0.7606) time: 0.1876 data: 0.1128 max mem: 9377 +Train: [62] [4200/6250] eta: 0:05:36 lr: 0.000042 grad: 0.1711 (0.1765) loss: 0.7483 (0.7603) time: 0.1702 data: 0.0828 max mem: 9377 +Train: [62] [4300/6250] eta: 0:05:20 lr: 0.000042 grad: 0.1618 (0.1763) loss: 0.7603 (0.7602) time: 0.1352 data: 0.0502 max mem: 9377 +Train: [62] [4400/6250] eta: 0:05:04 lr: 0.000042 grad: 0.1620 (0.1762) loss: 0.7618 (0.7601) time: 0.1911 data: 0.0925 max mem: 9377 +Train: [62] [4500/6250] eta: 0:04:47 lr: 0.000042 grad: 0.1659 (0.1760) loss: 0.7689 (0.7600) time: 0.1735 data: 0.0787 max mem: 9377 +Train: [62] [4600/6250] eta: 0:04:31 lr: 0.000042 grad: 0.1651 (0.1759) loss: 0.7664 (0.7600) time: 0.1867 data: 0.0971 max mem: 9377 +Train: [62] [4700/6250] eta: 0:04:14 lr: 0.000042 grad: 0.1811 (0.1758) loss: 0.7515 (0.7600) time: 0.1570 data: 0.0656 max mem: 9377 +Train: [62] [4800/6250] eta: 0:03:57 lr: 0.000042 grad: 0.1635 (0.1756) loss: 0.7618 (0.7599) time: 0.1611 data: 0.0760 max mem: 9377 +Train: [62] [4900/6250] eta: 0:03:41 lr: 0.000042 grad: 0.1620 (0.1755) loss: 0.7649 (0.7599) time: 0.1882 data: 0.1079 max mem: 9377 +Train: [62] [5000/6250] eta: 0:03:25 lr: 0.000042 grad: 0.1643 (0.1753) loss: 0.7500 (0.7599) time: 0.1878 data: 0.1086 max mem: 9377 +Train: [62] [5100/6250] eta: 0:03:08 lr: 0.000042 grad: 0.1708 (0.1753) loss: 0.7619 (0.7599) time: 0.1508 data: 0.0705 max mem: 9377 +Train: [62] [5200/6250] eta: 0:02:52 lr: 0.000042 grad: 0.1776 (0.1752) loss: 0.7582 (0.7599) time: 0.1447 data: 0.0602 max mem: 9377 +Train: [62] [5300/6250] eta: 0:02:35 lr: 0.000042 grad: 0.1676 (0.1750) loss: 0.7640 (0.7599) time: 0.1507 data: 0.0643 max mem: 9377 +Train: [62] [5400/6250] eta: 0:02:19 lr: 0.000041 grad: 0.1790 (0.1749) loss: 0.7544 (0.7600) time: 0.1567 data: 0.0708 max mem: 9377 +Train: [62] [5500/6250] eta: 0:02:02 lr: 0.000041 grad: 0.1846 (0.1749) loss: 0.7506 (0.7599) time: 0.1473 data: 0.0523 max mem: 9377 +Train: [62] [5600/6250] eta: 0:01:46 lr: 0.000041 grad: 0.1802 (0.1749) loss: 0.7465 (0.7599) time: 0.1673 data: 0.0745 max mem: 9377 +Train: [62] [5700/6250] eta: 0:01:29 lr: 0.000041 grad: 0.1684 (0.1749) loss: 0.7575 (0.7598) time: 0.1585 data: 0.0711 max mem: 9377 +Train: [62] [5800/6250] eta: 0:01:13 lr: 0.000041 grad: 0.1725 (0.1749) loss: 0.7506 (0.7597) time: 0.1489 data: 0.0532 max mem: 9377 +Train: [62] [5900/6250] eta: 0:00:57 lr: 0.000041 grad: 0.1779 (0.1749) loss: 0.7537 (0.7596) time: 0.1617 data: 0.0685 max mem: 9377 +Train: [62] [6000/6250] eta: 0:00:40 lr: 0.000041 grad: 0.1772 (0.1749) loss: 0.7550 (0.7595) time: 0.1603 data: 0.0680 max mem: 9377 +Train: [62] [6100/6250] eta: 0:00:24 lr: 0.000041 grad: 0.1704 (0.1749) loss: 0.7532 (0.7595) time: 0.1670 data: 0.0725 max mem: 9377 +Train: [62] [6200/6250] eta: 0:00:08 lr: 0.000041 grad: 0.1902 (0.1750) loss: 0.7524 (0.7594) time: 0.1495 data: 0.0556 max mem: 9377 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1725 (0.1750) loss: 0.7497 (0.7593) time: 0.1695 data: 0.0806 max mem: 9377 +Train: [62] Total time: 0:17:05 (0.1641 s / it) +Averaged stats: lr: 0.000041 grad: 0.1725 (0.1750) loss: 0.7497 (0.7593) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:06:08 loss: 0.8456 (0.8456) time: 5.9509 data: 5.9182 max mem: 9377 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.8526 (0.8544) time: 0.1212 data: 0.0960 max mem: 9377 +Eval (hcp-train-subset): [62] Total time: 0:00:14 (0.2392 s / it) +Averaged stats (hcp-train-subset): loss: 0.8526 (0.8544) +Eval (hcp-val): [62] [ 0/62] eta: 0:05:43 loss: 0.8492 (0.8492) time: 5.5413 data: 5.5114 max mem: 9377 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8512 (0.8516) time: 0.1491 data: 0.1231 max mem: 9377 +Eval (hcp-val): [62] Total time: 0:00:14 (0.2352 s / it) +Averaged stats (hcp-val): loss: 0.8512 (0.8516) +Eval (nsd-val): [62] [ 0/62] eta: 0:03:38 loss: 0.8217 (0.8217) time: 3.5241 data: 3.4451 max mem: 9377 +Eval (nsd-val): [62] [61/62] eta: 0:00:00 loss: 0.8278 (0.8290) time: 0.1364 data: 0.1112 max mem: 9377 +Eval (nsd-val): [62] Total time: 0:00:14 (0.2312 s / it) +Averaged stats (nsd-val): loss: 0.8278 (0.8290) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 10:59:11 lr: 0.000041 grad: nan (nan) loss: 0.8127 (0.8127) time: 6.3283 data: 6.2128 max mem: 9377 +Train: [63] [ 100/6250] eta: 0:23:12 lr: 0.000041 grad: 0.2024 (0.2589) loss: 0.7972 (0.7880) time: 0.1747 data: 0.0641 max mem: 9377 +Train: [63] [ 200/6250] eta: 0:19:48 lr: 0.000041 grad: 0.2215 (0.2408) loss: 0.7809 (0.7829) time: 0.1486 data: 0.0438 max mem: 9377 +Train: [63] [ 300/6250] eta: 0:18:58 lr: 0.000041 grad: 0.2238 (0.2397) loss: 0.7502 (0.7752) time: 0.1910 data: 0.0996 max mem: 9377 +Train: [63] [ 400/6250] eta: 0:18:05 lr: 0.000041 grad: 0.1794 (0.2306) loss: 0.7469 (0.7705) time: 0.1489 data: 0.0388 max mem: 9377 +Train: [63] [ 500/6250] eta: 0:17:22 lr: 0.000041 grad: 0.1855 (0.2204) loss: 0.7425 (0.7672) time: 0.1639 data: 0.0648 max mem: 9377 +Train: [63] [ 600/6250] eta: 0:16:48 lr: 0.000041 grad: 0.1738 (0.2134) loss: 0.7584 (0.7657) time: 0.1447 data: 0.0457 max mem: 9377 +Train: [63] [ 700/6250] eta: 0:16:14 lr: 0.000041 grad: 0.1648 (0.2076) loss: 0.7645 (0.7646) time: 0.1358 data: 0.0420 max mem: 9377 +Train: [63] [ 800/6250] eta: 0:15:46 lr: 0.000041 grad: 0.1730 (0.2041) loss: 0.7591 (0.7636) time: 0.1325 data: 0.0417 max mem: 9377 +Train: [63] [ 900/6250] eta: 0:15:26 lr: 0.000041 grad: 0.1806 (0.2014) loss: 0.7544 (0.7632) time: 0.1379 data: 0.0358 max mem: 9377 +Train: [63] [1000/6250] eta: 0:15:10 lr: 0.000041 grad: 0.1700 (0.1993) loss: 0.7525 (0.7626) time: 0.2368 data: 0.1542 max mem: 9377 +Train: [63] [1100/6250] eta: 0:14:40 lr: 0.000041 grad: 0.1699 (0.1976) loss: 0.7653 (0.7620) time: 0.1646 data: 0.0798 max mem: 9377 +Train: [63] [1200/6250] eta: 0:14:18 lr: 0.000041 grad: 0.1746 (0.1964) loss: 0.7492 (0.7614) time: 0.1551 data: 0.0611 max mem: 9377 +Train: [63] [1300/6250] eta: 0:13:55 lr: 0.000041 grad: 0.1720 (0.1948) loss: 0.7544 (0.7608) time: 0.1394 data: 0.0499 max mem: 9377 +Train: [63] [1400/6250] eta: 0:13:42 lr: 0.000041 grad: 0.1753 (0.1936) loss: 0.7566 (0.7603) time: 0.1874 data: 0.0939 max mem: 9377 +Train: [63] [1500/6250] eta: 0:13:21 lr: 0.000041 grad: 0.1833 (0.1925) loss: 0.7476 (0.7599) time: 0.1717 data: 0.0796 max mem: 9377 +Train: [63] [1600/6250] eta: 0:13:06 lr: 0.000041 grad: 0.1623 (0.1914) loss: 0.7630 (0.7595) time: 0.1852 data: 0.1033 max mem: 9377 +Train: [63] [1700/6250] eta: 0:12:53 lr: 0.000041 grad: 0.1795 (0.1906) loss: 0.7468 (0.7589) time: 0.2156 data: 0.1382 max mem: 9377 +Train: [63] [1800/6250] eta: 0:12:42 lr: 0.000041 grad: 0.1695 (0.1900) loss: 0.7495 (0.7583) time: 0.1658 data: 0.0658 max mem: 9377 +Train: [63] [1900/6250] eta: 0:12:28 lr: 0.000041 grad: 0.1728 (0.1893) loss: 0.7537 (0.7581) time: 0.1588 data: 0.0635 max mem: 9377 +Train: [63] [2000/6250] eta: 0:12:11 lr: 0.000041 grad: 0.1743 (0.1887) loss: 0.7556 (0.7578) time: 0.1585 data: 0.0732 max mem: 9377 +Train: [63] [2100/6250] eta: 0:11:51 lr: 0.000041 grad: 0.1714 (0.1878) loss: 0.7585 (0.7576) time: 0.1619 data: 0.0703 max mem: 9377 +Train: [63] [2200/6250] eta: 0:11:33 lr: 0.000041 grad: 0.1724 (0.1872) loss: 0.7409 (0.7573) time: 0.1776 data: 0.0821 max mem: 9377 +Train: [63] [2300/6250] eta: 0:11:14 lr: 0.000041 grad: 0.1677 (0.1865) loss: 0.7603 (0.7570) time: 0.1652 data: 0.0624 max mem: 9377 +Train: [63] [2400/6250] eta: 0:10:56 lr: 0.000040 grad: 0.1645 (0.1859) loss: 0.7598 (0.7568) time: 0.1527 data: 0.0660 max mem: 9377 +Train: [63] [2500/6250] eta: 0:10:38 lr: 0.000040 grad: 0.1748 (0.1856) loss: 0.7583 (0.7566) time: 0.1669 data: 0.0791 max mem: 9377 +Train: [63] [2600/6250] eta: 0:10:21 lr: 0.000040 grad: 0.1751 (0.1851) loss: 0.7355 (0.7564) time: 0.1808 data: 0.0959 max mem: 9377 +Train: [63] [2700/6250] eta: 0:10:03 lr: 0.000040 grad: 0.1714 (0.1848) loss: 0.7574 (0.7563) time: 0.1436 data: 0.0567 max mem: 9377 +Train: [63] [2800/6250] eta: 0:09:45 lr: 0.000040 grad: 0.1625 (0.1842) loss: 0.7700 (0.7564) time: 0.1811 data: 0.0957 max mem: 9377 +Train: [63] [2900/6250] eta: 0:09:27 lr: 0.000040 grad: 0.1771 (0.1840) loss: 0.7648 (0.7564) time: 0.1406 data: 0.0529 max mem: 9377 +Train: [63] [3000/6250] eta: 0:09:10 lr: 0.000040 grad: 0.1806 (0.1836) loss: 0.7451 (0.7564) time: 0.1707 data: 0.0915 max mem: 9377 +Train: [63] [3100/6250] eta: 0:08:51 lr: 0.000040 grad: 0.1640 (0.1834) loss: 0.7521 (0.7564) time: 0.1391 data: 0.0518 max mem: 9377 +Train: [63] [3200/6250] eta: 0:08:34 lr: 0.000040 grad: 0.1662 (0.1831) loss: 0.7596 (0.7565) time: 0.1626 data: 0.0763 max mem: 9377 +Train: [63] [3300/6250] eta: 0:08:17 lr: 0.000040 grad: 0.1770 (0.1829) loss: 0.7438 (0.7564) time: 0.1857 data: 0.0960 max mem: 9377 +Train: [63] [3400/6250] eta: 0:08:00 lr: 0.000040 grad: 0.1704 (0.1828) loss: 0.7514 (0.7563) time: 0.1861 data: 0.0966 max mem: 9377 +Train: [63] [3500/6250] eta: 0:07:43 lr: 0.000040 grad: 0.1713 (0.1827) loss: 0.7597 (0.7561) time: 0.2006 data: 0.1235 max mem: 9377 +Train: [63] [3600/6250] eta: 0:07:26 lr: 0.000040 grad: 0.1662 (0.1825) loss: 0.7455 (0.7560) time: 0.1713 data: 0.0904 max mem: 9377 +Train: [63] [3700/6250] eta: 0:07:09 lr: 0.000040 grad: 0.1660 (0.1823) loss: 0.7526 (0.7559) time: 0.1709 data: 0.0762 max mem: 9377 +Train: [63] [3800/6250] eta: 0:06:53 lr: 0.000040 grad: 0.1675 (0.1821) loss: 0.7521 (0.7559) time: 0.1633 data: 0.0793 max mem: 9377 +Train: [63] [3900/6250] eta: 0:06:36 lr: 0.000040 grad: 0.1642 (0.1820) loss: 0.7557 (0.7558) time: 0.1622 data: 0.0720 max mem: 9377 +Train: [63] [4000/6250] eta: 0:06:19 lr: 0.000040 grad: 0.1728 (0.1818) loss: 0.7492 (0.7558) time: 0.1368 data: 0.0468 max mem: 9377 +Train: [63] [4100/6250] eta: 0:06:02 lr: 0.000040 grad: 0.1770 (0.1816) loss: 0.7433 (0.7558) time: 0.1744 data: 0.0884 max mem: 9377 +Train: [63] [4200/6250] eta: 0:05:45 lr: 0.000040 grad: 0.1817 (0.1816) loss: 0.7535 (0.7557) time: 0.1716 data: 0.0844 max mem: 9377 +Train: [63] [4300/6250] eta: 0:05:29 lr: 0.000040 grad: 0.1734 (0.1815) loss: 0.7483 (0.7556) time: 0.1619 data: 0.0641 max mem: 9377 +Train: [63] [4400/6250] eta: 0:05:12 lr: 0.000040 grad: 0.1820 (0.1815) loss: 0.7351 (0.7554) time: 0.1606 data: 0.0729 max mem: 9377 +Train: [63] [4500/6250] eta: 0:04:54 lr: 0.000040 grad: 0.1691 (0.1812) loss: 0.7550 (0.7553) time: 0.1500 data: 0.0590 max mem: 9377 +Train: [63] [4600/6250] eta: 0:04:37 lr: 0.000040 grad: 0.1784 (0.1812) loss: 0.7420 (0.7552) time: 0.1653 data: 0.0761 max mem: 9377 +Train: [63] [4700/6250] eta: 0:04:20 lr: 0.000040 grad: 0.1701 (0.1811) loss: 0.7580 (0.7550) time: 0.1387 data: 0.0562 max mem: 9377 +Train: [63] [4800/6250] eta: 0:04:02 lr: 0.000040 grad: 0.1688 (0.1809) loss: 0.7444 (0.7548) time: 0.1616 data: 0.0764 max mem: 9377 +Train: [63] [4900/6250] eta: 0:03:46 lr: 0.000040 grad: 0.1804 (0.1808) loss: 0.7429 (0.7547) time: 0.1603 data: 0.0701 max mem: 9377 +Train: [63] [5000/6250] eta: 0:03:29 lr: 0.000040 grad: 0.1580 (0.1806) loss: 0.7459 (0.7546) time: 0.1300 data: 0.0369 max mem: 9377 +Train: [63] [5100/6250] eta: 0:03:12 lr: 0.000040 grad: 0.1707 (0.1805) loss: 0.7456 (0.7545) time: 0.1614 data: 0.0724 max mem: 9377 +Train: [63] [5200/6250] eta: 0:02:55 lr: 0.000040 grad: 0.1821 (0.1804) loss: 0.7377 (0.7544) time: 0.1813 data: 0.0870 max mem: 9377 +Train: [63] [5300/6250] eta: 0:02:38 lr: 0.000040 grad: 0.1767 (0.1803) loss: 0.7477 (0.7543) time: 0.1620 data: 0.0665 max mem: 9377 +Train: [63] [5400/6250] eta: 0:02:22 lr: 0.000040 grad: 0.1739 (0.1803) loss: 0.7483 (0.7541) time: 0.1707 data: 0.0676 max mem: 9377 +Train: [63] [5500/6250] eta: 0:02:05 lr: 0.000040 grad: 0.1799 (0.1803) loss: 0.7441 (0.7540) time: 0.1352 data: 0.0472 max mem: 9377 +Train: [63] [5600/6250] eta: 0:01:48 lr: 0.000039 grad: 0.1767 (0.1803) loss: 0.7461 (0.7538) time: 0.1486 data: 0.0627 max mem: 9377 +Train: [63] [5700/6250] eta: 0:01:31 lr: 0.000039 grad: 0.1690 (0.1802) loss: 0.7480 (0.7537) time: 0.1797 data: 0.0859 max mem: 9377 +Train: [63] [5800/6250] eta: 0:01:15 lr: 0.000039 grad: 0.1757 (0.1802) loss: 0.7556 (0.7536) time: 0.1652 data: 0.0703 max mem: 9377 +Train: [63] [5900/6250] eta: 0:00:58 lr: 0.000039 grad: 0.1741 (0.1801) loss: 0.7561 (0.7536) time: 0.1744 data: 0.0840 max mem: 9377 +Train: [63] [6000/6250] eta: 0:00:41 lr: 0.000039 grad: 0.1740 (0.1800) loss: 0.7498 (0.7536) time: 0.1770 data: 0.0886 max mem: 9377 +Train: [63] [6100/6250] eta: 0:00:24 lr: 0.000039 grad: 0.1758 (0.1800) loss: 0.7503 (0.7536) time: 0.1504 data: 0.0680 max mem: 9377 +Train: [63] [6200/6250] eta: 0:00:08 lr: 0.000039 grad: 0.1753 (0.1799) loss: 0.7481 (0.7536) time: 0.1475 data: 0.0617 max mem: 9377 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1652 (0.1798) loss: 0.7593 (0.7536) time: 0.1469 data: 0.0567 max mem: 9377 +Train: [63] Total time: 0:17:24 (0.1671 s / it) +Averaged stats: lr: 0.000039 grad: 0.1652 (0.1798) loss: 0.7593 (0.7536) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:05:07 loss: 0.8491 (0.8491) time: 4.9667 data: 4.9343 max mem: 9377 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.8527 (0.8549) time: 0.1363 data: 0.1112 max mem: 9377 +Eval (hcp-train-subset): [63] Total time: 0:00:14 (0.2356 s / it) +Averaged stats (hcp-train-subset): loss: 0.8527 (0.8549) +Eval (hcp-val): [63] [ 0/62] eta: 0:03:48 loss: 0.8566 (0.8566) time: 3.6838 data: 3.5537 max mem: 9377 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8501 (0.8525) time: 0.1157 data: 0.0903 max mem: 9377 +Eval (hcp-val): [63] Total time: 0:00:14 (0.2378 s / it) +Averaged stats (hcp-val): loss: 0.8501 (0.8525) +Eval (nsd-val): [63] [ 0/62] eta: 0:03:39 loss: 0.8198 (0.8198) time: 3.5334 data: 3.4630 max mem: 9377 +Eval (nsd-val): [63] [61/62] eta: 0:00:00 loss: 0.8298 (0.8294) time: 0.1361 data: 0.1107 max mem: 9377 +Eval (nsd-val): [63] Total time: 0:00:14 (0.2308 s / it) +Averaged stats (nsd-val): loss: 0.8298 (0.8294) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 7:43:08 lr: 0.000039 grad: 0.3068 (0.3068) loss: 0.7106 (0.7106) time: 4.4462 data: 4.1494 max mem: 9377 +Train: [64] [ 100/6250] eta: 0:23:41 lr: 0.000039 grad: 0.2191 (0.3182) loss: 0.7525 (0.7653) time: 0.1918 data: 0.0904 max mem: 9377 +Train: [64] [ 200/6250] eta: 0:19:58 lr: 0.000039 grad: 0.2069 (0.2757) loss: 0.7597 (0.7673) time: 0.1639 data: 0.0616 max mem: 9377 +Train: [64] [ 300/6250] eta: 0:18:22 lr: 0.000039 grad: 0.2138 (0.2558) loss: 0.7608 (0.7665) time: 0.1647 data: 0.0629 max mem: 9377 +Train: [64] [ 400/6250] eta: 0:17:18 lr: 0.000039 grad: 0.1937 (0.2421) loss: 0.7692 (0.7664) time: 0.1518 data: 0.0442 max mem: 9377 +Train: [64] [ 500/6250] eta: 0:16:25 lr: 0.000039 grad: 0.1665 (0.2307) loss: 0.7741 (0.7666) time: 0.1531 data: 0.0547 max mem: 9377 +Train: [64] [ 600/6250] eta: 0:15:47 lr: 0.000039 grad: 0.1718 (0.2229) loss: 0.7587 (0.7667) time: 0.1500 data: 0.0642 max mem: 9377 +Train: [64] [ 700/6250] eta: 0:15:21 lr: 0.000039 grad: 0.1633 (0.2158) loss: 0.7730 (0.7667) time: 0.1550 data: 0.0607 max mem: 9377 +Train: [64] [ 800/6250] eta: 0:14:56 lr: 0.000039 grad: 0.1590 (0.2100) loss: 0.7748 (0.7669) time: 0.1400 data: 0.0443 max mem: 9377 +Train: [64] [ 900/6250] eta: 0:14:35 lr: 0.000039 grad: 0.1650 (0.2054) loss: 0.7681 (0.7668) time: 0.1551 data: 0.0488 max mem: 9377 +Train: [64] [1000/6250] eta: 0:14:17 lr: 0.000039 grad: 0.1723 (0.2023) loss: 0.7599 (0.7664) time: 0.1517 data: 0.0639 max mem: 9377 +Train: [64] [1100/6250] eta: 0:13:58 lr: 0.000039 grad: 0.1622 (0.1991) loss: 0.7705 (0.7666) time: 0.1560 data: 0.0682 max mem: 9377 +Train: [64] [1200/6250] eta: 0:13:41 lr: 0.000039 grad: 0.1634 (0.1966) loss: 0.7689 (0.7668) time: 0.1610 data: 0.0704 max mem: 9377 +Train: [64] [1300/6250] eta: 0:13:24 lr: 0.000039 grad: 0.1607 (0.1945) loss: 0.7637 (0.7664) time: 0.1428 data: 0.0468 max mem: 9377 +Train: [64] [1400/6250] eta: 0:13:11 lr: 0.000039 grad: 0.1649 (0.1927) loss: 0.7576 (0.7664) time: 0.1567 data: 0.0687 max mem: 9377 +Train: [64] [1500/6250] eta: 0:12:54 lr: 0.000039 grad: 0.1690 (0.1917) loss: 0.7573 (0.7660) time: 0.1606 data: 0.0760 max mem: 9377 +Train: [64] [1600/6250] eta: 0:12:38 lr: 0.000039 grad: 0.1650 (0.1902) loss: 0.7595 (0.7657) time: 0.1565 data: 0.0744 max mem: 9377 +Train: [64] [1700/6250] eta: 0:12:22 lr: 0.000039 grad: 0.1688 (0.1887) loss: 0.7622 (0.7655) time: 0.1455 data: 0.0634 max mem: 9377 +Train: [64] [1800/6250] eta: 0:12:06 lr: 0.000039 grad: 0.1666 (0.1877) loss: 0.7660 (0.7655) time: 0.1599 data: 0.0681 max mem: 9377 +Train: [64] [1900/6250] eta: 0:11:48 lr: 0.000039 grad: 0.1591 (0.1867) loss: 0.7589 (0.7654) time: 0.1614 data: 0.0684 max mem: 9377 +Train: [64] [2000/6250] eta: 0:11:30 lr: 0.000039 grad: 0.1703 (0.1858) loss: 0.7608 (0.7652) time: 0.1645 data: 0.0702 max mem: 9377 +Train: [64] [2100/6250] eta: 0:11:11 lr: 0.000039 grad: 0.1666 (0.1852) loss: 0.7642 (0.7651) time: 0.1327 data: 0.0381 max mem: 9377 +Train: [64] [2200/6250] eta: 0:10:52 lr: 0.000039 grad: 0.1708 (0.1849) loss: 0.7644 (0.7648) time: 0.1345 data: 0.0359 max mem: 9377 +Train: [64] [2300/6250] eta: 0:10:34 lr: 0.000039 grad: 0.1695 (0.1842) loss: 0.7593 (0.7646) time: 0.1363 data: 0.0445 max mem: 9377 +Train: [64] [2400/6250] eta: 0:10:16 lr: 0.000039 grad: 0.1718 (0.1838) loss: 0.7539 (0.7643) time: 0.1369 data: 0.0403 max mem: 9377 +Train: [64] [2500/6250] eta: 0:09:59 lr: 0.000039 grad: 0.1724 (0.1834) loss: 0.7563 (0.7640) time: 0.1341 data: 0.0356 max mem: 9377 +Train: [64] [2600/6250] eta: 0:09:41 lr: 0.000039 grad: 0.1656 (0.1828) loss: 0.7635 (0.7638) time: 0.1310 data: 0.0327 max mem: 9377 +Train: [64] [2700/6250] eta: 0:09:25 lr: 0.000038 grad: 0.1746 (0.1824) loss: 0.7621 (0.7636) time: 0.1547 data: 0.0582 max mem: 9377 +Train: [64] [2800/6250] eta: 0:09:10 lr: 0.000038 grad: 0.1680 (0.1820) loss: 0.7498 (0.7633) time: 0.1595 data: 0.0658 max mem: 9377 +Train: [64] [2900/6250] eta: 0:08:54 lr: 0.000038 grad: 0.1682 (0.1816) loss: 0.7561 (0.7632) time: 0.1772 data: 0.0911 max mem: 9377 +Train: [64] [3000/6250] eta: 0:08:38 lr: 0.000038 grad: 0.1687 (0.1813) loss: 0.7527 (0.7630) time: 0.1617 data: 0.0749 max mem: 9377 +Train: [64] [3100/6250] eta: 0:08:22 lr: 0.000038 grad: 0.1712 (0.1810) loss: 0.7522 (0.7628) time: 0.1599 data: 0.0660 max mem: 9377 +Train: [64] [3200/6250] eta: 0:08:06 lr: 0.000038 grad: 0.1610 (0.1807) loss: 0.7673 (0.7627) time: 0.1579 data: 0.0702 max mem: 9377 +Train: [64] [3300/6250] eta: 0:07:50 lr: 0.000038 grad: 0.1697 (0.1804) loss: 0.7642 (0.7626) time: 0.1460 data: 0.0591 max mem: 9377 +Train: [64] [3400/6250] eta: 0:07:34 lr: 0.000038 grad: 0.1709 (0.1801) loss: 0.7545 (0.7624) time: 0.1570 data: 0.0731 max mem: 9377 +Train: [64] [3500/6250] eta: 0:07:17 lr: 0.000038 grad: 0.1671 (0.1798) loss: 0.7499 (0.7621) time: 0.1500 data: 0.0643 max mem: 9377 +Train: [64] [3600/6250] eta: 0:07:01 lr: 0.000038 grad: 0.1685 (0.1796) loss: 0.7520 (0.7619) time: 0.1604 data: 0.0714 max mem: 9377 +Train: [64] [3700/6250] eta: 0:06:45 lr: 0.000038 grad: 0.1694 (0.1795) loss: 0.7554 (0.7618) time: 0.1311 data: 0.0324 max mem: 9377 +Train: [64] [3800/6250] eta: 0:06:30 lr: 0.000038 grad: 0.1730 (0.1793) loss: 0.7591 (0.7616) time: 0.1893 data: 0.1009 max mem: 9377 +Train: [64] [3900/6250] eta: 0:06:14 lr: 0.000038 grad: 0.1742 (0.1791) loss: 0.7507 (0.7615) time: 0.1583 data: 0.0667 max mem: 9377 +Train: [64] [4000/6250] eta: 0:05:58 lr: 0.000038 grad: 0.1714 (0.1789) loss: 0.7609 (0.7614) time: 0.1367 data: 0.0464 max mem: 9377 +Train: [64] [4100/6250] eta: 0:05:42 lr: 0.000038 grad: 0.1663 (0.1788) loss: 0.7639 (0.7613) time: 0.1600 data: 0.0785 max mem: 9377 +Train: [64] [4200/6250] eta: 0:05:26 lr: 0.000038 grad: 0.1654 (0.1787) loss: 0.7583 (0.7613) time: 0.1711 data: 0.0840 max mem: 9377 +Train: [64] [4300/6250] eta: 0:05:10 lr: 0.000038 grad: 0.1692 (0.1786) loss: 0.7559 (0.7613) time: 0.1661 data: 0.0741 max mem: 9377 +Train: [64] [4400/6250] eta: 0:04:55 lr: 0.000038 grad: 0.1569 (0.1784) loss: 0.7778 (0.7614) time: 0.1611 data: 0.0720 max mem: 9377 +Train: [64] [4500/6250] eta: 0:04:39 lr: 0.000038 grad: 0.1533 (0.1781) loss: 0.7688 (0.7615) time: 0.1437 data: 0.0559 max mem: 9377 +Train: [64] [4600/6250] eta: 0:04:23 lr: 0.000038 grad: 0.1706 (0.1779) loss: 0.7620 (0.7616) time: 0.1603 data: 0.0728 max mem: 9377 +Train: [64] [4700/6250] eta: 0:04:06 lr: 0.000038 grad: 0.1734 (0.1777) loss: 0.7557 (0.7616) time: 0.1672 data: 0.0850 max mem: 9377 +Train: [64] [4800/6250] eta: 0:03:50 lr: 0.000038 grad: 0.1725 (0.1777) loss: 0.7404 (0.7614) time: 0.1470 data: 0.0529 max mem: 9377 +Train: [64] [4900/6250] eta: 0:03:35 lr: 0.000038 grad: 0.1732 (0.1776) loss: 0.7564 (0.7613) time: 0.1686 data: 0.0771 max mem: 9377 +Train: [64] [5000/6250] eta: 0:03:19 lr: 0.000038 grad: 0.1719 (0.1776) loss: 0.7499 (0.7611) time: 0.1834 data: 0.0871 max mem: 9377 +Train: [64] [5100/6250] eta: 0:03:04 lr: 0.000038 grad: 0.1696 (0.1775) loss: 0.7508 (0.7610) time: 0.1874 data: 0.1002 max mem: 9377 +Train: [64] [5200/6250] eta: 0:02:48 lr: 0.000038 grad: 0.1650 (0.1774) loss: 0.7642 (0.7610) time: 0.1656 data: 0.0728 max mem: 9377 +Train: [64] [5300/6250] eta: 0:02:32 lr: 0.000038 grad: 0.1658 (0.1773) loss: 0.7619 (0.7610) time: 0.1865 data: 0.0985 max mem: 9377 +Train: [64] [5400/6250] eta: 0:02:16 lr: 0.000038 grad: 0.1630 (0.1772) loss: 0.7669 (0.7610) time: 0.1799 data: 0.0937 max mem: 9377 +Train: [64] [5500/6250] eta: 0:02:01 lr: 0.000038 grad: 0.1698 (0.1771) loss: 0.7544 (0.7609) time: 0.1699 data: 0.0792 max mem: 9377 +Train: [64] [5600/6250] eta: 0:01:45 lr: 0.000038 grad: 0.1705 (0.1771) loss: 0.7494 (0.7608) time: 0.1765 data: 0.0813 max mem: 9377 +Train: [64] [5700/6250] eta: 0:01:28 lr: 0.000038 grad: 0.1714 (0.1771) loss: 0.7623 (0.7608) time: 0.1698 data: 0.0767 max mem: 9377 +Train: [64] [5800/6250] eta: 0:01:12 lr: 0.000038 grad: 0.1683 (0.1771) loss: 0.7574 (0.7608) time: 0.1656 data: 0.0714 max mem: 9377 +Train: [64] [5900/6250] eta: 0:00:56 lr: 0.000037 grad: 0.1698 (0.1770) loss: 0.7603 (0.7608) time: 0.1282 data: 0.0383 max mem: 9377 +Train: [64] [6000/6250] eta: 0:00:40 lr: 0.000037 grad: 0.1762 (0.1769) loss: 0.7535 (0.7608) time: 0.1589 data: 0.0725 max mem: 9377 +Train: [64] [6100/6250] eta: 0:00:24 lr: 0.000037 grad: 0.1744 (0.1770) loss: 0.7582 (0.7608) time: 0.1332 data: 0.0387 max mem: 9377 +Train: [64] [6200/6250] eta: 0:00:08 lr: 0.000037 grad: 0.1755 (0.1770) loss: 0.7468 (0.7607) time: 0.1625 data: 0.0712 max mem: 9377 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1645 (0.1769) loss: 0.7666 (0.7607) time: 0.1638 data: 0.0769 max mem: 9377 +Train: [64] Total time: 0:16:56 (0.1626 s / it) +Averaged stats: lr: 0.000037 grad: 0.1645 (0.1769) loss: 0.7666 (0.7607) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:05:17 loss: 0.8469 (0.8469) time: 5.1196 data: 5.0827 max mem: 9377 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.8539 (0.8536) time: 0.1408 data: 0.1143 max mem: 9377 +Eval (hcp-train-subset): [64] Total time: 0:00:14 (0.2285 s / it) +Averaged stats (hcp-train-subset): loss: 0.8539 (0.8536) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [64] [ 0/62] eta: 0:04:32 loss: 0.8470 (0.8470) time: 4.3960 data: 4.3177 max mem: 9377 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8505 (0.8511) time: 0.1484 data: 0.1215 max mem: 9377 +Eval (hcp-val): [64] Total time: 0:00:14 (0.2362 s / it) +Averaged stats (hcp-val): loss: 0.8505 (0.8511) +Making plots (hcp-val): example=60 +Eval (nsd-val): [64] [ 0/62] eta: 0:06:51 loss: 0.8136 (0.8136) time: 6.6401 data: 6.6068 max mem: 9377 +Eval (nsd-val): [64] [61/62] eta: 0:00:00 loss: 0.8260 (0.8264) time: 0.1352 data: 0.1096 max mem: 9377 +Eval (nsd-val): [64] Total time: 0:00:15 (0.2532 s / it) +Averaged stats (nsd-val): loss: 0.8260 (0.8264) +Making plots (nsd-val): example=24 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00064.pth +Train: [65] [ 0/6250] eta: 12:19:08 lr: 0.000037 grad: 0.1586 (0.1586) loss: 0.8320 (0.8320) time: 7.0957 data: 6.9824 max mem: 9377 +Train: [65] [ 100/6250] eta: 0:24:51 lr: 0.000037 grad: 0.2064 (0.2595) loss: 0.7845 (0.7780) time: 0.1906 data: 0.0796 max mem: 9377 +Train: [65] [ 200/6250] eta: 0:21:31 lr: 0.000037 grad: 0.2065 (0.2575) loss: 0.7705 (0.7700) time: 0.1483 data: 0.0367 max mem: 9377 +Train: [65] [ 300/6250] eta: 0:19:59 lr: 0.000037 grad: 0.1692 (0.2401) loss: 0.7728 (0.7690) time: 0.1646 data: 0.0759 max mem: 9377 +Train: [65] [ 400/6250] eta: 0:18:45 lr: 0.000037 grad: 0.1842 (0.2243) loss: 0.7681 (0.7691) time: 0.1485 data: 0.0487 max mem: 9377 +Train: [65] [ 500/6250] eta: 0:17:52 lr: 0.000037 grad: 0.1636 (0.2153) loss: 0.7764 (0.7696) time: 0.1335 data: 0.0259 max mem: 9377 +Train: [65] [ 600/6250] eta: 0:17:17 lr: 0.000037 grad: 0.1817 (0.2090) loss: 0.7629 (0.7690) time: 0.1839 data: 0.0874 max mem: 9377 +Train: [65] [ 700/6250] eta: 0:16:41 lr: 0.000037 grad: 0.1670 (0.2035) loss: 0.7578 (0.7685) time: 0.1565 data: 0.0672 max mem: 9377 +Train: [65] [ 800/6250] eta: 0:16:11 lr: 0.000037 grad: 0.1742 (0.1996) loss: 0.7582 (0.7675) time: 0.1542 data: 0.0670 max mem: 9377 +Train: [65] [ 900/6250] eta: 0:15:52 lr: 0.000037 grad: 0.1648 (0.1965) loss: 0.7641 (0.7667) time: 0.1748 data: 0.0742 max mem: 9377 +Train: [65] [1000/6250] eta: 0:15:20 lr: 0.000037 grad: 0.1803 (0.1945) loss: 0.7547 (0.7664) time: 0.1509 data: 0.0601 max mem: 9377 +Train: [65] [1100/6250] eta: 0:14:56 lr: 0.000037 grad: 0.1678 (0.1929) loss: 0.7467 (0.7658) time: 0.1191 data: 0.0102 max mem: 9377 +Train: [65] [1200/6250] eta: 0:14:36 lr: 0.000037 grad: 0.1673 (0.1909) loss: 0.7548 (0.7656) time: 0.1741 data: 0.0880 max mem: 9377 +Train: [65] [1300/6250] eta: 0:14:21 lr: 0.000037 grad: 0.1634 (0.1892) loss: 0.7592 (0.7653) time: 0.2534 data: 0.1735 max mem: 9377 +Train: [65] [1400/6250] eta: 0:13:56 lr: 0.000037 grad: 0.1714 (0.1880) loss: 0.7565 (0.7650) time: 0.1351 data: 0.0532 max mem: 9377 +Train: [65] [1500/6250] eta: 0:13:34 lr: 0.000037 grad: 0.1660 (0.1865) loss: 0.7621 (0.7648) time: 0.1507 data: 0.0676 max mem: 9377 +Train: [65] [1600/6250] eta: 0:13:16 lr: 0.000037 grad: 0.1697 (0.1854) loss: 0.7524 (0.7644) time: 0.1668 data: 0.0835 max mem: 9377 +Train: [65] [1700/6250] eta: 0:12:54 lr: 0.000037 grad: 0.1625 (0.1842) loss: 0.7624 (0.7641) time: 0.1516 data: 0.0638 max mem: 9377 +Train: [65] [1800/6250] eta: 0:12:35 lr: 0.000037 grad: 0.1640 (0.1834) loss: 0.7707 (0.7639) time: 0.1647 data: 0.0775 max mem: 9377 +Train: [65] [1900/6250] eta: 0:12:18 lr: 0.000037 grad: 0.1692 (0.1826) loss: 0.7580 (0.7637) time: 0.1562 data: 0.0610 max mem: 9377 +Train: [65] [2000/6250] eta: 0:12:02 lr: 0.000037 grad: 0.1716 (0.1821) loss: 0.7699 (0.7634) time: 0.1612 data: 0.0638 max mem: 9377 +Train: [65] [2100/6250] eta: 0:11:46 lr: 0.000037 grad: 0.1662 (0.1817) loss: 0.7667 (0.7632) time: 0.1410 data: 0.0409 max mem: 9377 +Train: [65] [2200/6250] eta: 0:11:29 lr: 0.000037 grad: 0.1726 (0.1814) loss: 0.7598 (0.7631) time: 0.1697 data: 0.0873 max mem: 9377 +Train: [65] [2300/6250] eta: 0:11:11 lr: 0.000037 grad: 0.1706 (0.1812) loss: 0.7589 (0.7628) time: 0.1620 data: 0.0734 max mem: 9377 +Train: [65] [2400/6250] eta: 0:10:52 lr: 0.000037 grad: 0.1736 (0.1811) loss: 0.7451 (0.7625) time: 0.1447 data: 0.0540 max mem: 9377 +Train: [65] [2500/6250] eta: 0:10:33 lr: 0.000037 grad: 0.1684 (0.1808) loss: 0.7644 (0.7622) time: 0.1386 data: 0.0409 max mem: 9377 +Train: [65] [2600/6250] eta: 0:10:14 lr: 0.000037 grad: 0.1732 (0.1808) loss: 0.7547 (0.7619) time: 0.1524 data: 0.0638 max mem: 9377 +Train: [65] [2700/6250] eta: 0:09:57 lr: 0.000037 grad: 0.1758 (0.1806) loss: 0.7614 (0.7618) time: 0.1631 data: 0.0663 max mem: 9377 +Train: [65] [2800/6250] eta: 0:09:38 lr: 0.000037 grad: 0.1758 (0.1804) loss: 0.7594 (0.7617) time: 0.1600 data: 0.0711 max mem: 9377 +Train: [65] [2900/6250] eta: 0:09:20 lr: 0.000037 grad: 0.1700 (0.1801) loss: 0.7555 (0.7615) time: 0.1454 data: 0.0497 max mem: 9377 +Train: [65] [3000/6250] eta: 0:09:02 lr: 0.000036 grad: 0.1643 (0.1799) loss: 0.7708 (0.7614) time: 0.1564 data: 0.0710 max mem: 9377 +Train: [65] [3100/6250] eta: 0:08:44 lr: 0.000036 grad: 0.1710 (0.1798) loss: 0.7621 (0.7613) time: 0.1518 data: 0.0668 max mem: 9377 +Train: [65] [3200/6250] eta: 0:08:27 lr: 0.000036 grad: 0.1721 (0.1797) loss: 0.7634 (0.7613) time: 0.1707 data: 0.0917 max mem: 9377 +Train: [65] [3300/6250] eta: 0:08:10 lr: 0.000036 grad: 0.1624 (0.1795) loss: 0.7562 (0.7613) time: 0.1884 data: 0.1043 max mem: 9377 +Train: [65] [3400/6250] eta: 0:07:52 lr: 0.000036 grad: 0.1753 (0.1795) loss: 0.7594 (0.7612) time: 0.1443 data: 0.0475 max mem: 9377 +Train: [65] [3500/6250] eta: 0:07:35 lr: 0.000036 grad: 0.1778 (0.1794) loss: 0.7613 (0.7612) time: 0.1550 data: 0.0630 max mem: 9377 +Train: [65] [3600/6250] eta: 0:07:18 lr: 0.000036 grad: 0.1704 (0.1793) loss: 0.7630 (0.7611) time: 0.1960 data: 0.1120 max mem: 9377 +Train: [65] [3700/6250] eta: 0:07:00 lr: 0.000036 grad: 0.1666 (0.1791) loss: 0.7614 (0.7611) time: 0.1526 data: 0.0598 max mem: 9377 +Train: [65] [3800/6250] eta: 0:06:44 lr: 0.000036 grad: 0.1712 (0.1789) loss: 0.7557 (0.7610) time: 0.1663 data: 0.0831 max mem: 9377 +Train: [65] [3900/6250] eta: 0:06:27 lr: 0.000036 grad: 0.1758 (0.1789) loss: 0.7585 (0.7609) time: 0.1511 data: 0.0675 max mem: 9377 +Train: [65] [4000/6250] eta: 0:06:11 lr: 0.000036 grad: 0.1695 (0.1788) loss: 0.7621 (0.7608) time: 0.1253 data: 0.0356 max mem: 9377 +Train: [65] [4100/6250] eta: 0:05:54 lr: 0.000036 grad: 0.1726 (0.1788) loss: 0.7493 (0.7606) time: 0.1721 data: 0.0837 max mem: 9377 +Train: [65] [4200/6250] eta: 0:05:38 lr: 0.000036 grad: 0.1780 (0.1788) loss: 0.7530 (0.7605) time: 0.1841 data: 0.0908 max mem: 9377 +Train: [65] [4300/6250] eta: 0:05:22 lr: 0.000036 grad: 0.1726 (0.1789) loss: 0.7532 (0.7604) time: 0.1825 data: 0.0957 max mem: 9377 +Train: [65] [4400/6250] eta: 0:05:06 lr: 0.000036 grad: 0.1755 (0.1787) loss: 0.7511 (0.7603) time: 0.1640 data: 0.0612 max mem: 9377 +Train: [65] [4500/6250] eta: 0:04:50 lr: 0.000036 grad: 0.1749 (0.1787) loss: 0.7575 (0.7602) time: 0.1693 data: 0.0783 max mem: 9377 +Train: [65] [4600/6250] eta: 0:04:33 lr: 0.000036 grad: 0.1796 (0.1788) loss: 0.7562 (0.7601) time: 0.1781 data: 0.0815 max mem: 9377 +Train: [65] [4700/6250] eta: 0:04:17 lr: 0.000036 grad: 0.1733 (0.1787) loss: 0.7548 (0.7600) time: 0.1510 data: 0.0619 max mem: 9377 +Train: [65] [4800/6250] eta: 0:04:00 lr: 0.000036 grad: 0.1717 (0.1787) loss: 0.7452 (0.7598) time: 0.1642 data: 0.0772 max mem: 9377 +Train: [65] [4900/6250] eta: 0:03:44 lr: 0.000036 grad: 0.1757 (0.1787) loss: 0.7552 (0.7596) time: 0.1651 data: 0.0838 max mem: 9377 +Train: [65] [5000/6250] eta: 0:03:27 lr: 0.000036 grad: 0.1840 (0.1789) loss: 0.7580 (0.7595) time: 0.1659 data: 0.0706 max mem: 9377 +Train: [65] [5100/6250] eta: 0:03:10 lr: 0.000036 grad: 0.1848 (0.1790) loss: 0.7549 (0.7593) time: 0.1389 data: 0.0487 max mem: 9377 +Train: [65] [5200/6250] eta: 0:02:54 lr: 0.000036 grad: 0.1834 (0.1791) loss: 0.7503 (0.7593) time: 0.1622 data: 0.0792 max mem: 9377 +Train: [65] [5300/6250] eta: 0:02:37 lr: 0.000036 grad: 0.1797 (0.1791) loss: 0.7554 (0.7592) time: 0.1643 data: 0.0840 max mem: 9377 +Train: [65] [5400/6250] eta: 0:02:20 lr: 0.000036 grad: 0.1791 (0.1792) loss: 0.7373 (0.7590) time: 0.1515 data: 0.0633 max mem: 9377 +Train: [65] [5500/6250] eta: 0:02:04 lr: 0.000036 grad: 0.1728 (0.1792) loss: 0.7536 (0.7589) time: 0.1372 data: 0.0439 max mem: 9377 +Train: [65] [5600/6250] eta: 0:01:47 lr: 0.000036 grad: 0.1916 (0.1793) loss: 0.7532 (0.7589) time: 0.1229 data: 0.0237 max mem: 9377 +Train: [65] [5700/6250] eta: 0:01:30 lr: 0.000036 grad: 0.1848 (0.1794) loss: 0.7567 (0.7588) time: 0.1604 data: 0.0764 max mem: 9377 +Train: [65] [5800/6250] eta: 0:01:14 lr: 0.000036 grad: 0.1790 (0.1795) loss: 0.7649 (0.7588) time: 0.1550 data: 0.0666 max mem: 9377 +Train: [65] [5900/6250] eta: 0:00:57 lr: 0.000036 grad: 0.1767 (0.1795) loss: 0.7625 (0.7587) time: 0.1566 data: 0.0652 max mem: 9377 +Train: [65] [6000/6250] eta: 0:00:41 lr: 0.000036 grad: 0.1690 (0.1796) loss: 0.7607 (0.7587) time: 0.1647 data: 0.0674 max mem: 9377 +Train: [65] [6100/6250] eta: 0:00:24 lr: 0.000036 grad: 0.1773 (0.1796) loss: 0.7479 (0.7587) time: 0.1121 data: 0.0174 max mem: 9377 +Train: [65] [6200/6250] eta: 0:00:08 lr: 0.000036 grad: 0.1705 (0.1796) loss: 0.7552 (0.7586) time: 0.1395 data: 0.0487 max mem: 9377 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1757 (0.1796) loss: 0.7477 (0.7585) time: 0.1629 data: 0.0766 max mem: 9377 +Train: [65] Total time: 0:17:11 (0.1650 s / it) +Averaged stats: lr: 0.000036 grad: 0.1757 (0.1796) loss: 0.7477 (0.7585) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:05:41 loss: 0.8505 (0.8505) time: 5.5038 data: 5.4737 max mem: 9377 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.8528 (0.8539) time: 0.1318 data: 0.1052 max mem: 9377 +Eval (hcp-train-subset): [65] Total time: 0:00:14 (0.2365 s / it) +Averaged stats (hcp-train-subset): loss: 0.8528 (0.8539) +Eval (hcp-val): [65] [ 0/62] eta: 0:05:02 loss: 0.8490 (0.8490) time: 4.8824 data: 4.8500 max mem: 9377 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8492 (0.8520) time: 0.1304 data: 0.1030 max mem: 9377 +Eval (hcp-val): [65] Total time: 0:00:14 (0.2273 s / it) +Averaged stats (hcp-val): loss: 0.8492 (0.8520) +Eval (nsd-val): [65] [ 0/62] eta: 0:04:40 loss: 0.8144 (0.8144) time: 4.5203 data: 4.4465 max mem: 9377 +Eval (nsd-val): [65] [61/62] eta: 0:00:00 loss: 0.8276 (0.8281) time: 0.1453 data: 0.1201 max mem: 9377 +Eval (nsd-val): [65] Total time: 0:00:13 (0.2257 s / it) +Averaged stats (nsd-val): loss: 0.8276 (0.8281) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [66] [ 0/6250] eta: 10:25:46 lr: 0.000036 grad: 0.2997 (0.2997) loss: 0.8009 (0.8009) time: 6.0075 data: 5.8387 max mem: 9377 +Train: [66] [ 100/6250] eta: 0:22:23 lr: 0.000035 grad: 0.2377 (0.2743) loss: 0.7710 (0.7744) time: 0.1740 data: 0.0647 max mem: 9377 +Train: [66] [ 200/6250] eta: 0:19:42 lr: 0.000035 grad: 0.2361 (0.2532) loss: 0.7762 (0.7714) time: 0.1799 data: 0.0783 max mem: 9377 +Train: [66] [ 300/6250] eta: 0:18:16 lr: 0.000035 grad: 0.2305 (0.2459) loss: 0.7578 (0.7666) time: 0.1521 data: 0.0613 max mem: 9377 +Train: [66] [ 400/6250] eta: 0:17:18 lr: 0.000035 grad: 0.2093 (0.2376) loss: 0.7476 (0.7639) time: 0.1608 data: 0.0674 max mem: 9377 +Train: [66] [ 500/6250] eta: 0:16:35 lr: 0.000035 grad: 0.2081 (0.2298) loss: 0.7567 (0.7625) time: 0.1414 data: 0.0320 max mem: 9377 +Train: [66] [ 600/6250] eta: 0:16:02 lr: 0.000035 grad: 0.1848 (0.2238) loss: 0.7670 (0.7627) time: 0.1589 data: 0.0593 max mem: 9377 +Train: [66] [ 700/6250] eta: 0:15:30 lr: 0.000035 grad: 0.1831 (0.2183) loss: 0.7609 (0.7629) time: 0.1662 data: 0.0675 max mem: 9377 +Train: [66] [ 800/6250] eta: 0:15:05 lr: 0.000035 grad: 0.1845 (0.2142) loss: 0.7606 (0.7627) time: 0.1679 data: 0.0763 max mem: 9377 +Train: [66] [ 900/6250] eta: 0:14:39 lr: 0.000035 grad: 0.1724 (0.2103) loss: 0.7710 (0.7625) time: 0.1422 data: 0.0538 max mem: 9377 +Train: [66] [1000/6250] eta: 0:14:16 lr: 0.000035 grad: 0.1709 (0.2074) loss: 0.7637 (0.7627) time: 0.1634 data: 0.0741 max mem: 9377 +Train: [66] [1100/6250] eta: 0:13:57 lr: 0.000035 grad: 0.1700 (0.2047) loss: 0.7622 (0.7625) time: 0.1242 data: 0.0301 max mem: 9377 +Train: [66] [1200/6250] eta: 0:13:43 lr: 0.000035 grad: 0.1865 (0.2028) loss: 0.7503 (0.7621) time: 0.1900 data: 0.0986 max mem: 9377 +Train: [66] [1300/6250] eta: 0:13:25 lr: 0.000035 grad: 0.1692 (0.2008) loss: 0.7604 (0.7619) time: 0.1788 data: 0.0919 max mem: 9377 +Train: [66] [1400/6250] eta: 0:13:10 lr: 0.000035 grad: 0.1796 (0.1990) loss: 0.7559 (0.7617) time: 0.1965 data: 0.1212 max mem: 9377 +Train: [66] [1500/6250] eta: 0:12:54 lr: 0.000035 grad: 0.1711 (0.1980) loss: 0.7552 (0.7614) time: 0.1390 data: 0.0586 max mem: 9377 +Train: [66] [1600/6250] eta: 0:12:37 lr: 0.000035 grad: 0.1643 (0.1963) loss: 0.7584 (0.7613) time: 0.1510 data: 0.0710 max mem: 9377 +Train: [66] [1700/6250] eta: 0:12:23 lr: 0.000035 grad: 0.1828 (0.1951) loss: 0.7474 (0.7613) time: 0.1543 data: 0.0706 max mem: 9377 +Train: [66] [1800/6250] eta: 0:12:07 lr: 0.000035 grad: 0.1724 (0.1940) loss: 0.7445 (0.7612) time: 0.1608 data: 0.0726 max mem: 9377 +Train: [66] [1900/6250] eta: 0:11:52 lr: 0.000035 grad: 0.1701 (0.1930) loss: 0.7623 (0.7612) time: 0.1639 data: 0.0765 max mem: 9377 +Train: [66] [2000/6250] eta: 0:11:37 lr: 0.000035 grad: 0.1879 (0.1923) loss: 0.7516 (0.7609) time: 0.1476 data: 0.0562 max mem: 9377 +Train: [66] [2100/6250] eta: 0:11:20 lr: 0.000035 grad: 0.1737 (0.1916) loss: 0.7477 (0.7607) time: 0.1736 data: 0.0889 max mem: 9377 +Train: [66] [2200/6250] eta: 0:11:03 lr: 0.000035 grad: 0.1761 (0.1909) loss: 0.7472 (0.7605) time: 0.1723 data: 0.0817 max mem: 9377 +Train: [66] [2300/6250] eta: 0:10:46 lr: 0.000035 grad: 0.1740 (0.1905) loss: 0.7456 (0.7603) time: 0.1710 data: 0.0784 max mem: 9377 +Train: [66] [2400/6250] eta: 0:10:29 lr: 0.000035 grad: 0.1767 (0.1900) loss: 0.7473 (0.7598) time: 0.1545 data: 0.0600 max mem: 9377 +Train: [66] [2500/6250] eta: 0:10:11 lr: 0.000035 grad: 0.1700 (0.1897) loss: 0.7589 (0.7595) time: 0.1548 data: 0.0676 max mem: 9377 +Train: [66] [2600/6250] eta: 0:09:53 lr: 0.000035 grad: 0.1737 (0.1892) loss: 0.7594 (0.7594) time: 0.1182 data: 0.0291 max mem: 9377 +Train: [66] [2700/6250] eta: 0:09:37 lr: 0.000035 grad: 0.1722 (0.1888) loss: 0.7538 (0.7593) time: 0.1650 data: 0.0785 max mem: 9377 +Train: [66] [2800/6250] eta: 0:09:21 lr: 0.000035 grad: 0.1758 (0.1883) loss: 0.7622 (0.7592) time: 0.2016 data: 0.1228 max mem: 9377 +Train: [66] [2900/6250] eta: 0:09:05 lr: 0.000035 grad: 0.1805 (0.1880) loss: 0.7528 (0.7592) time: 0.1168 data: 0.0304 max mem: 9377 +Train: [66] [3000/6250] eta: 0:08:48 lr: 0.000035 grad: 0.1749 (0.1878) loss: 0.7516 (0.7591) time: 0.1478 data: 0.0610 max mem: 9377 +Train: [66] [3100/6250] eta: 0:08:31 lr: 0.000035 grad: 0.1852 (0.1877) loss: 0.7522 (0.7591) time: 0.1640 data: 0.0770 max mem: 9377 +Train: [66] [3200/6250] eta: 0:08:14 lr: 0.000035 grad: 0.1721 (0.1873) loss: 0.7538 (0.7590) time: 0.1840 data: 0.0997 max mem: 9377 +Train: [66] [3300/6250] eta: 0:07:57 lr: 0.000035 grad: 0.1738 (0.1871) loss: 0.7525 (0.7590) time: 0.1341 data: 0.0531 max mem: 9377 +Train: [66] [3400/6250] eta: 0:07:40 lr: 0.000035 grad: 0.1675 (0.1867) loss: 0.7641 (0.7590) time: 0.1410 data: 0.0555 max mem: 9377 +Train: [66] [3500/6250] eta: 0:07:24 lr: 0.000034 grad: 0.1692 (0.1863) loss: 0.7645 (0.7591) time: 0.1420 data: 0.0517 max mem: 9377 +Train: [66] [3600/6250] eta: 0:07:08 lr: 0.000034 grad: 0.1640 (0.1861) loss: 0.7596 (0.7592) time: 0.1685 data: 0.0804 max mem: 9377 +Train: [66] [3700/6250] eta: 0:06:52 lr: 0.000034 grad: 0.1657 (0.1857) loss: 0.7680 (0.7592) time: 0.1784 data: 0.0924 max mem: 9377 +Train: [66] [3800/6250] eta: 0:06:37 lr: 0.000034 grad: 0.1682 (0.1854) loss: 0.7642 (0.7593) time: 0.1647 data: 0.0850 max mem: 9377 +Train: [66] [3900/6250] eta: 0:06:20 lr: 0.000034 grad: 0.1737 (0.1852) loss: 0.7603 (0.7593) time: 0.1615 data: 0.0730 max mem: 9377 +Train: [66] [4000/6250] eta: 0:06:05 lr: 0.000034 grad: 0.1661 (0.1848) loss: 0.7645 (0.7595) time: 0.1583 data: 0.0772 max mem: 9377 +Train: [66] [4100/6250] eta: 0:05:48 lr: 0.000034 grad: 0.1617 (0.1845) loss: 0.7658 (0.7596) time: 0.1423 data: 0.0520 max mem: 9377 +Train: [66] [4200/6250] eta: 0:05:33 lr: 0.000034 grad: 0.1659 (0.1841) loss: 0.7700 (0.7598) time: 0.1534 data: 0.0665 max mem: 9377 +Train: [66] [4300/6250] eta: 0:05:17 lr: 0.000034 grad: 0.1747 (0.1839) loss: 0.7577 (0.7599) time: 0.1581 data: 0.0664 max mem: 9377 +Train: [66] [4400/6250] eta: 0:05:00 lr: 0.000034 grad: 0.1701 (0.1836) loss: 0.7673 (0.7600) time: 0.1447 data: 0.0498 max mem: 9377 +Train: [66] [4500/6250] eta: 0:04:44 lr: 0.000034 grad: 0.1535 (0.1831) loss: 0.7814 (0.7603) time: 0.1553 data: 0.0634 max mem: 9377 +Train: [66] [4600/6250] eta: 0:04:27 lr: 0.000034 grad: 0.1701 (0.1828) loss: 0.7729 (0.7604) time: 0.1650 data: 0.0706 max mem: 9377 +Train: [66] [4700/6250] eta: 0:04:11 lr: 0.000034 grad: 0.1703 (0.1826) loss: 0.7651 (0.7605) time: 0.1378 data: 0.0426 max mem: 9377 +Train: [66] [4800/6250] eta: 0:03:54 lr: 0.000034 grad: 0.1622 (0.1823) loss: 0.7668 (0.7607) time: 0.1295 data: 0.0218 max mem: 9377 +Train: [66] [4900/6250] eta: 0:03:38 lr: 0.000034 grad: 0.1645 (0.1820) loss: 0.7686 (0.7609) time: 0.1640 data: 0.0699 max mem: 9377 +Train: [66] [5000/6250] eta: 0:03:22 lr: 0.000034 grad: 0.1789 (0.1819) loss: 0.7586 (0.7609) time: 0.1673 data: 0.0865 max mem: 9377 +Train: [66] [5100/6250] eta: 0:03:06 lr: 0.000034 grad: 0.1656 (0.1817) loss: 0.7691 (0.7610) time: 0.1738 data: 0.0875 max mem: 9377 +Train: [66] [5200/6250] eta: 0:02:50 lr: 0.000034 grad: 0.1609 (0.1815) loss: 0.7691 (0.7610) time: 0.2179 data: 0.1337 max mem: 9377 +Train: [66] [5300/6250] eta: 0:02:34 lr: 0.000034 grad: 0.1677 (0.1814) loss: 0.7536 (0.7610) time: 0.2007 data: 0.1036 max mem: 9377 +Train: [66] [5400/6250] eta: 0:02:18 lr: 0.000034 grad: 0.1669 (0.1813) loss: 0.7636 (0.7611) time: 0.1672 data: 0.0633 max mem: 9377 +Train: [66] [5500/6250] eta: 0:02:01 lr: 0.000034 grad: 0.1727 (0.1813) loss: 0.7599 (0.7611) time: 0.1496 data: 0.0504 max mem: 9377 +Train: [66] [5600/6250] eta: 0:01:45 lr: 0.000034 grad: 0.1680 (0.1812) loss: 0.7565 (0.7610) time: 0.1845 data: 0.0923 max mem: 9377 +Train: [66] [5700/6250] eta: 0:01:29 lr: 0.000034 grad: 0.1773 (0.1811) loss: 0.7503 (0.7610) time: 0.1478 data: 0.0517 max mem: 9377 +Train: [66] [5800/6250] eta: 0:01:13 lr: 0.000034 grad: 0.1747 (0.1811) loss: 0.7448 (0.7609) time: 0.1710 data: 0.0855 max mem: 9377 +Train: [66] [5900/6250] eta: 0:00:56 lr: 0.000034 grad: 0.1791 (0.1811) loss: 0.7526 (0.7609) time: 0.1541 data: 0.0599 max mem: 9377 +Train: [66] [6000/6250] eta: 0:00:40 lr: 0.000034 grad: 0.1855 (0.1812) loss: 0.7514 (0.7608) time: 0.1603 data: 0.0810 max mem: 9377 +Train: [66] [6100/6250] eta: 0:00:24 lr: 0.000034 grad: 0.1720 (0.1812) loss: 0.7536 (0.7607) time: 0.1555 data: 0.0688 max mem: 9377 +Train: [66] [6200/6250] eta: 0:00:08 lr: 0.000034 grad: 0.1732 (0.1813) loss: 0.7609 (0.7607) time: 0.1374 data: 0.0519 max mem: 9377 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1762 (0.1812) loss: 0.7655 (0.7607) time: 0.1137 data: 0.0250 max mem: 9377 +Train: [66] Total time: 0:16:59 (0.1632 s / it) +Averaged stats: lr: 0.000034 grad: 0.1762 (0.1812) loss: 0.7655 (0.7607) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:03:48 loss: 0.8482 (0.8482) time: 3.6840 data: 3.5575 max mem: 9377 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.8525 (0.8528) time: 0.1301 data: 0.1035 max mem: 9377 +Eval (hcp-train-subset): [66] Total time: 0:00:14 (0.2337 s / it) +Averaged stats (hcp-train-subset): loss: 0.8525 (0.8528) +Eval (hcp-val): [66] [ 0/62] eta: 0:05:26 loss: 0.8495 (0.8495) time: 5.2666 data: 5.2361 max mem: 9377 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.8499 (0.8513) time: 0.1280 data: 0.1011 max mem: 9377 +Eval (hcp-val): [66] Total time: 0:00:14 (0.2327 s / it) +Averaged stats (hcp-val): loss: 0.8499 (0.8513) +Eval (nsd-val): [66] [ 0/62] eta: 0:03:51 loss: 0.8142 (0.8142) time: 3.7376 data: 3.6386 max mem: 9377 +Eval (nsd-val): [66] [61/62] eta: 0:00:00 loss: 0.8301 (0.8302) time: 0.1387 data: 0.1119 max mem: 9377 +Eval (nsd-val): [66] Total time: 0:00:14 (0.2295 s / it) +Averaged stats (nsd-val): loss: 0.8301 (0.8302) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [67] [ 0/6250] eta: 11:29:33 lr: 0.000034 grad: 0.7169 (0.7169) loss: 0.7478 (0.7478) time: 6.6198 data: 6.4883 max mem: 9377 +Train: [67] [ 100/6250] eta: 0:21:58 lr: 0.000034 grad: 0.2124 (0.2811) loss: 0.7824 (0.7907) time: 0.1557 data: 0.0509 max mem: 9377 +Train: [67] [ 200/6250] eta: 0:19:22 lr: 0.000034 grad: 0.2058 (0.2524) loss: 0.7722 (0.7820) time: 0.1800 data: 0.0742 max mem: 9377 +Train: [67] [ 300/6250] eta: 0:18:29 lr: 0.000034 grad: 0.2102 (0.2469) loss: 0.7532 (0.7733) time: 0.1862 data: 0.0845 max mem: 9377 +Train: [67] [ 400/6250] eta: 0:17:41 lr: 0.000034 grad: 0.1845 (0.2377) loss: 0.7559 (0.7703) time: 0.1592 data: 0.0591 max mem: 9377 +Train: [67] [ 500/6250] eta: 0:16:59 lr: 0.000034 grad: 0.1916 (0.2306) loss: 0.7508 (0.7673) time: 0.1450 data: 0.0455 max mem: 9377 +Train: [67] [ 600/6250] eta: 0:16:26 lr: 0.000033 grad: 0.1878 (0.2245) loss: 0.7580 (0.7653) time: 0.1700 data: 0.0772 max mem: 9377 +Train: [67] [ 700/6250] eta: 0:15:59 lr: 0.000033 grad: 0.1874 (0.2193) loss: 0.7473 (0.7638) time: 0.1719 data: 0.0746 max mem: 9377 +Train: [67] [ 800/6250] eta: 0:15:37 lr: 0.000033 grad: 0.1733 (0.2147) loss: 0.7606 (0.7632) time: 0.1698 data: 0.0802 max mem: 9377 +Train: [67] [ 900/6250] eta: 0:15:15 lr: 0.000033 grad: 0.1693 (0.2107) loss: 0.7669 (0.7628) time: 0.1728 data: 0.0786 max mem: 9377 +Train: [67] [1000/6250] eta: 0:14:53 lr: 0.000033 grad: 0.1720 (0.2076) loss: 0.7531 (0.7623) time: 0.1604 data: 0.0640 max mem: 9377 +Train: [67] [1100/6250] eta: 0:14:30 lr: 0.000033 grad: 0.1630 (0.2049) loss: 0.7583 (0.7618) time: 0.1445 data: 0.0520 max mem: 9377 +Train: [67] [1200/6250] eta: 0:14:07 lr: 0.000033 grad: 0.1645 (0.2026) loss: 0.7579 (0.7612) time: 0.1511 data: 0.0705 max mem: 9377 +Train: [67] [1300/6250] eta: 0:13:47 lr: 0.000033 grad: 0.1714 (0.2009) loss: 0.7620 (0.7606) time: 0.1768 data: 0.0956 max mem: 9377 +Train: [67] [1400/6250] eta: 0:13:29 lr: 0.000033 grad: 0.1756 (0.1994) loss: 0.7544 (0.7597) time: 0.2146 data: 0.1289 max mem: 9377 +Train: [67] [1500/6250] eta: 0:13:11 lr: 0.000033 grad: 0.1752 (0.1979) loss: 0.7497 (0.7591) time: 0.1576 data: 0.0712 max mem: 9377 +Train: [67] [1600/6250] eta: 0:12:52 lr: 0.000033 grad: 0.1791 (0.1972) loss: 0.7550 (0.7582) time: 0.1630 data: 0.0869 max mem: 9377 +Train: [67] [1700/6250] eta: 0:12:35 lr: 0.000033 grad: 0.1785 (0.1963) loss: 0.7347 (0.7576) time: 0.1662 data: 0.0826 max mem: 9377 +Train: [67] [1800/6250] eta: 0:12:18 lr: 0.000033 grad: 0.1718 (0.1953) loss: 0.7525 (0.7568) time: 0.1910 data: 0.1025 max mem: 9377 +Train: [67] [1900/6250] eta: 0:12:01 lr: 0.000033 grad: 0.1785 (0.1946) loss: 0.7519 (0.7564) time: 0.1574 data: 0.0623 max mem: 9377 +Train: [67] [2000/6250] eta: 0:11:42 lr: 0.000033 grad: 0.1863 (0.1940) loss: 0.7514 (0.7557) time: 0.1471 data: 0.0571 max mem: 9377 +Train: [67] [2100/6250] eta: 0:11:23 lr: 0.000033 grad: 0.1784 (0.1936) loss: 0.7432 (0.7553) time: 0.1540 data: 0.0622 max mem: 9377 +Train: [67] [2200/6250] eta: 0:11:03 lr: 0.000033 grad: 0.1747 (0.1930) loss: 0.7408 (0.7549) time: 0.1374 data: 0.0405 max mem: 9377 +Train: [67] [2300/6250] eta: 0:10:44 lr: 0.000033 grad: 0.1781 (0.1925) loss: 0.7556 (0.7546) time: 0.1465 data: 0.0623 max mem: 9377 +Train: [67] [2400/6250] eta: 0:10:25 lr: 0.000033 grad: 0.1766 (0.1920) loss: 0.7420 (0.7544) time: 0.1553 data: 0.0500 max mem: 9377 +Train: [67] [2500/6250] eta: 0:10:07 lr: 0.000033 grad: 0.1751 (0.1914) loss: 0.7560 (0.7542) time: 0.1541 data: 0.0685 max mem: 9377 +Train: [67] [2600/6250] eta: 0:09:50 lr: 0.000033 grad: 0.1761 (0.1911) loss: 0.7570 (0.7539) time: 0.1476 data: 0.0604 max mem: 9377 +Train: [67] [2700/6250] eta: 0:09:31 lr: 0.000033 grad: 0.1772 (0.1907) loss: 0.7505 (0.7537) time: 0.1476 data: 0.0581 max mem: 9377 +Train: [67] [2800/6250] eta: 0:09:15 lr: 0.000033 grad: 0.1781 (0.1904) loss: 0.7393 (0.7535) time: 0.1269 data: 0.0447 max mem: 9377 +Train: [67] [2900/6250] eta: 0:08:58 lr: 0.000033 grad: 0.1787 (0.1902) loss: 0.7439 (0.7532) time: 0.1519 data: 0.0603 max mem: 9377 +Train: [67] [3000/6250] eta: 0:08:42 lr: 0.000033 grad: 0.1719 (0.1899) loss: 0.7547 (0.7531) time: 0.1472 data: 0.0566 max mem: 9377 +Train: [67] [3100/6250] eta: 0:08:25 lr: 0.000033 grad: 0.1743 (0.1898) loss: 0.7481 (0.7529) time: 0.1283 data: 0.0410 max mem: 9377 +Train: [67] [3200/6250] eta: 0:08:08 lr: 0.000033 grad: 0.1734 (0.1895) loss: 0.7492 (0.7529) time: 0.1561 data: 0.0726 max mem: 9377 +Train: [67] [3300/6250] eta: 0:07:51 lr: 0.000033 grad: 0.1681 (0.1892) loss: 0.7551 (0.7528) time: 0.1613 data: 0.0698 max mem: 9377 +Train: [67] [3400/6250] eta: 0:07:36 lr: 0.000033 grad: 0.1844 (0.1890) loss: 0.7520 (0.7527) time: 0.1851 data: 0.0978 max mem: 9377 +Train: [67] [3500/6250] eta: 0:07:19 lr: 0.000033 grad: 0.1755 (0.1888) loss: 0.7508 (0.7527) time: 0.1626 data: 0.0775 max mem: 9377 +Train: [67] [3600/6250] eta: 0:07:03 lr: 0.000033 grad: 0.1782 (0.1886) loss: 0.7483 (0.7527) time: 0.1629 data: 0.0724 max mem: 9377 +Train: [67] [3700/6250] eta: 0:06:47 lr: 0.000033 grad: 0.1788 (0.1884) loss: 0.7487 (0.7527) time: 0.1571 data: 0.0728 max mem: 9377 +Train: [67] [3800/6250] eta: 0:06:32 lr: 0.000033 grad: 0.1725 (0.1882) loss: 0.7587 (0.7527) time: 0.1668 data: 0.0761 max mem: 9377 +Train: [67] [3900/6250] eta: 0:06:16 lr: 0.000033 grad: 0.1887 (0.1880) loss: 0.7460 (0.7527) time: 0.1686 data: 0.0702 max mem: 9377 +Train: [67] [4000/6250] eta: 0:06:00 lr: 0.000032 grad: 0.1789 (0.1878) loss: 0.7464 (0.7527) time: 0.1767 data: 0.0970 max mem: 9377 +Train: [67] [4100/6250] eta: 0:05:45 lr: 0.000032 grad: 0.1755 (0.1877) loss: 0.7531 (0.7526) time: 0.1645 data: 0.0722 max mem: 9377 +Train: [67] [4200/6250] eta: 0:05:28 lr: 0.000032 grad: 0.1772 (0.1876) loss: 0.7599 (0.7526) time: 0.1495 data: 0.0583 max mem: 9377 +Train: [67] [4300/6250] eta: 0:05:12 lr: 0.000032 grad: 0.1870 (0.1875) loss: 0.7479 (0.7525) time: 0.1805 data: 0.0933 max mem: 9377 +Train: [67] [4400/6250] eta: 0:04:56 lr: 0.000032 grad: 0.1792 (0.1874) loss: 0.7427 (0.7525) time: 0.1721 data: 0.0763 max mem: 9377 +Train: [67] [4500/6250] eta: 0:04:41 lr: 0.000032 grad: 0.1859 (0.1873) loss: 0.7549 (0.7526) time: 0.1473 data: 0.0537 max mem: 9377 +Train: [67] [4600/6250] eta: 0:04:24 lr: 0.000032 grad: 0.1784 (0.1872) loss: 0.7512 (0.7526) time: 0.1154 data: 0.0216 max mem: 9377 +Train: [67] [4700/6250] eta: 0:04:08 lr: 0.000032 grad: 0.1801 (0.1871) loss: 0.7528 (0.7526) time: 0.1596 data: 0.0716 max mem: 9377 +Train: [67] [4800/6250] eta: 0:03:52 lr: 0.000032 grad: 0.1793 (0.1870) loss: 0.7510 (0.7527) time: 0.1535 data: 0.0644 max mem: 9377 +Train: [67] [4900/6250] eta: 0:03:36 lr: 0.000032 grad: 0.1707 (0.1869) loss: 0.7597 (0.7529) time: 0.1768 data: 0.0950 max mem: 9377 +Train: [67] [5000/6250] eta: 0:03:20 lr: 0.000032 grad: 0.1759 (0.1867) loss: 0.7607 (0.7531) time: 0.1587 data: 0.0604 max mem: 9377 +Train: [67] [5100/6250] eta: 0:03:04 lr: 0.000032 grad: 0.1719 (0.1865) loss: 0.7632 (0.7533) time: 0.1602 data: 0.0747 max mem: 9377 +Train: [67] [5200/6250] eta: 0:02:48 lr: 0.000032 grad: 0.1761 (0.1863) loss: 0.7467 (0.7535) time: 0.1784 data: 0.0880 max mem: 9377 +Train: [67] [5300/6250] eta: 0:02:32 lr: 0.000032 grad: 0.1755 (0.1861) loss: 0.7613 (0.7536) time: 0.1320 data: 0.0326 max mem: 9377 +Train: [67] [5400/6250] eta: 0:02:16 lr: 0.000032 grad: 0.1642 (0.1860) loss: 0.7669 (0.7537) time: 0.1538 data: 0.0645 max mem: 9377 +Train: [67] [5500/6250] eta: 0:02:00 lr: 0.000032 grad: 0.1671 (0.1859) loss: 0.7665 (0.7538) time: 0.1426 data: 0.0458 max mem: 9377 +Train: [67] [5600/6250] eta: 0:01:44 lr: 0.000032 grad: 0.1854 (0.1859) loss: 0.7461 (0.7538) time: 0.1354 data: 0.0321 max mem: 9377 +Train: [67] [5700/6250] eta: 0:01:28 lr: 0.000032 grad: 0.1797 (0.1859) loss: 0.7592 (0.7538) time: 0.1421 data: 0.0496 max mem: 9377 +Train: [67] [5800/6250] eta: 0:01:11 lr: 0.000032 grad: 0.1858 (0.1859) loss: 0.7516 (0.7537) time: 0.1046 data: 0.0060 max mem: 9377 +Train: [67] [5900/6250] eta: 0:00:55 lr: 0.000032 grad: 0.1849 (0.1860) loss: 0.7428 (0.7537) time: 0.1560 data: 0.0627 max mem: 9377 +Train: [67] [6000/6250] eta: 0:00:39 lr: 0.000032 grad: 0.1872 (0.1859) loss: 0.7426 (0.7536) time: 0.1644 data: 0.0829 max mem: 9377 +Train: [67] [6100/6250] eta: 0:00:24 lr: 0.000032 grad: 0.1810 (0.1859) loss: 0.7475 (0.7535) time: 0.1754 data: 0.0884 max mem: 9377 +Train: [67] [6200/6250] eta: 0:00:07 lr: 0.000032 grad: 0.1838 (0.1859) loss: 0.7435 (0.7535) time: 0.1488 data: 0.0582 max mem: 9377 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1844 (0.1858) loss: 0.7554 (0.7535) time: 0.1386 data: 0.0540 max mem: 9377 +Train: [67] Total time: 0:16:47 (0.1611 s / it) +Averaged stats: lr: 0.000032 grad: 0.1844 (0.1858) loss: 0.7554 (0.7535) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:05:38 loss: 0.8509 (0.8509) time: 5.4660 data: 5.4352 max mem: 9377 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.8564 (0.8550) time: 0.1540 data: 0.1287 max mem: 9377 +Eval (hcp-train-subset): [67] Total time: 0:00:15 (0.2573 s / it) +Averaged stats (hcp-train-subset): loss: 0.8564 (0.8550) +Eval (hcp-val): [67] [ 0/62] eta: 0:05:33 loss: 0.8492 (0.8492) time: 5.3766 data: 5.3303 max mem: 9377 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.8526 (0.8538) time: 0.1441 data: 0.1169 max mem: 9377 +Eval (hcp-val): [67] Total time: 0:00:15 (0.2518 s / it) +Averaged stats (hcp-val): loss: 0.8526 (0.8538) +Eval (nsd-val): [67] [ 0/62] eta: 0:06:14 loss: 0.8263 (0.8263) time: 6.0423 data: 6.0113 max mem: 9377 +Eval (nsd-val): [67] [61/62] eta: 0:00:00 loss: 0.8297 (0.8321) time: 0.1320 data: 0.1070 max mem: 9377 +Eval (nsd-val): [67] Total time: 0:00:15 (0.2491 s / it) +Averaged stats (nsd-val): loss: 0.8297 (0.8321) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [68] [ 0/6250] eta: 11:07:57 lr: 0.000032 grad: 0.1546 (0.1546) loss: 0.7951 (0.7951) time: 6.4124 data: 6.2641 max mem: 9377 +Train: [68] [ 100/6250] eta: 0:24:30 lr: 0.000032 grad: 0.2840 (0.3130) loss: 0.7427 (0.7795) time: 0.2089 data: 0.0973 max mem: 9377 +Train: [68] [ 200/6250] eta: 0:21:15 lr: 0.000032 grad: 0.2150 (0.2754) loss: 0.7761 (0.7758) time: 0.1705 data: 0.0736 max mem: 9377 +Train: [68] [ 300/6250] eta: 0:19:33 lr: 0.000032 grad: 0.1876 (0.2515) loss: 0.7802 (0.7760) time: 0.1692 data: 0.0680 max mem: 9377 +Train: [68] [ 400/6250] eta: 0:18:30 lr: 0.000032 grad: 0.1750 (0.2367) loss: 0.7778 (0.7757) time: 0.1638 data: 0.0625 max mem: 9377 +Train: [68] [ 500/6250] eta: 0:17:52 lr: 0.000032 grad: 0.1818 (0.2282) loss: 0.7629 (0.7738) time: 0.1403 data: 0.0501 max mem: 9377 +Train: [68] [ 600/6250] eta: 0:17:10 lr: 0.000032 grad: 0.1883 (0.2220) loss: 0.7547 (0.7714) time: 0.1599 data: 0.0698 max mem: 9377 +Train: [68] [ 700/6250] eta: 0:16:35 lr: 0.000032 grad: 0.1933 (0.2175) loss: 0.7483 (0.7691) time: 0.1514 data: 0.0555 max mem: 9377 +Train: [68] [ 800/6250] eta: 0:16:13 lr: 0.000032 grad: 0.1796 (0.2144) loss: 0.7431 (0.7669) time: 0.1640 data: 0.0702 max mem: 9377 +Train: [68] [ 900/6250] eta: 0:15:44 lr: 0.000032 grad: 0.1791 (0.2120) loss: 0.7425 (0.7649) time: 0.1391 data: 0.0424 max mem: 9377 +Train: [68] [1000/6250] eta: 0:15:16 lr: 0.000032 grad: 0.1802 (0.2095) loss: 0.7520 (0.7631) time: 0.1711 data: 0.0729 max mem: 9377 +Train: [68] [1100/6250] eta: 0:14:49 lr: 0.000032 grad: 0.1762 (0.2076) loss: 0.7544 (0.7616) time: 0.1384 data: 0.0528 max mem: 9377 +Train: [68] [1200/6250] eta: 0:14:24 lr: 0.000032 grad: 0.1953 (0.2058) loss: 0.7479 (0.7605) time: 0.1378 data: 0.0533 max mem: 9377 +Train: [68] [1300/6250] eta: 0:14:05 lr: 0.000031 grad: 0.1829 (0.2043) loss: 0.7454 (0.7594) time: 0.1210 data: 0.0321 max mem: 9377 +Train: [68] [1400/6250] eta: 0:13:43 lr: 0.000031 grad: 0.1751 (0.2031) loss: 0.7606 (0.7585) time: 0.1614 data: 0.0676 max mem: 9377 +Train: [68] [1500/6250] eta: 0:13:27 lr: 0.000031 grad: 0.1816 (0.2021) loss: 0.7469 (0.7579) time: 0.1772 data: 0.0838 max mem: 9377 +Train: [68] [1600/6250] eta: 0:13:06 lr: 0.000031 grad: 0.1870 (0.2012) loss: 0.7486 (0.7571) time: 0.1472 data: 0.0542 max mem: 9377 +Train: [68] [1700/6250] eta: 0:12:48 lr: 0.000031 grad: 0.1731 (0.2005) loss: 0.7601 (0.7566) time: 0.1722 data: 0.0845 max mem: 9377 +Train: [68] [1800/6250] eta: 0:12:29 lr: 0.000031 grad: 0.1789 (0.1997) loss: 0.7576 (0.7564) time: 0.1739 data: 0.0765 max mem: 9377 +Train: [68] [1900/6250] eta: 0:12:13 lr: 0.000031 grad: 0.1827 (0.1989) loss: 0.7390 (0.7560) time: 0.1808 data: 0.0936 max mem: 9377 +Train: [68] [2000/6250] eta: 0:11:56 lr: 0.000031 grad: 0.1864 (0.1983) loss: 0.7437 (0.7557) time: 0.1615 data: 0.0672 max mem: 9377 +Train: [68] [2100/6250] eta: 0:11:39 lr: 0.000031 grad: 0.1805 (0.1978) loss: 0.7536 (0.7555) time: 0.1784 data: 0.0920 max mem: 9377 +Train: [68] [2200/6250] eta: 0:11:18 lr: 0.000031 grad: 0.1786 (0.1971) loss: 0.7601 (0.7554) time: 0.1646 data: 0.0637 max mem: 9377 +Train: [68] [2300/6250] eta: 0:10:59 lr: 0.000031 grad: 0.1744 (0.1965) loss: 0.7536 (0.7552) time: 0.1626 data: 0.0733 max mem: 9377 +Train: [68] [2400/6250] eta: 0:10:41 lr: 0.000031 grad: 0.1866 (0.1960) loss: 0.7474 (0.7550) time: 0.1578 data: 0.0681 max mem: 9377 +Train: [68] [2500/6250] eta: 0:10:23 lr: 0.000031 grad: 0.1768 (0.1954) loss: 0.7596 (0.7549) time: 0.1933 data: 0.1094 max mem: 9377 +Train: [68] [2600/6250] eta: 0:10:04 lr: 0.000031 grad: 0.1943 (0.1950) loss: 0.7527 (0.7548) time: 0.1553 data: 0.0689 max mem: 9377 +Train: [68] [2700/6250] eta: 0:09:47 lr: 0.000031 grad: 0.1645 (0.1943) loss: 0.7626 (0.7548) time: 0.1830 data: 0.0958 max mem: 9377 +Train: [68] [2800/6250] eta: 0:09:30 lr: 0.000031 grad: 0.1766 (0.1939) loss: 0.7570 (0.7549) time: 0.1439 data: 0.0553 max mem: 9377 +Train: [68] [2900/6250] eta: 0:09:13 lr: 0.000031 grad: 0.1771 (0.1934) loss: 0.7489 (0.7550) time: 0.1610 data: 0.0764 max mem: 9377 +Train: [68] [3000/6250] eta: 0:08:56 lr: 0.000031 grad: 0.1758 (0.1929) loss: 0.7581 (0.7550) time: 0.1486 data: 0.0573 max mem: 9377 +Train: [68] [3100/6250] eta: 0:08:40 lr: 0.000031 grad: 0.1788 (0.1925) loss: 0.7500 (0.7550) time: 0.1738 data: 0.0850 max mem: 9377 +Train: [68] [3200/6250] eta: 0:08:22 lr: 0.000031 grad: 0.1812 (0.1923) loss: 0.7548 (0.7551) time: 0.1359 data: 0.0472 max mem: 9377 +Train: [68] [3300/6250] eta: 0:08:04 lr: 0.000031 grad: 0.1838 (0.1919) loss: 0.7614 (0.7552) time: 0.1584 data: 0.0727 max mem: 9377 +Train: [68] [3400/6250] eta: 0:07:47 lr: 0.000031 grad: 0.1802 (0.1916) loss: 0.7513 (0.7553) time: 0.1892 data: 0.1035 max mem: 9377 +Train: [68] [3500/6250] eta: 0:07:29 lr: 0.000031 grad: 0.1664 (0.1912) loss: 0.7699 (0.7555) time: 0.1466 data: 0.0555 max mem: 9377 +Train: [68] [3600/6250] eta: 0:07:13 lr: 0.000031 grad: 0.1713 (0.1907) loss: 0.7607 (0.7558) time: 0.1599 data: 0.0707 max mem: 9377 +Train: [68] [3700/6250] eta: 0:06:58 lr: 0.000031 grad: 0.1797 (0.1904) loss: 0.7656 (0.7561) time: 0.2694 data: 0.1955 max mem: 9377 +Train: [68] [3800/6250] eta: 0:06:43 lr: 0.000031 grad: 0.1781 (0.1901) loss: 0.7673 (0.7562) time: 0.1616 data: 0.0765 max mem: 9377 +Train: [68] [3900/6250] eta: 0:06:28 lr: 0.000031 grad: 0.1798 (0.1899) loss: 0.7588 (0.7564) time: 0.1845 data: 0.0941 max mem: 9377 +Train: [68] [4000/6250] eta: 0:06:12 lr: 0.000031 grad: 0.1811 (0.1897) loss: 0.7596 (0.7564) time: 0.1728 data: 0.0896 max mem: 9377 +Train: [68] [4100/6250] eta: 0:05:56 lr: 0.000031 grad: 0.1832 (0.1895) loss: 0.7506 (0.7565) time: 0.1682 data: 0.0716 max mem: 9377 +Train: [68] [4200/6250] eta: 0:05:40 lr: 0.000031 grad: 0.1884 (0.1894) loss: 0.7518 (0.7564) time: 0.1765 data: 0.0859 max mem: 9377 +Train: [68] [4300/6250] eta: 0:05:23 lr: 0.000031 grad: 0.1731 (0.1893) loss: 0.7616 (0.7563) time: 0.1913 data: 0.1046 max mem: 9377 +Train: [68] [4400/6250] eta: 0:05:07 lr: 0.000031 grad: 0.1842 (0.1891) loss: 0.7663 (0.7563) time: 0.1661 data: 0.0664 max mem: 9377 +Train: [68] [4500/6250] eta: 0:04:50 lr: 0.000031 grad: 0.1664 (0.1890) loss: 0.7614 (0.7562) time: 0.1524 data: 0.0551 max mem: 9377 +Train: [68] [4600/6250] eta: 0:04:33 lr: 0.000031 grad: 0.1773 (0.1888) loss: 0.7498 (0.7561) time: 0.1611 data: 0.0713 max mem: 9377 +Train: [68] [4700/6250] eta: 0:04:16 lr: 0.000031 grad: 0.1797 (0.1887) loss: 0.7485 (0.7560) time: 0.1403 data: 0.0470 max mem: 9377 +Train: [68] [4800/6250] eta: 0:03:59 lr: 0.000030 grad: 0.1790 (0.1886) loss: 0.7302 (0.7558) time: 0.1816 data: 0.0930 max mem: 9377 +Train: [68] [4900/6250] eta: 0:03:43 lr: 0.000030 grad: 0.1818 (0.1885) loss: 0.7528 (0.7557) time: 0.1552 data: 0.0657 max mem: 9377 +Train: [68] [5000/6250] eta: 0:03:26 lr: 0.000030 grad: 0.1763 (0.1884) loss: 0.7447 (0.7556) time: 0.1709 data: 0.0827 max mem: 9377 +Train: [68] [5100/6250] eta: 0:03:09 lr: 0.000030 grad: 0.1763 (0.1882) loss: 0.7519 (0.7555) time: 0.1391 data: 0.0465 max mem: 9377 +Train: [68] [5200/6250] eta: 0:02:53 lr: 0.000030 grad: 0.1888 (0.1881) loss: 0.7312 (0.7554) time: 0.1903 data: 0.0988 max mem: 9377 +Train: [68] [5300/6250] eta: 0:02:36 lr: 0.000030 grad: 0.1849 (0.1881) loss: 0.7463 (0.7553) time: 0.1849 data: 0.0899 max mem: 9377 +Train: [68] [5400/6250] eta: 0:02:20 lr: 0.000030 grad: 0.1741 (0.1880) loss: 0.7491 (0.7552) time: 0.1920 data: 0.0901 max mem: 9377 +Train: [68] [5500/6250] eta: 0:02:04 lr: 0.000030 grad: 0.1824 (0.1879) loss: 0.7375 (0.7550) time: 0.1609 data: 0.0675 max mem: 9377 +Train: [68] [5600/6250] eta: 0:01:47 lr: 0.000030 grad: 0.1841 (0.1879) loss: 0.7434 (0.7549) time: 0.1472 data: 0.0493 max mem: 9377 +Train: [68] [5700/6250] eta: 0:01:30 lr: 0.000030 grad: 0.1782 (0.1877) loss: 0.7445 (0.7549) time: 0.1629 data: 0.0731 max mem: 9377 +Train: [68] [5800/6250] eta: 0:01:14 lr: 0.000030 grad: 0.1831 (0.1878) loss: 0.7446 (0.7548) time: 0.1653 data: 0.0689 max mem: 9377 +Train: [68] [5900/6250] eta: 0:00:57 lr: 0.000030 grad: 0.1772 (0.1877) loss: 0.7640 (0.7547) time: 0.1802 data: 0.0867 max mem: 9377 +Train: [68] [6000/6250] eta: 0:00:41 lr: 0.000030 grad: 0.1735 (0.1876) loss: 0.7540 (0.7547) time: 0.1537 data: 0.0599 max mem: 9377 +Train: [68] [6100/6250] eta: 0:00:24 lr: 0.000030 grad: 0.1868 (0.1876) loss: 0.7485 (0.7546) time: 0.1741 data: 0.0985 max mem: 9377 +Train: [68] [6200/6250] eta: 0:00:08 lr: 0.000030 grad: 0.1779 (0.1876) loss: 0.7523 (0.7545) time: 0.1367 data: 0.0404 max mem: 9377 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1848 (0.1876) loss: 0.7497 (0.7544) time: 0.1492 data: 0.0575 max mem: 9377 +Train: [68] Total time: 0:17:15 (0.1657 s / it) +Averaged stats: lr: 0.000030 grad: 0.1848 (0.1876) loss: 0.7497 (0.7544) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:03:39 loss: 0.8557 (0.8557) time: 3.5364 data: 3.4695 max mem: 9377 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.8569 (0.8549) time: 0.1238 data: 0.0972 max mem: 9377 +Eval (hcp-train-subset): [68] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (hcp-train-subset): loss: 0.8569 (0.8549) +Eval (hcp-val): [68] [ 0/62] eta: 0:03:36 loss: 0.8463 (0.8463) time: 3.4954 data: 3.4205 max mem: 9377 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.8528 (0.8537) time: 0.1249 data: 0.1001 max mem: 9377 +Eval (hcp-val): [68] Total time: 0:00:14 (0.2292 s / it) +Averaged stats (hcp-val): loss: 0.8528 (0.8537) +Eval (nsd-val): [68] [ 0/62] eta: 0:06:15 loss: 0.8169 (0.8169) time: 6.0561 data: 6.0254 max mem: 9377 +Eval (nsd-val): [68] [61/62] eta: 0:00:00 loss: 0.8260 (0.8278) time: 0.1236 data: 0.0984 max mem: 9377 +Eval (nsd-val): [68] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (nsd-val): loss: 0.8260 (0.8278) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [69] [ 0/6250] eta: 9:08:32 lr: 0.000030 grad: 0.1567 (0.1567) loss: 0.8681 (0.8681) time: 5.2660 data: 4.9313 max mem: 9377 +Train: [69] [ 100/6250] eta: 0:23:53 lr: 0.000030 grad: 0.2335 (0.2804) loss: 0.7568 (0.7758) time: 0.1767 data: 0.0696 max mem: 9377 +Train: [69] [ 200/6250] eta: 0:20:56 lr: 0.000030 grad: 0.2142 (0.2613) loss: 0.7488 (0.7657) time: 0.1876 data: 0.0820 max mem: 9377 +Train: [69] [ 300/6250] eta: 0:19:24 lr: 0.000030 grad: 0.2209 (0.2481) loss: 0.7537 (0.7608) time: 0.1636 data: 0.0523 max mem: 9377 +Train: [69] [ 400/6250] eta: 0:18:28 lr: 0.000030 grad: 0.1965 (0.2388) loss: 0.7456 (0.7576) time: 0.1617 data: 0.0598 max mem: 9377 +Train: [69] [ 500/6250] eta: 0:17:51 lr: 0.000030 grad: 0.1905 (0.2329) loss: 0.7549 (0.7553) time: 0.1915 data: 0.0912 max mem: 9377 +Train: [69] [ 600/6250] eta: 0:17:05 lr: 0.000030 grad: 0.1981 (0.2272) loss: 0.7518 (0.7546) time: 0.1653 data: 0.0829 max mem: 9377 +Train: [69] [ 700/6250] eta: 0:16:36 lr: 0.000030 grad: 0.1994 (0.2243) loss: 0.7506 (0.7541) time: 0.1634 data: 0.0619 max mem: 9377 +Train: [69] [ 800/6250] eta: 0:16:03 lr: 0.000030 grad: 0.1848 (0.2209) loss: 0.7502 (0.7535) time: 0.1760 data: 0.0796 max mem: 9377 +Train: [69] [ 900/6250] eta: 0:15:45 lr: 0.000030 grad: 0.1820 (0.2174) loss: 0.7700 (0.7538) time: 0.1846 data: 0.0874 max mem: 9377 +Train: [69] [1000/6250] eta: 0:15:17 lr: 0.000030 grad: 0.1946 (0.2151) loss: 0.7625 (0.7538) time: 0.1480 data: 0.0593 max mem: 9377 +Train: [69] [1100/6250] eta: 0:14:54 lr: 0.000030 grad: 0.1774 (0.2123) loss: 0.7600 (0.7540) time: 0.1548 data: 0.0609 max mem: 9377 +Train: [69] [1200/6250] eta: 0:14:30 lr: 0.000030 grad: 0.1826 (0.2099) loss: 0.7634 (0.7545) time: 0.1611 data: 0.0714 max mem: 9377 +Train: [69] [1300/6250] eta: 0:14:07 lr: 0.000030 grad: 0.1768 (0.2083) loss: 0.7621 (0.7545) time: 0.1512 data: 0.0698 max mem: 9377 +Train: [69] [1400/6250] eta: 0:13:44 lr: 0.000030 grad: 0.1876 (0.2066) loss: 0.7596 (0.7545) time: 0.1651 data: 0.0793 max mem: 9377 +Train: [69] [1500/6250] eta: 0:13:29 lr: 0.000030 grad: 0.1805 (0.2051) loss: 0.7522 (0.7545) time: 0.1755 data: 0.0940 max mem: 9377 +Train: [69] [1600/6250] eta: 0:13:10 lr: 0.000030 grad: 0.1898 (0.2042) loss: 0.7551 (0.7545) time: 0.1577 data: 0.0773 max mem: 9377 +Train: [69] [1700/6250] eta: 0:12:51 lr: 0.000030 grad: 0.1909 (0.2036) loss: 0.7519 (0.7542) time: 0.1305 data: 0.0381 max mem: 9377 +Train: [69] [1800/6250] eta: 0:12:32 lr: 0.000030 grad: 0.1861 (0.2032) loss: 0.7566 (0.7538) time: 0.1514 data: 0.0637 max mem: 9377 +Train: [69] [1900/6250] eta: 0:12:15 lr: 0.000030 grad: 0.1851 (0.2026) loss: 0.7427 (0.7533) time: 0.1561 data: 0.0656 max mem: 9377 +Train: [69] [2000/6250] eta: 0:11:56 lr: 0.000030 grad: 0.1843 (0.2020) loss: 0.7510 (0.7531) time: 0.1671 data: 0.0730 max mem: 9377 +Train: [69] [2100/6250] eta: 0:11:36 lr: 0.000029 grad: 0.1876 (0.2015) loss: 0.7438 (0.7528) time: 0.1618 data: 0.0625 max mem: 9377 +Train: [69] [2200/6250] eta: 0:11:16 lr: 0.000029 grad: 0.1900 (0.2009) loss: 0.7567 (0.7527) time: 0.1556 data: 0.0636 max mem: 9377 +Train: [69] [2300/6250] eta: 0:10:58 lr: 0.000029 grad: 0.1913 (0.2004) loss: 0.7558 (0.7528) time: 0.1719 data: 0.0803 max mem: 9377 +Train: [69] [2400/6250] eta: 0:10:38 lr: 0.000029 grad: 0.1848 (0.2000) loss: 0.7581 (0.7528) time: 0.1478 data: 0.0536 max mem: 9377 +Train: [69] [2500/6250] eta: 0:10:20 lr: 0.000029 grad: 0.1872 (0.1996) loss: 0.7459 (0.7527) time: 0.1666 data: 0.0671 max mem: 9377 +Train: [69] [2600/6250] eta: 0:10:03 lr: 0.000029 grad: 0.1935 (0.1991) loss: 0.7465 (0.7526) time: 0.1563 data: 0.0719 max mem: 9377 +Train: [69] [2700/6250] eta: 0:09:46 lr: 0.000029 grad: 0.1810 (0.1985) loss: 0.7489 (0.7526) time: 0.1658 data: 0.0804 max mem: 9377 +Train: [69] [2800/6250] eta: 0:09:29 lr: 0.000029 grad: 0.1824 (0.1981) loss: 0.7541 (0.7524) time: 0.1528 data: 0.0642 max mem: 9377 +Train: [69] [2900/6250] eta: 0:09:13 lr: 0.000029 grad: 0.1838 (0.1977) loss: 0.7467 (0.7523) time: 0.1793 data: 0.0847 max mem: 9377 +Train: [69] [3000/6250] eta: 0:08:56 lr: 0.000029 grad: 0.1796 (0.1974) loss: 0.7574 (0.7522) time: 0.1557 data: 0.0627 max mem: 9377 +Train: [69] [3100/6250] eta: 0:08:39 lr: 0.000029 grad: 0.1896 (0.1972) loss: 0.7375 (0.7519) time: 0.1606 data: 0.0744 max mem: 9377 +Train: [69] [3200/6250] eta: 0:08:23 lr: 0.000029 grad: 0.1873 (0.1969) loss: 0.7385 (0.7516) time: 0.1735 data: 0.0818 max mem: 9377 +Train: [69] [3300/6250] eta: 0:08:05 lr: 0.000029 grad: 0.1825 (0.1966) loss: 0.7569 (0.7515) time: 0.1432 data: 0.0570 max mem: 9377 +Train: [69] [3400/6250] eta: 0:07:48 lr: 0.000029 grad: 0.1954 (0.1964) loss: 0.7297 (0.7512) time: 0.1605 data: 0.0685 max mem: 9377 +Train: [69] [3500/6250] eta: 0:07:32 lr: 0.000029 grad: 0.1827 (0.1962) loss: 0.7450 (0.7509) time: 0.1763 data: 0.0897 max mem: 9377 +Train: [69] [3600/6250] eta: 0:07:14 lr: 0.000029 grad: 0.1887 (0.1960) loss: 0.7452 (0.7508) time: 0.1583 data: 0.0621 max mem: 9377 +Train: [69] [3700/6250] eta: 0:06:59 lr: 0.000029 grad: 0.1872 (0.1958) loss: 0.7461 (0.7508) time: 0.1683 data: 0.0768 max mem: 9377 +Train: [69] [3800/6250] eta: 0:06:42 lr: 0.000029 grad: 0.1820 (0.1956) loss: 0.7503 (0.7507) time: 0.1856 data: 0.0970 max mem: 9377 +Train: [69] [3900/6250] eta: 0:06:26 lr: 0.000029 grad: 0.1890 (0.1955) loss: 0.7509 (0.7506) time: 0.1776 data: 0.0882 max mem: 9377 +Train: [69] [4000/6250] eta: 0:06:09 lr: 0.000029 grad: 0.1901 (0.1954) loss: 0.7360 (0.7505) time: 0.1549 data: 0.0732 max mem: 9377 +Train: [69] [4100/6250] eta: 0:05:52 lr: 0.000029 grad: 0.1861 (0.1952) loss: 0.7450 (0.7505) time: 0.1561 data: 0.0685 max mem: 9377 +Train: [69] [4200/6250] eta: 0:05:36 lr: 0.000029 grad: 0.1855 (0.1950) loss: 0.7502 (0.7505) time: 0.1736 data: 0.0791 max mem: 9377 +Train: [69] [4300/6250] eta: 0:05:20 lr: 0.000029 grad: 0.1879 (0.1949) loss: 0.7536 (0.7504) time: 0.1842 data: 0.0944 max mem: 9377 +Train: [69] [4400/6250] eta: 0:05:04 lr: 0.000029 grad: 0.1771 (0.1948) loss: 0.7552 (0.7504) time: 0.1903 data: 0.0951 max mem: 9377 +Train: [69] [4500/6250] eta: 0:04:47 lr: 0.000029 grad: 0.1890 (0.1947) loss: 0.7469 (0.7505) time: 0.1427 data: 0.0380 max mem: 9377 +Train: [69] [4600/6250] eta: 0:04:30 lr: 0.000029 grad: 0.1928 (0.1946) loss: 0.7408 (0.7506) time: 0.1820 data: 0.0860 max mem: 9377 +Train: [69] [4700/6250] eta: 0:04:14 lr: 0.000029 grad: 0.1878 (0.1945) loss: 0.7471 (0.7506) time: 0.1387 data: 0.0454 max mem: 9377 +Train: [69] [4800/6250] eta: 0:03:58 lr: 0.000029 grad: 0.1947 (0.1945) loss: 0.7539 (0.7506) time: 0.2040 data: 0.1282 max mem: 9377 +Train: [69] [4900/6250] eta: 0:03:41 lr: 0.000029 grad: 0.1826 (0.1944) loss: 0.7556 (0.7506) time: 0.1705 data: 0.0835 max mem: 9377 +Train: [69] [5000/6250] eta: 0:03:25 lr: 0.000029 grad: 0.1934 (0.1943) loss: 0.7556 (0.7506) time: 0.1683 data: 0.0788 max mem: 9377 +Train: [69] [5100/6250] eta: 0:03:09 lr: 0.000029 grad: 0.1874 (0.1943) loss: 0.7580 (0.7507) time: 0.1685 data: 0.0856 max mem: 9377 +Train: [69] [5200/6250] eta: 0:02:53 lr: 0.000029 grad: 0.1839 (0.1941) loss: 0.7557 (0.7508) time: 0.1438 data: 0.0487 max mem: 9377 +Train: [69] [5300/6250] eta: 0:02:36 lr: 0.000029 grad: 0.1810 (0.1939) loss: 0.7505 (0.7510) time: 0.1689 data: 0.0838 max mem: 9377 +Train: [69] [5400/6250] eta: 0:02:20 lr: 0.000029 grad: 0.1886 (0.1937) loss: 0.7521 (0.7511) time: 0.1954 data: 0.1076 max mem: 9377 +Train: [69] [5500/6250] eta: 0:02:03 lr: 0.000029 grad: 0.1894 (0.1936) loss: 0.7561 (0.7512) time: 0.1812 data: 0.0899 max mem: 9377 +Train: [69] [5600/6250] eta: 0:01:47 lr: 0.000028 grad: 0.1799 (0.1934) loss: 0.7591 (0.7513) time: 0.1598 data: 0.0621 max mem: 9377 +Train: [69] [5700/6250] eta: 0:01:30 lr: 0.000028 grad: 0.1896 (0.1933) loss: 0.7563 (0.7514) time: 0.1459 data: 0.0415 max mem: 9377 +Train: [69] [5800/6250] eta: 0:01:14 lr: 0.000028 grad: 0.1848 (0.1932) loss: 0.7513 (0.7514) time: 0.1455 data: 0.0571 max mem: 9377 +Train: [69] [5900/6250] eta: 0:00:57 lr: 0.000028 grad: 0.1817 (0.1931) loss: 0.7569 (0.7515) time: 0.1452 data: 0.0478 max mem: 9377 +Train: [69] [6000/6250] eta: 0:00:41 lr: 0.000028 grad: 0.1737 (0.1929) loss: 0.7590 (0.7516) time: 0.1787 data: 0.0873 max mem: 9377 +Train: [69] [6100/6250] eta: 0:00:24 lr: 0.000028 grad: 0.1746 (0.1927) loss: 0.7581 (0.7517) time: 0.1597 data: 0.0766 max mem: 9377 +Train: [69] [6200/6250] eta: 0:00:08 lr: 0.000028 grad: 0.1769 (0.1926) loss: 0.7578 (0.7518) time: 0.1551 data: 0.0767 max mem: 9377 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.1948 (0.1926) loss: 0.7473 (0.7518) time: 0.1463 data: 0.0498 max mem: 9377 +Train: [69] Total time: 0:17:13 (0.1654 s / it) +Averaged stats: lr: 0.000028 grad: 0.1948 (0.1926) loss: 0.7473 (0.7518) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:04:45 loss: 0.8518 (0.8518) time: 4.6013 data: 4.5224 max mem: 9377 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.8529 (0.8551) time: 0.1256 data: 0.1003 max mem: 9377 +Eval (hcp-train-subset): [69] Total time: 0:00:14 (0.2330 s / it) +Averaged stats (hcp-train-subset): loss: 0.8529 (0.8551) +Making plots (hcp-train-subset): example=41 +Eval (hcp-val): [69] [ 0/62] eta: 0:05:49 loss: 0.8476 (0.8476) time: 5.6380 data: 5.6075 max mem: 9377 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.8510 (0.8532) time: 0.1431 data: 0.1181 max mem: 9377 +Eval (hcp-val): [69] Total time: 0:00:14 (0.2359 s / it) +Averaged stats (hcp-val): loss: 0.8510 (0.8532) +Making plots (hcp-val): example=18 +Eval (nsd-val): [69] [ 0/62] eta: 0:05:35 loss: 0.8229 (0.8229) time: 5.4136 data: 5.3832 max mem: 9377 +Eval (nsd-val): [69] [61/62] eta: 0:00:00 loss: 0.8317 (0.8340) time: 0.1334 data: 0.1084 max mem: 9377 +Eval (nsd-val): [69] Total time: 0:00:14 (0.2373 s / it) +Averaged stats (nsd-val): loss: 0.8317 (0.8340) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00069.pth +Train: [70] [ 0/6250] eta: 10:46:54 lr: 0.000028 grad: 0.1587 (0.1587) loss: 0.8181 (0.8181) time: 6.2103 data: 6.0776 max mem: 9377 +Train: [70] [ 100/6250] eta: 0:23:00 lr: 0.000028 grad: 0.2435 (0.2901) loss: 0.7641 (0.7701) time: 0.1765 data: 0.0650 max mem: 9377 +Train: [70] [ 200/6250] eta: 0:19:51 lr: 0.000028 grad: 0.2155 (0.2750) loss: 0.7618 (0.7645) time: 0.1755 data: 0.0843 max mem: 9377 +Train: [70] [ 300/6250] eta: 0:18:23 lr: 0.000028 grad: 0.2235 (0.2577) loss: 0.7573 (0.7622) time: 0.1644 data: 0.0664 max mem: 9377 +Train: [70] [ 400/6250] eta: 0:17:20 lr: 0.000028 grad: 0.1907 (0.2459) loss: 0.7601 (0.7613) time: 0.1519 data: 0.0528 max mem: 9377 +Train: [70] [ 500/6250] eta: 0:16:38 lr: 0.000028 grad: 0.1797 (0.2353) loss: 0.7651 (0.7614) time: 0.1593 data: 0.0657 max mem: 9377 +Train: [70] [ 600/6250] eta: 0:15:57 lr: 0.000028 grad: 0.1821 (0.2263) loss: 0.7661 (0.7620) time: 0.1475 data: 0.0533 max mem: 9377 +Train: [70] [ 700/6250] eta: 0:15:27 lr: 0.000028 grad: 0.1802 (0.2205) loss: 0.7563 (0.7619) time: 0.1535 data: 0.0577 max mem: 9377 +Train: [70] [ 800/6250] eta: 0:15:06 lr: 0.000028 grad: 0.1712 (0.2157) loss: 0.7642 (0.7619) time: 0.1514 data: 0.0606 max mem: 9377 +Train: [70] [ 900/6250] eta: 0:14:53 lr: 0.000028 grad: 0.1877 (0.2134) loss: 0.7544 (0.7614) time: 0.1793 data: 0.0894 max mem: 9377 +Train: [70] [1000/6250] eta: 0:14:38 lr: 0.000028 grad: 0.1918 (0.2113) loss: 0.7541 (0.7609) time: 0.1659 data: 0.0818 max mem: 9377 +Train: [70] [1100/6250] eta: 0:14:18 lr: 0.000028 grad: 0.1759 (0.2088) loss: 0.7629 (0.7604) time: 0.1592 data: 0.0720 max mem: 9377 +Train: [70] [1200/6250] eta: 0:14:03 lr: 0.000028 grad: 0.1892 (0.2068) loss: 0.7601 (0.7603) time: 0.1978 data: 0.1083 max mem: 9377 +Train: [70] [1300/6250] eta: 0:13:45 lr: 0.000028 grad: 0.1774 (0.2051) loss: 0.7485 (0.7600) time: 0.1833 data: 0.1010 max mem: 9377 +Train: [70] [1400/6250] eta: 0:13:29 lr: 0.000028 grad: 0.1863 (0.2038) loss: 0.7514 (0.7597) time: 0.1397 data: 0.0539 max mem: 9377 +Train: [70] [1500/6250] eta: 0:13:10 lr: 0.000028 grad: 0.1770 (0.2027) loss: 0.7566 (0.7594) time: 0.1657 data: 0.0777 max mem: 9377 +Train: [70] [1600/6250] eta: 0:12:57 lr: 0.000028 grad: 0.1798 (0.2015) loss: 0.7511 (0.7592) time: 0.1592 data: 0.0649 max mem: 9377 +Train: [70] [1700/6250] eta: 0:12:39 lr: 0.000028 grad: 0.1834 (0.2008) loss: 0.7540 (0.7590) time: 0.1609 data: 0.0786 max mem: 9377 +Train: [70] [1800/6250] eta: 0:12:27 lr: 0.000028 grad: 0.1911 (0.2001) loss: 0.7511 (0.7588) time: 0.1967 data: 0.1028 max mem: 9377 +Train: [70] [1900/6250] eta: 0:12:12 lr: 0.000028 grad: 0.1853 (0.1991) loss: 0.7431 (0.7585) time: 0.1740 data: 0.0873 max mem: 9377 +Train: [70] [2000/6250] eta: 0:11:55 lr: 0.000028 grad: 0.1847 (0.1983) loss: 0.7353 (0.7582) time: 0.1669 data: 0.0764 max mem: 9377 +Train: [70] [2100/6250] eta: 0:11:37 lr: 0.000028 grad: 0.1759 (0.1978) loss: 0.7499 (0.7579) time: 0.1751 data: 0.0918 max mem: 9377 +Train: [70] [2200/6250] eta: 0:11:18 lr: 0.000028 grad: 0.1786 (0.1971) loss: 0.7500 (0.7577) time: 0.1208 data: 0.0281 max mem: 9377 +Train: [70] [2300/6250] eta: 0:11:00 lr: 0.000028 grad: 0.1802 (0.1967) loss: 0.7454 (0.7574) time: 0.1572 data: 0.0645 max mem: 9377 +Train: [70] [2400/6250] eta: 0:10:43 lr: 0.000028 grad: 0.1790 (0.1964) loss: 0.7556 (0.7570) time: 0.1528 data: 0.0601 max mem: 9377 +Train: [70] [2500/6250] eta: 0:10:24 lr: 0.000028 grad: 0.1906 (0.1959) loss: 0.7367 (0.7568) time: 0.1496 data: 0.0552 max mem: 9377 +Train: [70] [2600/6250] eta: 0:10:07 lr: 0.000028 grad: 0.1843 (0.1956) loss: 0.7411 (0.7564) time: 0.1770 data: 0.0950 max mem: 9377 +Train: [70] [2700/6250] eta: 0:09:51 lr: 0.000028 grad: 0.1743 (0.1952) loss: 0.7562 (0.7560) time: 0.1051 data: 0.0003 max mem: 9377 +Train: [70] [2800/6250] eta: 0:09:31 lr: 0.000028 grad: 0.1764 (0.1949) loss: 0.7501 (0.7557) time: 0.1480 data: 0.0555 max mem: 9377 +Train: [70] [2900/6250] eta: 0:09:14 lr: 0.000028 grad: 0.1799 (0.1946) loss: 0.7471 (0.7555) time: 0.1473 data: 0.0595 max mem: 9377 +Train: [70] [3000/6250] eta: 0:08:58 lr: 0.000027 grad: 0.1797 (0.1943) loss: 0.7477 (0.7553) time: 0.2217 data: 0.1378 max mem: 9377 +Train: [70] [3100/6250] eta: 0:08:40 lr: 0.000027 grad: 0.1885 (0.1941) loss: 0.7529 (0.7551) time: 0.1545 data: 0.0609 max mem: 9377 +Train: [70] [3200/6250] eta: 0:08:23 lr: 0.000027 grad: 0.1885 (0.1939) loss: 0.7496 (0.7549) time: 0.1499 data: 0.0678 max mem: 9377 +Train: [70] [3300/6250] eta: 0:08:05 lr: 0.000027 grad: 0.1833 (0.1935) loss: 0.7514 (0.7548) time: 0.1485 data: 0.0543 max mem: 9377 +Train: [70] [3400/6250] eta: 0:07:48 lr: 0.000027 grad: 0.1733 (0.1932) loss: 0.7628 (0.7548) time: 0.1708 data: 0.0861 max mem: 9377 +Train: [70] [3500/6250] eta: 0:07:31 lr: 0.000027 grad: 0.1801 (0.1928) loss: 0.7514 (0.7548) time: 0.1539 data: 0.0727 max mem: 9377 +Train: [70] [3600/6250] eta: 0:07:15 lr: 0.000027 grad: 0.1839 (0.1925) loss: 0.7494 (0.7547) time: 0.1721 data: 0.0847 max mem: 9377 +Train: [70] [3700/6250] eta: 0:07:00 lr: 0.000027 grad: 0.1860 (0.1923) loss: 0.7558 (0.7547) time: 0.2188 data: 0.1343 max mem: 9377 +Train: [70] [3800/6250] eta: 0:06:45 lr: 0.000027 grad: 0.1858 (0.1922) loss: 0.7470 (0.7545) time: 0.1818 data: 0.0966 max mem: 9377 +Train: [70] [3900/6250] eta: 0:06:29 lr: 0.000027 grad: 0.1823 (0.1920) loss: 0.7547 (0.7545) time: 0.1724 data: 0.0829 max mem: 9377 +Train: [70] [4000/6250] eta: 0:06:14 lr: 0.000027 grad: 0.1806 (0.1917) loss: 0.7464 (0.7544) time: 0.1771 data: 0.0880 max mem: 9377 +Train: [70] [4100/6250] eta: 0:05:58 lr: 0.000027 grad: 0.1805 (0.1914) loss: 0.7422 (0.7544) time: 0.1779 data: 0.0826 max mem: 9377 +Train: [70] [4200/6250] eta: 0:05:42 lr: 0.000027 grad: 0.1779 (0.1912) loss: 0.7487 (0.7543) time: 0.2116 data: 0.1186 max mem: 9377 +Train: [70] [4300/6250] eta: 0:05:25 lr: 0.000027 grad: 0.1777 (0.1910) loss: 0.7552 (0.7543) time: 0.1798 data: 0.0854 max mem: 9377 +Train: [70] [4400/6250] eta: 0:05:08 lr: 0.000027 grad: 0.1772 (0.1908) loss: 0.7567 (0.7543) time: 0.1180 data: 0.0239 max mem: 9377 +Train: [70] [4500/6250] eta: 0:04:52 lr: 0.000027 grad: 0.1857 (0.1906) loss: 0.7552 (0.7543) time: 0.1658 data: 0.0744 max mem: 9377 +Train: [70] [4600/6250] eta: 0:04:35 lr: 0.000027 grad: 0.1846 (0.1905) loss: 0.7530 (0.7544) time: 0.1567 data: 0.0638 max mem: 9377 +Train: [70] [4700/6250] eta: 0:04:18 lr: 0.000027 grad: 0.1822 (0.1903) loss: 0.7557 (0.7544) time: 0.1770 data: 0.0906 max mem: 9377 +Train: [70] [4800/6250] eta: 0:04:02 lr: 0.000027 grad: 0.1835 (0.1903) loss: 0.7558 (0.7543) time: 0.1919 data: 0.1116 max mem: 9377 +Train: [70] [4900/6250] eta: 0:03:45 lr: 0.000027 grad: 0.1809 (0.1902) loss: 0.7514 (0.7543) time: 0.1665 data: 0.0734 max mem: 9377 +Train: [70] [5000/6250] eta: 0:03:28 lr: 0.000027 grad: 0.1840 (0.1901) loss: 0.7570 (0.7542) time: 0.1572 data: 0.0649 max mem: 9377 +Train: [70] [5100/6250] eta: 0:03:11 lr: 0.000027 grad: 0.1720 (0.1899) loss: 0.7660 (0.7542) time: 0.1668 data: 0.0803 max mem: 9377 +Train: [70] [5200/6250] eta: 0:02:55 lr: 0.000027 grad: 0.1832 (0.1898) loss: 0.7516 (0.7543) time: 0.1660 data: 0.0746 max mem: 9377 +Train: [70] [5300/6250] eta: 0:02:38 lr: 0.000027 grad: 0.1804 (0.1897) loss: 0.7610 (0.7543) time: 0.1509 data: 0.0573 max mem: 9377 +Train: [70] [5400/6250] eta: 0:02:21 lr: 0.000027 grad: 0.1785 (0.1895) loss: 0.7513 (0.7544) time: 0.1466 data: 0.0430 max mem: 9377 +Train: [70] [5500/6250] eta: 0:02:05 lr: 0.000027 grad: 0.1789 (0.1894) loss: 0.7598 (0.7544) time: 0.1507 data: 0.0568 max mem: 9377 +Train: [70] [5600/6250] eta: 0:01:48 lr: 0.000027 grad: 0.1921 (0.1893) loss: 0.7588 (0.7544) time: 0.1775 data: 0.0894 max mem: 9377 +Train: [70] [5700/6250] eta: 0:01:31 lr: 0.000027 grad: 0.1768 (0.1892) loss: 0.7487 (0.7544) time: 0.1526 data: 0.0591 max mem: 9377 +Train: [70] [5800/6250] eta: 0:01:14 lr: 0.000027 grad: 0.1885 (0.1892) loss: 0.7444 (0.7543) time: 0.1612 data: 0.0703 max mem: 9377 +Train: [70] [5900/6250] eta: 0:00:58 lr: 0.000027 grad: 0.1729 (0.1891) loss: 0.7519 (0.7544) time: 0.1817 data: 0.0817 max mem: 9377 +Train: [70] [6000/6250] eta: 0:00:41 lr: 0.000027 grad: 0.1788 (0.1890) loss: 0.7493 (0.7543) time: 0.1674 data: 0.0763 max mem: 9377 +Train: [70] [6100/6250] eta: 0:00:24 lr: 0.000027 grad: 0.1834 (0.1889) loss: 0.7517 (0.7542) time: 0.1854 data: 0.0969 max mem: 9377 +Train: [70] [6200/6250] eta: 0:00:08 lr: 0.000027 grad: 0.1917 (0.1889) loss: 0.7515 (0.7541) time: 0.1656 data: 0.0789 max mem: 9377 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.1818 (0.1889) loss: 0.7579 (0.7541) time: 0.1651 data: 0.0747 max mem: 9377 +Train: [70] Total time: 0:17:23 (0.1669 s / it) +Averaged stats: lr: 0.000027 grad: 0.1818 (0.1889) loss: 0.7579 (0.7541) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:04:03 loss: 0.8480 (0.8480) time: 3.9200 data: 3.8387 max mem: 9377 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.8561 (0.8554) time: 0.1028 data: 0.0779 max mem: 9377 +Eval (hcp-train-subset): [70] Total time: 0:00:14 (0.2312 s / it) +Averaged stats (hcp-train-subset): loss: 0.8561 (0.8554) +Eval (hcp-val): [70] [ 0/62] eta: 0:03:57 loss: 0.8512 (0.8512) time: 3.8378 data: 3.7665 max mem: 9377 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.8547 (0.8543) time: 0.1272 data: 0.1017 max mem: 9377 +Eval (hcp-val): [70] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (hcp-val): loss: 0.8547 (0.8543) +Eval (nsd-val): [70] [ 0/62] eta: 0:05:42 loss: 0.8208 (0.8208) time: 5.5304 data: 5.4994 max mem: 9377 +Eval (nsd-val): [70] [61/62] eta: 0:00:00 loss: 0.8301 (0.8311) time: 0.1415 data: 0.1117 max mem: 9377 +Eval (nsd-val): [70] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (nsd-val): loss: 0.8301 (0.8311) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 10:51:04 lr: 0.000027 grad: 0.2388 (0.2388) loss: 0.7787 (0.7787) time: 6.2503 data: 5.9755 max mem: 9377 +Train: [71] [ 100/6250] eta: 0:23:14 lr: 0.000027 grad: 0.2229 (0.2909) loss: 0.7611 (0.7751) time: 0.1659 data: 0.0648 max mem: 9377 +Train: [71] [ 200/6250] eta: 0:19:41 lr: 0.000027 grad: 0.2056 (0.2613) loss: 0.7701 (0.7719) time: 0.1599 data: 0.0474 max mem: 9377 +Train: [71] [ 300/6250] eta: 0:18:38 lr: 0.000027 grad: 0.1888 (0.2459) loss: 0.7542 (0.7679) time: 0.1697 data: 0.0783 max mem: 9377 +Train: [71] [ 400/6250] eta: 0:17:52 lr: 0.000026 grad: 0.1858 (0.2318) loss: 0.7603 (0.7671) time: 0.1702 data: 0.0574 max mem: 9377 +Train: [71] [ 500/6250] eta: 0:16:56 lr: 0.000026 grad: 0.1821 (0.2238) loss: 0.7597 (0.7657) time: 0.1586 data: 0.0509 max mem: 9377 +Train: [71] [ 600/6250] eta: 0:16:18 lr: 0.000026 grad: 0.1885 (0.2180) loss: 0.7608 (0.7646) time: 0.1698 data: 0.0752 max mem: 9377 +Train: [71] [ 700/6250] eta: 0:15:46 lr: 0.000026 grad: 0.1791 (0.2134) loss: 0.7603 (0.7638) time: 0.1497 data: 0.0497 max mem: 9377 +Train: [71] [ 800/6250] eta: 0:15:17 lr: 0.000026 grad: 0.1830 (0.2098) loss: 0.7589 (0.7632) time: 0.1563 data: 0.0563 max mem: 9377 +Train: [71] [ 900/6250] eta: 0:14:50 lr: 0.000026 grad: 0.1880 (0.2075) loss: 0.7534 (0.7622) time: 0.1457 data: 0.0452 max mem: 9377 +Train: [71] [1000/6250] eta: 0:14:21 lr: 0.000026 grad: 0.1851 (0.2055) loss: 0.7613 (0.7618) time: 0.1577 data: 0.0622 max mem: 9377 +Train: [71] [1100/6250] eta: 0:14:00 lr: 0.000026 grad: 0.1843 (0.2038) loss: 0.7470 (0.7613) time: 0.1491 data: 0.0495 max mem: 9377 +Train: [71] [1200/6250] eta: 0:13:43 lr: 0.000026 grad: 0.1746 (0.2021) loss: 0.7605 (0.7604) time: 0.1512 data: 0.0626 max mem: 9377 +Train: [71] [1300/6250] eta: 0:13:27 lr: 0.000026 grad: 0.1744 (0.2010) loss: 0.7542 (0.7599) time: 0.1750 data: 0.0857 max mem: 9377 +Train: [71] [1400/6250] eta: 0:13:12 lr: 0.000026 grad: 0.1824 (0.1999) loss: 0.7584 (0.7596) time: 0.1975 data: 0.1232 max mem: 9377 +Train: [71] [1500/6250] eta: 0:12:59 lr: 0.000026 grad: 0.1821 (0.1993) loss: 0.7497 (0.7591) time: 0.1502 data: 0.0697 max mem: 9377 +Train: [71] [1600/6250] eta: 0:12:45 lr: 0.000026 grad: 0.1834 (0.1982) loss: 0.7580 (0.7589) time: 0.1650 data: 0.0792 max mem: 9377 +Train: [71] [1700/6250] eta: 0:12:30 lr: 0.000026 grad: 0.1797 (0.1972) loss: 0.7456 (0.7586) time: 0.1735 data: 0.0915 max mem: 9377 +Train: [71] [1800/6250] eta: 0:12:16 lr: 0.000026 grad: 0.1817 (0.1962) loss: 0.7618 (0.7586) time: 0.1725 data: 0.0793 max mem: 9377 +Train: [71] [1900/6250] eta: 0:12:00 lr: 0.000026 grad: 0.1726 (0.1954) loss: 0.7570 (0.7585) time: 0.1716 data: 0.0893 max mem: 9377 +Train: [71] [2000/6250] eta: 0:11:42 lr: 0.000026 grad: 0.1765 (0.1947) loss: 0.7542 (0.7584) time: 0.1388 data: 0.0444 max mem: 9377 +Train: [71] [2100/6250] eta: 0:11:24 lr: 0.000026 grad: 0.1780 (0.1940) loss: 0.7469 (0.7582) time: 0.1645 data: 0.0799 max mem: 9377 +Train: [71] [2200/6250] eta: 0:11:05 lr: 0.000026 grad: 0.1761 (0.1935) loss: 0.7581 (0.7582) time: 0.1556 data: 0.0726 max mem: 9377 +Train: [71] [2300/6250] eta: 0:10:48 lr: 0.000026 grad: 0.1762 (0.1930) loss: 0.7581 (0.7581) time: 0.1748 data: 0.0756 max mem: 9377 +Train: [71] [2400/6250] eta: 0:10:29 lr: 0.000026 grad: 0.1705 (0.1924) loss: 0.7634 (0.7583) time: 0.1408 data: 0.0467 max mem: 9377 +Train: [71] [2500/6250] eta: 0:10:10 lr: 0.000026 grad: 0.1728 (0.1918) loss: 0.7640 (0.7581) time: 0.1611 data: 0.0702 max mem: 9377 +Train: [71] [2600/6250] eta: 0:09:51 lr: 0.000026 grad: 0.1865 (0.1915) loss: 0.7609 (0.7581) time: 0.1474 data: 0.0563 max mem: 9377 +Train: [71] [2700/6250] eta: 0:09:33 lr: 0.000026 grad: 0.1914 (0.1914) loss: 0.7383 (0.7578) time: 0.1417 data: 0.0597 max mem: 9377 +Train: [71] [2800/6250] eta: 0:09:16 lr: 0.000026 grad: 0.1812 (0.1911) loss: 0.7610 (0.7577) time: 0.1477 data: 0.0528 max mem: 9377 +Train: [71] [2900/6250] eta: 0:08:58 lr: 0.000026 grad: 0.1782 (0.1908) loss: 0.7536 (0.7577) time: 0.1617 data: 0.0782 max mem: 9377 +Train: [71] [3000/6250] eta: 0:08:41 lr: 0.000026 grad: 0.1754 (0.1904) loss: 0.7569 (0.7576) time: 0.1545 data: 0.0624 max mem: 9377 +Train: [71] [3100/6250] eta: 0:08:26 lr: 0.000026 grad: 0.1820 (0.1903) loss: 0.7443 (0.7574) time: 0.1525 data: 0.0668 max mem: 9377 +Train: [71] [3200/6250] eta: 0:08:09 lr: 0.000026 grad: 0.1747 (0.1901) loss: 0.7596 (0.7573) time: 0.1576 data: 0.0688 max mem: 9377 +Train: [71] [3300/6250] eta: 0:07:53 lr: 0.000026 grad: 0.1774 (0.1898) loss: 0.7613 (0.7573) time: 0.1513 data: 0.0648 max mem: 9377 +Train: [71] [3400/6250] eta: 0:07:38 lr: 0.000026 grad: 0.1760 (0.1895) loss: 0.7559 (0.7573) time: 0.1734 data: 0.0843 max mem: 9377 +Train: [71] [3500/6250] eta: 0:07:21 lr: 0.000026 grad: 0.1799 (0.1894) loss: 0.7466 (0.7572) time: 0.1573 data: 0.0719 max mem: 9377 +Train: [71] [3600/6250] eta: 0:07:08 lr: 0.000026 grad: 0.1818 (0.1893) loss: 0.7555 (0.7571) time: 0.2425 data: 0.1558 max mem: 9377 +Train: [71] [3700/6250] eta: 0:06:52 lr: 0.000026 grad: 0.1862 (0.1892) loss: 0.7454 (0.7569) time: 0.1955 data: 0.1192 max mem: 9377 +Train: [71] [3800/6250] eta: 0:06:36 lr: 0.000026 grad: 0.1907 (0.1892) loss: 0.7389 (0.7567) time: 0.1847 data: 0.1076 max mem: 9377 +Train: [71] [3900/6250] eta: 0:06:20 lr: 0.000026 grad: 0.1890 (0.1891) loss: 0.7408 (0.7566) time: 0.1598 data: 0.0769 max mem: 9377 +Train: [71] [4000/6250] eta: 0:06:05 lr: 0.000026 grad: 0.1866 (0.1891) loss: 0.7524 (0.7564) time: 0.1845 data: 0.0988 max mem: 9377 +Train: [71] [4100/6250] eta: 0:05:49 lr: 0.000026 grad: 0.1758 (0.1890) loss: 0.7388 (0.7562) time: 0.1646 data: 0.0798 max mem: 9377 +Train: [71] [4200/6250] eta: 0:05:34 lr: 0.000025 grad: 0.1855 (0.1891) loss: 0.7447 (0.7561) time: 0.1852 data: 0.0918 max mem: 9377 +Train: [71] [4300/6250] eta: 0:05:17 lr: 0.000025 grad: 0.1815 (0.1890) loss: 0.7483 (0.7560) time: 0.1559 data: 0.0639 max mem: 9377 +Train: [71] [4400/6250] eta: 0:05:01 lr: 0.000025 grad: 0.1849 (0.1889) loss: 0.7404 (0.7558) time: 0.1634 data: 0.0716 max mem: 9377 +Train: [71] [4500/6250] eta: 0:04:45 lr: 0.000025 grad: 0.1807 (0.1888) loss: 0.7465 (0.7558) time: 0.1577 data: 0.0692 max mem: 9377 +Train: [71] [4600/6250] eta: 0:04:28 lr: 0.000025 grad: 0.1853 (0.1888) loss: 0.7439 (0.7557) time: 0.1597 data: 0.0624 max mem: 9377 +Train: [71] [4700/6250] eta: 0:04:12 lr: 0.000025 grad: 0.1806 (0.1886) loss: 0.7503 (0.7557) time: 0.1521 data: 0.0675 max mem: 9377 +Train: [71] [4800/6250] eta: 0:03:55 lr: 0.000025 grad: 0.1840 (0.1885) loss: 0.7451 (0.7555) time: 0.1502 data: 0.0708 max mem: 9377 +Train: [71] [4900/6250] eta: 0:03:39 lr: 0.000025 grad: 0.1873 (0.1884) loss: 0.7470 (0.7554) time: 0.1422 data: 0.0525 max mem: 9377 +Train: [71] [5000/6250] eta: 0:03:23 lr: 0.000025 grad: 0.1891 (0.1885) loss: 0.7501 (0.7553) time: 0.1573 data: 0.0701 max mem: 9377 +Train: [71] [5100/6250] eta: 0:03:06 lr: 0.000025 grad: 0.1799 (0.1885) loss: 0.7477 (0.7551) time: 0.1852 data: 0.0967 max mem: 9377 +Train: [71] [5200/6250] eta: 0:02:50 lr: 0.000025 grad: 0.1856 (0.1886) loss: 0.7324 (0.7549) time: 0.1605 data: 0.0680 max mem: 9377 +Train: [71] [5300/6250] eta: 0:02:34 lr: 0.000025 grad: 0.1834 (0.1885) loss: 0.7492 (0.7547) time: 0.1557 data: 0.0706 max mem: 9377 +Train: [71] [5400/6250] eta: 0:02:18 lr: 0.000025 grad: 0.1827 (0.1886) loss: 0.7556 (0.7546) time: 0.1472 data: 0.0554 max mem: 9377 +Train: [71] [5500/6250] eta: 0:02:01 lr: 0.000025 grad: 0.1896 (0.1886) loss: 0.7529 (0.7545) time: 0.1483 data: 0.0532 max mem: 9377 +Train: [71] [5600/6250] eta: 0:01:45 lr: 0.000025 grad: 0.1749 (0.1886) loss: 0.7596 (0.7545) time: 0.1447 data: 0.0549 max mem: 9377 +Train: [71] [5700/6250] eta: 0:01:29 lr: 0.000025 grad: 0.1738 (0.1886) loss: 0.7502 (0.7545) time: 0.1293 data: 0.0256 max mem: 9377 +Train: [71] [5800/6250] eta: 0:01:12 lr: 0.000025 grad: 0.1732 (0.1885) loss: 0.7702 (0.7546) time: 0.1459 data: 0.0610 max mem: 9377 +Train: [71] [5900/6250] eta: 0:00:56 lr: 0.000025 grad: 0.1806 (0.1885) loss: 0.7597 (0.7547) time: 0.1992 data: 0.1240 max mem: 9377 +Train: [71] [6000/6250] eta: 0:00:40 lr: 0.000025 grad: 0.1776 (0.1884) loss: 0.7581 (0.7547) time: 0.1649 data: 0.0813 max mem: 9377 +Train: [71] [6100/6250] eta: 0:00:24 lr: 0.000025 grad: 0.1827 (0.1883) loss: 0.7522 (0.7547) time: 0.1872 data: 0.0984 max mem: 9377 +Train: [71] [6200/6250] eta: 0:00:08 lr: 0.000025 grad: 0.1920 (0.1884) loss: 0.7462 (0.7547) time: 0.1050 data: 0.0002 max mem: 9377 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1841 (0.1884) loss: 0.7567 (0.7547) time: 0.0987 data: 0.0151 max mem: 9377 +Train: [71] Total time: 0:17:02 (0.1636 s / it) +Averaged stats: lr: 0.000025 grad: 0.1841 (0.1884) loss: 0.7567 (0.7547) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:04:40 loss: 0.8485 (0.8485) time: 4.5305 data: 4.4629 max mem: 9377 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.8567 (0.8557) time: 0.1289 data: 0.1037 max mem: 9377 +Eval (hcp-train-subset): [71] Total time: 0:00:14 (0.2312 s / it) +Averaged stats (hcp-train-subset): loss: 0.8567 (0.8557) +Eval (hcp-val): [71] [ 0/62] eta: 0:06:24 loss: 0.8479 (0.8479) time: 6.2023 data: 6.1700 max mem: 9377 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.8510 (0.8538) time: 0.1207 data: 0.0954 max mem: 9377 +Eval (hcp-val): [71] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (hcp-val): loss: 0.8510 (0.8538) +Eval (nsd-val): [71] [ 0/62] eta: 0:05:16 loss: 0.8262 (0.8262) time: 5.0969 data: 5.0660 max mem: 9377 +Eval (nsd-val): [71] [61/62] eta: 0:00:00 loss: 0.8318 (0.8327) time: 0.1346 data: 0.1094 max mem: 9377 +Eval (nsd-val): [71] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (nsd-val): loss: 0.8318 (0.8327) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [72] [ 0/6250] eta: 8:35:52 lr: 0.000025 grad: 0.4680 (0.4680) loss: 0.8107 (0.8107) time: 4.9525 data: 4.5897 max mem: 9377 +Train: [72] [ 100/6250] eta: 0:22:10 lr: 0.000025 grad: 0.2150 (0.2653) loss: 0.7421 (0.7807) time: 0.1744 data: 0.0780 max mem: 9377 +Train: [72] [ 200/6250] eta: 0:18:54 lr: 0.000025 grad: 0.2411 (0.2518) loss: 0.7467 (0.7646) time: 0.1548 data: 0.0503 max mem: 9377 +Train: [72] [ 300/6250] eta: 0:18:14 lr: 0.000025 grad: 0.2128 (0.2431) loss: 0.7456 (0.7594) time: 0.1862 data: 0.0839 max mem: 9377 +Train: [72] [ 400/6250] eta: 0:17:41 lr: 0.000025 grad: 0.2058 (0.2363) loss: 0.7602 (0.7570) time: 0.1855 data: 0.0795 max mem: 9377 +Train: [72] [ 500/6250] eta: 0:17:01 lr: 0.000025 grad: 0.2081 (0.2312) loss: 0.7424 (0.7542) time: 0.1386 data: 0.0410 max mem: 9377 +Train: [72] [ 600/6250] eta: 0:16:26 lr: 0.000025 grad: 0.2067 (0.2259) loss: 0.7490 (0.7540) time: 0.1584 data: 0.0710 max mem: 9377 +Train: [72] [ 700/6250] eta: 0:15:58 lr: 0.000025 grad: 0.1810 (0.2219) loss: 0.7566 (0.7537) time: 0.1552 data: 0.0540 max mem: 9377 +Train: [72] [ 800/6250] eta: 0:15:32 lr: 0.000025 grad: 0.1836 (0.2181) loss: 0.7533 (0.7544) time: 0.1813 data: 0.0954 max mem: 9377 +Train: [72] [ 900/6250] eta: 0:15:12 lr: 0.000025 grad: 0.1806 (0.2145) loss: 0.7563 (0.7549) time: 0.1770 data: 0.0907 max mem: 9377 +Train: [72] [1000/6250] eta: 0:14:52 lr: 0.000025 grad: 0.1773 (0.2113) loss: 0.7580 (0.7554) time: 0.1766 data: 0.0804 max mem: 9377 +Train: [72] [1100/6250] eta: 0:14:29 lr: 0.000025 grad: 0.1741 (0.2087) loss: 0.7587 (0.7559) time: 0.1590 data: 0.0729 max mem: 9377 +Train: [72] [1200/6250] eta: 0:14:10 lr: 0.000025 grad: 0.1871 (0.2067) loss: 0.7462 (0.7559) time: 0.1450 data: 0.0557 max mem: 9377 +Train: [72] [1300/6250] eta: 0:13:49 lr: 0.000025 grad: 0.1943 (0.2057) loss: 0.7497 (0.7557) time: 0.1555 data: 0.0612 max mem: 9377 +Train: [72] [1400/6250] eta: 0:13:33 lr: 0.000025 grad: 0.1886 (0.2045) loss: 0.7492 (0.7556) time: 0.1701 data: 0.0918 max mem: 9377 +Train: [72] [1500/6250] eta: 0:13:15 lr: 0.000025 grad: 0.1860 (0.2034) loss: 0.7517 (0.7556) time: 0.1525 data: 0.0693 max mem: 9377 +Train: [72] [1600/6250] eta: 0:12:56 lr: 0.000025 grad: 0.1855 (0.2023) loss: 0.7547 (0.7556) time: 0.1781 data: 0.0869 max mem: 9377 +Train: [72] [1700/6250] eta: 0:12:38 lr: 0.000024 grad: 0.1896 (0.2015) loss: 0.7639 (0.7555) time: 0.1675 data: 0.0858 max mem: 9377 +Train: [72] [1800/6250] eta: 0:12:24 lr: 0.000024 grad: 0.1882 (0.2007) loss: 0.7638 (0.7557) time: 0.1725 data: 0.0876 max mem: 9377 +Train: [72] [1900/6250] eta: 0:12:06 lr: 0.000024 grad: 0.1755 (0.1998) loss: 0.7569 (0.7560) time: 0.1457 data: 0.0553 max mem: 9377 +Train: [72] [2000/6250] eta: 0:11:49 lr: 0.000024 grad: 0.1780 (0.1989) loss: 0.7536 (0.7562) time: 0.1719 data: 0.0777 max mem: 9377 +Train: [72] [2100/6250] eta: 0:11:32 lr: 0.000024 grad: 0.1907 (0.1983) loss: 0.7544 (0.7564) time: 0.1549 data: 0.0645 max mem: 9377 +Train: [72] [2200/6250] eta: 0:11:16 lr: 0.000024 grad: 0.1821 (0.1978) loss: 0.7613 (0.7562) time: 0.1575 data: 0.0684 max mem: 9377 +Train: [72] [2300/6250] eta: 0:10:56 lr: 0.000024 grad: 0.1869 (0.1975) loss: 0.7591 (0.7561) time: 0.1551 data: 0.0562 max mem: 9377 +Train: [72] [2400/6250] eta: 0:10:38 lr: 0.000024 grad: 0.1912 (0.1972) loss: 0.7548 (0.7560) time: 0.1611 data: 0.0678 max mem: 9377 +Train: [72] [2500/6250] eta: 0:10:18 lr: 0.000024 grad: 0.1772 (0.1968) loss: 0.7564 (0.7560) time: 0.1452 data: 0.0516 max mem: 9377 +Train: [72] [2600/6250] eta: 0:10:00 lr: 0.000024 grad: 0.1965 (0.1968) loss: 0.7463 (0.7557) time: 0.1592 data: 0.0751 max mem: 9377 +Train: [72] [2700/6250] eta: 0:09:43 lr: 0.000024 grad: 0.1871 (0.1966) loss: 0.7518 (0.7555) time: 0.1653 data: 0.0781 max mem: 9377 +Train: [72] [2800/6250] eta: 0:09:25 lr: 0.000024 grad: 0.1925 (0.1967) loss: 0.7482 (0.7553) time: 0.1726 data: 0.0862 max mem: 9377 +Train: [72] [2900/6250] eta: 0:09:07 lr: 0.000024 grad: 0.1838 (0.1965) loss: 0.7545 (0.7552) time: 0.1471 data: 0.0613 max mem: 9377 +Train: [72] [3000/6250] eta: 0:08:49 lr: 0.000024 grad: 0.1920 (0.1963) loss: 0.7522 (0.7550) time: 0.1492 data: 0.0593 max mem: 9377 +Train: [72] [3100/6250] eta: 0:08:33 lr: 0.000024 grad: 0.1932 (0.1962) loss: 0.7512 (0.7549) time: 0.1799 data: 0.0892 max mem: 9377 +Train: [72] [3200/6250] eta: 0:08:16 lr: 0.000024 grad: 0.1873 (0.1960) loss: 0.7463 (0.7547) time: 0.1530 data: 0.0670 max mem: 9377 +Train: [72] [3300/6250] eta: 0:07:59 lr: 0.000024 grad: 0.1898 (0.1959) loss: 0.7523 (0.7546) time: 0.1664 data: 0.0747 max mem: 9377 +Train: [72] [3400/6250] eta: 0:07:43 lr: 0.000024 grad: 0.1848 (0.1958) loss: 0.7511 (0.7546) time: 0.1951 data: 0.0975 max mem: 9377 +Train: [72] [3500/6250] eta: 0:07:27 lr: 0.000024 grad: 0.1801 (0.1957) loss: 0.7519 (0.7546) time: 0.2310 data: 0.1553 max mem: 9377 +Train: [72] [3600/6250] eta: 0:07:11 lr: 0.000024 grad: 0.1804 (0.1954) loss: 0.7514 (0.7546) time: 0.1707 data: 0.0910 max mem: 9377 +Train: [72] [3700/6250] eta: 0:06:55 lr: 0.000024 grad: 0.1892 (0.1952) loss: 0.7402 (0.7545) time: 0.1727 data: 0.0957 max mem: 9377 +Train: [72] [3800/6250] eta: 0:06:39 lr: 0.000024 grad: 0.1913 (0.1950) loss: 0.7526 (0.7545) time: 0.1631 data: 0.0795 max mem: 9377 +Train: [72] [3900/6250] eta: 0:06:24 lr: 0.000024 grad: 0.1811 (0.1948) loss: 0.7521 (0.7544) time: 0.1627 data: 0.0658 max mem: 9377 +Train: [72] [4000/6250] eta: 0:06:08 lr: 0.000024 grad: 0.1866 (0.1947) loss: 0.7457 (0.7543) time: 0.1744 data: 0.0860 max mem: 9377 +Train: [72] [4100/6250] eta: 0:05:52 lr: 0.000024 grad: 0.1939 (0.1946) loss: 0.7413 (0.7543) time: 0.1750 data: 0.0796 max mem: 9377 +Train: [72] [4200/6250] eta: 0:05:35 lr: 0.000024 grad: 0.1805 (0.1945) loss: 0.7595 (0.7543) time: 0.1607 data: 0.0738 max mem: 9377 +Train: [72] [4300/6250] eta: 0:05:19 lr: 0.000024 grad: 0.1800 (0.1944) loss: 0.7490 (0.7542) time: 0.1546 data: 0.0539 max mem: 9377 +Train: [72] [4400/6250] eta: 0:05:02 lr: 0.000024 grad: 0.1963 (0.1943) loss: 0.7417 (0.7542) time: 0.1649 data: 0.0722 max mem: 9377 +Train: [72] [4500/6250] eta: 0:04:46 lr: 0.000024 grad: 0.1801 (0.1942) loss: 0.7616 (0.7542) time: 0.1500 data: 0.0618 max mem: 9377 +Train: [72] [4600/6250] eta: 0:04:29 lr: 0.000024 grad: 0.1853 (0.1941) loss: 0.7522 (0.7542) time: 0.1545 data: 0.0659 max mem: 9377 +Train: [72] [4700/6250] eta: 0:04:12 lr: 0.000024 grad: 0.1860 (0.1940) loss: 0.7588 (0.7542) time: 0.1310 data: 0.0411 max mem: 9377 +Train: [72] [4800/6250] eta: 0:03:56 lr: 0.000024 grad: 0.1921 (0.1939) loss: 0.7511 (0.7543) time: 0.1629 data: 0.0678 max mem: 9377 +Train: [72] [4900/6250] eta: 0:03:40 lr: 0.000024 grad: 0.1849 (0.1938) loss: 0.7501 (0.7543) time: 0.1707 data: 0.0903 max mem: 9377 +Train: [72] [5000/6250] eta: 0:03:24 lr: 0.000024 grad: 0.1878 (0.1937) loss: 0.7522 (0.7543) time: 0.1566 data: 0.0733 max mem: 9377 +Train: [72] [5100/6250] eta: 0:03:07 lr: 0.000024 grad: 0.1984 (0.1937) loss: 0.7454 (0.7543) time: 0.1647 data: 0.0769 max mem: 9377 +Train: [72] [5200/6250] eta: 0:02:51 lr: 0.000024 grad: 0.1767 (0.1936) loss: 0.7593 (0.7543) time: 0.1547 data: 0.0661 max mem: 9377 +Train: [72] [5300/6250] eta: 0:02:34 lr: 0.000024 grad: 0.1783 (0.1934) loss: 0.7511 (0.7543) time: 0.1529 data: 0.0689 max mem: 9377 +Train: [72] [5400/6250] eta: 0:02:18 lr: 0.000024 grad: 0.1808 (0.1934) loss: 0.7571 (0.7543) time: 0.1447 data: 0.0563 max mem: 9377 +Train: [72] [5500/6250] eta: 0:02:02 lr: 0.000023 grad: 0.1887 (0.1933) loss: 0.7583 (0.7543) time: 0.1774 data: 0.0908 max mem: 9377 +Train: [72] [5600/6250] eta: 0:01:45 lr: 0.000023 grad: 0.1873 (0.1932) loss: 0.7485 (0.7542) time: 0.1651 data: 0.0677 max mem: 9377 +Train: [72] [5700/6250] eta: 0:01:29 lr: 0.000023 grad: 0.1780 (0.1932) loss: 0.7581 (0.7542) time: 0.1384 data: 0.0491 max mem: 9377 +Train: [72] [5800/6250] eta: 0:01:12 lr: 0.000023 grad: 0.1883 (0.1931) loss: 0.7423 (0.7541) time: 0.1515 data: 0.0630 max mem: 9377 +Train: [72] [5900/6250] eta: 0:00:56 lr: 0.000023 grad: 0.1896 (0.1932) loss: 0.7538 (0.7540) time: 0.1276 data: 0.0288 max mem: 9377 +Train: [72] [6000/6250] eta: 0:00:40 lr: 0.000023 grad: 0.1887 (0.1931) loss: 0.7478 (0.7539) time: 0.1381 data: 0.0489 max mem: 9377 +Train: [72] [6100/6250] eta: 0:00:24 lr: 0.000023 grad: 0.1901 (0.1931) loss: 0.7534 (0.7539) time: 0.1581 data: 0.0733 max mem: 9377 +Train: [72] [6200/6250] eta: 0:00:08 lr: 0.000023 grad: 0.1830 (0.1930) loss: 0.7492 (0.7538) time: 0.1511 data: 0.0631 max mem: 9377 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1954 (0.1930) loss: 0.7430 (0.7538) time: 0.1658 data: 0.0816 max mem: 9377 +Train: [72] Total time: 0:16:57 (0.1627 s / it) +Averaged stats: lr: 0.000023 grad: 0.1954 (0.1930) loss: 0.7430 (0.7538) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:06:57 loss: 0.8493 (0.8493) time: 6.7412 data: 6.7085 max mem: 9377 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.8544 (0.8561) time: 0.1330 data: 0.1075 max mem: 9377 +Eval (hcp-train-subset): [72] Total time: 0:00:17 (0.2780 s / it) +Averaged stats (hcp-train-subset): loss: 0.8544 (0.8561) +Eval (hcp-val): [72] [ 0/62] eta: 0:04:06 loss: 0.8537 (0.8537) time: 3.9806 data: 3.9340 max mem: 9377 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.8523 (0.8547) time: 0.1603 data: 0.1346 max mem: 9377 +Eval (hcp-val): [72] Total time: 0:00:16 (0.2614 s / it) +Averaged stats (hcp-val): loss: 0.8523 (0.8547) +Eval (nsd-val): [72] [ 0/62] eta: 0:05:23 loss: 0.8239 (0.8239) time: 5.2178 data: 5.1858 max mem: 9377 +Eval (nsd-val): [72] [61/62] eta: 0:00:00 loss: 0.8320 (0.8331) time: 0.1629 data: 0.1372 max mem: 9377 +Eval (nsd-val): [72] Total time: 0:00:15 (0.2533 s / it) +Averaged stats (nsd-val): loss: 0.8320 (0.8331) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 12:52:06 lr: 0.000023 grad: 0.3946 (0.3946) loss: 0.8220 (0.8220) time: 7.4122 data: 7.2815 max mem: 9377 +Train: [73] [ 100/6250] eta: 0:25:11 lr: 0.000023 grad: 0.2453 (0.3176) loss: 0.7706 (0.7819) time: 0.2009 data: 0.0982 max mem: 9377 +Train: [73] [ 200/6250] eta: 0:21:55 lr: 0.000023 grad: 0.2094 (0.2762) loss: 0.7621 (0.7735) time: 0.1767 data: 0.0672 max mem: 9377 +Train: [73] [ 300/6250] eta: 0:20:10 lr: 0.000023 grad: 0.2146 (0.2530) loss: 0.7711 (0.7735) time: 0.1787 data: 0.0767 max mem: 9377 +Train: [73] [ 400/6250] eta: 0:18:47 lr: 0.000023 grad: 0.2057 (0.2402) loss: 0.7620 (0.7732) time: 0.1449 data: 0.0507 max mem: 9377 +Train: [73] [ 500/6250] eta: 0:17:44 lr: 0.000023 grad: 0.2050 (0.2311) loss: 0.7637 (0.7710) time: 0.1609 data: 0.0636 max mem: 9377 +Train: [73] [ 600/6250] eta: 0:17:04 lr: 0.000023 grad: 0.2073 (0.2259) loss: 0.7459 (0.7678) time: 0.1615 data: 0.0640 max mem: 9377 +Train: [73] [ 700/6250] eta: 0:16:37 lr: 0.000023 grad: 0.1881 (0.2218) loss: 0.7633 (0.7659) time: 0.1748 data: 0.0836 max mem: 9377 +Train: [73] [ 800/6250] eta: 0:16:11 lr: 0.000023 grad: 0.1886 (0.2179) loss: 0.7580 (0.7644) time: 0.1621 data: 0.0721 max mem: 9377 +Train: [73] [ 900/6250] eta: 0:15:47 lr: 0.000023 grad: 0.1976 (0.2150) loss: 0.7444 (0.7630) time: 0.1745 data: 0.0806 max mem: 9377 +Train: [73] [1000/6250] eta: 0:15:20 lr: 0.000023 grad: 0.1821 (0.2123) loss: 0.7583 (0.7620) time: 0.1650 data: 0.0744 max mem: 9377 +Train: [73] [1100/6250] eta: 0:14:55 lr: 0.000023 grad: 0.1839 (0.2099) loss: 0.7453 (0.7612) time: 0.1535 data: 0.0546 max mem: 9377 +Train: [73] [1200/6250] eta: 0:14:35 lr: 0.000023 grad: 0.1788 (0.2081) loss: 0.7569 (0.7608) time: 0.1368 data: 0.0431 max mem: 9377 +Train: [73] [1300/6250] eta: 0:14:12 lr: 0.000023 grad: 0.1987 (0.2066) loss: 0.7441 (0.7597) time: 0.1546 data: 0.0641 max mem: 9377 +Train: [73] [1400/6250] eta: 0:14:00 lr: 0.000023 grad: 0.1827 (0.2054) loss: 0.7453 (0.7590) time: 0.2790 data: 0.2034 max mem: 9377 +Train: [73] [1500/6250] eta: 0:13:41 lr: 0.000023 grad: 0.1840 (0.2044) loss: 0.7463 (0.7582) time: 0.1532 data: 0.0741 max mem: 9377 +Train: [73] [1600/6250] eta: 0:13:23 lr: 0.000023 grad: 0.1881 (0.2033) loss: 0.7470 (0.7579) time: 0.1890 data: 0.1044 max mem: 9377 +Train: [73] [1700/6250] eta: 0:13:05 lr: 0.000023 grad: 0.1932 (0.2025) loss: 0.7452 (0.7572) time: 0.1251 data: 0.0223 max mem: 9377 +Train: [73] [1800/6250] eta: 0:12:50 lr: 0.000023 grad: 0.1894 (0.2017) loss: 0.7524 (0.7569) time: 0.1877 data: 0.0934 max mem: 9377 +Train: [73] [1900/6250] eta: 0:12:35 lr: 0.000023 grad: 0.1915 (0.2010) loss: 0.7444 (0.7565) time: 0.1851 data: 0.0805 max mem: 9377 +Train: [73] [2000/6250] eta: 0:12:18 lr: 0.000023 grad: 0.1857 (0.2004) loss: 0.7510 (0.7562) time: 0.1861 data: 0.0915 max mem: 9377 +Train: [73] [2100/6250] eta: 0:12:01 lr: 0.000023 grad: 0.1885 (0.1999) loss: 0.7433 (0.7557) time: 0.1699 data: 0.0743 max mem: 9377 +Train: [73] [2200/6250] eta: 0:11:41 lr: 0.000023 grad: 0.1967 (0.1993) loss: 0.7402 (0.7553) time: 0.1679 data: 0.0849 max mem: 9377 +Train: [73] [2300/6250] eta: 0:11:22 lr: 0.000023 grad: 0.1808 (0.1990) loss: 0.7476 (0.7550) time: 0.1722 data: 0.0769 max mem: 9377 +Train: [73] [2400/6250] eta: 0:11:02 lr: 0.000023 grad: 0.1867 (0.1987) loss: 0.7482 (0.7547) time: 0.1591 data: 0.0602 max mem: 9377 +Train: [73] [2500/6250] eta: 0:10:43 lr: 0.000023 grad: 0.1856 (0.1984) loss: 0.7529 (0.7544) time: 0.1489 data: 0.0597 max mem: 9377 +Train: [73] [2600/6250] eta: 0:10:24 lr: 0.000023 grad: 0.1793 (0.1978) loss: 0.7587 (0.7544) time: 0.1570 data: 0.0600 max mem: 9377 +Train: [73] [2700/6250] eta: 0:10:06 lr: 0.000023 grad: 0.1863 (0.1973) loss: 0.7517 (0.7543) time: 0.1637 data: 0.0785 max mem: 9377 +Train: [73] [2800/6250] eta: 0:09:47 lr: 0.000023 grad: 0.1799 (0.1970) loss: 0.7574 (0.7542) time: 0.1584 data: 0.0678 max mem: 9377 +Train: [73] [2900/6250] eta: 0:09:30 lr: 0.000023 grad: 0.1837 (0.1969) loss: 0.7529 (0.7541) time: 0.1769 data: 0.0843 max mem: 9377 +Train: [73] [3000/6250] eta: 0:09:12 lr: 0.000023 grad: 0.1817 (0.1965) loss: 0.7421 (0.7539) time: 0.1672 data: 0.0818 max mem: 9377 +Train: [73] [3100/6250] eta: 0:08:53 lr: 0.000023 grad: 0.1813 (0.1965) loss: 0.7448 (0.7538) time: 0.1497 data: 0.0642 max mem: 9377 +Train: [73] [3200/6250] eta: 0:08:36 lr: 0.000022 grad: 0.1848 (0.1962) loss: 0.7445 (0.7537) time: 0.2179 data: 0.1369 max mem: 9377 +Train: [73] [3300/6250] eta: 0:08:18 lr: 0.000022 grad: 0.1835 (0.1959) loss: 0.7553 (0.7537) time: 0.1580 data: 0.0623 max mem: 9377 +Train: [73] [3400/6250] eta: 0:07:59 lr: 0.000022 grad: 0.1846 (0.1957) loss: 0.7506 (0.7536) time: 0.1576 data: 0.0682 max mem: 9377 +Train: [73] [3500/6250] eta: 0:07:43 lr: 0.000022 grad: 0.1842 (0.1955) loss: 0.7468 (0.7535) time: 0.1744 data: 0.0960 max mem: 9377 +Train: [73] [3600/6250] eta: 0:07:25 lr: 0.000022 grad: 0.1839 (0.1954) loss: 0.7547 (0.7535) time: 0.1556 data: 0.0684 max mem: 9377 +Train: [73] [3700/6250] eta: 0:07:08 lr: 0.000022 grad: 0.1863 (0.1953) loss: 0.7432 (0.7535) time: 0.1736 data: 0.0854 max mem: 9377 +Train: [73] [3800/6250] eta: 0:06:52 lr: 0.000022 grad: 0.1922 (0.1953) loss: 0.7521 (0.7534) time: 0.1687 data: 0.0859 max mem: 9377 +Train: [73] [3900/6250] eta: 0:06:35 lr: 0.000022 grad: 0.1760 (0.1951) loss: 0.7481 (0.7533) time: 0.1717 data: 0.0862 max mem: 9377 +Train: [73] [4000/6250] eta: 0:06:19 lr: 0.000022 grad: 0.1853 (0.1950) loss: 0.7575 (0.7534) time: 0.1926 data: 0.1088 max mem: 9377 +Train: [73] [4100/6250] eta: 0:06:02 lr: 0.000022 grad: 0.1892 (0.1949) loss: 0.7525 (0.7534) time: 0.1971 data: 0.1073 max mem: 9377 +Train: [73] [4200/6250] eta: 0:05:45 lr: 0.000022 grad: 0.1853 (0.1949) loss: 0.7599 (0.7533) time: 0.1768 data: 0.0784 max mem: 9377 +Train: [73] [4300/6250] eta: 0:05:28 lr: 0.000022 grad: 0.1942 (0.1949) loss: 0.7526 (0.7533) time: 0.1685 data: 0.0768 max mem: 9377 +Train: [73] [4400/6250] eta: 0:05:11 lr: 0.000022 grad: 0.1832 (0.1948) loss: 0.7574 (0.7534) time: 0.1753 data: 0.0852 max mem: 9377 +Train: [73] [4500/6250] eta: 0:04:53 lr: 0.000022 grad: 0.1785 (0.1946) loss: 0.7575 (0.7534) time: 0.1581 data: 0.0661 max mem: 9377 +Train: [73] [4600/6250] eta: 0:04:36 lr: 0.000022 grad: 0.1887 (0.1945) loss: 0.7535 (0.7534) time: 0.1638 data: 0.0760 max mem: 9377 +Train: [73] [4700/6250] eta: 0:04:19 lr: 0.000022 grad: 0.1847 (0.1944) loss: 0.7620 (0.7534) time: 0.1616 data: 0.0785 max mem: 9377 +Train: [73] [4800/6250] eta: 0:04:02 lr: 0.000022 grad: 0.1874 (0.1943) loss: 0.7529 (0.7534) time: 0.1714 data: 0.0818 max mem: 9377 +Train: [73] [4900/6250] eta: 0:03:45 lr: 0.000022 grad: 0.1899 (0.1942) loss: 0.7471 (0.7533) time: 0.1743 data: 0.0813 max mem: 9377 +Train: [73] [5000/6250] eta: 0:03:28 lr: 0.000022 grad: 0.1938 (0.1943) loss: 0.7433 (0.7533) time: 0.1532 data: 0.0653 max mem: 9377 +Train: [73] [5100/6250] eta: 0:03:12 lr: 0.000022 grad: 0.1914 (0.1942) loss: 0.7500 (0.7532) time: 0.1634 data: 0.0808 max mem: 9377 +Train: [73] [5200/6250] eta: 0:02:55 lr: 0.000022 grad: 0.1837 (0.1944) loss: 0.7484 (0.7531) time: 0.1828 data: 0.1053 max mem: 9377 +Train: [73] [5300/6250] eta: 0:02:38 lr: 0.000022 grad: 0.1835 (0.1944) loss: 0.7463 (0.7530) time: 0.1620 data: 0.0772 max mem: 9377 +Train: [73] [5400/6250] eta: 0:02:21 lr: 0.000022 grad: 0.1858 (0.1944) loss: 0.7542 (0.7529) time: 0.1461 data: 0.0562 max mem: 9377 +Train: [73] [5500/6250] eta: 0:02:04 lr: 0.000022 grad: 0.2034 (0.1944) loss: 0.7404 (0.7528) time: 0.1351 data: 0.0405 max mem: 9377 +Train: [73] [5600/6250] eta: 0:01:48 lr: 0.000022 grad: 0.1984 (0.1945) loss: 0.7417 (0.7527) time: 0.1364 data: 0.0360 max mem: 9377 +Train: [73] [5700/6250] eta: 0:01:31 lr: 0.000022 grad: 0.1905 (0.1945) loss: 0.7367 (0.7525) time: 0.1523 data: 0.0697 max mem: 9377 +Train: [73] [5800/6250] eta: 0:01:14 lr: 0.000022 grad: 0.1890 (0.1945) loss: 0.7541 (0.7525) time: 0.1443 data: 0.0587 max mem: 9377 +Train: [73] [5900/6250] eta: 0:00:58 lr: 0.000022 grad: 0.1941 (0.1944) loss: 0.7425 (0.7525) time: 0.1595 data: 0.0763 max mem: 9377 +Train: [73] [6000/6250] eta: 0:00:41 lr: 0.000022 grad: 0.1876 (0.1944) loss: 0.7521 (0.7525) time: 0.1513 data: 0.0558 max mem: 9377 +Train: [73] [6100/6250] eta: 0:00:24 lr: 0.000022 grad: 0.1839 (0.1943) loss: 0.7488 (0.7524) time: 0.1840 data: 0.0981 max mem: 9377 +Train: [73] [6200/6250] eta: 0:00:08 lr: 0.000022 grad: 0.1841 (0.1942) loss: 0.7517 (0.7524) time: 0.1535 data: 0.0672 max mem: 9377 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.1880 (0.1942) loss: 0.7525 (0.7524) time: 0.1559 data: 0.0661 max mem: 9377 +Train: [73] Total time: 0:17:19 (0.1664 s / it) +Averaged stats: lr: 0.000022 grad: 0.1880 (0.1942) loss: 0.7525 (0.7524) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:06:26 loss: 0.8467 (0.8467) time: 6.2296 data: 6.1995 max mem: 9377 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.8552 (0.8562) time: 0.1186 data: 0.0937 max mem: 9377 +Eval (hcp-train-subset): [73] Total time: 0:00:14 (0.2403 s / it) +Averaged stats (hcp-train-subset): loss: 0.8552 (0.8562) +Eval (hcp-val): [73] [ 0/62] eta: 0:06:20 loss: 0.8546 (0.8546) time: 6.1344 data: 6.1026 max mem: 9377 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.8557 (0.8561) time: 0.1512 data: 0.1234 max mem: 9377 +Eval (hcp-val): [73] Total time: 0:00:14 (0.2374 s / it) +Averaged stats (hcp-val): loss: 0.8557 (0.8561) +Eval (nsd-val): [73] [ 0/62] eta: 0:05:59 loss: 0.8205 (0.8205) time: 5.7935 data: 5.7607 max mem: 9377 +Eval (nsd-val): [73] [61/62] eta: 0:00:00 loss: 0.8292 (0.8306) time: 0.1299 data: 0.1046 max mem: 9377 +Eval (nsd-val): [73] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (nsd-val): loss: 0.8292 (0.8306) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [74] [ 0/6250] eta: 10:37:23 lr: 0.000022 grad: 0.1914 (0.1914) loss: 0.8111 (0.8111) time: 6.1189 data: 6.0064 max mem: 9377 +Train: [74] [ 100/6250] eta: 0:23:45 lr: 0.000022 grad: 0.2951 (0.3383) loss: 0.7414 (0.7560) time: 0.1556 data: 0.0487 max mem: 9377 +Train: [74] [ 200/6250] eta: 0:20:05 lr: 0.000022 grad: 0.2277 (0.3007) loss: 0.7420 (0.7513) time: 0.1670 data: 0.0721 max mem: 9377 +Train: [74] [ 300/6250] eta: 0:18:16 lr: 0.000022 grad: 0.2015 (0.2768) loss: 0.7478 (0.7502) time: 0.1570 data: 0.0570 max mem: 9377 +Train: [74] [ 400/6250] eta: 0:17:22 lr: 0.000022 grad: 0.2206 (0.2592) loss: 0.7427 (0.7508) time: 0.1444 data: 0.0332 max mem: 9377 +Train: [74] [ 500/6250] eta: 0:16:39 lr: 0.000022 grad: 0.2029 (0.2499) loss: 0.7514 (0.7500) time: 0.1500 data: 0.0529 max mem: 9377 +Train: [74] [ 600/6250] eta: 0:16:12 lr: 0.000022 grad: 0.2057 (0.2422) loss: 0.7422 (0.7495) time: 0.1588 data: 0.0585 max mem: 9377 +Train: [74] [ 700/6250] eta: 0:15:41 lr: 0.000022 grad: 0.1948 (0.2368) loss: 0.7502 (0.7494) time: 0.1525 data: 0.0545 max mem: 9377 +Train: [74] [ 800/6250] eta: 0:15:16 lr: 0.000022 grad: 0.1902 (0.2318) loss: 0.7500 (0.7498) time: 0.1561 data: 0.0618 max mem: 9377 +Train: [74] [ 900/6250] eta: 0:14:54 lr: 0.000021 grad: 0.1823 (0.2278) loss: 0.7538 (0.7498) time: 0.1274 data: 0.0318 max mem: 9377 +Train: [74] [1000/6250] eta: 0:14:32 lr: 0.000021 grad: 0.1897 (0.2247) loss: 0.7511 (0.7495) time: 0.1522 data: 0.0667 max mem: 9377 +Train: [74] [1100/6250] eta: 0:14:08 lr: 0.000021 grad: 0.1914 (0.2221) loss: 0.7428 (0.7495) time: 0.1518 data: 0.0712 max mem: 9377 +Train: [74] [1200/6250] eta: 0:13:49 lr: 0.000021 grad: 0.1915 (0.2197) loss: 0.7538 (0.7496) time: 0.1526 data: 0.0669 max mem: 9377 +Train: [74] [1300/6250] eta: 0:13:29 lr: 0.000021 grad: 0.1817 (0.2173) loss: 0.7530 (0.7495) time: 0.1360 data: 0.0380 max mem: 9377 +Train: [74] [1400/6250] eta: 0:13:16 lr: 0.000021 grad: 0.1956 (0.2161) loss: 0.7506 (0.7491) time: 0.1557 data: 0.0794 max mem: 9377 +Train: [74] [1500/6250] eta: 0:12:58 lr: 0.000021 grad: 0.1853 (0.2150) loss: 0.7574 (0.7488) time: 0.1595 data: 0.0702 max mem: 9377 +Train: [74] [1600/6250] eta: 0:12:41 lr: 0.000021 grad: 0.1875 (0.2136) loss: 0.7519 (0.7488) time: 0.1906 data: 0.1071 max mem: 9377 +Train: [74] [1700/6250] eta: 0:12:23 lr: 0.000021 grad: 0.1990 (0.2125) loss: 0.7539 (0.7486) time: 0.1605 data: 0.0713 max mem: 9377 +Train: [74] [1800/6250] eta: 0:12:09 lr: 0.000021 grad: 0.1903 (0.2117) loss: 0.7486 (0.7483) time: 0.1828 data: 0.0902 max mem: 9377 +Train: [74] [1900/6250] eta: 0:11:53 lr: 0.000021 grad: 0.1940 (0.2107) loss: 0.7508 (0.7484) time: 0.1826 data: 0.0918 max mem: 9377 +Train: [74] [2000/6250] eta: 0:11:36 lr: 0.000021 grad: 0.1941 (0.2101) loss: 0.7552 (0.7485) time: 0.1571 data: 0.0636 max mem: 9377 +Train: [74] [2100/6250] eta: 0:11:21 lr: 0.000021 grad: 0.1988 (0.2093) loss: 0.7442 (0.7484) time: 0.1467 data: 0.0479 max mem: 9377 +Train: [74] [2200/6250] eta: 0:11:06 lr: 0.000021 grad: 0.2068 (0.2091) loss: 0.7298 (0.7482) time: 0.1869 data: 0.0961 max mem: 9377 +Train: [74] [2300/6250] eta: 0:10:48 lr: 0.000021 grad: 0.1925 (0.2086) loss: 0.7393 (0.7480) time: 0.1700 data: 0.0800 max mem: 9377 +Train: [74] [2400/6250] eta: 0:10:30 lr: 0.000021 grad: 0.1866 (0.2082) loss: 0.7489 (0.7478) time: 0.1704 data: 0.0860 max mem: 9377 +Train: [74] [2500/6250] eta: 0:10:12 lr: 0.000021 grad: 0.1905 (0.2078) loss: 0.7433 (0.7477) time: 0.1734 data: 0.0847 max mem: 9377 +Train: [74] [2600/6250] eta: 0:09:55 lr: 0.000021 grad: 0.2063 (0.2074) loss: 0.7385 (0.7476) time: 0.1727 data: 0.0852 max mem: 9377 +Train: [74] [2700/6250] eta: 0:09:37 lr: 0.000021 grad: 0.1971 (0.2069) loss: 0.7399 (0.7475) time: 0.1550 data: 0.0609 max mem: 9377 +Train: [74] [2800/6250] eta: 0:09:19 lr: 0.000021 grad: 0.1933 (0.2065) loss: 0.7465 (0.7477) time: 0.1312 data: 0.0511 max mem: 9377 +Train: [74] [2900/6250] eta: 0:09:02 lr: 0.000021 grad: 0.1897 (0.2061) loss: 0.7534 (0.7476) time: 0.1470 data: 0.0555 max mem: 9377 +Train: [74] [3000/6250] eta: 0:08:45 lr: 0.000021 grad: 0.1966 (0.2058) loss: 0.7501 (0.7476) time: 0.1385 data: 0.0368 max mem: 9377 +Train: [74] [3100/6250] eta: 0:08:29 lr: 0.000021 grad: 0.1905 (0.2054) loss: 0.7421 (0.7476) time: 0.2073 data: 0.1165 max mem: 9377 +Train: [74] [3200/6250] eta: 0:08:12 lr: 0.000021 grad: 0.1958 (0.2054) loss: 0.7421 (0.7475) time: 0.1732 data: 0.0864 max mem: 9377 +Train: [74] [3300/6250] eta: 0:07:55 lr: 0.000021 grad: 0.1887 (0.2051) loss: 0.7434 (0.7475) time: 0.1566 data: 0.0663 max mem: 9377 +Train: [74] [3400/6250] eta: 0:07:40 lr: 0.000021 grad: 0.1947 (0.2049) loss: 0.7355 (0.7474) time: 0.1852 data: 0.1030 max mem: 9377 +Train: [74] [3500/6250] eta: 0:07:24 lr: 0.000021 grad: 0.1803 (0.2045) loss: 0.7471 (0.7474) time: 0.1761 data: 0.0858 max mem: 9377 +Train: [74] [3600/6250] eta: 0:07:09 lr: 0.000021 grad: 0.2019 (0.2043) loss: 0.7274 (0.7472) time: 0.2029 data: 0.1186 max mem: 9377 +Train: [74] [3700/6250] eta: 0:06:52 lr: 0.000021 grad: 0.1952 (0.2041) loss: 0.7354 (0.7472) time: 0.1509 data: 0.0619 max mem: 9377 +Train: [74] [3800/6250] eta: 0:06:36 lr: 0.000021 grad: 0.2027 (0.2039) loss: 0.7347 (0.7470) time: 0.1511 data: 0.0693 max mem: 9377 +Train: [74] [3900/6250] eta: 0:06:19 lr: 0.000021 grad: 0.2024 (0.2037) loss: 0.7357 (0.7469) time: 0.1739 data: 0.0970 max mem: 9377 +Train: [74] [4000/6250] eta: 0:06:03 lr: 0.000021 grad: 0.1906 (0.2036) loss: 0.7465 (0.7468) time: 0.1685 data: 0.0819 max mem: 9377 +Train: [74] [4100/6250] eta: 0:05:47 lr: 0.000021 grad: 0.1832 (0.2035) loss: 0.7440 (0.7467) time: 0.1732 data: 0.0828 max mem: 9377 +Train: [74] [4200/6250] eta: 0:05:31 lr: 0.000021 grad: 0.1958 (0.2032) loss: 0.7420 (0.7467) time: 0.1875 data: 0.1010 max mem: 9377 +Train: [74] [4300/6250] eta: 0:05:14 lr: 0.000021 grad: 0.1936 (0.2030) loss: 0.7436 (0.7467) time: 0.1474 data: 0.0453 max mem: 9377 +Train: [74] [4400/6250] eta: 0:04:58 lr: 0.000021 grad: 0.1868 (0.2028) loss: 0.7509 (0.7467) time: 0.1665 data: 0.0718 max mem: 9377 +Train: [74] [4500/6250] eta: 0:04:41 lr: 0.000021 grad: 0.2042 (0.2028) loss: 0.7433 (0.7466) time: 0.1446 data: 0.0505 max mem: 9377 +Train: [74] [4600/6250] eta: 0:04:24 lr: 0.000021 grad: 0.1965 (0.2026) loss: 0.7399 (0.7466) time: 0.1355 data: 0.0432 max mem: 9377 +Train: [74] [4700/6250] eta: 0:04:09 lr: 0.000021 grad: 0.1937 (0.2024) loss: 0.7520 (0.7467) time: 0.1817 data: 0.1089 max mem: 9377 +Train: [74] [4800/6250] eta: 0:03:53 lr: 0.000021 grad: 0.1897 (0.2023) loss: 0.7414 (0.7467) time: 0.1552 data: 0.0771 max mem: 9377 +Train: [74] [4900/6250] eta: 0:03:37 lr: 0.000020 grad: 0.1886 (0.2022) loss: 0.7497 (0.7468) time: 0.1426 data: 0.0587 max mem: 9377 +Train: [74] [5000/6250] eta: 0:03:21 lr: 0.000020 grad: 0.1927 (0.2020) loss: 0.7504 (0.7469) time: 0.1847 data: 0.1053 max mem: 9377 +Train: [74] [5100/6250] eta: 0:03:05 lr: 0.000020 grad: 0.1919 (0.2020) loss: 0.7462 (0.7469) time: 0.1505 data: 0.0655 max mem: 9377 +Train: [74] [5200/6250] eta: 0:02:49 lr: 0.000020 grad: 0.1818 (0.2019) loss: 0.7575 (0.7469) time: 0.1938 data: 0.0895 max mem: 9377 +Train: [74] [5300/6250] eta: 0:02:33 lr: 0.000020 grad: 0.1815 (0.2018) loss: 0.7552 (0.7470) time: 0.1786 data: 0.0844 max mem: 9377 +Train: [74] [5400/6250] eta: 0:02:17 lr: 0.000020 grad: 0.1793 (0.2016) loss: 0.7663 (0.7471) time: 0.1618 data: 0.0697 max mem: 9377 +Train: [74] [5500/6250] eta: 0:02:01 lr: 0.000020 grad: 0.1820 (0.2013) loss: 0.7662 (0.7473) time: 0.1361 data: 0.0491 max mem: 9377 +Train: [74] [5600/6250] eta: 0:01:45 lr: 0.000020 grad: 0.1822 (0.2011) loss: 0.7599 (0.7474) time: 0.1612 data: 0.0715 max mem: 9377 +Train: [74] [5700/6250] eta: 0:01:28 lr: 0.000020 grad: 0.1781 (0.2009) loss: 0.7612 (0.7476) time: 0.1404 data: 0.0544 max mem: 9377 +Train: [74] [5800/6250] eta: 0:01:12 lr: 0.000020 grad: 0.1817 (0.2007) loss: 0.7583 (0.7477) time: 0.1604 data: 0.0614 max mem: 9377 +Train: [74] [5900/6250] eta: 0:00:56 lr: 0.000020 grad: 0.1879 (0.2005) loss: 0.7600 (0.7479) time: 0.1401 data: 0.0564 max mem: 9377 +Train: [74] [6000/6250] eta: 0:00:40 lr: 0.000020 grad: 0.1855 (0.2003) loss: 0.7602 (0.7480) time: 0.1539 data: 0.0674 max mem: 9377 +Train: [74] [6100/6250] eta: 0:00:24 lr: 0.000020 grad: 0.1790 (0.2001) loss: 0.7605 (0.7483) time: 0.1460 data: 0.0599 max mem: 9377 +Train: [74] [6200/6250] eta: 0:00:08 lr: 0.000020 grad: 0.1765 (0.1999) loss: 0.7674 (0.7485) time: 0.1410 data: 0.0584 max mem: 9377 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1834 (0.1997) loss: 0.7675 (0.7486) time: 0.1587 data: 0.0658 max mem: 9377 +Train: [74] Total time: 0:16:59 (0.1632 s / it) +Averaged stats: lr: 0.000020 grad: 0.1834 (0.1997) loss: 0.7675 (0.7486) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:05:56 loss: 0.8505 (0.8505) time: 5.7555 data: 5.7229 max mem: 9377 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.8550 (0.8567) time: 0.1530 data: 0.1277 max mem: 9377 +Eval (hcp-train-subset): [74] Total time: 0:00:16 (0.2608 s / it) +Averaged stats (hcp-train-subset): loss: 0.8550 (0.8567) +Making plots (hcp-train-subset): example=19 +Eval (hcp-val): [74] [ 0/62] eta: 0:04:48 loss: 0.8580 (0.8580) time: 4.6601 data: 4.5798 max mem: 9377 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.8531 (0.8559) time: 0.1678 data: 0.1414 max mem: 9377 +Eval (hcp-val): [74] Total time: 0:00:16 (0.2694 s / it) +Averaged stats (hcp-val): loss: 0.8531 (0.8559) +Making plots (hcp-val): example=5 +Eval (nsd-val): [74] [ 0/62] eta: 0:07:18 loss: 0.8234 (0.8234) time: 7.0694 data: 7.0348 max mem: 9377 +Eval (nsd-val): [74] [61/62] eta: 0:00:00 loss: 0.8311 (0.8320) time: 0.1727 data: 0.1454 max mem: 9377 +Eval (nsd-val): [74] Total time: 0:00:17 (0.2761 s / it) +Averaged stats (nsd-val): loss: 0.8311 (0.8320) +Making plots (nsd-val): example=52 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00074.pth +Train: [75] [ 0/6250] eta: 11:11:57 lr: 0.000020 grad: 0.7777 (0.7777) loss: 0.7920 (0.7920) time: 6.4507 data: 6.1793 max mem: 9377 +Train: [75] [ 100/6250] eta: 0:25:45 lr: 0.000020 grad: 0.2532 (0.3015) loss: 0.7479 (0.7662) time: 0.1816 data: 0.0680 max mem: 9377 +Train: [75] [ 200/6250] eta: 0:22:28 lr: 0.000020 grad: 0.2356 (0.2805) loss: 0.7544 (0.7626) time: 0.1696 data: 0.0709 max mem: 9377 +Train: [75] [ 300/6250] eta: 0:20:48 lr: 0.000020 grad: 0.2220 (0.2655) loss: 0.7564 (0.7594) time: 0.1704 data: 0.0797 max mem: 9377 +Train: [75] [ 400/6250] eta: 0:19:48 lr: 0.000020 grad: 0.2247 (0.2546) loss: 0.7601 (0.7589) time: 0.1759 data: 0.0761 max mem: 9377 +Train: [75] [ 500/6250] eta: 0:19:06 lr: 0.000020 grad: 0.1911 (0.2451) loss: 0.7707 (0.7597) time: 0.1971 data: 0.0979 max mem: 9377 +Train: [75] [ 600/6250] eta: 0:18:23 lr: 0.000020 grad: 0.2204 (0.2391) loss: 0.7648 (0.7594) time: 0.1718 data: 0.0751 max mem: 9377 +Train: [75] [ 700/6250] eta: 0:17:36 lr: 0.000020 grad: 0.1968 (0.2339) loss: 0.7577 (0.7591) time: 0.1490 data: 0.0573 max mem: 9377 +Train: [75] [ 800/6250] eta: 0:17:15 lr: 0.000020 grad: 0.2037 (0.2294) loss: 0.7454 (0.7590) time: 0.2321 data: 0.1449 max mem: 9377 +Train: [75] [ 900/6250] eta: 0:16:38 lr: 0.000020 grad: 0.1940 (0.2260) loss: 0.7565 (0.7583) time: 0.1388 data: 0.0384 max mem: 9377 +Train: [75] [1000/6250] eta: 0:16:08 lr: 0.000020 grad: 0.1969 (0.2238) loss: 0.7359 (0.7573) time: 0.1517 data: 0.0576 max mem: 9377 +Train: [75] [1100/6250] eta: 0:15:38 lr: 0.000020 grad: 0.1962 (0.2219) loss: 0.7524 (0.7563) time: 0.1541 data: 0.0563 max mem: 9377 +Train: [75] [1200/6250] eta: 0:15:08 lr: 0.000020 grad: 0.1999 (0.2201) loss: 0.7528 (0.7555) time: 0.1658 data: 0.0763 max mem: 9377 +Train: [75] [1300/6250] eta: 0:14:41 lr: 0.000020 grad: 0.2017 (0.2187) loss: 0.7567 (0.7551) time: 0.1421 data: 0.0579 max mem: 9377 +Train: [75] [1400/6250] eta: 0:14:24 lr: 0.000020 grad: 0.2011 (0.2178) loss: 0.7461 (0.7542) time: 0.1921 data: 0.0979 max mem: 9377 +Train: [75] [1500/6250] eta: 0:14:02 lr: 0.000020 grad: 0.1998 (0.2166) loss: 0.7474 (0.7536) time: 0.1852 data: 0.0969 max mem: 9377 +Train: [75] [1600/6250] eta: 0:13:42 lr: 0.000020 grad: 0.1879 (0.2155) loss: 0.7639 (0.7532) time: 0.1680 data: 0.0808 max mem: 9377 +Train: [75] [1700/6250] eta: 0:13:24 lr: 0.000020 grad: 0.1930 (0.2145) loss: 0.7357 (0.7525) time: 0.1804 data: 0.0989 max mem: 9377 +Train: [75] [1800/6250] eta: 0:13:06 lr: 0.000020 grad: 0.2007 (0.2139) loss: 0.7442 (0.7519) time: 0.1637 data: 0.0606 max mem: 9377 +Train: [75] [1900/6250] eta: 0:12:48 lr: 0.000020 grad: 0.1933 (0.2132) loss: 0.7428 (0.7513) time: 0.1586 data: 0.0698 max mem: 9377 +Train: [75] [2000/6250] eta: 0:12:29 lr: 0.000020 grad: 0.1903 (0.2126) loss: 0.7460 (0.7510) time: 0.1629 data: 0.0801 max mem: 9377 +Train: [75] [2100/6250] eta: 0:12:11 lr: 0.000020 grad: 0.1960 (0.2119) loss: 0.7434 (0.7507) time: 0.1714 data: 0.0771 max mem: 9377 +Train: [75] [2200/6250] eta: 0:11:51 lr: 0.000020 grad: 0.1871 (0.2112) loss: 0.7558 (0.7504) time: 0.1398 data: 0.0379 max mem: 9377 +Train: [75] [2300/6250] eta: 0:11:32 lr: 0.000020 grad: 0.2030 (0.2109) loss: 0.7402 (0.7501) time: 0.1525 data: 0.0622 max mem: 9377 +Train: [75] [2400/6250] eta: 0:11:12 lr: 0.000020 grad: 0.1907 (0.2106) loss: 0.7584 (0.7499) time: 0.1741 data: 0.0830 max mem: 9377 +Train: [75] [2500/6250] eta: 0:10:52 lr: 0.000020 grad: 0.1945 (0.2101) loss: 0.7384 (0.7497) time: 0.1518 data: 0.0655 max mem: 9377 +Train: [75] [2600/6250] eta: 0:10:32 lr: 0.000020 grad: 0.1935 (0.2096) loss: 0.7430 (0.7497) time: 0.1561 data: 0.0644 max mem: 9377 +Train: [75] [2700/6250] eta: 0:10:13 lr: 0.000020 grad: 0.2011 (0.2093) loss: 0.7436 (0.7494) time: 0.1692 data: 0.0832 max mem: 9377 +Train: [75] [2800/6250] eta: 0:09:55 lr: 0.000019 grad: 0.2004 (0.2091) loss: 0.7451 (0.7493) time: 0.1263 data: 0.0287 max mem: 9377 +Train: [75] [2900/6250] eta: 0:09:35 lr: 0.000019 grad: 0.1886 (0.2087) loss: 0.7528 (0.7492) time: 0.1417 data: 0.0566 max mem: 9377 +Train: [75] [3000/6250] eta: 0:09:18 lr: 0.000019 grad: 0.1968 (0.2084) loss: 0.7482 (0.7492) time: 0.1655 data: 0.0826 max mem: 9377 +Train: [75] [3100/6250] eta: 0:09:00 lr: 0.000019 grad: 0.1911 (0.2081) loss: 0.7554 (0.7491) time: 0.1741 data: 0.0909 max mem: 9377 +Train: [75] [3200/6250] eta: 0:08:41 lr: 0.000019 grad: 0.1884 (0.2076) loss: 0.7495 (0.7491) time: 0.1315 data: 0.0341 max mem: 9377 +Train: [75] [3300/6250] eta: 0:08:23 lr: 0.000019 grad: 0.1801 (0.2072) loss: 0.7389 (0.7490) time: 0.1659 data: 0.0806 max mem: 9377 +Train: [75] [3400/6250] eta: 0:08:05 lr: 0.000019 grad: 0.1981 (0.2068) loss: 0.7411 (0.7490) time: 0.1673 data: 0.0873 max mem: 9377 +Train: [75] [3500/6250] eta: 0:07:48 lr: 0.000019 grad: 0.1881 (0.2064) loss: 0.7533 (0.7490) time: 0.1739 data: 0.0848 max mem: 9377 +Train: [75] [3600/6250] eta: 0:07:30 lr: 0.000019 grad: 0.1825 (0.2062) loss: 0.7478 (0.7490) time: 0.1578 data: 0.0745 max mem: 9377 +Train: [75] [3700/6250] eta: 0:07:13 lr: 0.000019 grad: 0.1869 (0.2059) loss: 0.7528 (0.7491) time: 0.1698 data: 0.0818 max mem: 9377 +Train: [75] [3800/6250] eta: 0:06:55 lr: 0.000019 grad: 0.1795 (0.2055) loss: 0.7505 (0.7492) time: 0.1741 data: 0.0773 max mem: 9377 +Train: [75] [3900/6250] eta: 0:06:38 lr: 0.000019 grad: 0.1935 (0.2051) loss: 0.7544 (0.7493) time: 0.1428 data: 0.0574 max mem: 9377 +Train: [75] [4000/6250] eta: 0:06:21 lr: 0.000019 grad: 0.1912 (0.2048) loss: 0.7485 (0.7494) time: 0.1763 data: 0.0819 max mem: 9377 +Train: [75] [4100/6250] eta: 0:06:04 lr: 0.000019 grad: 0.1916 (0.2044) loss: 0.7577 (0.7495) time: 0.1855 data: 0.0889 max mem: 9377 +Train: [75] [4200/6250] eta: 0:05:46 lr: 0.000019 grad: 0.1821 (0.2040) loss: 0.7568 (0.7496) time: 0.1368 data: 0.0445 max mem: 9377 +Train: [75] [4300/6250] eta: 0:05:29 lr: 0.000019 grad: 0.1807 (0.2037) loss: 0.7603 (0.7497) time: 0.1811 data: 0.0993 max mem: 9377 +Train: [75] [4400/6250] eta: 0:05:11 lr: 0.000019 grad: 0.1862 (0.2035) loss: 0.7552 (0.7498) time: 0.1555 data: 0.0616 max mem: 9377 +Train: [75] [4500/6250] eta: 0:04:54 lr: 0.000019 grad: 0.1827 (0.2031) loss: 0.7573 (0.7500) time: 0.1904 data: 0.1112 max mem: 9377 +Train: [75] [4600/6250] eta: 0:04:37 lr: 0.000019 grad: 0.1958 (0.2029) loss: 0.7422 (0.7499) time: 0.1839 data: 0.1076 max mem: 9377 +Train: [75] [4700/6250] eta: 0:04:21 lr: 0.000019 grad: 0.1936 (0.2026) loss: 0.7498 (0.7499) time: 0.1736 data: 0.0835 max mem: 9377 +Train: [75] [4800/6250] eta: 0:04:04 lr: 0.000019 grad: 0.1768 (0.2024) loss: 0.7536 (0.7499) time: 0.1732 data: 0.0836 max mem: 9377 +Train: [75] [4900/6250] eta: 0:03:47 lr: 0.000019 grad: 0.1917 (0.2021) loss: 0.7460 (0.7500) time: 0.1443 data: 0.0533 max mem: 9377 +Train: [75] [5000/6250] eta: 0:03:29 lr: 0.000019 grad: 0.1832 (0.2018) loss: 0.7550 (0.7501) time: 0.1705 data: 0.0760 max mem: 9377 +Train: [75] [5100/6250] eta: 0:03:13 lr: 0.000019 grad: 0.1801 (0.2016) loss: 0.7616 (0.7502) time: 0.1681 data: 0.0845 max mem: 9377 +Train: [75] [5200/6250] eta: 0:02:56 lr: 0.000019 grad: 0.1836 (0.2013) loss: 0.7652 (0.7503) time: 0.1438 data: 0.0612 max mem: 9377 +Train: [75] [5300/6250] eta: 0:02:39 lr: 0.000019 grad: 0.1910 (0.2011) loss: 0.7498 (0.7504) time: 0.1465 data: 0.0516 max mem: 9377 +Train: [75] [5400/6250] eta: 0:02:22 lr: 0.000019 grad: 0.1857 (0.2009) loss: 0.7460 (0.7505) time: 0.1568 data: 0.0729 max mem: 9377 +Train: [75] [5500/6250] eta: 0:02:05 lr: 0.000019 grad: 0.1948 (0.2007) loss: 0.7557 (0.7506) time: 0.1224 data: 0.0322 max mem: 9377 +Train: [75] [5600/6250] eta: 0:01:48 lr: 0.000019 grad: 0.1852 (0.2004) loss: 0.7636 (0.7507) time: 0.1820 data: 0.0982 max mem: 9377 +Train: [75] [5700/6250] eta: 0:01:31 lr: 0.000019 grad: 0.1946 (0.2002) loss: 0.7545 (0.7508) time: 0.1462 data: 0.0660 max mem: 9377 +Train: [75] [5800/6250] eta: 0:01:14 lr: 0.000019 grad: 0.1921 (0.2001) loss: 0.7459 (0.7509) time: 0.1508 data: 0.0601 max mem: 9377 +Train: [75] [5900/6250] eta: 0:00:58 lr: 0.000019 grad: 0.1899 (0.1999) loss: 0.7605 (0.7510) time: 0.1498 data: 0.0631 max mem: 9377 +Train: [75] [6000/6250] eta: 0:00:41 lr: 0.000019 grad: 0.1952 (0.1998) loss: 0.7566 (0.7511) time: 0.1734 data: 0.0916 max mem: 9377 +Train: [75] [6100/6250] eta: 0:00:24 lr: 0.000019 grad: 0.1949 (0.1997) loss: 0.7519 (0.7512) time: 0.1509 data: 0.0614 max mem: 9377 +Train: [75] [6200/6250] eta: 0:00:08 lr: 0.000019 grad: 0.1900 (0.1996) loss: 0.7607 (0.7513) time: 0.1255 data: 0.0348 max mem: 9377 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.1918 (0.1995) loss: 0.7422 (0.7513) time: 0.1429 data: 0.0601 max mem: 9377 +Train: [75] Total time: 0:17:22 (0.1667 s / it) +Averaged stats: lr: 0.000019 grad: 0.1918 (0.1995) loss: 0.7422 (0.7513) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:06:01 loss: 0.8501 (0.8501) time: 5.8279 data: 5.7949 max mem: 9377 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.8562 (0.8560) time: 0.1377 data: 0.1110 max mem: 9377 +Eval (hcp-train-subset): [75] Total time: 0:00:14 (0.2369 s / it) +Averaged stats (hcp-train-subset): loss: 0.8562 (0.8560) +Eval (hcp-val): [75] [ 0/62] eta: 0:06:27 loss: 0.8501 (0.8501) time: 6.2509 data: 6.2074 max mem: 9377 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.8544 (0.8543) time: 0.1369 data: 0.1118 max mem: 9377 +Eval (hcp-val): [75] Total time: 0:00:14 (0.2363 s / it) +Averaged stats (hcp-val): loss: 0.8544 (0.8543) +Eval (nsd-val): [75] [ 0/62] eta: 0:05:59 loss: 0.8231 (0.8231) time: 5.7995 data: 5.7682 max mem: 9377 +Eval (nsd-val): [75] [61/62] eta: 0:00:00 loss: 0.8319 (0.8308) time: 0.1359 data: 0.1105 max mem: 9377 +Eval (nsd-val): [75] Total time: 0:00:14 (0.2291 s / it) +Averaged stats (nsd-val): loss: 0.8319 (0.8308) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [76] [ 0/6250] eta: 10:05:05 lr: 0.000019 grad: 0.2571 (0.2571) loss: 0.7964 (0.7964) time: 5.8089 data: 5.5769 max mem: 9377 +Train: [76] [ 100/6250] eta: 0:23:01 lr: 0.000019 grad: 0.2024 (0.2766) loss: 0.7820 (0.7812) time: 0.1841 data: 0.0813 max mem: 9377 +Train: [76] [ 200/6250] eta: 0:19:36 lr: 0.000019 grad: 0.2260 (0.2559) loss: 0.7744 (0.7770) time: 0.1572 data: 0.0504 max mem: 9377 +Train: [76] [ 300/6250] eta: 0:18:29 lr: 0.000019 grad: 0.2116 (0.2452) loss: 0.7602 (0.7725) time: 0.1745 data: 0.0761 max mem: 9377 +Train: [76] [ 400/6250] eta: 0:17:45 lr: 0.000019 grad: 0.2086 (0.2382) loss: 0.7598 (0.7689) time: 0.1679 data: 0.0757 max mem: 9377 +Train: [76] [ 500/6250] eta: 0:17:05 lr: 0.000019 grad: 0.2109 (0.2323) loss: 0.7518 (0.7655) time: 0.1811 data: 0.0864 max mem: 9377 +Train: [76] [ 600/6250] eta: 0:16:28 lr: 0.000019 grad: 0.1935 (0.2271) loss: 0.7537 (0.7636) time: 0.1540 data: 0.0609 max mem: 9377 +Train: [76] [ 700/6250] eta: 0:16:00 lr: 0.000019 grad: 0.1911 (0.2232) loss: 0.7549 (0.7620) time: 0.1566 data: 0.0596 max mem: 9377 +Train: [76] [ 800/6250] eta: 0:15:33 lr: 0.000018 grad: 0.1963 (0.2207) loss: 0.7488 (0.7605) time: 0.1555 data: 0.0611 max mem: 9377 +Train: [76] [ 900/6250] eta: 0:15:09 lr: 0.000018 grad: 0.1950 (0.2181) loss: 0.7516 (0.7596) time: 0.1464 data: 0.0481 max mem: 9377 +Train: [76] [1000/6250] eta: 0:14:49 lr: 0.000018 grad: 0.1870 (0.2156) loss: 0.7575 (0.7589) time: 0.1412 data: 0.0593 max mem: 9377 +Train: [76] [1100/6250] eta: 0:14:22 lr: 0.000018 grad: 0.1921 (0.2137) loss: 0.7559 (0.7582) time: 0.1450 data: 0.0533 max mem: 9377 +Train: [76] [1200/6250] eta: 0:14:02 lr: 0.000018 grad: 0.1890 (0.2122) loss: 0.7496 (0.7575) time: 0.1795 data: 0.0921 max mem: 9377 +Train: [76] [1300/6250] eta: 0:13:40 lr: 0.000018 grad: 0.1928 (0.2106) loss: 0.7538 (0.7569) time: 0.1562 data: 0.0682 max mem: 9377 +Train: [76] [1400/6250] eta: 0:13:19 lr: 0.000018 grad: 0.1883 (0.2095) loss: 0.7585 (0.7562) time: 0.1583 data: 0.0727 max mem: 9377 +Train: [76] [1500/6250] eta: 0:13:07 lr: 0.000018 grad: 0.1914 (0.2085) loss: 0.7452 (0.7558) time: 0.1658 data: 0.0785 max mem: 9377 +Train: [76] [1600/6250] eta: 0:12:55 lr: 0.000018 grad: 0.1867 (0.2072) loss: 0.7548 (0.7557) time: 0.1761 data: 0.0871 max mem: 9377 +Train: [76] [1700/6250] eta: 0:12:37 lr: 0.000018 grad: 0.1922 (0.2066) loss: 0.7440 (0.7553) time: 0.1751 data: 0.0888 max mem: 9377 +Train: [76] [1800/6250] eta: 0:12:18 lr: 0.000018 grad: 0.1977 (0.2061) loss: 0.7438 (0.7550) time: 0.1787 data: 0.0805 max mem: 9377 +Train: [76] [1900/6250] eta: 0:12:02 lr: 0.000018 grad: 0.1928 (0.2056) loss: 0.7562 (0.7547) time: 0.1660 data: 0.0762 max mem: 9377 +Train: [76] [2000/6250] eta: 0:11:44 lr: 0.000018 grad: 0.1939 (0.2052) loss: 0.7421 (0.7544) time: 0.1650 data: 0.0777 max mem: 9377 +Train: [76] [2100/6250] eta: 0:11:26 lr: 0.000018 grad: 0.1902 (0.2047) loss: 0.7474 (0.7540) time: 0.1401 data: 0.0513 max mem: 9377 +Train: [76] [2200/6250] eta: 0:11:08 lr: 0.000018 grad: 0.1888 (0.2042) loss: 0.7527 (0.7539) time: 0.1523 data: 0.0583 max mem: 9377 +Train: [76] [2300/6250] eta: 0:10:49 lr: 0.000018 grad: 0.2020 (0.2038) loss: 0.7452 (0.7536) time: 0.1608 data: 0.0675 max mem: 9377 +Train: [76] [2400/6250] eta: 0:10:31 lr: 0.000018 grad: 0.1886 (0.2037) loss: 0.7537 (0.7533) time: 0.1545 data: 0.0625 max mem: 9377 +Train: [76] [2500/6250] eta: 0:10:12 lr: 0.000018 grad: 0.1967 (0.2034) loss: 0.7365 (0.7530) time: 0.1412 data: 0.0506 max mem: 9377 +Train: [76] [2600/6250] eta: 0:09:55 lr: 0.000018 grad: 0.1986 (0.2032) loss: 0.7465 (0.7527) time: 0.1901 data: 0.1109 max mem: 9377 +Train: [76] [2700/6250] eta: 0:09:40 lr: 0.000018 grad: 0.1924 (0.2031) loss: 0.7450 (0.7525) time: 0.1589 data: 0.0741 max mem: 9377 +Train: [76] [2800/6250] eta: 0:09:23 lr: 0.000018 grad: 0.1928 (0.2030) loss: 0.7461 (0.7521) time: 0.1605 data: 0.0746 max mem: 9377 +Train: [76] [2900/6250] eta: 0:09:06 lr: 0.000018 grad: 0.1977 (0.2031) loss: 0.7367 (0.7518) time: 0.1697 data: 0.0863 max mem: 9377 +Train: [76] [3000/6250] eta: 0:08:50 lr: 0.000018 grad: 0.2024 (0.2032) loss: 0.7481 (0.7517) time: 0.1257 data: 0.0402 max mem: 9377 +Train: [76] [3100/6250] eta: 0:08:33 lr: 0.000018 grad: 0.1964 (0.2029) loss: 0.7513 (0.7516) time: 0.1305 data: 0.0403 max mem: 9377 +Train: [76] [3200/6250] eta: 0:08:16 lr: 0.000018 grad: 0.1937 (0.2028) loss: 0.7348 (0.7515) time: 0.1416 data: 0.0531 max mem: 9377 +Train: [76] [3300/6250] eta: 0:07:59 lr: 0.000018 grad: 0.1939 (0.2026) loss: 0.7460 (0.7514) time: 0.1471 data: 0.0587 max mem: 9377 +Train: [76] [3400/6250] eta: 0:07:44 lr: 0.000018 grad: 0.1883 (0.2024) loss: 0.7516 (0.7513) time: 0.2348 data: 0.1570 max mem: 9377 +Train: [76] [3500/6250] eta: 0:07:28 lr: 0.000018 grad: 0.1947 (0.2022) loss: 0.7422 (0.7513) time: 0.1600 data: 0.0760 max mem: 9377 +Train: [76] [3600/6250] eta: 0:07:13 lr: 0.000018 grad: 0.1899 (0.2019) loss: 0.7529 (0.7513) time: 0.1679 data: 0.0871 max mem: 9377 +Train: [76] [3700/6250] eta: 0:06:58 lr: 0.000018 grad: 0.1980 (0.2016) loss: 0.7455 (0.7514) time: 0.1886 data: 0.1056 max mem: 9377 +Train: [76] [3800/6250] eta: 0:06:44 lr: 0.000018 grad: 0.1919 (0.2015) loss: 0.7563 (0.7514) time: 0.1744 data: 0.0829 max mem: 9377 +Train: [76] [3900/6250] eta: 0:06:27 lr: 0.000018 grad: 0.1949 (0.2014) loss: 0.7450 (0.7513) time: 0.1628 data: 0.0771 max mem: 9377 +Train: [76] [4000/6250] eta: 0:06:10 lr: 0.000018 grad: 0.1965 (0.2013) loss: 0.7509 (0.7513) time: 0.1411 data: 0.0590 max mem: 9377 +Train: [76] [4100/6250] eta: 0:05:53 lr: 0.000018 grad: 0.1917 (0.2012) loss: 0.7493 (0.7513) time: 0.1499 data: 0.0528 max mem: 9377 +Train: [76] [4200/6250] eta: 0:05:36 lr: 0.000018 grad: 0.1851 (0.2011) loss: 0.7594 (0.7512) time: 0.1626 data: 0.0700 max mem: 9377 +Train: [76] [4300/6250] eta: 0:05:19 lr: 0.000018 grad: 0.1927 (0.2010) loss: 0.7408 (0.7512) time: 0.1520 data: 0.0634 max mem: 9377 +Train: [76] [4400/6250] eta: 0:05:02 lr: 0.000018 grad: 0.1950 (0.2009) loss: 0.7437 (0.7511) time: 0.1641 data: 0.0758 max mem: 9377 +Train: [76] [4500/6250] eta: 0:04:45 lr: 0.000018 grad: 0.1883 (0.2008) loss: 0.7565 (0.7510) time: 0.1708 data: 0.0862 max mem: 9377 +Train: [76] [4600/6250] eta: 0:04:29 lr: 0.000018 grad: 0.1910 (0.2008) loss: 0.7435 (0.7510) time: 0.2295 data: 0.1558 max mem: 9377 +Train: [76] [4700/6250] eta: 0:04:13 lr: 0.000018 grad: 0.1931 (0.2006) loss: 0.7432 (0.7510) time: 0.1658 data: 0.0845 max mem: 9377 +Train: [76] [4800/6250] eta: 0:03:58 lr: 0.000018 grad: 0.1929 (0.2006) loss: 0.7396 (0.7509) time: 0.1790 data: 0.0962 max mem: 9377 +Train: [76] [4900/6250] eta: 0:03:41 lr: 0.000018 grad: 0.1946 (0.2006) loss: 0.7414 (0.7509) time: 0.1627 data: 0.0741 max mem: 9377 +Train: [76] [5000/6250] eta: 0:03:25 lr: 0.000018 grad: 0.2008 (0.2004) loss: 0.7559 (0.7509) time: 0.2018 data: 0.1144 max mem: 9377 +Train: [76] [5100/6250] eta: 0:03:09 lr: 0.000017 grad: 0.1976 (0.2003) loss: 0.7459 (0.7509) time: 0.1878 data: 0.0924 max mem: 9377 +Train: [76] [5200/6250] eta: 0:02:53 lr: 0.000017 grad: 0.1857 (0.2003) loss: 0.7581 (0.7509) time: 0.1918 data: 0.0914 max mem: 9377 +Train: [76] [5300/6250] eta: 0:02:36 lr: 0.000017 grad: 0.1950 (0.2002) loss: 0.7529 (0.7509) time: 0.1693 data: 0.0824 max mem: 9377 +Train: [76] [5400/6250] eta: 0:02:20 lr: 0.000017 grad: 0.1946 (0.2002) loss: 0.7457 (0.7508) time: 0.1896 data: 0.1038 max mem: 9377 +Train: [76] [5500/6250] eta: 0:02:03 lr: 0.000017 grad: 0.1967 (0.2001) loss: 0.7431 (0.7508) time: 0.1439 data: 0.0551 max mem: 9377 +Train: [76] [5600/6250] eta: 0:01:47 lr: 0.000017 grad: 0.2034 (0.2000) loss: 0.7447 (0.7508) time: 0.1598 data: 0.0651 max mem: 9377 +Train: [76] [5700/6250] eta: 0:01:30 lr: 0.000017 grad: 0.1898 (0.1999) loss: 0.7434 (0.7509) time: 0.1605 data: 0.0648 max mem: 9377 +Train: [76] [5800/6250] eta: 0:01:14 lr: 0.000017 grad: 0.1935 (0.1997) loss: 0.7524 (0.7509) time: 0.1613 data: 0.0722 max mem: 9377 +Train: [76] [5900/6250] eta: 0:00:57 lr: 0.000017 grad: 0.1911 (0.1997) loss: 0.7531 (0.7509) time: 0.1827 data: 0.0919 max mem: 9377 +Train: [76] [6000/6250] eta: 0:00:41 lr: 0.000017 grad: 0.1864 (0.1996) loss: 0.7529 (0.7509) time: 0.1401 data: 0.0518 max mem: 9377 +Train: [76] [6100/6250] eta: 0:00:24 lr: 0.000017 grad: 0.1905 (0.1995) loss: 0.7607 (0.7509) time: 0.1626 data: 0.0740 max mem: 9377 +Train: [76] [6200/6250] eta: 0:00:08 lr: 0.000017 grad: 0.1829 (0.1994) loss: 0.7623 (0.7510) time: 0.1734 data: 0.0879 max mem: 9377 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1903 (0.1993) loss: 0.7527 (0.7510) time: 0.1704 data: 0.0874 max mem: 9377 +Train: [76] Total time: 0:17:16 (0.1658 s / it) +Averaged stats: lr: 0.000017 grad: 0.1903 (0.1993) loss: 0.7527 (0.7510) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:06:01 loss: 0.8484 (0.8484) time: 5.8229 data: 5.7887 max mem: 9377 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.8572 (0.8572) time: 0.1268 data: 0.1017 max mem: 9377 +Eval (hcp-train-subset): [76] Total time: 0:00:15 (0.2431 s / it) +Averaged stats (hcp-train-subset): loss: 0.8572 (0.8572) +Eval (hcp-val): [76] [ 0/62] eta: 0:06:49 loss: 0.8541 (0.8541) time: 6.5978 data: 6.5672 max mem: 9377 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.8548 (0.8554) time: 0.1399 data: 0.1121 max mem: 9377 +Eval (hcp-val): [76] Total time: 0:00:15 (0.2421 s / it) +Averaged stats (hcp-val): loss: 0.8548 (0.8554) +Eval (nsd-val): [76] [ 0/62] eta: 0:05:33 loss: 0.8226 (0.8226) time: 5.3745 data: 5.3429 max mem: 9377 +Eval (nsd-val): [76] [61/62] eta: 0:00:00 loss: 0.8311 (0.8333) time: 0.1548 data: 0.1293 max mem: 9377 +Eval (nsd-val): [76] Total time: 0:00:15 (0.2445 s / it) +Averaged stats (nsd-val): loss: 0.8311 (0.8333) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [77] [ 0/6250] eta: 11:59:05 lr: 0.000017 grad: 0.2155 (0.2155) loss: 0.7976 (0.7976) time: 6.9034 data: 6.7975 max mem: 9377 +Train: [77] [ 100/6250] eta: 0:23:58 lr: 0.000017 grad: 0.2743 (0.3026) loss: 0.7699 (0.7693) time: 0.1568 data: 0.0504 max mem: 9377 +Train: [77] [ 200/6250] eta: 0:20:28 lr: 0.000017 grad: 0.2582 (0.2818) loss: 0.7329 (0.7573) time: 0.1636 data: 0.0677 max mem: 9377 +Train: [77] [ 300/6250] eta: 0:19:22 lr: 0.000017 grad: 0.2254 (0.2651) loss: 0.7407 (0.7530) time: 0.1502 data: 0.0501 max mem: 9377 +Train: [77] [ 400/6250] eta: 0:18:32 lr: 0.000017 grad: 0.2103 (0.2511) loss: 0.7526 (0.7536) time: 0.1679 data: 0.0562 max mem: 9377 +Train: [77] [ 500/6250] eta: 0:17:54 lr: 0.000017 grad: 0.2061 (0.2434) loss: 0.7522 (0.7534) time: 0.1641 data: 0.0647 max mem: 9377 +Train: [77] [ 600/6250] eta: 0:17:15 lr: 0.000017 grad: 0.1995 (0.2372) loss: 0.7589 (0.7534) time: 0.1433 data: 0.0393 max mem: 9377 +Train: [77] [ 700/6250] eta: 0:16:40 lr: 0.000017 grad: 0.2069 (0.2321) loss: 0.7506 (0.7534) time: 0.1699 data: 0.0667 max mem: 9377 +Train: [77] [ 800/6250] eta: 0:16:09 lr: 0.000017 grad: 0.2004 (0.2283) loss: 0.7482 (0.7531) time: 0.1539 data: 0.0449 max mem: 9377 +Train: [77] [ 900/6250] eta: 0:15:41 lr: 0.000017 grad: 0.2001 (0.2250) loss: 0.7549 (0.7536) time: 0.1717 data: 0.0772 max mem: 9377 +Train: [77] [1000/6250] eta: 0:15:09 lr: 0.000017 grad: 0.1887 (0.2219) loss: 0.7583 (0.7539) time: 0.1442 data: 0.0537 max mem: 9377 +Train: [77] [1100/6250] eta: 0:14:43 lr: 0.000017 grad: 0.1841 (0.2193) loss: 0.7534 (0.7541) time: 0.1434 data: 0.0484 max mem: 9377 +Train: [77] [1200/6250] eta: 0:14:19 lr: 0.000017 grad: 0.1876 (0.2173) loss: 0.7434 (0.7542) time: 0.1535 data: 0.0698 max mem: 9377 +Train: [77] [1300/6250] eta: 0:13:55 lr: 0.000017 grad: 0.1844 (0.2153) loss: 0.7578 (0.7546) time: 0.1509 data: 0.0610 max mem: 9377 +Train: [77] [1400/6250] eta: 0:13:34 lr: 0.000017 grad: 0.1938 (0.2142) loss: 0.7555 (0.7547) time: 0.1661 data: 0.0798 max mem: 9377 +Train: [77] [1500/6250] eta: 0:13:18 lr: 0.000017 grad: 0.1900 (0.2125) loss: 0.7510 (0.7549) time: 0.1551 data: 0.0583 max mem: 9377 +Train: [77] [1600/6250] eta: 0:13:00 lr: 0.000017 grad: 0.1883 (0.2110) loss: 0.7537 (0.7550) time: 0.1489 data: 0.0625 max mem: 9377 +Train: [77] [1700/6250] eta: 0:12:41 lr: 0.000017 grad: 0.1865 (0.2099) loss: 0.7566 (0.7552) time: 0.1574 data: 0.0667 max mem: 9377 +Train: [77] [1800/6250] eta: 0:12:22 lr: 0.000017 grad: 0.1920 (0.2088) loss: 0.7480 (0.7553) time: 0.1470 data: 0.0627 max mem: 9377 +Train: [77] [1900/6250] eta: 0:12:04 lr: 0.000017 grad: 0.1911 (0.2080) loss: 0.7529 (0.7553) time: 0.1412 data: 0.0563 max mem: 9377 +Train: [77] [2000/6250] eta: 0:11:46 lr: 0.000017 grad: 0.1886 (0.2073) loss: 0.7445 (0.7552) time: 0.1637 data: 0.0831 max mem: 9377 +Train: [77] [2100/6250] eta: 0:11:28 lr: 0.000017 grad: 0.1978 (0.2068) loss: 0.7549 (0.7550) time: 0.1609 data: 0.0764 max mem: 9377 +Train: [77] [2200/6250] eta: 0:11:09 lr: 0.000017 grad: 0.1954 (0.2062) loss: 0.7411 (0.7547) time: 0.1497 data: 0.0555 max mem: 9377 +Train: [77] [2300/6250] eta: 0:10:50 lr: 0.000017 grad: 0.1836 (0.2058) loss: 0.7521 (0.7544) time: 0.1606 data: 0.0706 max mem: 9377 +Train: [77] [2400/6250] eta: 0:10:31 lr: 0.000017 grad: 0.1951 (0.2055) loss: 0.7490 (0.7542) time: 0.1319 data: 0.0376 max mem: 9377 +Train: [77] [2500/6250] eta: 0:10:12 lr: 0.000017 grad: 0.1993 (0.2053) loss: 0.7428 (0.7538) time: 0.1597 data: 0.0695 max mem: 9377 +Train: [77] [2600/6250] eta: 0:09:54 lr: 0.000017 grad: 0.1903 (0.2051) loss: 0.7571 (0.7536) time: 0.1523 data: 0.0570 max mem: 9377 +Train: [77] [2700/6250] eta: 0:09:36 lr: 0.000017 grad: 0.1961 (0.2050) loss: 0.7519 (0.7534) time: 0.1366 data: 0.0448 max mem: 9377 +Train: [77] [2800/6250] eta: 0:09:18 lr: 0.000017 grad: 0.1955 (0.2046) loss: 0.7457 (0.7533) time: 0.1669 data: 0.0694 max mem: 9377 +Train: [77] [2900/6250] eta: 0:09:01 lr: 0.000017 grad: 0.1872 (0.2043) loss: 0.7517 (0.7533) time: 0.1752 data: 0.0842 max mem: 9377 +Train: [77] [3000/6250] eta: 0:08:46 lr: 0.000017 grad: 0.2004 (0.2041) loss: 0.7475 (0.7532) time: 0.1808 data: 0.0946 max mem: 9377 +Train: [77] [3100/6250] eta: 0:08:28 lr: 0.000017 grad: 0.1952 (0.2039) loss: 0.7422 (0.7531) time: 0.1514 data: 0.0657 max mem: 9377 +Train: [77] [3200/6250] eta: 0:08:13 lr: 0.000017 grad: 0.1962 (0.2038) loss: 0.7459 (0.7529) time: 0.1922 data: 0.1048 max mem: 9377 +Train: [77] [3300/6250] eta: 0:07:55 lr: 0.000016 grad: 0.1999 (0.2036) loss: 0.7481 (0.7529) time: 0.1462 data: 0.0566 max mem: 9377 +Train: [77] [3400/6250] eta: 0:07:40 lr: 0.000016 grad: 0.2033 (0.2035) loss: 0.7384 (0.7528) time: 0.2223 data: 0.1479 max mem: 9377 +Train: [77] [3500/6250] eta: 0:07:24 lr: 0.000016 grad: 0.1975 (0.2032) loss: 0.7550 (0.7528) time: 0.1386 data: 0.0514 max mem: 9377 +Train: [77] [3600/6250] eta: 0:07:08 lr: 0.000016 grad: 0.2002 (0.2032) loss: 0.7636 (0.7530) time: 0.1416 data: 0.0613 max mem: 9377 +Train: [77] [3700/6250] eta: 0:06:52 lr: 0.000016 grad: 0.1945 (0.2030) loss: 0.7527 (0.7531) time: 0.1605 data: 0.0795 max mem: 9377 +Train: [77] [3800/6250] eta: 0:06:36 lr: 0.000016 grad: 0.1890 (0.2028) loss: 0.7593 (0.7532) time: 0.1531 data: 0.0645 max mem: 9377 +Train: [77] [3900/6250] eta: 0:06:20 lr: 0.000016 grad: 0.1907 (0.2027) loss: 0.7587 (0.7533) time: 0.1687 data: 0.0819 max mem: 9377 +Train: [77] [4000/6250] eta: 0:06:05 lr: 0.000016 grad: 0.1830 (0.2025) loss: 0.7727 (0.7535) time: 0.1840 data: 0.1005 max mem: 9377 +Train: [77] [4100/6250] eta: 0:05:49 lr: 0.000016 grad: 0.2006 (0.2023) loss: 0.7669 (0.7537) time: 0.1679 data: 0.0804 max mem: 9377 +Train: [77] [4200/6250] eta: 0:05:33 lr: 0.000016 grad: 0.1999 (0.2021) loss: 0.7520 (0.7538) time: 0.1789 data: 0.0829 max mem: 9377 +Train: [77] [4300/6250] eta: 0:05:17 lr: 0.000016 grad: 0.1841 (0.2019) loss: 0.7735 (0.7540) time: 0.1892 data: 0.0988 max mem: 9377 +Train: [77] [4400/6250] eta: 0:05:01 lr: 0.000016 grad: 0.1987 (0.2017) loss: 0.7504 (0.7541) time: 0.1596 data: 0.0678 max mem: 9377 +Train: [77] [4500/6250] eta: 0:04:44 lr: 0.000016 grad: 0.1954 (0.2016) loss: 0.7587 (0.7542) time: 0.1532 data: 0.0666 max mem: 9377 +Train: [77] [4600/6250] eta: 0:04:28 lr: 0.000016 grad: 0.1870 (0.2014) loss: 0.7650 (0.7544) time: 0.2170 data: 0.1409 max mem: 9377 +Train: [77] [4700/6250] eta: 0:04:12 lr: 0.000016 grad: 0.1951 (0.2012) loss: 0.7747 (0.7545) time: 0.1585 data: 0.0815 max mem: 9377 +Train: [77] [4800/6250] eta: 0:03:56 lr: 0.000016 grad: 0.1823 (0.2011) loss: 0.7657 (0.7546) time: 0.1653 data: 0.0749 max mem: 9377 +Train: [77] [4900/6250] eta: 0:03:39 lr: 0.000016 grad: 0.1889 (0.2010) loss: 0.7594 (0.7547) time: 0.1536 data: 0.0673 max mem: 9377 +Train: [77] [5000/6250] eta: 0:03:23 lr: 0.000016 grad: 0.1938 (0.2009) loss: 0.7509 (0.7547) time: 0.1817 data: 0.0931 max mem: 9377 +Train: [77] [5100/6250] eta: 0:03:07 lr: 0.000016 grad: 0.1943 (0.2008) loss: 0.7488 (0.7547) time: 0.1550 data: 0.0727 max mem: 9377 +Train: [77] [5200/6250] eta: 0:02:51 lr: 0.000016 grad: 0.1921 (0.2007) loss: 0.7620 (0.7547) time: 0.1668 data: 0.0763 max mem: 9377 +Train: [77] [5300/6250] eta: 0:02:35 lr: 0.000016 grad: 0.1972 (0.2007) loss: 0.7412 (0.7546) time: 0.1811 data: 0.0924 max mem: 9377 +Train: [77] [5400/6250] eta: 0:02:18 lr: 0.000016 grad: 0.1972 (0.2007) loss: 0.7425 (0.7546) time: 0.1583 data: 0.0715 max mem: 9377 +Train: [77] [5500/6250] eta: 0:02:02 lr: 0.000016 grad: 0.1913 (0.2006) loss: 0.7639 (0.7546) time: 0.1547 data: 0.0711 max mem: 9377 +Train: [77] [5600/6250] eta: 0:01:45 lr: 0.000016 grad: 0.1917 (0.2006) loss: 0.7548 (0.7546) time: 0.1596 data: 0.0749 max mem: 9377 +Train: [77] [5700/6250] eta: 0:01:29 lr: 0.000016 grad: 0.1946 (0.2006) loss: 0.7553 (0.7545) time: 0.1797 data: 0.0868 max mem: 9377 +Train: [77] [5800/6250] eta: 0:01:13 lr: 0.000016 grad: 0.1891 (0.2005) loss: 0.7587 (0.7545) time: 0.1637 data: 0.0764 max mem: 9377 +Train: [77] [5900/6250] eta: 0:00:56 lr: 0.000016 grad: 0.1925 (0.2005) loss: 0.7496 (0.7544) time: 0.1442 data: 0.0540 max mem: 9377 +Train: [77] [6000/6250] eta: 0:00:40 lr: 0.000016 grad: 0.1819 (0.2003) loss: 0.7630 (0.7545) time: 0.1612 data: 0.0581 max mem: 9377 +Train: [77] [6100/6250] eta: 0:00:24 lr: 0.000016 grad: 0.1909 (0.2001) loss: 0.7525 (0.7545) time: 0.1270 data: 0.0297 max mem: 9377 +Train: [77] [6200/6250] eta: 0:00:08 lr: 0.000016 grad: 0.1925 (0.2001) loss: 0.7447 (0.7544) time: 0.1458 data: 0.0639 max mem: 9377 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.1979 (0.2000) loss: 0.7415 (0.7544) time: 0.1481 data: 0.0559 max mem: 9377 +Train: [77] Total time: 0:17:00 (0.1632 s / it) +Averaged stats: lr: 0.000016 grad: 0.1979 (0.2000) loss: 0.7415 (0.7544) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:05:29 loss: 0.8468 (0.8468) time: 5.3105 data: 5.2787 max mem: 9377 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.8539 (0.8561) time: 0.1472 data: 0.1205 max mem: 9377 +Eval (hcp-train-subset): [77] Total time: 0:00:14 (0.2418 s / it) +Averaged stats (hcp-train-subset): loss: 0.8539 (0.8561) +Eval (hcp-val): [77] [ 0/62] eta: 0:05:38 loss: 0.8545 (0.8545) time: 5.4565 data: 5.4200 max mem: 9377 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.8533 (0.8555) time: 0.1162 data: 0.0888 max mem: 9377 +Eval (hcp-val): [77] Total time: 0:00:15 (0.2465 s / it) +Averaged stats (hcp-val): loss: 0.8533 (0.8555) +Eval (nsd-val): [77] [ 0/62] eta: 0:04:57 loss: 0.8232 (0.8232) time: 4.8054 data: 4.7437 max mem: 9377 +Eval (nsd-val): [77] [61/62] eta: 0:00:00 loss: 0.8295 (0.8312) time: 0.1431 data: 0.1178 max mem: 9377 +Eval (nsd-val): [77] Total time: 0:00:14 (0.2348 s / it) +Averaged stats (nsd-val): loss: 0.8295 (0.8312) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 8:30:45 lr: 0.000016 grad: 0.2858 (0.2858) loss: 0.8316 (0.8316) time: 4.9032 data: 4.6733 max mem: 9377 +Train: [78] [ 100/6250] eta: 0:22:23 lr: 0.000016 grad: 0.2222 (0.2768) loss: 0.7732 (0.7793) time: 0.1627 data: 0.0534 max mem: 9377 +Train: [78] [ 200/6250] eta: 0:20:18 lr: 0.000016 grad: 0.2257 (0.2480) loss: 0.7566 (0.7710) time: 0.1959 data: 0.0905 max mem: 9377 +Train: [78] [ 300/6250] eta: 0:18:40 lr: 0.000016 grad: 0.2550 (0.2469) loss: 0.7352 (0.7604) time: 0.1714 data: 0.0608 max mem: 9377 +Train: [78] [ 400/6250] eta: 0:17:50 lr: 0.000016 grad: 0.2207 (0.2429) loss: 0.7384 (0.7556) time: 0.1658 data: 0.0514 max mem: 9377 +Train: [78] [ 500/6250] eta: 0:17:08 lr: 0.000016 grad: 0.2131 (0.2385) loss: 0.7395 (0.7530) time: 0.1480 data: 0.0504 max mem: 9377 +Train: [78] [ 600/6250] eta: 0:16:24 lr: 0.000016 grad: 0.2076 (0.2349) loss: 0.7446 (0.7514) time: 0.1298 data: 0.0208 max mem: 9377 +Train: [78] [ 700/6250] eta: 0:15:50 lr: 0.000016 grad: 0.2088 (0.2312) loss: 0.7333 (0.7509) time: 0.1447 data: 0.0491 max mem: 9377 +Train: [78] [ 800/6250] eta: 0:15:23 lr: 0.000016 grad: 0.2015 (0.2282) loss: 0.7625 (0.7512) time: 0.1646 data: 0.0700 max mem: 9377 +Train: [78] [ 900/6250] eta: 0:14:59 lr: 0.000016 grad: 0.1972 (0.2251) loss: 0.7525 (0.7518) time: 0.1637 data: 0.0640 max mem: 9377 +Train: [78] [1000/6250] eta: 0:14:32 lr: 0.000016 grad: 0.1898 (0.2225) loss: 0.7536 (0.7522) time: 0.1476 data: 0.0577 max mem: 9377 +Train: [78] [1100/6250] eta: 0:14:09 lr: 0.000016 grad: 0.1916 (0.2200) loss: 0.7511 (0.7527) time: 0.1586 data: 0.0732 max mem: 9377 +Train: [78] [1200/6250] eta: 0:13:51 lr: 0.000016 grad: 0.1917 (0.2181) loss: 0.7610 (0.7527) time: 0.1505 data: 0.0570 max mem: 9377 +Train: [78] [1300/6250] eta: 0:13:37 lr: 0.000016 grad: 0.1930 (0.2165) loss: 0.7541 (0.7530) time: 0.1677 data: 0.0752 max mem: 9377 +Train: [78] [1400/6250] eta: 0:13:23 lr: 0.000016 grad: 0.1889 (0.2150) loss: 0.7470 (0.7528) time: 0.1891 data: 0.1050 max mem: 9377 +Train: [78] [1500/6250] eta: 0:13:01 lr: 0.000015 grad: 0.1998 (0.2140) loss: 0.7402 (0.7523) time: 0.1528 data: 0.0551 max mem: 9377 +Train: [78] [1600/6250] eta: 0:12:48 lr: 0.000015 grad: 0.1933 (0.2131) loss: 0.7433 (0.7520) time: 0.1569 data: 0.0700 max mem: 9377 +Train: [78] [1700/6250] eta: 0:12:31 lr: 0.000015 grad: 0.1994 (0.2123) loss: 0.7322 (0.7516) time: 0.1698 data: 0.0834 max mem: 9377 +Train: [78] [1800/6250] eta: 0:12:12 lr: 0.000015 grad: 0.1946 (0.2117) loss: 0.7455 (0.7511) time: 0.1499 data: 0.0653 max mem: 9377 +Train: [78] [1900/6250] eta: 0:11:56 lr: 0.000015 grad: 0.1841 (0.2109) loss: 0.7582 (0.7508) time: 0.1751 data: 0.0871 max mem: 9377 +Train: [78] [2000/6250] eta: 0:11:41 lr: 0.000015 grad: 0.1979 (0.2104) loss: 0.7384 (0.7501) time: 0.1593 data: 0.0660 max mem: 9377 +Train: [78] [2100/6250] eta: 0:11:23 lr: 0.000015 grad: 0.1955 (0.2099) loss: 0.7388 (0.7497) time: 0.1510 data: 0.0614 max mem: 9377 +Train: [78] [2200/6250] eta: 0:11:06 lr: 0.000015 grad: 0.1956 (0.2095) loss: 0.7443 (0.7495) time: 0.1546 data: 0.0648 max mem: 9377 +Train: [78] [2300/6250] eta: 0:10:47 lr: 0.000015 grad: 0.1908 (0.2089) loss: 0.7517 (0.7493) time: 0.1364 data: 0.0489 max mem: 9377 +Train: [78] [2400/6250] eta: 0:10:29 lr: 0.000015 grad: 0.2094 (0.2084) loss: 0.7356 (0.7491) time: 0.1546 data: 0.0665 max mem: 9377 +Train: [78] [2500/6250] eta: 0:10:10 lr: 0.000015 grad: 0.1877 (0.2081) loss: 0.7413 (0.7489) time: 0.1534 data: 0.0688 max mem: 9377 +Train: [78] [2600/6250] eta: 0:09:52 lr: 0.000015 grad: 0.1842 (0.2078) loss: 0.7517 (0.7487) time: 0.1561 data: 0.0668 max mem: 9377 +Train: [78] [2700/6250] eta: 0:09:34 lr: 0.000015 grad: 0.1957 (0.2074) loss: 0.7376 (0.7485) time: 0.1526 data: 0.0738 max mem: 9377 +Train: [78] [2800/6250] eta: 0:09:16 lr: 0.000015 grad: 0.1967 (0.2071) loss: 0.7386 (0.7484) time: 0.1852 data: 0.0968 max mem: 9377 +Train: [78] [2900/6250] eta: 0:08:59 lr: 0.000015 grad: 0.1915 (0.2068) loss: 0.7569 (0.7483) time: 0.1758 data: 0.0833 max mem: 9377 +Train: [78] [3000/6250] eta: 0:08:42 lr: 0.000015 grad: 0.1837 (0.2064) loss: 0.7541 (0.7484) time: 0.1292 data: 0.0376 max mem: 9377 +Train: [78] [3100/6250] eta: 0:08:26 lr: 0.000015 grad: 0.1870 (0.2061) loss: 0.7563 (0.7484) time: 0.1660 data: 0.0708 max mem: 9377 +Train: [78] [3200/6250] eta: 0:08:10 lr: 0.000015 grad: 0.1916 (0.2057) loss: 0.7440 (0.7485) time: 0.1639 data: 0.0690 max mem: 9377 +Train: [78] [3300/6250] eta: 0:07:54 lr: 0.000015 grad: 0.1957 (0.2054) loss: 0.7526 (0.7486) time: 0.1630 data: 0.0778 max mem: 9377 +Train: [78] [3400/6250] eta: 0:07:40 lr: 0.000015 grad: 0.1858 (0.2051) loss: 0.7603 (0.7489) time: 0.2405 data: 0.1661 max mem: 9377 +Train: [78] [3500/6250] eta: 0:07:25 lr: 0.000015 grad: 0.1885 (0.2048) loss: 0.7519 (0.7489) time: 0.1710 data: 0.0932 max mem: 9377 +Train: [78] [3600/6250] eta: 0:07:11 lr: 0.000015 grad: 0.1979 (0.2047) loss: 0.7414 (0.7490) time: 0.2148 data: 0.1337 max mem: 9377 +Train: [78] [3700/6250] eta: 0:06:57 lr: 0.000015 grad: 0.1924 (0.2046) loss: 0.7296 (0.7490) time: 0.1248 data: 0.0315 max mem: 9377 +Train: [78] [3800/6250] eta: 0:06:42 lr: 0.000015 grad: 0.1936 (0.2045) loss: 0.7510 (0.7490) time: 0.1636 data: 0.0825 max mem: 9377 +Train: [78] [3900/6250] eta: 0:06:27 lr: 0.000015 grad: 0.2028 (0.2045) loss: 0.7561 (0.7490) time: 0.1887 data: 0.1013 max mem: 9377 +Train: [78] [4000/6250] eta: 0:06:11 lr: 0.000015 grad: 0.1962 (0.2044) loss: 0.7459 (0.7490) time: 0.1738 data: 0.0811 max mem: 9377 +Train: [78] [4100/6250] eta: 0:05:55 lr: 0.000015 grad: 0.2000 (0.2044) loss: 0.7436 (0.7489) time: 0.1586 data: 0.0587 max mem: 9377 +Train: [78] [4200/6250] eta: 0:05:39 lr: 0.000015 grad: 0.1934 (0.2042) loss: 0.7469 (0.7489) time: 0.1588 data: 0.0546 max mem: 9377 +Train: [78] [4300/6250] eta: 0:05:22 lr: 0.000015 grad: 0.1922 (0.2040) loss: 0.7516 (0.7490) time: 0.1832 data: 0.0902 max mem: 9377 +Train: [78] [4400/6250] eta: 0:05:06 lr: 0.000015 grad: 0.1949 (0.2041) loss: 0.7443 (0.7490) time: 0.1579 data: 0.0684 max mem: 9377 +Train: [78] [4500/6250] eta: 0:04:49 lr: 0.000015 grad: 0.2101 (0.2041) loss: 0.7441 (0.7489) time: 0.1777 data: 0.0885 max mem: 9377 +Train: [78] [4600/6250] eta: 0:04:33 lr: 0.000015 grad: 0.1966 (0.2039) loss: 0.7531 (0.7489) time: 0.1618 data: 0.0747 max mem: 9377 +Train: [78] [4700/6250] eta: 0:04:16 lr: 0.000015 grad: 0.1934 (0.2039) loss: 0.7470 (0.7488) time: 0.1598 data: 0.0510 max mem: 9377 +Train: [78] [4800/6250] eta: 0:03:59 lr: 0.000015 grad: 0.2035 (0.2040) loss: 0.7393 (0.7487) time: 0.1581 data: 0.0637 max mem: 9377 +Train: [78] [4900/6250] eta: 0:03:42 lr: 0.000015 grad: 0.2023 (0.2040) loss: 0.7442 (0.7487) time: 0.1641 data: 0.0769 max mem: 9377 +Train: [78] [5000/6250] eta: 0:03:26 lr: 0.000015 grad: 0.1961 (0.2039) loss: 0.7473 (0.7486) time: 0.1656 data: 0.0585 max mem: 9377 +Train: [78] [5100/6250] eta: 0:03:10 lr: 0.000015 grad: 0.2072 (0.2039) loss: 0.7444 (0.7486) time: 0.1644 data: 0.0634 max mem: 9377 +Train: [78] [5200/6250] eta: 0:02:53 lr: 0.000015 grad: 0.2184 (0.2040) loss: 0.7373 (0.7484) time: 0.1469 data: 0.0468 max mem: 9377 +Train: [78] [5300/6250] eta: 0:02:36 lr: 0.000015 grad: 0.2003 (0.2041) loss: 0.7448 (0.7482) time: 0.1753 data: 0.0834 max mem: 9377 +Train: [78] [5400/6250] eta: 0:02:20 lr: 0.000015 grad: 0.2018 (0.2041) loss: 0.7415 (0.7481) time: 0.1535 data: 0.0692 max mem: 9377 +Train: [78] [5500/6250] eta: 0:02:03 lr: 0.000015 grad: 0.1928 (0.2041) loss: 0.7474 (0.7479) time: 0.1475 data: 0.0618 max mem: 9377 +Train: [78] [5600/6250] eta: 0:01:47 lr: 0.000015 grad: 0.2028 (0.2041) loss: 0.7395 (0.7478) time: 0.1658 data: 0.0720 max mem: 9377 +Train: [78] [5700/6250] eta: 0:01:30 lr: 0.000015 grad: 0.2024 (0.2040) loss: 0.7484 (0.7478) time: 0.1253 data: 0.0332 max mem: 9377 +Train: [78] [5800/6250] eta: 0:01:14 lr: 0.000015 grad: 0.1903 (0.2040) loss: 0.7495 (0.7478) time: 0.1405 data: 0.0514 max mem: 9377 +Train: [78] [5900/6250] eta: 0:00:57 lr: 0.000015 grad: 0.1945 (0.2040) loss: 0.7480 (0.7477) time: 0.1750 data: 0.0784 max mem: 9377 +Train: [78] [6000/6250] eta: 0:00:41 lr: 0.000015 grad: 0.1965 (0.2041) loss: 0.7450 (0.7476) time: 0.1472 data: 0.0568 max mem: 9377 +Train: [78] [6100/6250] eta: 0:00:24 lr: 0.000015 grad: 0.2034 (0.2040) loss: 0.7451 (0.7476) time: 0.1431 data: 0.0560 max mem: 9377 +Train: [78] [6200/6250] eta: 0:00:08 lr: 0.000014 grad: 0.1970 (0.2040) loss: 0.7447 (0.7475) time: 0.1682 data: 0.0790 max mem: 9377 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.1999 (0.2041) loss: 0.7420 (0.7474) time: 0.1456 data: 0.0544 max mem: 9377 +Train: [78] Total time: 0:17:11 (0.1650 s / it) +Averaged stats: lr: 0.000014 grad: 0.1999 (0.2041) loss: 0.7420 (0.7474) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:05:45 loss: 0.8474 (0.8474) time: 5.5648 data: 5.5322 max mem: 9377 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.8550 (0.8557) time: 0.1390 data: 0.1135 max mem: 9377 +Eval (hcp-train-subset): [78] Total time: 0:00:14 (0.2358 s / it) +Averaged stats (hcp-train-subset): loss: 0.8550 (0.8557) +Eval (hcp-val): [78] [ 0/62] eta: 0:06:07 loss: 0.8596 (0.8596) time: 5.9281 data: 5.8979 max mem: 9377 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.8525 (0.8543) time: 0.1470 data: 0.1217 max mem: 9377 +Eval (hcp-val): [78] Total time: 0:00:14 (0.2354 s / it) +Averaged stats (hcp-val): loss: 0.8525 (0.8543) +Eval (nsd-val): [78] [ 0/62] eta: 0:04:40 loss: 0.8227 (0.8227) time: 4.5248 data: 4.4328 max mem: 9377 +Eval (nsd-val): [78] [61/62] eta: 0:00:00 loss: 0.8284 (0.8305) time: 0.1387 data: 0.1135 max mem: 9377 +Eval (nsd-val): [78] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (nsd-val): loss: 0.8284 (0.8305) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [79] [ 0/6250] eta: 10:55:32 lr: 0.000014 grad: 0.5214 (0.5214) loss: 0.6917 (0.6917) time: 6.2932 data: 6.1830 max mem: 9377 +Train: [79] [ 100/6250] eta: 0:22:09 lr: 0.000014 grad: 0.2146 (0.3313) loss: 0.7697 (0.7691) time: 0.1594 data: 0.0565 max mem: 9377 +Train: [79] [ 200/6250] eta: 0:19:10 lr: 0.000014 grad: 0.2186 (0.2838) loss: 0.7626 (0.7658) time: 0.1569 data: 0.0547 max mem: 9377 +Train: [79] [ 300/6250] eta: 0:18:18 lr: 0.000014 grad: 0.2099 (0.2639) loss: 0.7430 (0.7632) time: 0.1720 data: 0.0638 max mem: 9377 +Train: [79] [ 400/6250] eta: 0:17:13 lr: 0.000014 grad: 0.2056 (0.2539) loss: 0.7586 (0.7603) time: 0.1571 data: 0.0582 max mem: 9377 +Train: [79] [ 500/6250] eta: 0:16:46 lr: 0.000014 grad: 0.2008 (0.2445) loss: 0.7557 (0.7594) time: 0.1770 data: 0.0611 max mem: 9377 +Train: [79] [ 600/6250] eta: 0:16:21 lr: 0.000014 grad: 0.2051 (0.2379) loss: 0.7473 (0.7581) time: 0.1684 data: 0.0673 max mem: 9377 +Train: [79] [ 700/6250] eta: 0:15:53 lr: 0.000014 grad: 0.1932 (0.2332) loss: 0.7606 (0.7572) time: 0.1694 data: 0.0686 max mem: 9377 +Train: [79] [ 800/6250] eta: 0:15:28 lr: 0.000014 grad: 0.2046 (0.2293) loss: 0.7528 (0.7562) time: 0.1663 data: 0.0606 max mem: 9377 +Train: [79] [ 900/6250] eta: 0:15:03 lr: 0.000014 grad: 0.2028 (0.2260) loss: 0.7472 (0.7556) time: 0.1349 data: 0.0373 max mem: 9377 +Train: [79] [1000/6250] eta: 0:14:46 lr: 0.000014 grad: 0.1947 (0.2235) loss: 0.7555 (0.7555) time: 0.1562 data: 0.0664 max mem: 9377 +Train: [79] [1100/6250] eta: 0:14:23 lr: 0.000014 grad: 0.2018 (0.2215) loss: 0.7475 (0.7551) time: 0.1504 data: 0.0624 max mem: 9377 +Train: [79] [1200/6250] eta: 0:14:03 lr: 0.000014 grad: 0.1959 (0.2198) loss: 0.7537 (0.7547) time: 0.1551 data: 0.0771 max mem: 9377 +Train: [79] [1300/6250] eta: 0:13:45 lr: 0.000014 grad: 0.2000 (0.2187) loss: 0.7441 (0.7541) time: 0.1775 data: 0.0897 max mem: 9377 +Train: [79] [1400/6250] eta: 0:13:23 lr: 0.000014 grad: 0.1960 (0.2174) loss: 0.7512 (0.7536) time: 0.1405 data: 0.0614 max mem: 9377 +Train: [79] [1500/6250] eta: 0:13:03 lr: 0.000014 grad: 0.1962 (0.2164) loss: 0.7503 (0.7532) time: 0.1623 data: 0.0716 max mem: 9377 +Train: [79] [1600/6250] eta: 0:12:54 lr: 0.000014 grad: 0.1934 (0.2157) loss: 0.7511 (0.7529) time: 0.1846 data: 0.0904 max mem: 9377 +Train: [79] [1700/6250] eta: 0:12:36 lr: 0.000014 grad: 0.1975 (0.2149) loss: 0.7494 (0.7526) time: 0.1587 data: 0.0679 max mem: 9377 +Train: [79] [1800/6250] eta: 0:12:20 lr: 0.000014 grad: 0.2045 (0.2144) loss: 0.7427 (0.7522) time: 0.1875 data: 0.0982 max mem: 9377 +Train: [79] [1900/6250] eta: 0:12:00 lr: 0.000014 grad: 0.1981 (0.2138) loss: 0.7388 (0.7519) time: 0.1658 data: 0.0663 max mem: 9377 +Train: [79] [2000/6250] eta: 0:11:43 lr: 0.000014 grad: 0.1888 (0.2132) loss: 0.7547 (0.7517) time: 0.1373 data: 0.0474 max mem: 9377 +Train: [79] [2100/6250] eta: 0:11:25 lr: 0.000014 grad: 0.1931 (0.2126) loss: 0.7465 (0.7516) time: 0.1492 data: 0.0658 max mem: 9377 +Train: [79] [2200/6250] eta: 0:11:07 lr: 0.000014 grad: 0.2026 (0.2120) loss: 0.7496 (0.7516) time: 0.1749 data: 0.0822 max mem: 9377 +Train: [79] [2300/6250] eta: 0:10:48 lr: 0.000014 grad: 0.2023 (0.2115) loss: 0.7455 (0.7515) time: 0.1654 data: 0.0703 max mem: 9377 +Train: [79] [2400/6250] eta: 0:10:29 lr: 0.000014 grad: 0.1890 (0.2110) loss: 0.7489 (0.7514) time: 0.1448 data: 0.0543 max mem: 9377 +Train: [79] [2500/6250] eta: 0:10:12 lr: 0.000014 grad: 0.1921 (0.2106) loss: 0.7442 (0.7513) time: 0.1585 data: 0.0707 max mem: 9377 +Train: [79] [2600/6250] eta: 0:09:55 lr: 0.000014 grad: 0.1954 (0.2103) loss: 0.7476 (0.7511) time: 0.1741 data: 0.0870 max mem: 9377 +Train: [79] [2700/6250] eta: 0:09:38 lr: 0.000014 grad: 0.2021 (0.2098) loss: 0.7486 (0.7511) time: 0.1583 data: 0.0705 max mem: 9377 +Train: [79] [2800/6250] eta: 0:09:21 lr: 0.000014 grad: 0.1937 (0.2094) loss: 0.7563 (0.7511) time: 0.1667 data: 0.0801 max mem: 9377 +Train: [79] [2900/6250] eta: 0:09:03 lr: 0.000014 grad: 0.2058 (0.2092) loss: 0.7452 (0.7510) time: 0.1539 data: 0.0652 max mem: 9377 +Train: [79] [3000/6250] eta: 0:08:47 lr: 0.000014 grad: 0.2041 (0.2090) loss: 0.7397 (0.7508) time: 0.1656 data: 0.0742 max mem: 9377 +Train: [79] [3100/6250] eta: 0:08:31 lr: 0.000014 grad: 0.1986 (0.2089) loss: 0.7450 (0.7506) time: 0.1706 data: 0.0728 max mem: 9377 +Train: [79] [3200/6250] eta: 0:08:13 lr: 0.000014 grad: 0.2032 (0.2088) loss: 0.7431 (0.7504) time: 0.1598 data: 0.0678 max mem: 9377 +Train: [79] [3300/6250] eta: 0:07:56 lr: 0.000014 grad: 0.2238 (0.2089) loss: 0.7382 (0.7501) time: 0.1385 data: 0.0450 max mem: 9377 +Train: [79] [3400/6250] eta: 0:07:41 lr: 0.000014 grad: 0.2071 (0.2088) loss: 0.7399 (0.7498) time: 0.1810 data: 0.1014 max mem: 9377 +Train: [79] [3500/6250] eta: 0:07:25 lr: 0.000014 grad: 0.2049 (0.2088) loss: 0.7383 (0.7494) time: 0.1554 data: 0.0728 max mem: 9377 +Train: [79] [3600/6250] eta: 0:07:08 lr: 0.000014 grad: 0.2015 (0.2088) loss: 0.7376 (0.7490) time: 0.1548 data: 0.0654 max mem: 9377 +Train: [79] [3700/6250] eta: 0:06:52 lr: 0.000014 grad: 0.2032 (0.2087) loss: 0.7408 (0.7486) time: 0.1615 data: 0.0805 max mem: 9377 +Train: [79] [3800/6250] eta: 0:06:36 lr: 0.000014 grad: 0.2081 (0.2086) loss: 0.7436 (0.7485) time: 0.1577 data: 0.0719 max mem: 9377 +Train: [79] [3900/6250] eta: 0:06:20 lr: 0.000014 grad: 0.2033 (0.2085) loss: 0.7349 (0.7482) time: 0.1659 data: 0.0694 max mem: 9377 +Train: [79] [4000/6250] eta: 0:06:05 lr: 0.000014 grad: 0.2058 (0.2085) loss: 0.7384 (0.7480) time: 0.1272 data: 0.0194 max mem: 9377 +Train: [79] [4100/6250] eta: 0:05:49 lr: 0.000014 grad: 0.1931 (0.2085) loss: 0.7355 (0.7478) time: 0.1683 data: 0.0825 max mem: 9377 +Train: [79] [4200/6250] eta: 0:05:33 lr: 0.000014 grad: 0.2072 (0.2084) loss: 0.7353 (0.7476) time: 0.1575 data: 0.0653 max mem: 9377 +Train: [79] [4300/6250] eta: 0:05:16 lr: 0.000014 grad: 0.2037 (0.2083) loss: 0.7442 (0.7475) time: 0.1612 data: 0.0695 max mem: 9377 +Train: [79] [4400/6250] eta: 0:05:00 lr: 0.000014 grad: 0.2008 (0.2082) loss: 0.7424 (0.7473) time: 0.1769 data: 0.0889 max mem: 9377 +Train: [79] [4500/6250] eta: 0:04:44 lr: 0.000014 grad: 0.2032 (0.2081) loss: 0.7319 (0.7472) time: 0.1692 data: 0.0673 max mem: 9377 +Train: [79] [4600/6250] eta: 0:04:28 lr: 0.000014 grad: 0.2002 (0.2081) loss: 0.7461 (0.7471) time: 0.1829 data: 0.1024 max mem: 9377 +Train: [79] [4700/6250] eta: 0:04:12 lr: 0.000013 grad: 0.1928 (0.2080) loss: 0.7411 (0.7469) time: 0.1567 data: 0.0628 max mem: 9377 +Train: [79] [4800/6250] eta: 0:03:56 lr: 0.000013 grad: 0.1984 (0.2079) loss: 0.7344 (0.7469) time: 0.1698 data: 0.0748 max mem: 9377 +Train: [79] [4900/6250] eta: 0:03:40 lr: 0.000013 grad: 0.1989 (0.2078) loss: 0.7430 (0.7468) time: 0.1484 data: 0.0615 max mem: 9377 +Train: [79] [5000/6250] eta: 0:03:23 lr: 0.000013 grad: 0.1953 (0.2077) loss: 0.7481 (0.7468) time: 0.2052 data: 0.1050 max mem: 9377 +Train: [79] [5100/6250] eta: 0:03:07 lr: 0.000013 grad: 0.2071 (0.2075) loss: 0.7344 (0.7466) time: 0.1944 data: 0.1008 max mem: 9377 +Train: [79] [5200/6250] eta: 0:02:51 lr: 0.000013 grad: 0.2130 (0.2074) loss: 0.7301 (0.7465) time: 0.1504 data: 0.0514 max mem: 9377 +Train: [79] [5300/6250] eta: 0:02:35 lr: 0.000013 grad: 0.2012 (0.2075) loss: 0.7419 (0.7463) time: 0.1622 data: 0.0682 max mem: 9377 +Train: [79] [5400/6250] eta: 0:02:18 lr: 0.000013 grad: 0.1989 (0.2073) loss: 0.7454 (0.7462) time: 0.1511 data: 0.0520 max mem: 9377 +Train: [79] [5500/6250] eta: 0:02:02 lr: 0.000013 grad: 0.1997 (0.2073) loss: 0.7360 (0.7461) time: 0.1455 data: 0.0445 max mem: 9377 +Train: [79] [5600/6250] eta: 0:01:45 lr: 0.000013 grad: 0.1968 (0.2071) loss: 0.7473 (0.7460) time: 0.1411 data: 0.0521 max mem: 9377 +Train: [79] [5700/6250] eta: 0:01:29 lr: 0.000013 grad: 0.1958 (0.2070) loss: 0.7507 (0.7460) time: 0.1788 data: 0.0835 max mem: 9377 +Train: [79] [5800/6250] eta: 0:01:13 lr: 0.000013 grad: 0.1920 (0.2069) loss: 0.7546 (0.7460) time: 0.1518 data: 0.0638 max mem: 9377 +Train: [79] [5900/6250] eta: 0:00:56 lr: 0.000013 grad: 0.2011 (0.2068) loss: 0.7520 (0.7460) time: 0.1607 data: 0.0680 max mem: 9377 +Train: [79] [6000/6250] eta: 0:00:40 lr: 0.000013 grad: 0.1975 (0.2068) loss: 0.7438 (0.7460) time: 0.1575 data: 0.0724 max mem: 9377 +Train: [79] [6100/6250] eta: 0:00:24 lr: 0.000013 grad: 0.2063 (0.2068) loss: 0.7520 (0.7461) time: 0.1559 data: 0.0676 max mem: 9377 +Train: [79] [6200/6250] eta: 0:00:08 lr: 0.000013 grad: 0.1983 (0.2067) loss: 0.7538 (0.7461) time: 0.1604 data: 0.0700 max mem: 9377 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.1935 (0.2066) loss: 0.7427 (0.7461) time: 0.1663 data: 0.0813 max mem: 9377 +Train: [79] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000013 grad: 0.1935 (0.2066) loss: 0.7427 (0.7461) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:06:49 loss: 0.8523 (0.8523) time: 6.6085 data: 6.5775 max mem: 9377 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.8573 (0.8568) time: 0.1274 data: 0.1000 max mem: 9377 +Eval (hcp-train-subset): [79] Total time: 0:00:15 (0.2563 s / it) +Averaged stats (hcp-train-subset): loss: 0.8573 (0.8568) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [79] [ 0/62] eta: 0:06:38 loss: 0.8529 (0.8529) time: 6.4294 data: 6.3965 max mem: 9377 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.8543 (0.8543) time: 0.1686 data: 0.1428 max mem: 9377 +Eval (hcp-val): [79] Total time: 0:00:16 (0.2611 s / it) +Averaged stats (hcp-val): loss: 0.8543 (0.8543) +Making plots (hcp-val): example=2 +Eval (nsd-val): [79] [ 0/62] eta: 0:07:05 loss: 0.8217 (0.8217) time: 6.8667 data: 6.8352 max mem: 9377 +Eval (nsd-val): [79] [61/62] eta: 0:00:00 loss: 0.8333 (0.8331) time: 0.1538 data: 0.1279 max mem: 9377 +Eval (nsd-val): [79] Total time: 0:00:16 (0.2646 s / it) +Averaged stats (nsd-val): loss: 0.8333 (0.8331) +Making plots (nsd-val): example=11 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 11:49:23 lr: 0.000013 grad: 0.3917 (0.3917) loss: 0.7797 (0.7797) time: 6.8101 data: 6.5885 max mem: 9377 +Train: [80] [ 100/6250] eta: 0:25:42 lr: 0.000013 grad: 0.2045 (0.2790) loss: 0.7628 (0.7714) time: 0.1856 data: 0.0645 max mem: 9377 +Train: [80] [ 200/6250] eta: 0:21:51 lr: 0.000013 grad: 0.2256 (0.2618) loss: 0.7439 (0.7638) time: 0.1849 data: 0.0745 max mem: 9377 +Train: [80] [ 300/6250] eta: 0:20:00 lr: 0.000013 grad: 0.1969 (0.2459) loss: 0.7684 (0.7639) time: 0.1526 data: 0.0512 max mem: 9377 +Train: [80] [ 400/6250] eta: 0:18:46 lr: 0.000013 grad: 0.2081 (0.2382) loss: 0.7440 (0.7621) time: 0.1763 data: 0.0829 max mem: 9377 +Train: [80] [ 500/6250] eta: 0:17:51 lr: 0.000013 grad: 0.2102 (0.2339) loss: 0.7530 (0.7593) time: 0.1504 data: 0.0544 max mem: 9377 +Train: [80] [ 600/6250] eta: 0:17:05 lr: 0.000013 grad: 0.2130 (0.2306) loss: 0.7433 (0.7572) time: 0.1609 data: 0.0743 max mem: 9377 +Train: [80] [ 700/6250] eta: 0:16:54 lr: 0.000013 grad: 0.2103 (0.2272) loss: 0.7395 (0.7557) time: 0.2563 data: 0.1773 max mem: 9377 +Train: [80] [ 800/6250] eta: 0:16:43 lr: 0.000013 grad: 0.2014 (0.2237) loss: 0.7572 (0.7558) time: 0.1271 data: 0.0003 max mem: 9377 +Train: [80] [ 900/6250] eta: 0:16:19 lr: 0.000013 grad: 0.1952 (0.2209) loss: 0.7574 (0.7557) time: 0.1571 data: 0.0634 max mem: 9377 +Train: [80] [1000/6250] eta: 0:15:53 lr: 0.000013 grad: 0.1930 (0.2188) loss: 0.7584 (0.7555) time: 0.1888 data: 0.1060 max mem: 9377 +Train: [80] [1100/6250] eta: 0:15:28 lr: 0.000013 grad: 0.1891 (0.2166) loss: 0.7509 (0.7553) time: 0.1716 data: 0.0743 max mem: 9377 +Train: [80] [1200/6250] eta: 0:15:02 lr: 0.000013 grad: 0.1935 (0.2149) loss: 0.7574 (0.7551) time: 0.1641 data: 0.0723 max mem: 9377 +Train: [80] [1300/6250] eta: 0:14:36 lr: 0.000013 grad: 0.1878 (0.2134) loss: 0.7615 (0.7548) time: 0.1760 data: 0.0884 max mem: 9377 +Train: [80] [1400/6250] eta: 0:14:10 lr: 0.000013 grad: 0.1938 (0.2124) loss: 0.7583 (0.7546) time: 0.1557 data: 0.0642 max mem: 9377 +Train: [80] [1500/6250] eta: 0:13:56 lr: 0.000013 grad: 0.2002 (0.2117) loss: 0.7388 (0.7539) time: 0.1895 data: 0.1052 max mem: 9377 +Train: [80] [1600/6250] eta: 0:13:40 lr: 0.000013 grad: 0.2027 (0.2110) loss: 0.7474 (0.7535) time: 0.1892 data: 0.1074 max mem: 9377 +Train: [80] [1700/6250] eta: 0:13:25 lr: 0.000013 grad: 0.2006 (0.2103) loss: 0.7461 (0.7534) time: 0.1972 data: 0.1121 max mem: 9377 +Train: [80] [1800/6250] eta: 0:13:08 lr: 0.000013 grad: 0.2034 (0.2100) loss: 0.7408 (0.7528) time: 0.1726 data: 0.0862 max mem: 9377 +Train: [80] [1900/6250] eta: 0:12:48 lr: 0.000013 grad: 0.2106 (0.2096) loss: 0.7409 (0.7523) time: 0.1794 data: 0.0956 max mem: 9377 +Train: [80] [2000/6250] eta: 0:12:33 lr: 0.000013 grad: 0.2016 (0.2093) loss: 0.7499 (0.7520) time: 0.1910 data: 0.0914 max mem: 9377 +Train: [80] [2100/6250] eta: 0:12:17 lr: 0.000013 grad: 0.1964 (0.2089) loss: 0.7509 (0.7519) time: 0.1984 data: 0.1125 max mem: 9377 +Train: [80] [2200/6250] eta: 0:11:55 lr: 0.000013 grad: 0.2015 (0.2086) loss: 0.7448 (0.7517) time: 0.1426 data: 0.0429 max mem: 9377 +Train: [80] [2300/6250] eta: 0:11:35 lr: 0.000013 grad: 0.2056 (0.2084) loss: 0.7338 (0.7514) time: 0.1661 data: 0.0750 max mem: 9377 +Train: [80] [2400/6250] eta: 0:11:14 lr: 0.000013 grad: 0.1961 (0.2081) loss: 0.7390 (0.7512) time: 0.1471 data: 0.0454 max mem: 9377 +Train: [80] [2500/6250] eta: 0:10:54 lr: 0.000013 grad: 0.1917 (0.2077) loss: 0.7492 (0.7510) time: 0.1782 data: 0.0867 max mem: 9377 +Train: [80] [2600/6250] eta: 0:10:35 lr: 0.000013 grad: 0.1985 (0.2073) loss: 0.7479 (0.7509) time: 0.1593 data: 0.0604 max mem: 9377 +Train: [80] [2700/6250] eta: 0:10:15 lr: 0.000013 grad: 0.1914 (0.2069) loss: 0.7473 (0.7509) time: 0.1540 data: 0.0696 max mem: 9377 +Train: [80] [2800/6250] eta: 0:09:55 lr: 0.000013 grad: 0.2008 (0.2067) loss: 0.7399 (0.7507) time: 0.1507 data: 0.0511 max mem: 9377 +Train: [80] [2900/6250] eta: 0:09:36 lr: 0.000013 grad: 0.1985 (0.2064) loss: 0.7509 (0.7506) time: 0.1516 data: 0.0646 max mem: 9377 +Train: [80] [3000/6250] eta: 0:09:18 lr: 0.000013 grad: 0.1939 (0.2062) loss: 0.7437 (0.7504) time: 0.1928 data: 0.1088 max mem: 9377 +Train: [80] [3100/6250] eta: 0:09:00 lr: 0.000013 grad: 0.1991 (0.2059) loss: 0.7579 (0.7503) time: 0.1708 data: 0.0723 max mem: 9377 +Train: [80] [3200/6250] eta: 0:08:42 lr: 0.000013 grad: 0.1954 (0.2057) loss: 0.7457 (0.7503) time: 0.1631 data: 0.0753 max mem: 9377 +Train: [80] [3300/6250] eta: 0:08:23 lr: 0.000013 grad: 0.1973 (0.2056) loss: 0.7449 (0.7504) time: 0.1637 data: 0.0781 max mem: 9377 +Train: [80] [3400/6250] eta: 0:08:08 lr: 0.000012 grad: 0.1886 (0.2054) loss: 0.7594 (0.7504) time: 0.1958 data: 0.1115 max mem: 9377 +Train: [80] [3500/6250] eta: 0:07:51 lr: 0.000012 grad: 0.1919 (0.2054) loss: 0.7507 (0.7505) time: 0.1919 data: 0.1079 max mem: 9377 +Train: [80] [3600/6250] eta: 0:07:34 lr: 0.000012 grad: 0.1969 (0.2052) loss: 0.7504 (0.7505) time: 0.1505 data: 0.0647 max mem: 9377 +Train: [80] [3700/6250] eta: 0:07:17 lr: 0.000012 grad: 0.2026 (0.2050) loss: 0.7469 (0.7505) time: 0.1934 data: 0.1019 max mem: 9377 +Train: [80] [3800/6250] eta: 0:07:00 lr: 0.000012 grad: 0.1928 (0.2048) loss: 0.7516 (0.7506) time: 0.1568 data: 0.0731 max mem: 9377 +Train: [80] [3900/6250] eta: 0:06:43 lr: 0.000012 grad: 0.1920 (0.2045) loss: 0.7524 (0.7506) time: 0.1481 data: 0.0553 max mem: 9377 +Train: [80] [4000/6250] eta: 0:06:26 lr: 0.000012 grad: 0.1937 (0.2044) loss: 0.7504 (0.7506) time: 0.1989 data: 0.1088 max mem: 9377 +Train: [80] [4100/6250] eta: 0:06:08 lr: 0.000012 grad: 0.1953 (0.2043) loss: 0.7460 (0.7506) time: 0.1592 data: 0.0588 max mem: 9377 +Train: [80] [4200/6250] eta: 0:05:51 lr: 0.000012 grad: 0.1919 (0.2042) loss: 0.7533 (0.7507) time: 0.1776 data: 0.0885 max mem: 9377 +Train: [80] [4300/6250] eta: 0:05:33 lr: 0.000012 grad: 0.1934 (0.2041) loss: 0.7535 (0.7507) time: 0.1484 data: 0.0473 max mem: 9377 +Train: [80] [4400/6250] eta: 0:05:15 lr: 0.000012 grad: 0.1956 (0.2040) loss: 0.7578 (0.7507) time: 0.1476 data: 0.0592 max mem: 9377 +Train: [80] [4500/6250] eta: 0:04:58 lr: 0.000012 grad: 0.2027 (0.2040) loss: 0.7482 (0.7508) time: 0.1674 data: 0.0791 max mem: 9377 +Train: [80] [4600/6250] eta: 0:04:41 lr: 0.000012 grad: 0.1974 (0.2039) loss: 0.7513 (0.7508) time: 0.1543 data: 0.0693 max mem: 9377 +Train: [80] [4700/6250] eta: 0:04:23 lr: 0.000012 grad: 0.1963 (0.2039) loss: 0.7564 (0.7508) time: 0.1761 data: 0.0886 max mem: 9377 +Train: [80] [4800/6250] eta: 0:04:06 lr: 0.000012 grad: 0.1968 (0.2038) loss: 0.7459 (0.7509) time: 0.1590 data: 0.0622 max mem: 9377 +Train: [80] [4900/6250] eta: 0:03:49 lr: 0.000012 grad: 0.1914 (0.2036) loss: 0.7537 (0.7510) time: 0.1897 data: 0.0951 max mem: 9377 +Train: [80] [5000/6250] eta: 0:03:31 lr: 0.000012 grad: 0.2055 (0.2036) loss: 0.7488 (0.7511) time: 0.1565 data: 0.0635 max mem: 9377 +Train: [80] [5100/6250] eta: 0:03:14 lr: 0.000012 grad: 0.1992 (0.2036) loss: 0.7516 (0.7511) time: 0.1563 data: 0.0689 max mem: 9377 +Train: [80] [5200/6250] eta: 0:02:57 lr: 0.000012 grad: 0.2001 (0.2035) loss: 0.7578 (0.7512) time: 0.1956 data: 0.1077 max mem: 9377 +Train: [80] [5300/6250] eta: 0:02:40 lr: 0.000012 grad: 0.1997 (0.2036) loss: 0.7552 (0.7512) time: 0.1387 data: 0.0394 max mem: 9377 +Train: [80] [5400/6250] eta: 0:02:23 lr: 0.000012 grad: 0.1962 (0.2036) loss: 0.7470 (0.7511) time: 0.1564 data: 0.0645 max mem: 9377 +Train: [80] [5500/6250] eta: 0:02:06 lr: 0.000012 grad: 0.2113 (0.2037) loss: 0.7430 (0.7510) time: 0.1498 data: 0.0633 max mem: 9377 +Train: [80] [5600/6250] eta: 0:01:49 lr: 0.000012 grad: 0.1918 (0.2037) loss: 0.7598 (0.7510) time: 0.1399 data: 0.0473 max mem: 9377 +Train: [80] [5700/6250] eta: 0:01:32 lr: 0.000012 grad: 0.2041 (0.2036) loss: 0.7480 (0.7511) time: 0.1516 data: 0.0640 max mem: 9377 +Train: [80] [5800/6250] eta: 0:01:15 lr: 0.000012 grad: 0.2029 (0.2037) loss: 0.7475 (0.7510) time: 0.1518 data: 0.0675 max mem: 9377 +Train: [80] [5900/6250] eta: 0:00:58 lr: 0.000012 grad: 0.1997 (0.2037) loss: 0.7447 (0.7510) time: 0.1600 data: 0.0696 max mem: 9377 +Train: [80] [6000/6250] eta: 0:00:41 lr: 0.000012 grad: 0.2023 (0.2037) loss: 0.7489 (0.7510) time: 0.1503 data: 0.0695 max mem: 9377 +Train: [80] [6100/6250] eta: 0:00:25 lr: 0.000012 grad: 0.2087 (0.2036) loss: 0.7406 (0.7510) time: 0.1444 data: 0.0513 max mem: 9377 +Train: [80] [6200/6250] eta: 0:00:08 lr: 0.000012 grad: 0.2024 (0.2037) loss: 0.7406 (0.7509) time: 0.1701 data: 0.0732 max mem: 9377 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.1972 (0.2036) loss: 0.7405 (0.7509) time: 0.1576 data: 0.0694 max mem: 9377 +Train: [80] Total time: 0:17:28 (0.1677 s / it) +Averaged stats: lr: 0.000012 grad: 0.1972 (0.2036) loss: 0.7405 (0.7509) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:06:40 loss: 0.8523 (0.8523) time: 6.4520 data: 6.4169 max mem: 9377 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.8564 (0.8563) time: 0.1412 data: 0.1164 max mem: 9377 +Eval (hcp-train-subset): [80] Total time: 0:00:14 (0.2371 s / it) +Averaged stats (hcp-train-subset): loss: 0.8564 (0.8563) +Eval (hcp-val): [80] [ 0/62] eta: 0:06:15 loss: 0.8561 (0.8561) time: 6.0545 data: 6.0234 max mem: 9377 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.8525 (0.8547) time: 0.1476 data: 0.1206 max mem: 9377 +Eval (hcp-val): [80] Total time: 0:00:14 (0.2343 s / it) +Averaged stats (hcp-val): loss: 0.8525 (0.8547) +Eval (nsd-val): [80] [ 0/62] eta: 0:05:02 loss: 0.8170 (0.8170) time: 4.8838 data: 4.8477 max mem: 9377 +Eval (nsd-val): [80] [61/62] eta: 0:00:00 loss: 0.8304 (0.8315) time: 0.1315 data: 0.1045 max mem: 9377 +Eval (nsd-val): [80] Total time: 0:00:13 (0.2256 s / it) +Averaged stats (nsd-val): loss: 0.8304 (0.8315) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 11:10:11 lr: 0.000012 grad: 0.4409 (0.4409) loss: 0.7813 (0.7813) time: 6.4338 data: 6.3165 max mem: 9377 +Train: [81] [ 100/6250] eta: 0:23:09 lr: 0.000012 grad: 0.2529 (0.2676) loss: 0.7731 (0.7774) time: 0.1603 data: 0.0483 max mem: 9377 +Train: [81] [ 200/6250] eta: 0:19:52 lr: 0.000012 grad: 0.1890 (0.2455) loss: 0.7875 (0.7756) time: 0.1478 data: 0.0464 max mem: 9377 +Train: [81] [ 300/6250] eta: 0:18:29 lr: 0.000012 grad: 0.2169 (0.2372) loss: 0.7441 (0.7710) time: 0.1660 data: 0.0597 max mem: 9377 +Train: [81] [ 400/6250] eta: 0:17:27 lr: 0.000012 grad: 0.1915 (0.2308) loss: 0.7514 (0.7684) time: 0.1707 data: 0.0585 max mem: 9377 +Train: [81] [ 500/6250] eta: 0:16:47 lr: 0.000012 grad: 0.1910 (0.2267) loss: 0.7571 (0.7658) time: 0.1812 data: 0.0854 max mem: 9377 +Train: [81] [ 600/6250] eta: 0:16:11 lr: 0.000012 grad: 0.2011 (0.2241) loss: 0.7489 (0.7639) time: 0.1506 data: 0.0514 max mem: 9377 +Train: [81] [ 700/6250] eta: 0:15:43 lr: 0.000012 grad: 0.1949 (0.2211) loss: 0.7482 (0.7633) time: 0.1738 data: 0.0782 max mem: 9377 +Train: [81] [ 800/6250] eta: 0:15:13 lr: 0.000012 grad: 0.1911 (0.2186) loss: 0.7582 (0.7627) time: 0.1529 data: 0.0583 max mem: 9377 +Train: [81] [ 900/6250] eta: 0:14:49 lr: 0.000012 grad: 0.1922 (0.2166) loss: 0.7605 (0.7621) time: 0.1442 data: 0.0488 max mem: 9377 +Train: [81] [1000/6250] eta: 0:14:24 lr: 0.000012 grad: 0.1950 (0.2150) loss: 0.7694 (0.7617) time: 0.1467 data: 0.0542 max mem: 9377 +Train: [81] [1100/6250] eta: 0:14:03 lr: 0.000012 grad: 0.1907 (0.2131) loss: 0.7583 (0.7615) time: 0.1666 data: 0.0810 max mem: 9377 +Train: [81] [1200/6250] eta: 0:13:43 lr: 0.000012 grad: 0.1967 (0.2116) loss: 0.7598 (0.7612) time: 0.1431 data: 0.0496 max mem: 9377 +Train: [81] [1300/6250] eta: 0:13:25 lr: 0.000012 grad: 0.2046 (0.2106) loss: 0.7596 (0.7607) time: 0.1540 data: 0.0695 max mem: 9377 +Train: [81] [1400/6250] eta: 0:13:07 lr: 0.000012 grad: 0.2029 (0.2094) loss: 0.7461 (0.7603) time: 0.1579 data: 0.0749 max mem: 9377 +Train: [81] [1500/6250] eta: 0:12:50 lr: 0.000012 grad: 0.1972 (0.2085) loss: 0.7508 (0.7601) time: 0.1534 data: 0.0620 max mem: 9377 +Train: [81] [1600/6250] eta: 0:12:34 lr: 0.000012 grad: 0.1916 (0.2075) loss: 0.7561 (0.7599) time: 0.1956 data: 0.1185 max mem: 9377 +Train: [81] [1700/6250] eta: 0:12:20 lr: 0.000012 grad: 0.1942 (0.2071) loss: 0.7508 (0.7597) time: 0.1630 data: 0.0854 max mem: 9377 +Train: [81] [1800/6250] eta: 0:12:06 lr: 0.000012 grad: 0.1954 (0.2066) loss: 0.7445 (0.7592) time: 0.1298 data: 0.0373 max mem: 9377 +Train: [81] [1900/6250] eta: 0:11:50 lr: 0.000012 grad: 0.1901 (0.2060) loss: 0.7550 (0.7589) time: 0.1749 data: 0.0924 max mem: 9377 +Train: [81] [2000/6250] eta: 0:11:37 lr: 0.000012 grad: 0.1851 (0.2053) loss: 0.7633 (0.7588) time: 0.1739 data: 0.0849 max mem: 9377 +Train: [81] [2100/6250] eta: 0:11:23 lr: 0.000012 grad: 0.1970 (0.2049) loss: 0.7585 (0.7589) time: 0.1977 data: 0.1160 max mem: 9377 +Train: [81] [2200/6250] eta: 0:11:07 lr: 0.000012 grad: 0.2026 (0.2045) loss: 0.7538 (0.7589) time: 0.1950 data: 0.1037 max mem: 9377 +Train: [81] [2300/6250] eta: 0:10:50 lr: 0.000011 grad: 0.1933 (0.2040) loss: 0.7542 (0.7588) time: 0.1764 data: 0.0790 max mem: 9377 +Train: [81] [2400/6250] eta: 0:10:33 lr: 0.000011 grad: 0.1955 (0.2038) loss: 0.7510 (0.7586) time: 0.1584 data: 0.0607 max mem: 9377 +Train: [81] [2500/6250] eta: 0:10:16 lr: 0.000011 grad: 0.1897 (0.2037) loss: 0.7533 (0.7583) time: 0.1598 data: 0.0668 max mem: 9377 +Train: [81] [2600/6250] eta: 0:10:00 lr: 0.000011 grad: 0.2008 (0.2035) loss: 0.7537 (0.7581) time: 0.1307 data: 0.0418 max mem: 9377 +Train: [81] [2700/6250] eta: 0:09:41 lr: 0.000011 grad: 0.1955 (0.2033) loss: 0.7570 (0.7579) time: 0.1503 data: 0.0589 max mem: 9377 +Train: [81] [2800/6250] eta: 0:09:24 lr: 0.000011 grad: 0.1967 (0.2031) loss: 0.7528 (0.7577) time: 0.1731 data: 0.0783 max mem: 9377 +Train: [81] [2900/6250] eta: 0:09:06 lr: 0.000011 grad: 0.2031 (0.2029) loss: 0.7523 (0.7576) time: 0.1594 data: 0.0773 max mem: 9377 +Train: [81] [3000/6250] eta: 0:08:49 lr: 0.000011 grad: 0.1927 (0.2029) loss: 0.7526 (0.7573) time: 0.1551 data: 0.0684 max mem: 9377 +Train: [81] [3100/6250] eta: 0:08:32 lr: 0.000011 grad: 0.1978 (0.2026) loss: 0.7406 (0.7571) time: 0.1591 data: 0.0731 max mem: 9377 +Train: [81] [3200/6250] eta: 0:08:15 lr: 0.000011 grad: 0.1996 (0.2025) loss: 0.7513 (0.7569) time: 0.1691 data: 0.0817 max mem: 9377 +Train: [81] [3300/6250] eta: 0:07:59 lr: 0.000011 grad: 0.1987 (0.2024) loss: 0.7392 (0.7567) time: 0.1817 data: 0.0999 max mem: 9377 +Train: [81] [3400/6250] eta: 0:07:43 lr: 0.000011 grad: 0.2119 (0.2025) loss: 0.7479 (0.7563) time: 0.1608 data: 0.0704 max mem: 9377 +Train: [81] [3500/6250] eta: 0:07:26 lr: 0.000011 grad: 0.2039 (0.2026) loss: 0.7439 (0.7559) time: 0.1544 data: 0.0680 max mem: 9377 +Train: [81] [3600/6250] eta: 0:07:10 lr: 0.000011 grad: 0.1923 (0.2025) loss: 0.7443 (0.7557) time: 0.1718 data: 0.0824 max mem: 9377 +Train: [81] [3700/6250] eta: 0:06:53 lr: 0.000011 grad: 0.1949 (0.2024) loss: 0.7543 (0.7555) time: 0.1716 data: 0.0898 max mem: 9377 +Train: [81] [3800/6250] eta: 0:06:38 lr: 0.000011 grad: 0.1951 (0.2023) loss: 0.7494 (0.7554) time: 0.1790 data: 0.0985 max mem: 9377 +Train: [81] [3900/6250] eta: 0:06:21 lr: 0.000011 grad: 0.1908 (0.2022) loss: 0.7559 (0.7553) time: 0.1747 data: 0.0849 max mem: 9377 +Train: [81] [4000/6250] eta: 0:06:05 lr: 0.000011 grad: 0.1939 (0.2022) loss: 0.7590 (0.7552) time: 0.1312 data: 0.0348 max mem: 9377 +Train: [81] [4100/6250] eta: 0:05:48 lr: 0.000011 grad: 0.1866 (0.2021) loss: 0.7322 (0.7550) time: 0.1679 data: 0.0707 max mem: 9377 +Train: [81] [4200/6250] eta: 0:05:32 lr: 0.000011 grad: 0.1971 (0.2021) loss: 0.7459 (0.7549) time: 0.1617 data: 0.0618 max mem: 9377 +Train: [81] [4300/6250] eta: 0:05:15 lr: 0.000011 grad: 0.2020 (0.2020) loss: 0.7488 (0.7548) time: 0.1643 data: 0.0730 max mem: 9377 +Train: [81] [4400/6250] eta: 0:04:58 lr: 0.000011 grad: 0.2063 (0.2020) loss: 0.7469 (0.7546) time: 0.1460 data: 0.0592 max mem: 9377 +Train: [81] [4500/6250] eta: 0:04:41 lr: 0.000011 grad: 0.1971 (0.2021) loss: 0.7493 (0.7544) time: 0.1172 data: 0.0162 max mem: 9377 +Train: [81] [4600/6250] eta: 0:04:25 lr: 0.000011 grad: 0.1982 (0.2021) loss: 0.7488 (0.7542) time: 0.2115 data: 0.1316 max mem: 9377 +Train: [81] [4700/6250] eta: 0:04:09 lr: 0.000011 grad: 0.1983 (0.2022) loss: 0.7465 (0.7540) time: 0.1558 data: 0.0713 max mem: 9377 +Train: [81] [4800/6250] eta: 0:03:53 lr: 0.000011 grad: 0.2052 (0.2023) loss: 0.7432 (0.7538) time: 0.1710 data: 0.0875 max mem: 9377 +Train: [81] [4900/6250] eta: 0:03:37 lr: 0.000011 grad: 0.2029 (0.2024) loss: 0.7375 (0.7536) time: 0.1649 data: 0.0717 max mem: 9377 +Train: [81] [5000/6250] eta: 0:03:22 lr: 0.000011 grad: 0.2045 (0.2026) loss: 0.7480 (0.7534) time: 0.1945 data: 0.1052 max mem: 9377 +Train: [81] [5100/6250] eta: 0:03:06 lr: 0.000011 grad: 0.2088 (0.2026) loss: 0.7443 (0.7532) time: 0.1734 data: 0.0753 max mem: 9377 +Train: [81] [5200/6250] eta: 0:02:50 lr: 0.000011 grad: 0.2069 (0.2028) loss: 0.7479 (0.7530) time: 0.1723 data: 0.0757 max mem: 9377 +Train: [81] [5300/6250] eta: 0:02:34 lr: 0.000011 grad: 0.1990 (0.2029) loss: 0.7481 (0.7528) time: 0.1480 data: 0.0532 max mem: 9377 +Train: [81] [5400/6250] eta: 0:02:18 lr: 0.000011 grad: 0.1967 (0.2029) loss: 0.7482 (0.7526) time: 0.1528 data: 0.0704 max mem: 9377 +Train: [81] [5500/6250] eta: 0:02:01 lr: 0.000011 grad: 0.2128 (0.2031) loss: 0.7414 (0.7524) time: 0.1469 data: 0.0467 max mem: 9377 +Train: [81] [5600/6250] eta: 0:01:45 lr: 0.000011 grad: 0.2108 (0.2032) loss: 0.7385 (0.7522) time: 0.1792 data: 0.0913 max mem: 9377 +Train: [81] [5700/6250] eta: 0:01:29 lr: 0.000011 grad: 0.2035 (0.2033) loss: 0.7320 (0.7520) time: 0.1567 data: 0.0684 max mem: 9377 +Train: [81] [5800/6250] eta: 0:01:13 lr: 0.000011 grad: 0.2053 (0.2034) loss: 0.7454 (0.7519) time: 0.1877 data: 0.0969 max mem: 9377 +Train: [81] [5900/6250] eta: 0:00:57 lr: 0.000011 grad: 0.2011 (0.2036) loss: 0.7427 (0.7517) time: 0.2067 data: 0.1322 max mem: 9377 +Train: [81] [6000/6250] eta: 0:00:40 lr: 0.000011 grad: 0.2029 (0.2036) loss: 0.7352 (0.7515) time: 0.1215 data: 0.0345 max mem: 9377 +Train: [81] [6100/6250] eta: 0:00:24 lr: 0.000011 grad: 0.2035 (0.2036) loss: 0.7414 (0.7514) time: 0.1481 data: 0.0664 max mem: 9377 +Train: [81] [6200/6250] eta: 0:00:08 lr: 0.000011 grad: 0.2048 (0.2037) loss: 0.7494 (0.7513) time: 0.1334 data: 0.0442 max mem: 9377 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.2113 (0.2037) loss: 0.7375 (0.7512) time: 0.1886 data: 0.1124 max mem: 9377 +Train: [81] Total time: 0:17:08 (0.1646 s / it) +Averaged stats: lr: 0.000011 grad: 0.2113 (0.2037) loss: 0.7375 (0.7512) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:06:21 loss: 0.8502 (0.8502) time: 6.1538 data: 6.1229 max mem: 9377 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.8572 (0.8575) time: 0.1228 data: 0.0979 max mem: 9377 +Eval (hcp-train-subset): [81] Total time: 0:00:14 (0.2415 s / it) +Averaged stats (hcp-train-subset): loss: 0.8572 (0.8575) +Eval (hcp-val): [81] [ 0/62] eta: 0:05:24 loss: 0.8470 (0.8470) time: 5.2383 data: 5.2066 max mem: 9377 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.8540 (0.8553) time: 0.1356 data: 0.1100 max mem: 9377 +Eval (hcp-val): [81] Total time: 0:00:14 (0.2340 s / it) +Averaged stats (hcp-val): loss: 0.8540 (0.8553) +Eval (nsd-val): [81] [ 0/62] eta: 0:04:37 loss: 0.8223 (0.8223) time: 4.4684 data: 4.3995 max mem: 9377 +Eval (nsd-val): [81] [61/62] eta: 0:00:00 loss: 0.8297 (0.8312) time: 0.1420 data: 0.1169 max mem: 9377 +Eval (nsd-val): [81] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (nsd-val): loss: 0.8297 (0.8312) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [82] [ 0/6250] eta: 10:24:03 lr: 0.000011 grad: 0.2777 (0.2777) loss: 0.8222 (0.8222) time: 5.9910 data: 5.8398 max mem: 9377 +Train: [82] [ 100/6250] eta: 0:22:59 lr: 0.000011 grad: 0.2785 (0.2598) loss: 0.7591 (0.7681) time: 0.1553 data: 0.0447 max mem: 9377 +Train: [82] [ 200/6250] eta: 0:19:45 lr: 0.000011 grad: 0.2142 (0.2478) loss: 0.7502 (0.7627) time: 0.1454 data: 0.0493 max mem: 9377 +Train: [82] [ 300/6250] eta: 0:18:40 lr: 0.000011 grad: 0.2100 (0.2390) loss: 0.7490 (0.7605) time: 0.1862 data: 0.0735 max mem: 9377 +Train: [82] [ 400/6250] eta: 0:18:11 lr: 0.000011 grad: 0.2095 (0.2329) loss: 0.7537 (0.7589) time: 0.2004 data: 0.1037 max mem: 9377 +Train: [82] [ 500/6250] eta: 0:17:18 lr: 0.000011 grad: 0.2107 (0.2295) loss: 0.7590 (0.7578) time: 0.1679 data: 0.0690 max mem: 9377 +Train: [82] [ 600/6250] eta: 0:16:39 lr: 0.000011 grad: 0.2150 (0.2268) loss: 0.7568 (0.7573) time: 0.1439 data: 0.0493 max mem: 9377 +Train: [82] [ 700/6250] eta: 0:16:14 lr: 0.000011 grad: 0.2010 (0.2242) loss: 0.7485 (0.7565) time: 0.1718 data: 0.0881 max mem: 9377 +Train: [82] [ 800/6250] eta: 0:15:44 lr: 0.000011 grad: 0.2071 (0.2218) loss: 0.7597 (0.7562) time: 0.1531 data: 0.0647 max mem: 9377 +Train: [82] [ 900/6250] eta: 0:15:22 lr: 0.000011 grad: 0.2026 (0.2192) loss: 0.7457 (0.7557) time: 0.1621 data: 0.0690 max mem: 9377 +Train: [82] [1000/6250] eta: 0:14:56 lr: 0.000011 grad: 0.1986 (0.2186) loss: 0.7512 (0.7553) time: 0.1700 data: 0.0882 max mem: 9377 +Train: [82] [1100/6250] eta: 0:14:36 lr: 0.000011 grad: 0.1899 (0.2166) loss: 0.7575 (0.7553) time: 0.1545 data: 0.0736 max mem: 9377 +Train: [82] [1200/6250] eta: 0:14:16 lr: 0.000011 grad: 0.1978 (0.2154) loss: 0.7561 (0.7553) time: 0.1548 data: 0.0589 max mem: 9377 +Train: [82] [1300/6250] eta: 0:13:56 lr: 0.000011 grad: 0.1943 (0.2140) loss: 0.7559 (0.7553) time: 0.1591 data: 0.0678 max mem: 9377 +Train: [82] [1400/6250] eta: 0:13:39 lr: 0.000010 grad: 0.1926 (0.2133) loss: 0.7603 (0.7552) time: 0.1626 data: 0.0663 max mem: 9377 +Train: [82] [1500/6250] eta: 0:13:18 lr: 0.000010 grad: 0.1955 (0.2125) loss: 0.7497 (0.7551) time: 0.1673 data: 0.0759 max mem: 9377 +Train: [82] [1600/6250] eta: 0:13:01 lr: 0.000010 grad: 0.1883 (0.2117) loss: 0.7648 (0.7549) time: 0.1575 data: 0.0760 max mem: 9377 +Train: [82] [1700/6250] eta: 0:12:45 lr: 0.000010 grad: 0.2029 (0.2110) loss: 0.7532 (0.7549) time: 0.1850 data: 0.1072 max mem: 9377 +Train: [82] [1800/6250] eta: 0:12:28 lr: 0.000010 grad: 0.1984 (0.2104) loss: 0.7562 (0.7546) time: 0.1651 data: 0.0833 max mem: 9377 +Train: [82] [1900/6250] eta: 0:12:09 lr: 0.000010 grad: 0.1913 (0.2096) loss: 0.7609 (0.7548) time: 0.1679 data: 0.0885 max mem: 9377 +Train: [82] [2000/6250] eta: 0:11:51 lr: 0.000010 grad: 0.1871 (0.2090) loss: 0.7587 (0.7548) time: 0.1439 data: 0.0454 max mem: 9377 +Train: [82] [2100/6250] eta: 0:11:36 lr: 0.000010 grad: 0.1932 (0.2084) loss: 0.7567 (0.7549) time: 0.1913 data: 0.1010 max mem: 9377 +Train: [82] [2200/6250] eta: 0:11:18 lr: 0.000010 grad: 0.1965 (0.2078) loss: 0.7624 (0.7550) time: 0.1430 data: 0.0486 max mem: 9377 +Train: [82] [2300/6250] eta: 0:11:00 lr: 0.000010 grad: 0.1938 (0.2075) loss: 0.7570 (0.7551) time: 0.1800 data: 0.0858 max mem: 9377 +Train: [82] [2400/6250] eta: 0:10:42 lr: 0.000010 grad: 0.1964 (0.2070) loss: 0.7620 (0.7552) time: 0.1658 data: 0.0783 max mem: 9377 +Train: [82] [2500/6250] eta: 0:10:24 lr: 0.000010 grad: 0.1924 (0.2067) loss: 0.7597 (0.7552) time: 0.1600 data: 0.0613 max mem: 9377 +Train: [82] [2600/6250] eta: 0:10:06 lr: 0.000010 grad: 0.1967 (0.2064) loss: 0.7614 (0.7552) time: 0.1533 data: 0.0660 max mem: 9377 +Train: [82] [2700/6250] eta: 0:09:48 lr: 0.000010 grad: 0.2021 (0.2062) loss: 0.7549 (0.7552) time: 0.1615 data: 0.0695 max mem: 9377 +Train: [82] [2800/6250] eta: 0:09:30 lr: 0.000010 grad: 0.1966 (0.2062) loss: 0.7513 (0.7551) time: 0.1992 data: 0.1144 max mem: 9377 +Train: [82] [2900/6250] eta: 0:09:12 lr: 0.000010 grad: 0.1928 (0.2061) loss: 0.7558 (0.7549) time: 0.1479 data: 0.0554 max mem: 9377 +Train: [82] [3000/6250] eta: 0:08:54 lr: 0.000010 grad: 0.1982 (0.2061) loss: 0.7503 (0.7547) time: 0.1495 data: 0.0650 max mem: 9377 +Train: [82] [3100/6250] eta: 0:08:37 lr: 0.000010 grad: 0.1986 (0.2058) loss: 0.7494 (0.7546) time: 0.1652 data: 0.0761 max mem: 9377 +Train: [82] [3200/6250] eta: 0:08:20 lr: 0.000010 grad: 0.1875 (0.2055) loss: 0.7569 (0.7545) time: 0.1532 data: 0.0555 max mem: 9377 +Train: [82] [3300/6250] eta: 0:08:04 lr: 0.000010 grad: 0.1897 (0.2053) loss: 0.7539 (0.7544) time: 0.1589 data: 0.0716 max mem: 9377 +Train: [82] [3400/6250] eta: 0:07:48 lr: 0.000010 grad: 0.1943 (0.2051) loss: 0.7527 (0.7543) time: 0.1603 data: 0.0678 max mem: 9377 +Train: [82] [3500/6250] eta: 0:07:31 lr: 0.000010 grad: 0.1914 (0.2050) loss: 0.7485 (0.7542) time: 0.1580 data: 0.0687 max mem: 9377 +Train: [82] [3600/6250] eta: 0:07:14 lr: 0.000010 grad: 0.1984 (0.2050) loss: 0.7511 (0.7540) time: 0.1868 data: 0.1012 max mem: 9377 +Train: [82] [3700/6250] eta: 0:06:58 lr: 0.000010 grad: 0.1992 (0.2049) loss: 0.7461 (0.7538) time: 0.1325 data: 0.0356 max mem: 9377 +Train: [82] [3800/6250] eta: 0:06:41 lr: 0.000010 grad: 0.1927 (0.2048) loss: 0.7467 (0.7537) time: 0.1783 data: 0.0877 max mem: 9377 +Train: [82] [3900/6250] eta: 0:06:24 lr: 0.000010 grad: 0.1962 (0.2047) loss: 0.7516 (0.7536) time: 0.1681 data: 0.0770 max mem: 9377 +Train: [82] [4000/6250] eta: 0:06:07 lr: 0.000010 grad: 0.2000 (0.2046) loss: 0.7436 (0.7536) time: 0.1369 data: 0.0465 max mem: 9377 +Train: [82] [4100/6250] eta: 0:05:50 lr: 0.000010 grad: 0.2044 (0.2045) loss: 0.7468 (0.7536) time: 0.1612 data: 0.0715 max mem: 9377 +Train: [82] [4200/6250] eta: 0:05:33 lr: 0.000010 grad: 0.2009 (0.2045) loss: 0.7567 (0.7534) time: 0.1412 data: 0.0397 max mem: 9377 +Train: [82] [4300/6250] eta: 0:05:16 lr: 0.000010 grad: 0.1986 (0.2044) loss: 0.7582 (0.7534) time: 0.1544 data: 0.0595 max mem: 9377 +Train: [82] [4400/6250] eta: 0:04:59 lr: 0.000010 grad: 0.1947 (0.2044) loss: 0.7531 (0.7533) time: 0.1414 data: 0.0506 max mem: 9377 +Train: [82] [4500/6250] eta: 0:04:43 lr: 0.000010 grad: 0.1997 (0.2043) loss: 0.7473 (0.7533) time: 0.1591 data: 0.0672 max mem: 9377 +Train: [82] [4600/6250] eta: 0:04:27 lr: 0.000010 grad: 0.1950 (0.2042) loss: 0.7598 (0.7533) time: 0.1509 data: 0.0642 max mem: 9377 +Train: [82] [4700/6250] eta: 0:04:11 lr: 0.000010 grad: 0.2009 (0.2044) loss: 0.7451 (0.7531) time: 0.1460 data: 0.0599 max mem: 9377 +Train: [82] [4800/6250] eta: 0:03:54 lr: 0.000010 grad: 0.1988 (0.2044) loss: 0.7493 (0.7530) time: 0.1510 data: 0.0593 max mem: 9377 +Train: [82] [4900/6250] eta: 0:03:38 lr: 0.000010 grad: 0.2084 (0.2044) loss: 0.7438 (0.7528) time: 0.1579 data: 0.0668 max mem: 9377 +Train: [82] [5000/6250] eta: 0:03:22 lr: 0.000010 grad: 0.1995 (0.2044) loss: 0.7539 (0.7527) time: 0.1562 data: 0.0698 max mem: 9377 +Train: [82] [5100/6250] eta: 0:03:06 lr: 0.000010 grad: 0.1980 (0.2044) loss: 0.7412 (0.7526) time: 0.1623 data: 0.0680 max mem: 9377 +Train: [82] [5200/6250] eta: 0:02:49 lr: 0.000010 grad: 0.2078 (0.2044) loss: 0.7423 (0.7525) time: 0.1406 data: 0.0374 max mem: 9377 +Train: [82] [5300/6250] eta: 0:02:33 lr: 0.000010 grad: 0.2089 (0.2045) loss: 0.7492 (0.7524) time: 0.1226 data: 0.0220 max mem: 9377 +Train: [82] [5400/6250] eta: 0:02:17 lr: 0.000010 grad: 0.2036 (0.2045) loss: 0.7510 (0.7523) time: 0.1544 data: 0.0637 max mem: 9377 +Train: [82] [5500/6250] eta: 0:02:00 lr: 0.000010 grad: 0.1982 (0.2046) loss: 0.7383 (0.7522) time: 0.1668 data: 0.0811 max mem: 9377 +Train: [82] [5600/6250] eta: 0:01:44 lr: 0.000010 grad: 0.2028 (0.2046) loss: 0.7414 (0.7521) time: 0.1589 data: 0.0706 max mem: 9377 +Train: [82] [5700/6250] eta: 0:01:28 lr: 0.000010 grad: 0.2085 (0.2047) loss: 0.7416 (0.7520) time: 0.1667 data: 0.0812 max mem: 9377 +Train: [82] [5800/6250] eta: 0:01:12 lr: 0.000010 grad: 0.2057 (0.2048) loss: 0.7453 (0.7519) time: 0.1417 data: 0.0419 max mem: 9377 +Train: [82] [5900/6250] eta: 0:00:56 lr: 0.000010 grad: 0.2037 (0.2048) loss: 0.7403 (0.7518) time: 0.1500 data: 0.0539 max mem: 9377 +Train: [82] [6000/6250] eta: 0:00:40 lr: 0.000010 grad: 0.2024 (0.2048) loss: 0.7453 (0.7517) time: 0.1637 data: 0.0762 max mem: 9377 +Train: [82] [6100/6250] eta: 0:00:24 lr: 0.000010 grad: 0.2043 (0.2049) loss: 0.7506 (0.7516) time: 0.1751 data: 0.0941 max mem: 9377 +Train: [82] [6200/6250] eta: 0:00:08 lr: 0.000010 grad: 0.2064 (0.2049) loss: 0.7353 (0.7515) time: 0.1454 data: 0.0604 max mem: 9377 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.2004 (0.2050) loss: 0.7428 (0.7514) time: 0.1533 data: 0.0622 max mem: 9377 +Train: [82] Total time: 0:16:53 (0.1621 s / it) +Averaged stats: lr: 0.000010 grad: 0.2004 (0.2050) loss: 0.7428 (0.7514) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:05:57 loss: 0.8503 (0.8503) time: 5.7641 data: 5.7334 max mem: 9377 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.8560 (0.8579) time: 0.1560 data: 0.1305 max mem: 9377 +Eval (hcp-train-subset): [82] Total time: 0:00:15 (0.2553 s / it) +Averaged stats (hcp-train-subset): loss: 0.8560 (0.8579) +Eval (hcp-val): [82] [ 0/62] eta: 0:05:49 loss: 0.8512 (0.8512) time: 5.6368 data: 5.6063 max mem: 9377 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.8553 (0.8565) time: 0.1269 data: 0.1000 max mem: 9377 +Eval (hcp-val): [82] Total time: 0:00:15 (0.2527 s / it) +Averaged stats (hcp-val): loss: 0.8553 (0.8565) +Eval (nsd-val): [82] [ 0/62] eta: 0:05:45 loss: 0.8206 (0.8206) time: 5.5663 data: 5.5339 max mem: 9377 +Eval (nsd-val): [82] [61/62] eta: 0:00:00 loss: 0.8312 (0.8322) time: 0.1560 data: 0.1303 max mem: 9377 +Eval (nsd-val): [82] Total time: 0:00:15 (0.2527 s / it) +Averaged stats (nsd-val): loss: 0.8312 (0.8322) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 10:07:02 lr: 0.000010 grad: 0.2924 (0.2924) loss: 0.7712 (0.7712) time: 5.8277 data: 5.4266 max mem: 9377 +Train: [83] [ 100/6250] eta: 0:24:33 lr: 0.000010 grad: 0.2188 (0.2772) loss: 0.7559 (0.7596) time: 0.2020 data: 0.0977 max mem: 9377 +Train: [83] [ 200/6250] eta: 0:21:21 lr: 0.000010 grad: 0.1942 (0.2548) loss: 0.7688 (0.7546) time: 0.2014 data: 0.0996 max mem: 9377 +Train: [83] [ 300/6250] eta: 0:19:57 lr: 0.000010 grad: 0.2133 (0.2443) loss: 0.7542 (0.7520) time: 0.1645 data: 0.0697 max mem: 9377 +Train: [83] [ 400/6250] eta: 0:19:02 lr: 0.000010 grad: 0.2230 (0.2386) loss: 0.7272 (0.7491) time: 0.1601 data: 0.0569 max mem: 9377 +Train: [83] [ 500/6250] eta: 0:18:21 lr: 0.000010 grad: 0.2103 (0.2334) loss: 0.7492 (0.7487) time: 0.1628 data: 0.0621 max mem: 9377 +Train: [83] [ 600/6250] eta: 0:17:43 lr: 0.000010 grad: 0.2014 (0.2321) loss: 0.7492 (0.7482) time: 0.1669 data: 0.0655 max mem: 9377 +Train: [83] [ 700/6250] eta: 0:17:03 lr: 0.000009 grad: 0.2290 (0.2312) loss: 0.7350 (0.7468) time: 0.1782 data: 0.0763 max mem: 9377 +Train: [83] [ 800/6250] eta: 0:16:26 lr: 0.000009 grad: 0.2127 (0.2294) loss: 0.7281 (0.7460) time: 0.1558 data: 0.0641 max mem: 9377 +Train: [83] [ 900/6250] eta: 0:15:57 lr: 0.000009 grad: 0.1976 (0.2276) loss: 0.7577 (0.7459) time: 0.1530 data: 0.0628 max mem: 9377 +Train: [83] [1000/6250] eta: 0:15:30 lr: 0.000009 grad: 0.2188 (0.2259) loss: 0.7445 (0.7459) time: 0.1564 data: 0.0609 max mem: 9377 +Train: [83] [1100/6250] eta: 0:15:03 lr: 0.000009 grad: 0.2131 (0.2243) loss: 0.7473 (0.7459) time: 0.1296 data: 0.0311 max mem: 9377 +Train: [83] [1200/6250] eta: 0:14:40 lr: 0.000009 grad: 0.2053 (0.2227) loss: 0.7372 (0.7459) time: 0.1760 data: 0.0919 max mem: 9377 +Train: [83] [1300/6250] eta: 0:14:13 lr: 0.000009 grad: 0.2038 (0.2214) loss: 0.7428 (0.7459) time: 0.1536 data: 0.0616 max mem: 9377 +Train: [83] [1400/6250] eta: 0:13:51 lr: 0.000009 grad: 0.1958 (0.2199) loss: 0.7529 (0.7463) time: 0.1567 data: 0.0547 max mem: 9377 +Train: [83] [1500/6250] eta: 0:13:26 lr: 0.000009 grad: 0.2023 (0.2190) loss: 0.7424 (0.7462) time: 0.1426 data: 0.0549 max mem: 9377 +Train: [83] [1600/6250] eta: 0:13:05 lr: 0.000009 grad: 0.2033 (0.2183) loss: 0.7431 (0.7461) time: 0.1383 data: 0.0344 max mem: 9377 +Train: [83] [1700/6250] eta: 0:12:49 lr: 0.000009 grad: 0.2023 (0.2179) loss: 0.7436 (0.7460) time: 0.1586 data: 0.0739 max mem: 9377 +Train: [83] [1800/6250] eta: 0:12:33 lr: 0.000009 grad: 0.1941 (0.2176) loss: 0.7506 (0.7457) time: 0.1808 data: 0.1032 max mem: 9377 +Train: [83] [1900/6250] eta: 0:12:15 lr: 0.000009 grad: 0.2035 (0.2170) loss: 0.7431 (0.7456) time: 0.1610 data: 0.0769 max mem: 9377 +Train: [83] [2000/6250] eta: 0:11:58 lr: 0.000009 grad: 0.1999 (0.2164) loss: 0.7378 (0.7454) time: 0.1739 data: 0.0925 max mem: 9377 +Train: [83] [2100/6250] eta: 0:11:41 lr: 0.000009 grad: 0.2077 (0.2159) loss: 0.7473 (0.7455) time: 0.1703 data: 0.0836 max mem: 9377 +Train: [83] [2200/6250] eta: 0:11:26 lr: 0.000009 grad: 0.1921 (0.2153) loss: 0.7483 (0.7457) time: 0.1843 data: 0.0982 max mem: 9377 +Train: [83] [2300/6250] eta: 0:11:07 lr: 0.000009 grad: 0.2099 (0.2149) loss: 0.7395 (0.7457) time: 0.1625 data: 0.0670 max mem: 9377 +Train: [83] [2400/6250] eta: 0:10:48 lr: 0.000009 grad: 0.2002 (0.2143) loss: 0.7368 (0.7458) time: 0.1570 data: 0.0623 max mem: 9377 +Train: [83] [2500/6250] eta: 0:10:29 lr: 0.000009 grad: 0.2022 (0.2139) loss: 0.7455 (0.7457) time: 0.1364 data: 0.0376 max mem: 9377 +Train: [83] [2600/6250] eta: 0:10:10 lr: 0.000009 grad: 0.1962 (0.2135) loss: 0.7453 (0.7458) time: 0.1531 data: 0.0665 max mem: 9377 +Train: [83] [2700/6250] eta: 0:09:51 lr: 0.000009 grad: 0.2066 (0.2131) loss: 0.7320 (0.7457) time: 0.1605 data: 0.0795 max mem: 9377 +Train: [83] [2800/6250] eta: 0:09:34 lr: 0.000009 grad: 0.1986 (0.2127) loss: 0.7535 (0.7458) time: 0.2284 data: 0.1333 max mem: 9377 +Train: [83] [2900/6250] eta: 0:09:15 lr: 0.000009 grad: 0.2000 (0.2125) loss: 0.7451 (0.7458) time: 0.1538 data: 0.0638 max mem: 9377 +Train: [83] [3000/6250] eta: 0:08:57 lr: 0.000009 grad: 0.1991 (0.2121) loss: 0.7480 (0.7459) time: 0.1495 data: 0.0578 max mem: 9377 +Train: [83] [3100/6250] eta: 0:08:39 lr: 0.000009 grad: 0.2020 (0.2118) loss: 0.7539 (0.7460) time: 0.1539 data: 0.0578 max mem: 9377 +Train: [83] [3200/6250] eta: 0:08:21 lr: 0.000009 grad: 0.1976 (0.2116) loss: 0.7465 (0.7460) time: 0.1962 data: 0.0963 max mem: 9377 +Train: [83] [3300/6250] eta: 0:08:05 lr: 0.000009 grad: 0.2016 (0.2112) loss: 0.7452 (0.7460) time: 0.1946 data: 0.1111 max mem: 9377 +Train: [83] [3400/6250] eta: 0:07:48 lr: 0.000009 grad: 0.2045 (0.2110) loss: 0.7503 (0.7460) time: 0.1798 data: 0.0918 max mem: 9377 +Train: [83] [3500/6250] eta: 0:07:33 lr: 0.000009 grad: 0.1884 (0.2107) loss: 0.7545 (0.7462) time: 0.1740 data: 0.0879 max mem: 9377 +Train: [83] [3600/6250] eta: 0:07:17 lr: 0.000009 grad: 0.1966 (0.2106) loss: 0.7397 (0.7462) time: 0.1726 data: 0.0863 max mem: 9377 +Train: [83] [3700/6250] eta: 0:07:02 lr: 0.000009 grad: 0.1985 (0.2103) loss: 0.7479 (0.7463) time: 0.1921 data: 0.0997 max mem: 9377 +Train: [83] [3800/6250] eta: 0:06:47 lr: 0.000009 grad: 0.2052 (0.2101) loss: 0.7443 (0.7464) time: 0.1727 data: 0.0753 max mem: 9377 +Train: [83] [3900/6250] eta: 0:06:31 lr: 0.000009 grad: 0.2007 (0.2100) loss: 0.7464 (0.7464) time: 0.1772 data: 0.0964 max mem: 9377 +Train: [83] [4000/6250] eta: 0:06:15 lr: 0.000009 grad: 0.2024 (0.2099) loss: 0.7473 (0.7465) time: 0.1494 data: 0.0541 max mem: 9377 +Train: [83] [4100/6250] eta: 0:05:57 lr: 0.000009 grad: 0.1952 (0.2096) loss: 0.7547 (0.7467) time: 0.1591 data: 0.0652 max mem: 9377 +Train: [83] [4200/6250] eta: 0:05:40 lr: 0.000009 grad: 0.2076 (0.2093) loss: 0.7425 (0.7469) time: 0.1406 data: 0.0510 max mem: 9377 +Train: [83] [4300/6250] eta: 0:05:23 lr: 0.000009 grad: 0.1981 (0.2091) loss: 0.7534 (0.7470) time: 0.1741 data: 0.0820 max mem: 9377 +Train: [83] [4400/6250] eta: 0:05:07 lr: 0.000009 grad: 0.2024 (0.2089) loss: 0.7585 (0.7472) time: 0.1815 data: 0.0990 max mem: 9377 +Train: [83] [4500/6250] eta: 0:04:50 lr: 0.000009 grad: 0.1981 (0.2088) loss: 0.7474 (0.7474) time: 0.1940 data: 0.1149 max mem: 9377 +Train: [83] [4600/6250] eta: 0:04:34 lr: 0.000009 grad: 0.1900 (0.2085) loss: 0.7603 (0.7476) time: 0.1570 data: 0.0759 max mem: 9377 +Train: [83] [4700/6250] eta: 0:04:17 lr: 0.000009 grad: 0.1975 (0.2083) loss: 0.7503 (0.7478) time: 0.1629 data: 0.0832 max mem: 9377 +Train: [83] [4800/6250] eta: 0:04:00 lr: 0.000009 grad: 0.2049 (0.2082) loss: 0.7454 (0.7478) time: 0.1562 data: 0.0715 max mem: 9377 +Train: [83] [4900/6250] eta: 0:03:44 lr: 0.000009 grad: 0.1940 (0.2080) loss: 0.7450 (0.7478) time: 0.1726 data: 0.0787 max mem: 9377 +Train: [83] [5000/6250] eta: 0:03:27 lr: 0.000009 grad: 0.1974 (0.2079) loss: 0.7555 (0.7479) time: 0.1746 data: 0.0903 max mem: 9377 +Train: [83] [5100/6250] eta: 0:03:10 lr: 0.000009 grad: 0.1996 (0.2079) loss: 0.7506 (0.7480) time: 0.1706 data: 0.0854 max mem: 9377 +Train: [83] [5200/6250] eta: 0:02:54 lr: 0.000009 grad: 0.1975 (0.2078) loss: 0.7548 (0.7480) time: 0.1481 data: 0.0566 max mem: 9377 +Train: [83] [5300/6250] eta: 0:02:37 lr: 0.000009 grad: 0.2036 (0.2078) loss: 0.7461 (0.7479) time: 0.1580 data: 0.0711 max mem: 9377 +Train: [83] [5400/6250] eta: 0:02:20 lr: 0.000009 grad: 0.1978 (0.2077) loss: 0.7492 (0.7479) time: 0.1527 data: 0.0717 max mem: 9377 +Train: [83] [5500/6250] eta: 0:02:03 lr: 0.000009 grad: 0.2041 (0.2077) loss: 0.7551 (0.7479) time: 0.1487 data: 0.0561 max mem: 9377 +Train: [83] [5600/6250] eta: 0:01:47 lr: 0.000009 grad: 0.2007 (0.2076) loss: 0.7532 (0.7480) time: 0.1265 data: 0.0318 max mem: 9377 +Train: [83] [5700/6250] eta: 0:01:30 lr: 0.000009 grad: 0.2054 (0.2075) loss: 0.7519 (0.7480) time: 0.1668 data: 0.0765 max mem: 9377 +Train: [83] [5800/6250] eta: 0:01:13 lr: 0.000009 grad: 0.2021 (0.2074) loss: 0.7396 (0.7480) time: 0.1681 data: 0.0896 max mem: 9377 +Train: [83] [5900/6250] eta: 0:00:57 lr: 0.000009 grad: 0.1983 (0.2074) loss: 0.7519 (0.7481) time: 0.1689 data: 0.0759 max mem: 9377 +Train: [83] [6000/6250] eta: 0:00:40 lr: 0.000009 grad: 0.1952 (0.2072) loss: 0.7541 (0.7482) time: 0.1594 data: 0.0761 max mem: 9377 +Train: [83] [6100/6250] eta: 0:00:24 lr: 0.000009 grad: 0.1932 (0.2070) loss: 0.7561 (0.7484) time: 0.1521 data: 0.0694 max mem: 9377 +Train: [83] [6200/6250] eta: 0:00:08 lr: 0.000009 grad: 0.1919 (0.2068) loss: 0.7516 (0.7485) time: 0.1673 data: 0.0788 max mem: 9377 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.1954 (0.2067) loss: 0.7649 (0.7486) time: 0.1540 data: 0.0670 max mem: 9377 +Train: [83] Total time: 0:17:08 (0.1646 s / it) +Averaged stats: lr: 0.000009 grad: 0.1954 (0.2067) loss: 0.7649 (0.7486) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:06:03 loss: 0.8494 (0.8494) time: 5.8660 data: 5.8362 max mem: 9377 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.8573 (0.8575) time: 0.1418 data: 0.1141 max mem: 9377 +Eval (hcp-train-subset): [83] Total time: 0:00:14 (0.2417 s / it) +Averaged stats (hcp-train-subset): loss: 0.8573 (0.8575) +Eval (hcp-val): [83] [ 0/62] eta: 0:04:46 loss: 0.8515 (0.8515) time: 4.6152 data: 4.5480 max mem: 9377 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.8541 (0.8564) time: 0.1275 data: 0.1027 max mem: 9377 +Eval (hcp-val): [83] Total time: 0:00:14 (0.2289 s / it) +Averaged stats (hcp-val): loss: 0.8541 (0.8564) +Eval (nsd-val): [83] [ 0/62] eta: 0:05:14 loss: 0.8245 (0.8245) time: 5.0715 data: 5.0415 max mem: 9377 +Eval (nsd-val): [83] [61/62] eta: 0:00:00 loss: 0.8317 (0.8328) time: 0.1321 data: 0.1071 max mem: 9377 +Eval (nsd-val): [83] Total time: 0:00:14 (0.2365 s / it) +Averaged stats (nsd-val): loss: 0.8317 (0.8328) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [84] [ 0/6250] eta: 9:52:23 lr: 0.000009 grad: 0.1415 (0.1415) loss: 0.8538 (0.8538) time: 5.6869 data: 5.4388 max mem: 9377 +Train: [84] [ 100/6250] eta: 0:22:30 lr: 0.000009 grad: 0.2262 (0.2583) loss: 0.7607 (0.7652) time: 0.1686 data: 0.0567 max mem: 9377 +Train: [84] [ 200/6250] eta: 0:20:13 lr: 0.000009 grad: 0.2173 (0.2427) loss: 0.7453 (0.7605) time: 0.1752 data: 0.0729 max mem: 9377 +Train: [84] [ 300/6250] eta: 0:18:54 lr: 0.000008 grad: 0.2157 (0.2360) loss: 0.7548 (0.7572) time: 0.1614 data: 0.0507 max mem: 9377 +Train: [84] [ 400/6250] eta: 0:18:06 lr: 0.000008 grad: 0.2081 (0.2330) loss: 0.7564 (0.7560) time: 0.1637 data: 0.0620 max mem: 9377 +Train: [84] [ 500/6250] eta: 0:17:23 lr: 0.000008 grad: 0.2138 (0.2300) loss: 0.7501 (0.7547) time: 0.1531 data: 0.0356 max mem: 9377 +Train: [84] [ 600/6250] eta: 0:16:50 lr: 0.000008 grad: 0.2048 (0.2276) loss: 0.7495 (0.7533) time: 0.1808 data: 0.0827 max mem: 9377 +Train: [84] [ 700/6250] eta: 0:16:17 lr: 0.000008 grad: 0.2144 (0.2248) loss: 0.7455 (0.7527) time: 0.1126 data: 0.0149 max mem: 9377 +Train: [84] [ 800/6250] eta: 0:15:52 lr: 0.000008 grad: 0.1986 (0.2229) loss: 0.7499 (0.7520) time: 0.1630 data: 0.0731 max mem: 9377 +Train: [84] [ 900/6250] eta: 0:15:25 lr: 0.000008 grad: 0.1949 (0.2207) loss: 0.7591 (0.7525) time: 0.1714 data: 0.0752 max mem: 9377 +Train: [84] [1000/6250] eta: 0:14:59 lr: 0.000008 grad: 0.1874 (0.2188) loss: 0.7662 (0.7531) time: 0.1603 data: 0.0681 max mem: 9377 +Train: [84] [1100/6250] eta: 0:14:36 lr: 0.000008 grad: 0.1983 (0.2172) loss: 0.7637 (0.7533) time: 0.1699 data: 0.0844 max mem: 9377 +Train: [84] [1200/6250] eta: 0:14:13 lr: 0.000008 grad: 0.1970 (0.2156) loss: 0.7521 (0.7538) time: 0.1683 data: 0.0817 max mem: 9377 +Train: [84] [1300/6250] eta: 0:13:51 lr: 0.000008 grad: 0.1945 (0.2145) loss: 0.7609 (0.7541) time: 0.1681 data: 0.0756 max mem: 9377 +Train: [84] [1400/6250] eta: 0:13:30 lr: 0.000008 grad: 0.2061 (0.2134) loss: 0.7553 (0.7545) time: 0.1605 data: 0.0698 max mem: 9377 +Train: [84] [1500/6250] eta: 0:13:13 lr: 0.000008 grad: 0.1955 (0.2125) loss: 0.7680 (0.7550) time: 0.1947 data: 0.1065 max mem: 9377 +Train: [84] [1600/6250] eta: 0:12:55 lr: 0.000008 grad: 0.2016 (0.2117) loss: 0.7524 (0.7551) time: 0.1745 data: 0.0882 max mem: 9377 +Train: [84] [1700/6250] eta: 0:12:40 lr: 0.000008 grad: 0.2040 (0.2111) loss: 0.7500 (0.7552) time: 0.1679 data: 0.0768 max mem: 9377 +Train: [84] [1800/6250] eta: 0:12:21 lr: 0.000008 grad: 0.2104 (0.2109) loss: 0.7436 (0.7550) time: 0.1483 data: 0.0606 max mem: 9377 +Train: [84] [1900/6250] eta: 0:12:03 lr: 0.000008 grad: 0.2027 (0.2107) loss: 0.7368 (0.7549) time: 0.1817 data: 0.0979 max mem: 9377 +Train: [84] [2000/6250] eta: 0:11:44 lr: 0.000008 grad: 0.1999 (0.2105) loss: 0.7514 (0.7547) time: 0.1755 data: 0.0848 max mem: 9377 +Train: [84] [2100/6250] eta: 0:11:29 lr: 0.000008 grad: 0.1973 (0.2101) loss: 0.7527 (0.7547) time: 0.1405 data: 0.0435 max mem: 9377 +Train: [84] [2200/6250] eta: 0:11:12 lr: 0.000008 grad: 0.2024 (0.2097) loss: 0.7451 (0.7546) time: 0.1767 data: 0.0945 max mem: 9377 +Train: [84] [2300/6250] eta: 0:10:55 lr: 0.000008 grad: 0.1998 (0.2097) loss: 0.7552 (0.7543) time: 0.1551 data: 0.0668 max mem: 9377 +Train: [84] [2400/6250] eta: 0:10:38 lr: 0.000008 grad: 0.1983 (0.2094) loss: 0.7435 (0.7541) time: 0.1572 data: 0.0548 max mem: 9377 +Train: [84] [2500/6250] eta: 0:10:20 lr: 0.000008 grad: 0.2014 (0.2093) loss: 0.7462 (0.7538) time: 0.1617 data: 0.0662 max mem: 9377 +Train: [84] [2600/6250] eta: 0:10:01 lr: 0.000008 grad: 0.1982 (0.2092) loss: 0.7559 (0.7536) time: 0.1439 data: 0.0409 max mem: 9377 +Train: [84] [2700/6250] eta: 0:09:42 lr: 0.000008 grad: 0.2023 (0.2090) loss: 0.7511 (0.7534) time: 0.1461 data: 0.0417 max mem: 9377 +Train: [84] [2800/6250] eta: 0:09:24 lr: 0.000008 grad: 0.2078 (0.2090) loss: 0.7492 (0.7532) time: 0.1396 data: 0.0513 max mem: 9377 +Train: [84] [2900/6250] eta: 0:09:08 lr: 0.000008 grad: 0.1999 (0.2089) loss: 0.7570 (0.7531) time: 0.1791 data: 0.0914 max mem: 9377 +Train: [84] [3000/6250] eta: 0:08:50 lr: 0.000008 grad: 0.2080 (0.2089) loss: 0.7510 (0.7531) time: 0.1529 data: 0.0656 max mem: 9377 +Train: [84] [3100/6250] eta: 0:08:32 lr: 0.000008 grad: 0.1941 (0.2086) loss: 0.7559 (0.7531) time: 0.1386 data: 0.0447 max mem: 9377 +Train: [84] [3200/6250] eta: 0:08:17 lr: 0.000008 grad: 0.1969 (0.2084) loss: 0.7634 (0.7533) time: 0.1578 data: 0.0768 max mem: 9377 +Train: [84] [3300/6250] eta: 0:08:00 lr: 0.000008 grad: 0.1934 (0.2081) loss: 0.7702 (0.7534) time: 0.1319 data: 0.0401 max mem: 9377 +Train: [84] [3400/6250] eta: 0:07:44 lr: 0.000008 grad: 0.2060 (0.2081) loss: 0.7486 (0.7534) time: 0.1552 data: 0.0690 max mem: 9377 +Train: [84] [3500/6250] eta: 0:07:28 lr: 0.000008 grad: 0.1972 (0.2079) loss: 0.7685 (0.7535) time: 0.1654 data: 0.0804 max mem: 9377 +Train: [84] [3600/6250] eta: 0:07:11 lr: 0.000008 grad: 0.1977 (0.2077) loss: 0.7483 (0.7536) time: 0.1823 data: 0.0922 max mem: 9377 +Train: [84] [3700/6250] eta: 0:06:54 lr: 0.000008 grad: 0.1968 (0.2075) loss: 0.7561 (0.7537) time: 0.1545 data: 0.0617 max mem: 9377 +Train: [84] [3800/6250] eta: 0:06:37 lr: 0.000008 grad: 0.1995 (0.2074) loss: 0.7490 (0.7537) time: 0.1407 data: 0.0432 max mem: 9377 +Train: [84] [3900/6250] eta: 0:06:21 lr: 0.000008 grad: 0.2004 (0.2072) loss: 0.7590 (0.7538) time: 0.1454 data: 0.0587 max mem: 9377 +Train: [84] [4000/6250] eta: 0:06:04 lr: 0.000008 grad: 0.2027 (0.2072) loss: 0.7379 (0.7537) time: 0.1468 data: 0.0528 max mem: 9377 +Train: [84] [4100/6250] eta: 0:05:47 lr: 0.000008 grad: 0.2073 (0.2072) loss: 0.7486 (0.7536) time: 0.1530 data: 0.0539 max mem: 9377 +Train: [84] [4200/6250] eta: 0:05:31 lr: 0.000008 grad: 0.2100 (0.2071) loss: 0.7406 (0.7535) time: 0.1448 data: 0.0538 max mem: 9377 +Train: [84] [4300/6250] eta: 0:05:14 lr: 0.000008 grad: 0.1984 (0.2071) loss: 0.7501 (0.7535) time: 0.1437 data: 0.0525 max mem: 9377 +Train: [84] [4400/6250] eta: 0:04:57 lr: 0.000008 grad: 0.2063 (0.2071) loss: 0.7386 (0.7533) time: 0.1364 data: 0.0464 max mem: 9377 +Train: [84] [4500/6250] eta: 0:04:42 lr: 0.000008 grad: 0.1977 (0.2071) loss: 0.7558 (0.7532) time: 0.2089 data: 0.1209 max mem: 9377 +Train: [84] [4600/6250] eta: 0:04:25 lr: 0.000008 grad: 0.2021 (0.2071) loss: 0.7557 (0.7531) time: 0.1512 data: 0.0640 max mem: 9377 +Train: [84] [4700/6250] eta: 0:04:10 lr: 0.000008 grad: 0.2058 (0.2071) loss: 0.7473 (0.7531) time: 0.1887 data: 0.0928 max mem: 9377 +Train: [84] [4800/6250] eta: 0:03:53 lr: 0.000008 grad: 0.2087 (0.2070) loss: 0.7402 (0.7530) time: 0.1532 data: 0.0661 max mem: 9377 +Train: [84] [4900/6250] eta: 0:03:38 lr: 0.000008 grad: 0.2008 (0.2070) loss: 0.7469 (0.7530) time: 0.2026 data: 0.1148 max mem: 9377 +Train: [84] [5000/6250] eta: 0:03:22 lr: 0.000008 grad: 0.1955 (0.2068) loss: 0.7500 (0.7531) time: 0.1848 data: 0.0924 max mem: 9377 +Train: [84] [5100/6250] eta: 0:03:06 lr: 0.000008 grad: 0.1968 (0.2068) loss: 0.7560 (0.7531) time: 0.1803 data: 0.0901 max mem: 9377 +Train: [84] [5200/6250] eta: 0:02:50 lr: 0.000008 grad: 0.2105 (0.2067) loss: 0.7430 (0.7531) time: 0.1713 data: 0.0779 max mem: 9377 +Train: [84] [5300/6250] eta: 0:02:34 lr: 0.000008 grad: 0.2070 (0.2067) loss: 0.7480 (0.7530) time: 0.1464 data: 0.0498 max mem: 9377 +Train: [84] [5400/6250] eta: 0:02:18 lr: 0.000008 grad: 0.2048 (0.2067) loss: 0.7490 (0.7530) time: 0.1599 data: 0.0707 max mem: 9377 +Train: [84] [5500/6250] eta: 0:02:01 lr: 0.000008 grad: 0.2094 (0.2067) loss: 0.7581 (0.7529) time: 0.1581 data: 0.0660 max mem: 9377 +Train: [84] [5600/6250] eta: 0:01:45 lr: 0.000008 grad: 0.1977 (0.2068) loss: 0.7463 (0.7528) time: 0.1790 data: 0.0876 max mem: 9377 +Train: [84] [5700/6250] eta: 0:01:29 lr: 0.000008 grad: 0.2000 (0.2067) loss: 0.7558 (0.7527) time: 0.1624 data: 0.0690 max mem: 9377 +Train: [84] [5800/6250] eta: 0:01:12 lr: 0.000008 grad: 0.2023 (0.2067) loss: 0.7466 (0.7526) time: 0.1604 data: 0.0716 max mem: 9377 +Train: [84] [5900/6250] eta: 0:00:56 lr: 0.000008 grad: 0.1885 (0.2067) loss: 0.7544 (0.7525) time: 0.1712 data: 0.0760 max mem: 9377 +Train: [84] [6000/6250] eta: 0:00:40 lr: 0.000008 grad: 0.2110 (0.2067) loss: 0.7378 (0.7523) time: 0.1436 data: 0.0566 max mem: 9377 +Train: [84] [6100/6250] eta: 0:00:24 lr: 0.000008 grad: 0.2079 (0.2068) loss: 0.7508 (0.7522) time: 0.1124 data: 0.0201 max mem: 9377 +Train: [84] [6200/6250] eta: 0:00:08 lr: 0.000008 grad: 0.2023 (0.2067) loss: 0.7454 (0.7521) time: 0.1621 data: 0.0830 max mem: 9377 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.2103 (0.2067) loss: 0.7379 (0.7521) time: 0.1627 data: 0.0759 max mem: 9377 +Train: [84] Total time: 0:16:55 (0.1626 s / it) +Averaged stats: lr: 0.000008 grad: 0.2103 (0.2067) loss: 0.7379 (0.7521) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:06:46 loss: 0.8482 (0.8482) time: 6.5635 data: 6.5342 max mem: 9377 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.8553 (0.8571) time: 0.1494 data: 0.1227 max mem: 9377 +Eval (hcp-train-subset): [84] Total time: 0:00:15 (0.2467 s / it) +Averaged stats (hcp-train-subset): loss: 0.8553 (0.8571) +Making plots (hcp-train-subset): example=49 +Eval (hcp-val): [84] [ 0/62] eta: 0:04:00 loss: 0.8522 (0.8522) time: 3.8729 data: 3.7938 max mem: 9377 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.8527 (0.8553) time: 0.1541 data: 0.1270 max mem: 9377 +Eval (hcp-val): [84] Total time: 0:00:15 (0.2532 s / it) +Averaged stats (hcp-val): loss: 0.8527 (0.8553) +Making plots (hcp-val): example=57 +Eval (nsd-val): [84] [ 0/62] eta: 0:05:16 loss: 0.8208 (0.8208) time: 5.1126 data: 5.0343 max mem: 9377 +Eval (nsd-val): [84] [61/62] eta: 0:00:00 loss: 0.8280 (0.8303) time: 0.1508 data: 0.1228 max mem: 9377 +Eval (nsd-val): [84] Total time: 0:00:14 (0.2370 s / it) +Averaged stats (nsd-val): loss: 0.8280 (0.8303) +Making plots (nsd-val): example=40 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00084.pth +Train: [85] [ 0/6250] eta: 11:34:27 lr: 0.000008 grad: 0.5436 (0.5436) loss: 0.6748 (0.6748) time: 6.6668 data: 6.5058 max mem: 9377 +Train: [85] [ 100/6250] eta: 0:23:48 lr: 0.000008 grad: 0.2478 (0.2793) loss: 0.7500 (0.7664) time: 0.1682 data: 0.0551 max mem: 9377 +Train: [85] [ 200/6250] eta: 0:20:17 lr: 0.000008 grad: 0.2218 (0.2569) loss: 0.7421 (0.7598) time: 0.1569 data: 0.0518 max mem: 9377 +Train: [85] [ 300/6250] eta: 0:18:53 lr: 0.000007 grad: 0.2138 (0.2468) loss: 0.7525 (0.7585) time: 0.1865 data: 0.0879 max mem: 9377 +Train: [85] [ 400/6250] eta: 0:17:52 lr: 0.000007 grad: 0.2174 (0.2413) loss: 0.7446 (0.7547) time: 0.1717 data: 0.0716 max mem: 9377 +Train: [85] [ 500/6250] eta: 0:17:11 lr: 0.000007 grad: 0.2201 (0.2375) loss: 0.7384 (0.7522) time: 0.1585 data: 0.0434 max mem: 9377 +Train: [85] [ 600/6250] eta: 0:16:29 lr: 0.000007 grad: 0.2070 (0.2356) loss: 0.7385 (0.7487) time: 0.1485 data: 0.0510 max mem: 9377 +Train: [85] [ 700/6250] eta: 0:15:55 lr: 0.000007 grad: 0.2173 (0.2344) loss: 0.7356 (0.7465) time: 0.1472 data: 0.0483 max mem: 9377 +Train: [85] [ 800/6250] eta: 0:15:29 lr: 0.000007 grad: 0.2284 (0.2329) loss: 0.7348 (0.7450) time: 0.1622 data: 0.0721 max mem: 9377 +Train: [85] [ 900/6250] eta: 0:15:07 lr: 0.000007 grad: 0.1914 (0.2310) loss: 0.7698 (0.7449) time: 0.1963 data: 0.1061 max mem: 9377 +Train: [85] [1000/6250] eta: 0:14:43 lr: 0.000007 grad: 0.2126 (0.2294) loss: 0.7475 (0.7450) time: 0.1687 data: 0.0795 max mem: 9377 +Train: [85] [1100/6250] eta: 0:14:25 lr: 0.000007 grad: 0.2110 (0.2276) loss: 0.7415 (0.7451) time: 0.1888 data: 0.0981 max mem: 9377 +Train: [85] [1200/6250] eta: 0:14:03 lr: 0.000007 grad: 0.1974 (0.2261) loss: 0.7424 (0.7452) time: 0.1642 data: 0.0722 max mem: 9377 +Train: [85] [1300/6250] eta: 0:13:42 lr: 0.000007 grad: 0.2055 (0.2249) loss: 0.7339 (0.7452) time: 0.1443 data: 0.0607 max mem: 9377 +Train: [85] [1400/6250] eta: 0:13:24 lr: 0.000007 grad: 0.2034 (0.2238) loss: 0.7515 (0.7453) time: 0.1614 data: 0.0750 max mem: 9377 +Train: [85] [1500/6250] eta: 0:13:03 lr: 0.000007 grad: 0.2049 (0.2226) loss: 0.7429 (0.7454) time: 0.1445 data: 0.0596 max mem: 9377 +Train: [85] [1600/6250] eta: 0:12:43 lr: 0.000007 grad: 0.2089 (0.2216) loss: 0.7522 (0.7454) time: 0.1653 data: 0.0799 max mem: 9377 +Train: [85] [1700/6250] eta: 0:12:31 lr: 0.000007 grad: 0.2001 (0.2208) loss: 0.7494 (0.7455) time: 0.1672 data: 0.0806 max mem: 9377 +Train: [85] [1800/6250] eta: 0:12:17 lr: 0.000007 grad: 0.2128 (0.2203) loss: 0.7386 (0.7453) time: 0.1685 data: 0.0803 max mem: 9377 +Train: [85] [1900/6250] eta: 0:12:04 lr: 0.000007 grad: 0.2109 (0.2196) loss: 0.7394 (0.7454) time: 0.1834 data: 0.0897 max mem: 9377 +Train: [85] [2000/6250] eta: 0:11:49 lr: 0.000007 grad: 0.2054 (0.2189) loss: 0.7405 (0.7455) time: 0.1681 data: 0.0833 max mem: 9377 +Train: [85] [2100/6250] eta: 0:11:35 lr: 0.000007 grad: 0.1993 (0.2183) loss: 0.7489 (0.7454) time: 0.1910 data: 0.0924 max mem: 9377 +Train: [85] [2200/6250] eta: 0:11:21 lr: 0.000007 grad: 0.1962 (0.2179) loss: 0.7410 (0.7454) time: 0.2086 data: 0.1247 max mem: 9377 +Train: [85] [2300/6250] eta: 0:11:04 lr: 0.000007 grad: 0.2005 (0.2174) loss: 0.7579 (0.7455) time: 0.1708 data: 0.0692 max mem: 9377 +Train: [85] [2400/6250] eta: 0:10:49 lr: 0.000007 grad: 0.1994 (0.2167) loss: 0.7581 (0.7458) time: 0.1405 data: 0.0513 max mem: 9377 +Train: [85] [2500/6250] eta: 0:10:31 lr: 0.000007 grad: 0.1978 (0.2165) loss: 0.7542 (0.7457) time: 0.1609 data: 0.0710 max mem: 9377 +Train: [85] [2600/6250] eta: 0:10:13 lr: 0.000007 grad: 0.2048 (0.2162) loss: 0.7476 (0.7458) time: 0.1591 data: 0.0659 max mem: 9377 +Train: [85] [2700/6250] eta: 0:09:56 lr: 0.000007 grad: 0.2006 (0.2159) loss: 0.7547 (0.7460) time: 0.1561 data: 0.0607 max mem: 9377 +Train: [85] [2800/6250] eta: 0:09:39 lr: 0.000007 grad: 0.2074 (0.2157) loss: 0.7447 (0.7459) time: 0.1522 data: 0.0648 max mem: 9377 +Train: [85] [2900/6250] eta: 0:09:21 lr: 0.000007 grad: 0.2093 (0.2155) loss: 0.7406 (0.7457) time: 0.1430 data: 0.0614 max mem: 9377 +Train: [85] [3000/6250] eta: 0:09:04 lr: 0.000007 grad: 0.2097 (0.2154) loss: 0.7471 (0.7455) time: 0.1467 data: 0.0556 max mem: 9377 +Train: [85] [3100/6250] eta: 0:08:46 lr: 0.000007 grad: 0.2091 (0.2154) loss: 0.7437 (0.7455) time: 0.1430 data: 0.0523 max mem: 9377 +Train: [85] [3200/6250] eta: 0:08:29 lr: 0.000007 grad: 0.1987 (0.2152) loss: 0.7415 (0.7455) time: 0.1700 data: 0.0896 max mem: 9377 +Train: [85] [3300/6250] eta: 0:08:11 lr: 0.000007 grad: 0.2070 (0.2151) loss: 0.7416 (0.7455) time: 0.1547 data: 0.0694 max mem: 9377 +Train: [85] [3400/6250] eta: 0:07:54 lr: 0.000007 grad: 0.2038 (0.2151) loss: 0.7564 (0.7456) time: 0.1559 data: 0.0672 max mem: 9377 +Train: [85] [3500/6250] eta: 0:07:37 lr: 0.000007 grad: 0.2103 (0.2149) loss: 0.7431 (0.7457) time: 0.1636 data: 0.0830 max mem: 9377 +Train: [85] [3600/6250] eta: 0:07:20 lr: 0.000007 grad: 0.2023 (0.2147) loss: 0.7571 (0.7457) time: 0.1806 data: 0.0914 max mem: 9377 +Train: [85] [3700/6250] eta: 0:07:03 lr: 0.000007 grad: 0.2103 (0.2146) loss: 0.7510 (0.7457) time: 0.1554 data: 0.0666 max mem: 9377 +Train: [85] [3800/6250] eta: 0:06:46 lr: 0.000007 grad: 0.2137 (0.2145) loss: 0.7520 (0.7457) time: 0.1556 data: 0.0703 max mem: 9377 +Train: [85] [3900/6250] eta: 0:06:29 lr: 0.000007 grad: 0.2168 (0.2145) loss: 0.7457 (0.7458) time: 0.1735 data: 0.0871 max mem: 9377 +Train: [85] [4000/6250] eta: 0:06:11 lr: 0.000007 grad: 0.2063 (0.2144) loss: 0.7363 (0.7457) time: 0.1670 data: 0.0748 max mem: 9377 +Train: [85] [4100/6250] eta: 0:05:54 lr: 0.000007 grad: 0.2007 (0.2143) loss: 0.7600 (0.7458) time: 0.1488 data: 0.0432 max mem: 9377 +Train: [85] [4200/6250] eta: 0:05:37 lr: 0.000007 grad: 0.1990 (0.2141) loss: 0.7484 (0.7458) time: 0.1661 data: 0.0664 max mem: 9377 +Train: [85] [4300/6250] eta: 0:05:20 lr: 0.000007 grad: 0.2030 (0.2139) loss: 0.7541 (0.7459) time: 0.1712 data: 0.0840 max mem: 9377 +Train: [85] [4400/6250] eta: 0:05:03 lr: 0.000007 grad: 0.2038 (0.2138) loss: 0.7543 (0.7460) time: 0.1359 data: 0.0401 max mem: 9377 +Train: [85] [4500/6250] eta: 0:04:46 lr: 0.000007 grad: 0.2045 (0.2137) loss: 0.7514 (0.7461) time: 0.1666 data: 0.0886 max mem: 9377 +Train: [85] [4600/6250] eta: 0:04:31 lr: 0.000007 grad: 0.2141 (0.2136) loss: 0.7413 (0.7461) time: 0.1737 data: 0.0911 max mem: 9377 +Train: [85] [4700/6250] eta: 0:04:15 lr: 0.000007 grad: 0.2003 (0.2135) loss: 0.7505 (0.7462) time: 0.1530 data: 0.0671 max mem: 9377 +Train: [85] [4800/6250] eta: 0:03:58 lr: 0.000007 grad: 0.2016 (0.2133) loss: 0.7586 (0.7463) time: 0.1334 data: 0.0433 max mem: 9377 +Train: [85] [4900/6250] eta: 0:03:42 lr: 0.000007 grad: 0.2109 (0.2132) loss: 0.7565 (0.7463) time: 0.1659 data: 0.0729 max mem: 9377 +Train: [85] [5000/6250] eta: 0:03:25 lr: 0.000007 grad: 0.2058 (0.2131) loss: 0.7395 (0.7465) time: 0.1846 data: 0.0817 max mem: 9377 +Train: [85] [5100/6250] eta: 0:03:09 lr: 0.000007 grad: 0.2111 (0.2130) loss: 0.7480 (0.7465) time: 0.1789 data: 0.0865 max mem: 9377 +Train: [85] [5200/6250] eta: 0:02:53 lr: 0.000007 grad: 0.1992 (0.2129) loss: 0.7511 (0.7465) time: 0.1513 data: 0.0632 max mem: 9377 +Train: [85] [5300/6250] eta: 0:02:37 lr: 0.000007 grad: 0.1991 (0.2127) loss: 0.7527 (0.7467) time: 0.1467 data: 0.0553 max mem: 9377 +Train: [85] [5400/6250] eta: 0:02:20 lr: 0.000007 grad: 0.2013 (0.2126) loss: 0.7498 (0.7466) time: 0.1573 data: 0.0683 max mem: 9377 +Train: [85] [5500/6250] eta: 0:02:03 lr: 0.000007 grad: 0.2034 (0.2125) loss: 0.7587 (0.7467) time: 0.1655 data: 0.0751 max mem: 9377 +Train: [85] [5600/6250] eta: 0:01:47 lr: 0.000007 grad: 0.2145 (0.2125) loss: 0.7406 (0.7467) time: 0.1809 data: 0.0829 max mem: 9377 +Train: [85] [5700/6250] eta: 0:01:30 lr: 0.000007 grad: 0.2039 (0.2124) loss: 0.7569 (0.7467) time: 0.1552 data: 0.0701 max mem: 9377 +Train: [85] [5800/6250] eta: 0:01:14 lr: 0.000007 grad: 0.2157 (0.2124) loss: 0.7391 (0.7467) time: 0.1445 data: 0.0497 max mem: 9377 +Train: [85] [5900/6250] eta: 0:00:57 lr: 0.000007 grad: 0.2062 (0.2124) loss: 0.7402 (0.7466) time: 0.1658 data: 0.0735 max mem: 9377 +Train: [85] [6000/6250] eta: 0:00:41 lr: 0.000007 grad: 0.2110 (0.2123) loss: 0.7419 (0.7465) time: 0.1615 data: 0.0792 max mem: 9377 +Train: [85] [6100/6250] eta: 0:00:24 lr: 0.000007 grad: 0.1962 (0.2123) loss: 0.7482 (0.7465) time: 0.1636 data: 0.0807 max mem: 9377 +Train: [85] [6200/6250] eta: 0:00:08 lr: 0.000007 grad: 0.2002 (0.2122) loss: 0.7403 (0.7465) time: 0.1547 data: 0.0676 max mem: 9377 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.2045 (0.2121) loss: 0.7465 (0.7465) time: 0.1697 data: 0.0905 max mem: 9377 +Train: [85] Total time: 0:17:12 (0.1652 s / it) +Averaged stats: lr: 0.000007 grad: 0.2045 (0.2121) loss: 0.7465 (0.7465) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:06:43 loss: 0.8457 (0.8457) time: 6.5004 data: 6.4363 max mem: 9377 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.8578 (0.8580) time: 0.1202 data: 0.0953 max mem: 9377 +Eval (hcp-train-subset): [85] Total time: 0:00:15 (0.2477 s / it) +Averaged stats (hcp-train-subset): loss: 0.8578 (0.8580) +Eval (hcp-val): [85] [ 0/62] eta: 0:05:30 loss: 0.8602 (0.8602) time: 5.3264 data: 5.2963 max mem: 9377 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.8542 (0.8559) time: 0.1338 data: 0.1086 max mem: 9377 +Eval (hcp-val): [85] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (hcp-val): loss: 0.8542 (0.8559) +Eval (nsd-val): [85] [ 0/62] eta: 0:05:27 loss: 0.8227 (0.8227) time: 5.2779 data: 5.2469 max mem: 9377 +Eval (nsd-val): [85] [61/62] eta: 0:00:00 loss: 0.8292 (0.8320) time: 0.1419 data: 0.1159 max mem: 9377 +Eval (nsd-val): [85] Total time: 0:00:14 (0.2321 s / it) +Averaged stats (nsd-val): loss: 0.8292 (0.8320) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 11:21:58 lr: 0.000007 grad: 0.1542 (0.1542) loss: 0.8170 (0.8170) time: 6.5469 data: 6.3935 max mem: 9377 +Train: [86] [ 100/6250] eta: 0:23:54 lr: 0.000007 grad: 0.2034 (0.2415) loss: 0.7674 (0.7706) time: 0.1864 data: 0.0798 max mem: 9377 +Train: [86] [ 200/6250] eta: 0:20:02 lr: 0.000007 grad: 0.2150 (0.2329) loss: 0.7450 (0.7621) time: 0.1471 data: 0.0339 max mem: 9377 +Train: [86] [ 300/6250] eta: 0:18:42 lr: 0.000007 grad: 0.2020 (0.2241) loss: 0.7517 (0.7605) time: 0.1686 data: 0.0640 max mem: 9377 +Train: [86] [ 400/6250] eta: 0:17:35 lr: 0.000007 grad: 0.1939 (0.2189) loss: 0.7562 (0.7598) time: 0.1319 data: 0.0292 max mem: 9377 +Train: [86] [ 500/6250] eta: 0:16:46 lr: 0.000007 grad: 0.2025 (0.2156) loss: 0.7596 (0.7590) time: 0.1316 data: 0.0467 max mem: 9377 +Train: [86] [ 600/6250] eta: 0:16:08 lr: 0.000006 grad: 0.1984 (0.2138) loss: 0.7691 (0.7582) time: 0.1359 data: 0.0384 max mem: 9377 +Train: [86] [ 700/6250] eta: 0:15:36 lr: 0.000006 grad: 0.2061 (0.2121) loss: 0.7617 (0.7580) time: 0.1462 data: 0.0528 max mem: 9377 +Train: [86] [ 800/6250] eta: 0:15:11 lr: 0.000006 grad: 0.1984 (0.2106) loss: 0.7570 (0.7584) time: 0.1544 data: 0.0562 max mem: 9377 +Train: [86] [ 900/6250] eta: 0:14:46 lr: 0.000006 grad: 0.2005 (0.2095) loss: 0.7698 (0.7584) time: 0.1580 data: 0.0580 max mem: 9377 +Train: [86] [1000/6250] eta: 0:14:22 lr: 0.000006 grad: 0.1915 (0.2092) loss: 0.7598 (0.7582) time: 0.1551 data: 0.0625 max mem: 9377 +Train: [86] [1100/6250] eta: 0:14:00 lr: 0.000006 grad: 0.2014 (0.2089) loss: 0.7516 (0.7577) time: 0.1422 data: 0.0506 max mem: 9377 +Train: [86] [1200/6250] eta: 0:13:43 lr: 0.000006 grad: 0.2011 (0.2086) loss: 0.7455 (0.7572) time: 0.1584 data: 0.0784 max mem: 9377 +Train: [86] [1300/6250] eta: 0:13:26 lr: 0.000006 grad: 0.2018 (0.2083) loss: 0.7480 (0.7566) time: 0.1357 data: 0.0483 max mem: 9377 +Train: [86] [1400/6250] eta: 0:13:07 lr: 0.000006 grad: 0.2011 (0.2083) loss: 0.7558 (0.7560) time: 0.1652 data: 0.0771 max mem: 9377 +Train: [86] [1500/6250] eta: 0:12:49 lr: 0.000006 grad: 0.1991 (0.2082) loss: 0.7474 (0.7554) time: 0.1550 data: 0.0674 max mem: 9377 +Train: [86] [1600/6250] eta: 0:12:30 lr: 0.000006 grad: 0.1976 (0.2081) loss: 0.7485 (0.7549) time: 0.1536 data: 0.0569 max mem: 9377 +Train: [86] [1700/6250] eta: 0:12:14 lr: 0.000006 grad: 0.2011 (0.2080) loss: 0.7439 (0.7545) time: 0.1563 data: 0.0694 max mem: 9377 +Train: [86] [1800/6250] eta: 0:12:02 lr: 0.000006 grad: 0.2042 (0.2078) loss: 0.7539 (0.7543) time: 0.1661 data: 0.0677 max mem: 9377 +Train: [86] [1900/6250] eta: 0:11:47 lr: 0.000006 grad: 0.2097 (0.2077) loss: 0.7359 (0.7540) time: 0.1870 data: 0.0962 max mem: 9377 +Train: [86] [2000/6250] eta: 0:11:30 lr: 0.000006 grad: 0.2017 (0.2075) loss: 0.7441 (0.7538) time: 0.1573 data: 0.0681 max mem: 9377 +Train: [86] [2100/6250] eta: 0:11:13 lr: 0.000006 grad: 0.1931 (0.2074) loss: 0.7502 (0.7535) time: 0.1622 data: 0.0729 max mem: 9377 +Train: [86] [2200/6250] eta: 0:10:57 lr: 0.000006 grad: 0.1998 (0.2073) loss: 0.7546 (0.7534) time: 0.1559 data: 0.0633 max mem: 9377 +Train: [86] [2300/6250] eta: 0:10:41 lr: 0.000006 grad: 0.2035 (0.2071) loss: 0.7461 (0.7533) time: 0.1739 data: 0.0852 max mem: 9377 +Train: [86] [2400/6250] eta: 0:10:24 lr: 0.000006 grad: 0.1997 (0.2069) loss: 0.7480 (0.7532) time: 0.1651 data: 0.0772 max mem: 9377 +Train: [86] [2500/6250] eta: 0:10:06 lr: 0.000006 grad: 0.1981 (0.2067) loss: 0.7509 (0.7531) time: 0.1581 data: 0.0658 max mem: 9377 +Train: [86] [2600/6250] eta: 0:09:48 lr: 0.000006 grad: 0.1961 (0.2067) loss: 0.7548 (0.7530) time: 0.1309 data: 0.0381 max mem: 9377 +Train: [86] [2700/6250] eta: 0:09:31 lr: 0.000006 grad: 0.2010 (0.2065) loss: 0.7375 (0.7528) time: 0.1441 data: 0.0576 max mem: 9377 +Train: [86] [2800/6250] eta: 0:09:13 lr: 0.000006 grad: 0.2007 (0.2064) loss: 0.7465 (0.7527) time: 0.1437 data: 0.0473 max mem: 9377 +Train: [86] [2900/6250] eta: 0:08:57 lr: 0.000006 grad: 0.2036 (0.2064) loss: 0.7516 (0.7526) time: 0.1518 data: 0.0668 max mem: 9377 +Train: [86] [3000/6250] eta: 0:08:41 lr: 0.000006 grad: 0.2058 (0.2063) loss: 0.7443 (0.7524) time: 0.1869 data: 0.0935 max mem: 9377 +Train: [86] [3100/6250] eta: 0:08:24 lr: 0.000006 grad: 0.2085 (0.2062) loss: 0.7492 (0.7522) time: 0.1457 data: 0.0695 max mem: 9377 +Train: [86] [3200/6250] eta: 0:08:09 lr: 0.000006 grad: 0.2156 (0.2062) loss: 0.7441 (0.7521) time: 0.1791 data: 0.0895 max mem: 9377 +Train: [86] [3300/6250] eta: 0:07:53 lr: 0.000006 grad: 0.2005 (0.2061) loss: 0.7467 (0.7521) time: 0.1580 data: 0.0635 max mem: 9377 +Train: [86] [3400/6250] eta: 0:07:37 lr: 0.000006 grad: 0.2022 (0.2062) loss: 0.7514 (0.7520) time: 0.1730 data: 0.0807 max mem: 9377 +Train: [86] [3500/6250] eta: 0:07:20 lr: 0.000006 grad: 0.2064 (0.2063) loss: 0.7463 (0.7519) time: 0.1733 data: 0.0722 max mem: 9377 +Train: [86] [3600/6250] eta: 0:07:05 lr: 0.000006 grad: 0.2015 (0.2063) loss: 0.7516 (0.7518) time: 0.1746 data: 0.0839 max mem: 9377 +Train: [86] [3700/6250] eta: 0:06:49 lr: 0.000006 grad: 0.2014 (0.2064) loss: 0.7520 (0.7516) time: 0.1658 data: 0.0778 max mem: 9377 +Train: [86] [3800/6250] eta: 0:06:34 lr: 0.000006 grad: 0.2062 (0.2064) loss: 0.7379 (0.7514) time: 0.1853 data: 0.0899 max mem: 9377 +Train: [86] [3900/6250] eta: 0:06:18 lr: 0.000006 grad: 0.2051 (0.2064) loss: 0.7453 (0.7512) time: 0.1736 data: 0.0888 max mem: 9377 +Train: [86] [4000/6250] eta: 0:06:02 lr: 0.000006 grad: 0.1994 (0.2065) loss: 0.7522 (0.7510) time: 0.1544 data: 0.0671 max mem: 9377 +Train: [86] [4100/6250] eta: 0:05:46 lr: 0.000006 grad: 0.1981 (0.2065) loss: 0.7477 (0.7509) time: 0.1529 data: 0.0624 max mem: 9377 +Train: [86] [4200/6250] eta: 0:05:30 lr: 0.000006 grad: 0.2038 (0.2065) loss: 0.7403 (0.7508) time: 0.1530 data: 0.0624 max mem: 9377 +Train: [86] [4300/6250] eta: 0:05:14 lr: 0.000006 grad: 0.1969 (0.2066) loss: 0.7512 (0.7506) time: 0.1467 data: 0.0540 max mem: 9377 +Train: [86] [4400/6250] eta: 0:04:58 lr: 0.000006 grad: 0.2099 (0.2066) loss: 0.7407 (0.7505) time: 0.1559 data: 0.0565 max mem: 9377 +Train: [86] [4500/6250] eta: 0:04:42 lr: 0.000006 grad: 0.1985 (0.2066) loss: 0.7544 (0.7505) time: 0.1176 data: 0.0330 max mem: 9377 +Train: [86] [4600/6250] eta: 0:04:27 lr: 0.000006 grad: 0.1989 (0.2065) loss: 0.7531 (0.7506) time: 0.1652 data: 0.0728 max mem: 9377 +Train: [86] [4700/6250] eta: 0:04:11 lr: 0.000006 grad: 0.2006 (0.2064) loss: 0.7381 (0.7506) time: 0.1599 data: 0.0793 max mem: 9377 +Train: [86] [4800/6250] eta: 0:03:54 lr: 0.000006 grad: 0.2077 (0.2063) loss: 0.7517 (0.7507) time: 0.1424 data: 0.0478 max mem: 9377 +Train: [86] [4900/6250] eta: 0:03:38 lr: 0.000006 grad: 0.1955 (0.2063) loss: 0.7547 (0.7508) time: 0.1509 data: 0.0544 max mem: 9377 +Train: [86] [5000/6250] eta: 0:03:22 lr: 0.000006 grad: 0.1972 (0.2062) loss: 0.7522 (0.7508) time: 0.1630 data: 0.0684 max mem: 9377 +Train: [86] [5100/6250] eta: 0:03:06 lr: 0.000006 grad: 0.2136 (0.2063) loss: 0.7431 (0.7508) time: 0.1487 data: 0.0565 max mem: 9377 +Train: [86] [5200/6250] eta: 0:02:49 lr: 0.000006 grad: 0.2041 (0.2063) loss: 0.7497 (0.7508) time: 0.1695 data: 0.0760 max mem: 9377 +Train: [86] [5300/6250] eta: 0:02:33 lr: 0.000006 grad: 0.2083 (0.2063) loss: 0.7466 (0.7508) time: 0.1431 data: 0.0467 max mem: 9377 +Train: [86] [5400/6250] eta: 0:02:17 lr: 0.000006 grad: 0.2024 (0.2062) loss: 0.7459 (0.7508) time: 0.1408 data: 0.0458 max mem: 9377 +Train: [86] [5500/6250] eta: 0:02:00 lr: 0.000006 grad: 0.1945 (0.2061) loss: 0.7557 (0.7508) time: 0.1486 data: 0.0388 max mem: 9377 +Train: [86] [5600/6250] eta: 0:01:44 lr: 0.000006 grad: 0.2129 (0.2061) loss: 0.7390 (0.7508) time: 0.1660 data: 0.0722 max mem: 9377 +Train: [86] [5700/6250] eta: 0:01:28 lr: 0.000006 grad: 0.2018 (0.2060) loss: 0.7416 (0.7507) time: 0.1788 data: 0.0887 max mem: 9377 +Train: [86] [5800/6250] eta: 0:01:12 lr: 0.000006 grad: 0.1920 (0.2059) loss: 0.7425 (0.7507) time: 0.1427 data: 0.0597 max mem: 9377 +Train: [86] [5900/6250] eta: 0:00:56 lr: 0.000006 grad: 0.1949 (0.2058) loss: 0.7583 (0.7507) time: 0.1573 data: 0.0691 max mem: 9377 +Train: [86] [6000/6250] eta: 0:00:40 lr: 0.000006 grad: 0.2044 (0.2057) loss: 0.7535 (0.7507) time: 0.1431 data: 0.0651 max mem: 9377 +Train: [86] [6100/6250] eta: 0:00:24 lr: 0.000006 grad: 0.2026 (0.2056) loss: 0.7434 (0.7506) time: 0.1583 data: 0.0711 max mem: 9377 +Train: [86] [6200/6250] eta: 0:00:08 lr: 0.000006 grad: 0.1973 (0.2056) loss: 0.7554 (0.7506) time: 0.1531 data: 0.0687 max mem: 9377 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.1980 (0.2056) loss: 0.7438 (0.7505) time: 0.1626 data: 0.0810 max mem: 9377 +Train: [86] Total time: 0:16:49 (0.1615 s / it) +Averaged stats: lr: 0.000006 grad: 0.1980 (0.2056) loss: 0.7438 (0.7505) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:05:52 loss: 0.8478 (0.8478) time: 5.6790 data: 5.6488 max mem: 9377 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.8555 (0.8579) time: 0.1254 data: 0.0985 max mem: 9377 +Eval (hcp-train-subset): [86] Total time: 0:00:14 (0.2405 s / it) +Averaged stats (hcp-train-subset): loss: 0.8555 (0.8579) +Eval (hcp-val): [86] [ 0/62] eta: 0:05:44 loss: 0.8582 (0.8582) time: 5.5511 data: 5.5211 max mem: 9377 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.8546 (0.8570) time: 0.1454 data: 0.1171 max mem: 9377 +Eval (hcp-val): [86] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-val): loss: 0.8546 (0.8570) +Eval (nsd-val): [86] [ 0/62] eta: 0:05:47 loss: 0.8234 (0.8234) time: 5.5990 data: 5.5676 max mem: 9377 +Eval (nsd-val): [86] [61/62] eta: 0:00:00 loss: 0.8297 (0.8330) time: 0.1391 data: 0.1140 max mem: 9377 +Eval (nsd-val): [86] Total time: 0:00:14 (0.2295 s / it) +Averaged stats (nsd-val): loss: 0.8297 (0.8330) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [87] [ 0/6250] eta: 9:29:38 lr: 0.000006 grad: 0.2624 (0.2624) loss: 0.7530 (0.7530) time: 5.4685 data: 5.1836 max mem: 9377 +Train: [87] [ 100/6250] eta: 0:22:06 lr: 0.000006 grad: 0.2173 (0.2534) loss: 0.7769 (0.7722) time: 0.1611 data: 0.0484 max mem: 9377 +Train: [87] [ 200/6250] eta: 0:19:06 lr: 0.000006 grad: 0.2167 (0.2382) loss: 0.7529 (0.7641) time: 0.1571 data: 0.0455 max mem: 9377 +Train: [87] [ 300/6250] eta: 0:17:57 lr: 0.000006 grad: 0.2004 (0.2334) loss: 0.7603 (0.7603) time: 0.1649 data: 0.0578 max mem: 9377 +Train: [87] [ 400/6250] eta: 0:17:34 lr: 0.000006 grad: 0.2071 (0.2310) loss: 0.7562 (0.7581) time: 0.1872 data: 0.0926 max mem: 9377 +Train: [87] [ 500/6250] eta: 0:17:00 lr: 0.000006 grad: 0.2173 (0.2283) loss: 0.7553 (0.7567) time: 0.1632 data: 0.0652 max mem: 9377 +Train: [87] [ 600/6250] eta: 0:16:35 lr: 0.000006 grad: 0.2113 (0.2256) loss: 0.7456 (0.7556) time: 0.1753 data: 0.0690 max mem: 9377 +Train: [87] [ 700/6250] eta: 0:15:59 lr: 0.000006 grad: 0.2077 (0.2236) loss: 0.7552 (0.7552) time: 0.1759 data: 0.0787 max mem: 9377 +Train: [87] [ 800/6250] eta: 0:15:33 lr: 0.000006 grad: 0.2023 (0.2222) loss: 0.7563 (0.7553) time: 0.1675 data: 0.0717 max mem: 9377 +Train: [87] [ 900/6250] eta: 0:15:12 lr: 0.000006 grad: 0.2102 (0.2207) loss: 0.7484 (0.7550) time: 0.1809 data: 0.0877 max mem: 9377 +Train: [87] [1000/6250] eta: 0:14:47 lr: 0.000006 grad: 0.2053 (0.2195) loss: 0.7527 (0.7545) time: 0.1662 data: 0.0803 max mem: 9377 +Train: [87] [1100/6250] eta: 0:14:26 lr: 0.000006 grad: 0.2053 (0.2182) loss: 0.7573 (0.7541) time: 0.1857 data: 0.0919 max mem: 9377 +Train: [87] [1200/6250] eta: 0:14:02 lr: 0.000006 grad: 0.1937 (0.2170) loss: 0.7569 (0.7540) time: 0.1532 data: 0.0530 max mem: 9377 +Train: [87] [1300/6250] eta: 0:13:49 lr: 0.000006 grad: 0.2006 (0.2161) loss: 0.7625 (0.7541) time: 0.2455 data: 0.1551 max mem: 9377 +Train: [87] [1400/6250] eta: 0:13:28 lr: 0.000005 grad: 0.1902 (0.2153) loss: 0.7557 (0.7541) time: 0.1900 data: 0.1038 max mem: 9377 +Train: [87] [1500/6250] eta: 0:13:07 lr: 0.000005 grad: 0.2040 (0.2147) loss: 0.7476 (0.7538) time: 0.1639 data: 0.0715 max mem: 9377 +Train: [87] [1600/6250] eta: 0:12:49 lr: 0.000005 grad: 0.1974 (0.2142) loss: 0.7514 (0.7532) time: 0.1440 data: 0.0593 max mem: 9377 +Train: [87] [1700/6250] eta: 0:12:29 lr: 0.000005 grad: 0.2050 (0.2140) loss: 0.7460 (0.7528) time: 0.1456 data: 0.0558 max mem: 9377 +Train: [87] [1800/6250] eta: 0:12:16 lr: 0.000005 grad: 0.2058 (0.2138) loss: 0.7564 (0.7525) time: 0.1524 data: 0.0495 max mem: 9377 +Train: [87] [1900/6250] eta: 0:11:58 lr: 0.000005 grad: 0.2087 (0.2134) loss: 0.7379 (0.7524) time: 0.1763 data: 0.0897 max mem: 9377 +Train: [87] [2000/6250] eta: 0:11:40 lr: 0.000005 grad: 0.2090 (0.2132) loss: 0.7511 (0.7523) time: 0.1434 data: 0.0462 max mem: 9377 +Train: [87] [2100/6250] eta: 0:11:22 lr: 0.000005 grad: 0.2152 (0.2131) loss: 0.7439 (0.7519) time: 0.1673 data: 0.0777 max mem: 9377 +Train: [87] [2200/6250] eta: 0:11:06 lr: 0.000005 grad: 0.2034 (0.2129) loss: 0.7410 (0.7515) time: 0.1726 data: 0.0821 max mem: 9377 +Train: [87] [2300/6250] eta: 0:10:53 lr: 0.000005 grad: 0.2103 (0.2129) loss: 0.7405 (0.7510) time: 0.1848 data: 0.1056 max mem: 9377 +Train: [87] [2400/6250] eta: 0:10:36 lr: 0.000005 grad: 0.2085 (0.2127) loss: 0.7360 (0.7506) time: 0.1567 data: 0.0645 max mem: 9377 +Train: [87] [2500/6250] eta: 0:10:17 lr: 0.000005 grad: 0.2014 (0.2126) loss: 0.7394 (0.7503) time: 0.1540 data: 0.0642 max mem: 9377 +Train: [87] [2600/6250] eta: 0:09:59 lr: 0.000005 grad: 0.2086 (0.2125) loss: 0.7386 (0.7500) time: 0.1436 data: 0.0437 max mem: 9377 +Train: [87] [2700/6250] eta: 0:09:40 lr: 0.000005 grad: 0.2091 (0.2125) loss: 0.7433 (0.7497) time: 0.1483 data: 0.0571 max mem: 9377 +Train: [87] [2800/6250] eta: 0:09:22 lr: 0.000005 grad: 0.2067 (0.2127) loss: 0.7476 (0.7495) time: 0.1487 data: 0.0555 max mem: 9377 +Train: [87] [2900/6250] eta: 0:09:05 lr: 0.000005 grad: 0.2075 (0.2126) loss: 0.7513 (0.7494) time: 0.1485 data: 0.0672 max mem: 9377 +Train: [87] [3000/6250] eta: 0:08:47 lr: 0.000005 grad: 0.2003 (0.2123) loss: 0.7493 (0.7494) time: 0.1869 data: 0.1026 max mem: 9377 +Train: [87] [3100/6250] eta: 0:08:32 lr: 0.000005 grad: 0.2145 (0.2122) loss: 0.7500 (0.7494) time: 0.1850 data: 0.1046 max mem: 9377 +Train: [87] [3200/6250] eta: 0:08:16 lr: 0.000005 grad: 0.1951 (0.2120) loss: 0.7599 (0.7495) time: 0.1772 data: 0.1033 max mem: 9377 +Train: [87] [3300/6250] eta: 0:08:01 lr: 0.000005 grad: 0.2039 (0.2118) loss: 0.7438 (0.7495) time: 0.1513 data: 0.0633 max mem: 9377 +Train: [87] [3400/6250] eta: 0:07:46 lr: 0.000005 grad: 0.2078 (0.2118) loss: 0.7491 (0.7494) time: 0.1425 data: 0.0560 max mem: 9377 +Train: [87] [3500/6250] eta: 0:07:32 lr: 0.000005 grad: 0.2015 (0.2118) loss: 0.7481 (0.7494) time: 0.2055 data: 0.1029 max mem: 9377 +Train: [87] [3600/6250] eta: 0:07:17 lr: 0.000005 grad: 0.2087 (0.2117) loss: 0.7474 (0.7494) time: 0.1566 data: 0.0687 max mem: 9377 +Train: [87] [3700/6250] eta: 0:07:01 lr: 0.000005 grad: 0.2097 (0.2117) loss: 0.7415 (0.7492) time: 0.1631 data: 0.0734 max mem: 9377 +Train: [87] [3800/6250] eta: 0:06:45 lr: 0.000005 grad: 0.2116 (0.2115) loss: 0.7427 (0.7492) time: 0.1647 data: 0.0764 max mem: 9377 +Train: [87] [3900/6250] eta: 0:06:29 lr: 0.000005 grad: 0.2004 (0.2114) loss: 0.7441 (0.7492) time: 0.1832 data: 0.0930 max mem: 9377 +Train: [87] [4000/6250] eta: 0:06:12 lr: 0.000005 grad: 0.2110 (0.2113) loss: 0.7543 (0.7493) time: 0.1879 data: 0.1043 max mem: 9377 +Train: [87] [4100/6250] eta: 0:05:55 lr: 0.000005 grad: 0.2070 (0.2113) loss: 0.7430 (0.7492) time: 0.1666 data: 0.0734 max mem: 9377 +Train: [87] [4200/6250] eta: 0:05:39 lr: 0.000005 grad: 0.2096 (0.2112) loss: 0.7371 (0.7490) time: 0.1620 data: 0.0743 max mem: 9377 +Train: [87] [4300/6250] eta: 0:05:22 lr: 0.000005 grad: 0.2077 (0.2112) loss: 0.7454 (0.7490) time: 0.1618 data: 0.0735 max mem: 9377 +Train: [87] [4400/6250] eta: 0:05:05 lr: 0.000005 grad: 0.2071 (0.2111) loss: 0.7496 (0.7490) time: 0.1861 data: 0.1004 max mem: 9377 +Train: [87] [4500/6250] eta: 0:04:49 lr: 0.000005 grad: 0.2154 (0.2111) loss: 0.7343 (0.7489) time: 0.1669 data: 0.0777 max mem: 9377 +Train: [87] [4600/6250] eta: 0:04:33 lr: 0.000005 grad: 0.2060 (0.2111) loss: 0.7483 (0.7489) time: 0.1699 data: 0.0788 max mem: 9377 +Train: [87] [4700/6250] eta: 0:04:16 lr: 0.000005 grad: 0.2069 (0.2111) loss: 0.7398 (0.7489) time: 0.1842 data: 0.0981 max mem: 9377 +Train: [87] [4800/6250] eta: 0:04:00 lr: 0.000005 grad: 0.2070 (0.2110) loss: 0.7362 (0.7489) time: 0.1762 data: 0.0945 max mem: 9377 +Train: [87] [4900/6250] eta: 0:03:43 lr: 0.000005 grad: 0.2065 (0.2109) loss: 0.7461 (0.7490) time: 0.1631 data: 0.0665 max mem: 9377 +Train: [87] [5000/6250] eta: 0:03:27 lr: 0.000005 grad: 0.2099 (0.2108) loss: 0.7502 (0.7489) time: 0.1663 data: 0.0793 max mem: 9377 +Train: [87] [5100/6250] eta: 0:03:10 lr: 0.000005 grad: 0.2032 (0.2107) loss: 0.7535 (0.7490) time: 0.1586 data: 0.0669 max mem: 9377 +Train: [87] [5200/6250] eta: 0:02:53 lr: 0.000005 grad: 0.2051 (0.2107) loss: 0.7447 (0.7491) time: 0.1554 data: 0.0524 max mem: 9377 +Train: [87] [5300/6250] eta: 0:02:36 lr: 0.000005 grad: 0.2015 (0.2107) loss: 0.7390 (0.7491) time: 0.1559 data: 0.0678 max mem: 9377 +Train: [87] [5400/6250] eta: 0:02:20 lr: 0.000005 grad: 0.2080 (0.2107) loss: 0.7509 (0.7490) time: 0.1557 data: 0.0568 max mem: 9377 +Train: [87] [5500/6250] eta: 0:02:03 lr: 0.000005 grad: 0.1943 (0.2107) loss: 0.7465 (0.7490) time: 0.1532 data: 0.0642 max mem: 9377 +Train: [87] [5600/6250] eta: 0:01:46 lr: 0.000005 grad: 0.1976 (0.2106) loss: 0.7501 (0.7491) time: 0.1464 data: 0.0532 max mem: 9377 +Train: [87] [5700/6250] eta: 0:01:30 lr: 0.000005 grad: 0.2021 (0.2106) loss: 0.7471 (0.7491) time: 0.1382 data: 0.0456 max mem: 9377 +Train: [87] [5800/6250] eta: 0:01:13 lr: 0.000005 grad: 0.2068 (0.2106) loss: 0.7396 (0.7491) time: 0.1572 data: 0.0625 max mem: 9377 +Train: [87] [5900/6250] eta: 0:00:57 lr: 0.000005 grad: 0.2092 (0.2106) loss: 0.7430 (0.7490) time: 0.1574 data: 0.0624 max mem: 9377 +Train: [87] [6000/6250] eta: 0:00:40 lr: 0.000005 grad: 0.2107 (0.2106) loss: 0.7484 (0.7490) time: 0.1687 data: 0.0771 max mem: 9377 +Train: [87] [6100/6250] eta: 0:00:24 lr: 0.000005 grad: 0.2117 (0.2107) loss: 0.7372 (0.7489) time: 0.1721 data: 0.0834 max mem: 9377 +Train: [87] [6200/6250] eta: 0:00:08 lr: 0.000005 grad: 0.2082 (0.2107) loss: 0.7361 (0.7488) time: 0.1360 data: 0.0454 max mem: 9377 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.2024 (0.2107) loss: 0.7447 (0.7488) time: 0.1626 data: 0.0709 max mem: 9377 +Train: [87] Total time: 0:17:08 (0.1646 s / it) +Averaged stats: lr: 0.000005 grad: 0.2024 (0.2107) loss: 0.7447 (0.7488) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:06:47 loss: 0.8470 (0.8470) time: 6.5718 data: 6.5388 max mem: 9377 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.8559 (0.8573) time: 0.1638 data: 0.1388 max mem: 9377 +Eval (hcp-train-subset): [87] Total time: 0:00:15 (0.2551 s / it) +Averaged stats (hcp-train-subset): loss: 0.8559 (0.8573) +Eval (hcp-val): [87] [ 0/62] eta: 0:04:10 loss: 0.8591 (0.8591) time: 4.0484 data: 3.9592 max mem: 9377 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.8551 (0.8568) time: 0.1227 data: 0.0976 max mem: 9377 +Eval (hcp-val): [87] Total time: 0:00:15 (0.2506 s / it) +Averaged stats (hcp-val): loss: 0.8551 (0.8568) +Eval (nsd-val): [87] [ 0/62] eta: 0:06:52 loss: 0.8217 (0.8217) time: 6.6519 data: 6.6198 max mem: 9377 +Eval (nsd-val): [87] [61/62] eta: 0:00:00 loss: 0.8309 (0.8317) time: 0.1357 data: 0.1103 max mem: 9377 +Eval (nsd-val): [87] Total time: 0:00:15 (0.2448 s / it) +Averaged stats (nsd-val): loss: 0.8309 (0.8317) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [88] [ 0/6250] eta: 9:18:01 lr: 0.000005 grad: 0.1590 (0.1590) loss: 0.8415 (0.8415) time: 5.3571 data: 5.1324 max mem: 9377 +Train: [88] [ 100/6250] eta: 0:24:08 lr: 0.000005 grad: 0.2370 (0.2709) loss: 0.7488 (0.7638) time: 0.1929 data: 0.0780 max mem: 9377 +Train: [88] [ 200/6250] eta: 0:20:54 lr: 0.000005 grad: 0.2382 (0.2579) loss: 0.7363 (0.7548) time: 0.1720 data: 0.0623 max mem: 9377 +Train: [88] [ 300/6250] eta: 0:19:24 lr: 0.000005 grad: 0.2265 (0.2512) loss: 0.7369 (0.7489) time: 0.1620 data: 0.0502 max mem: 9377 +Train: [88] [ 400/6250] eta: 0:18:25 lr: 0.000005 grad: 0.2165 (0.2456) loss: 0.7328 (0.7460) time: 0.1629 data: 0.0502 max mem: 9377 +Train: [88] [ 500/6250] eta: 0:17:33 lr: 0.000005 grad: 0.2113 (0.2407) loss: 0.7316 (0.7445) time: 0.1561 data: 0.0510 max mem: 9377 +Train: [88] [ 600/6250] eta: 0:16:52 lr: 0.000005 grad: 0.2078 (0.2369) loss: 0.7517 (0.7443) time: 0.1410 data: 0.0345 max mem: 9377 +Train: [88] [ 700/6250] eta: 0:16:17 lr: 0.000005 grad: 0.2059 (0.2338) loss: 0.7549 (0.7447) time: 0.1301 data: 0.0386 max mem: 9377 +Train: [88] [ 800/6250] eta: 0:15:48 lr: 0.000005 grad: 0.2135 (0.2316) loss: 0.7398 (0.7448) time: 0.1529 data: 0.0668 max mem: 9377 +Train: [88] [ 900/6250] eta: 0:15:26 lr: 0.000005 grad: 0.2193 (0.2295) loss: 0.7486 (0.7451) time: 0.1429 data: 0.0372 max mem: 9377 +Train: [88] [1000/6250] eta: 0:15:01 lr: 0.000005 grad: 0.2109 (0.2284) loss: 0.7391 (0.7446) time: 0.1795 data: 0.0859 max mem: 9377 +Train: [88] [1100/6250] eta: 0:14:31 lr: 0.000005 grad: 0.2029 (0.2269) loss: 0.7331 (0.7440) time: 0.1464 data: 0.0467 max mem: 9377 +Train: [88] [1200/6250] eta: 0:14:10 lr: 0.000005 grad: 0.2128 (0.2262) loss: 0.7373 (0.7436) time: 0.1809 data: 0.0877 max mem: 9377 +Train: [88] [1300/6250] eta: 0:13:45 lr: 0.000005 grad: 0.2130 (0.2252) loss: 0.7467 (0.7434) time: 0.1260 data: 0.0353 max mem: 9377 +Train: [88] [1400/6250] eta: 0:13:26 lr: 0.000005 grad: 0.2101 (0.2244) loss: 0.7394 (0.7434) time: 0.1405 data: 0.0448 max mem: 9377 +Train: [88] [1500/6250] eta: 0:13:08 lr: 0.000005 grad: 0.2062 (0.2234) loss: 0.7364 (0.7434) time: 0.1599 data: 0.0781 max mem: 9377 +Train: [88] [1600/6250] eta: 0:12:56 lr: 0.000005 grad: 0.2232 (0.2229) loss: 0.7403 (0.7432) time: 0.1909 data: 0.0983 max mem: 9377 +Train: [88] [1700/6250] eta: 0:12:39 lr: 0.000005 grad: 0.2069 (0.2223) loss: 0.7353 (0.7431) time: 0.1816 data: 0.0910 max mem: 9377 +Train: [88] [1800/6250] eta: 0:12:28 lr: 0.000005 grad: 0.2131 (0.2219) loss: 0.7358 (0.7430) time: 0.2060 data: 0.1351 max mem: 9377 +Train: [88] [1900/6250] eta: 0:12:10 lr: 0.000005 grad: 0.2072 (0.2215) loss: 0.7406 (0.7431) time: 0.1292 data: 0.0477 max mem: 9377 +Train: [88] [2000/6250] eta: 0:11:54 lr: 0.000005 grad: 0.2129 (0.2210) loss: 0.7410 (0.7431) time: 0.1855 data: 0.1049 max mem: 9377 +Train: [88] [2100/6250] eta: 0:11:38 lr: 0.000005 grad: 0.2160 (0.2206) loss: 0.7351 (0.7431) time: 0.1649 data: 0.0822 max mem: 9377 +Train: [88] [2200/6250] eta: 0:11:20 lr: 0.000005 grad: 0.2046 (0.2204) loss: 0.7477 (0.7432) time: 0.1589 data: 0.0678 max mem: 9377 +Train: [88] [2300/6250] eta: 0:11:04 lr: 0.000005 grad: 0.2086 (0.2200) loss: 0.7484 (0.7433) time: 0.1744 data: 0.0849 max mem: 9377 +Train: [88] [2400/6250] eta: 0:10:46 lr: 0.000005 grad: 0.2067 (0.2195) loss: 0.7489 (0.7433) time: 0.1633 data: 0.0722 max mem: 9377 +Train: [88] [2500/6250] eta: 0:10:28 lr: 0.000005 grad: 0.1939 (0.2191) loss: 0.7523 (0.7434) time: 0.1533 data: 0.0681 max mem: 9377 +Train: [88] [2600/6250] eta: 0:10:10 lr: 0.000005 grad: 0.1935 (0.2187) loss: 0.7539 (0.7435) time: 0.1539 data: 0.0595 max mem: 9377 +Train: [88] [2700/6250] eta: 0:09:51 lr: 0.000005 grad: 0.2080 (0.2183) loss: 0.7451 (0.7437) time: 0.1427 data: 0.0487 max mem: 9377 +Train: [88] [2800/6250] eta: 0:09:33 lr: 0.000005 grad: 0.2052 (0.2178) loss: 0.7572 (0.7440) time: 0.1413 data: 0.0458 max mem: 9377 +Train: [88] [2900/6250] eta: 0:09:16 lr: 0.000004 grad: 0.1944 (0.2173) loss: 0.7484 (0.7441) time: 0.1504 data: 0.0511 max mem: 9377 +Train: [88] [3000/6250] eta: 0:08:58 lr: 0.000004 grad: 0.2044 (0.2171) loss: 0.7491 (0.7441) time: 0.1527 data: 0.0593 max mem: 9377 +Train: [88] [3100/6250] eta: 0:08:43 lr: 0.000004 grad: 0.1918 (0.2166) loss: 0.7540 (0.7442) time: 0.1873 data: 0.1038 max mem: 9377 +Train: [88] [3200/6250] eta: 0:08:28 lr: 0.000004 grad: 0.1961 (0.2162) loss: 0.7563 (0.7444) time: 0.1878 data: 0.1004 max mem: 9377 +Train: [88] [3300/6250] eta: 0:08:12 lr: 0.000004 grad: 0.1977 (0.2158) loss: 0.7475 (0.7445) time: 0.1877 data: 0.1068 max mem: 9377 +Train: [88] [3400/6250] eta: 0:07:55 lr: 0.000004 grad: 0.2013 (0.2153) loss: 0.7452 (0.7447) time: 0.1701 data: 0.0729 max mem: 9377 +Train: [88] [3500/6250] eta: 0:07:38 lr: 0.000004 grad: 0.2019 (0.2150) loss: 0.7530 (0.7449) time: 0.1628 data: 0.0761 max mem: 9377 +Train: [88] [3600/6250] eta: 0:07:21 lr: 0.000004 grad: 0.1990 (0.2146) loss: 0.7473 (0.7451) time: 0.1504 data: 0.0604 max mem: 9377 +Train: [88] [3700/6250] eta: 0:07:05 lr: 0.000004 grad: 0.2036 (0.2142) loss: 0.7493 (0.7453) time: 0.1710 data: 0.0821 max mem: 9377 +Train: [88] [3800/6250] eta: 0:06:47 lr: 0.000004 grad: 0.2021 (0.2139) loss: 0.7488 (0.7455) time: 0.1529 data: 0.0579 max mem: 9377 +Train: [88] [3900/6250] eta: 0:06:30 lr: 0.000004 grad: 0.2020 (0.2136) loss: 0.7555 (0.7458) time: 0.1705 data: 0.0804 max mem: 9377 +Train: [88] [4000/6250] eta: 0:06:13 lr: 0.000004 grad: 0.1953 (0.2134) loss: 0.7442 (0.7459) time: 0.1512 data: 0.0622 max mem: 9377 +Train: [88] [4100/6250] eta: 0:05:56 lr: 0.000004 grad: 0.2064 (0.2133) loss: 0.7457 (0.7460) time: 0.1508 data: 0.0594 max mem: 9377 +Train: [88] [4200/6250] eta: 0:05:39 lr: 0.000004 grad: 0.2164 (0.2133) loss: 0.7465 (0.7460) time: 0.1332 data: 0.0471 max mem: 9377 +Train: [88] [4300/6250] eta: 0:05:22 lr: 0.000004 grad: 0.2076 (0.2131) loss: 0.7413 (0.7459) time: 0.1868 data: 0.0934 max mem: 9377 +Train: [88] [4400/6250] eta: 0:05:06 lr: 0.000004 grad: 0.2052 (0.2131) loss: 0.7362 (0.7459) time: 0.2160 data: 0.1400 max mem: 9377 +Train: [88] [4500/6250] eta: 0:04:49 lr: 0.000004 grad: 0.2090 (0.2130) loss: 0.7413 (0.7458) time: 0.1283 data: 0.0438 max mem: 9377 +Train: [88] [4600/6250] eta: 0:04:32 lr: 0.000004 grad: 0.1999 (0.2129) loss: 0.7364 (0.7458) time: 0.1487 data: 0.0645 max mem: 9377 +Train: [88] [4700/6250] eta: 0:04:15 lr: 0.000004 grad: 0.2082 (0.2128) loss: 0.7551 (0.7458) time: 0.1698 data: 0.0849 max mem: 9377 +Train: [88] [4800/6250] eta: 0:03:59 lr: 0.000004 grad: 0.2031 (0.2127) loss: 0.7490 (0.7458) time: 0.1911 data: 0.1015 max mem: 9377 +Train: [88] [4900/6250] eta: 0:03:42 lr: 0.000004 grad: 0.2027 (0.2125) loss: 0.7525 (0.7460) time: 0.1597 data: 0.0730 max mem: 9377 +Train: [88] [5000/6250] eta: 0:03:25 lr: 0.000004 grad: 0.2055 (0.2124) loss: 0.7453 (0.7460) time: 0.1474 data: 0.0544 max mem: 9377 +Train: [88] [5100/6250] eta: 0:03:09 lr: 0.000004 grad: 0.2133 (0.2123) loss: 0.7466 (0.7461) time: 0.1290 data: 0.0422 max mem: 9377 +Train: [88] [5200/6250] eta: 0:02:52 lr: 0.000004 grad: 0.2121 (0.2123) loss: 0.7515 (0.7460) time: 0.1529 data: 0.0549 max mem: 9377 +Train: [88] [5300/6250] eta: 0:02:35 lr: 0.000004 grad: 0.2083 (0.2122) loss: 0.7528 (0.7461) time: 0.1471 data: 0.0591 max mem: 9377 +Train: [88] [5400/6250] eta: 0:02:19 lr: 0.000004 grad: 0.2090 (0.2121) loss: 0.7508 (0.7461) time: 0.1662 data: 0.0714 max mem: 9377 +Train: [88] [5500/6250] eta: 0:02:02 lr: 0.000004 grad: 0.2058 (0.2122) loss: 0.7468 (0.7461) time: 0.1466 data: 0.0547 max mem: 9377 +Train: [88] [5600/6250] eta: 0:01:46 lr: 0.000004 grad: 0.2090 (0.2121) loss: 0.7527 (0.7462) time: 0.1153 data: 0.0288 max mem: 9377 +Train: [88] [5700/6250] eta: 0:01:29 lr: 0.000004 grad: 0.2088 (0.2121) loss: 0.7513 (0.7462) time: 0.1813 data: 0.1021 max mem: 9377 +Train: [88] [5800/6250] eta: 0:01:13 lr: 0.000004 grad: 0.2091 (0.2121) loss: 0.7513 (0.7462) time: 0.1523 data: 0.0731 max mem: 9377 +Train: [88] [5900/6250] eta: 0:00:56 lr: 0.000004 grad: 0.2012 (0.2121) loss: 0.7537 (0.7462) time: 0.1460 data: 0.0616 max mem: 9377 +Train: [88] [6000/6250] eta: 0:00:40 lr: 0.000004 grad: 0.2059 (0.2121) loss: 0.7441 (0.7461) time: 0.1315 data: 0.0443 max mem: 9377 +Train: [88] [6100/6250] eta: 0:00:24 lr: 0.000004 grad: 0.2082 (0.2121) loss: 0.7382 (0.7461) time: 0.1854 data: 0.0945 max mem: 9377 +Train: [88] [6200/6250] eta: 0:00:08 lr: 0.000004 grad: 0.2058 (0.2122) loss: 0.7406 (0.7459) time: 0.1641 data: 0.0759 max mem: 9377 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.2048 (0.2122) loss: 0.7454 (0.7459) time: 0.1422 data: 0.0590 max mem: 9377 +Train: [88] Total time: 0:16:59 (0.1632 s / it) +Averaged stats: lr: 0.000004 grad: 0.2048 (0.2122) loss: 0.7454 (0.7459) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:05:22 loss: 0.8458 (0.8458) time: 5.2062 data: 5.1752 max mem: 9377 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.8577 (0.8587) time: 0.1363 data: 0.1114 max mem: 9377 +Eval (hcp-train-subset): [88] Total time: 0:00:14 (0.2317 s / it) +Averaged stats (hcp-train-subset): loss: 0.8577 (0.8587) +Eval (hcp-val): [88] [ 0/62] eta: 0:06:07 loss: 0.8564 (0.8564) time: 5.9223 data: 5.8861 max mem: 9377 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.8559 (0.8568) time: 0.1323 data: 0.1072 max mem: 9377 +Eval (hcp-val): [88] Total time: 0:00:14 (0.2303 s / it) +Averaged stats (hcp-val): loss: 0.8559 (0.8568) +Eval (nsd-val): [88] [ 0/62] eta: 0:05:20 loss: 0.8255 (0.8255) time: 5.1754 data: 5.1441 max mem: 9377 +Eval (nsd-val): [88] [61/62] eta: 0:00:00 loss: 0.8317 (0.8337) time: 0.1043 data: 0.0777 max mem: 9377 +Eval (nsd-val): [88] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (nsd-val): loss: 0.8317 (0.8337) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 7:59:44 lr: 0.000004 grad: 0.2527 (0.2527) loss: 0.8094 (0.8094) time: 4.6056 data: 4.3609 max mem: 9377 +Train: [89] [ 100/6250] eta: 0:22:17 lr: 0.000004 grad: 0.2241 (0.2375) loss: 0.7746 (0.7749) time: 0.1538 data: 0.0396 max mem: 9377 +Train: [89] [ 200/6250] eta: 0:19:04 lr: 0.000004 grad: 0.2001 (0.2260) loss: 0.7584 (0.7683) time: 0.1673 data: 0.0646 max mem: 9377 +Train: [89] [ 300/6250] eta: 0:17:34 lr: 0.000004 grad: 0.2187 (0.2226) loss: 0.7482 (0.7645) time: 0.1464 data: 0.0485 max mem: 9377 +Train: [89] [ 400/6250] eta: 0:16:39 lr: 0.000004 grad: 0.2223 (0.2222) loss: 0.7377 (0.7611) time: 0.1421 data: 0.0359 max mem: 9377 +Train: [89] [ 500/6250] eta: 0:16:02 lr: 0.000004 grad: 0.2091 (0.2210) loss: 0.7524 (0.7586) time: 0.1598 data: 0.0709 max mem: 9377 +Train: [89] [ 600/6250] eta: 0:15:40 lr: 0.000004 grad: 0.1992 (0.2205) loss: 0.7536 (0.7567) time: 0.1648 data: 0.0653 max mem: 9377 +Train: [89] [ 700/6250] eta: 0:15:24 lr: 0.000004 grad: 0.2081 (0.2194) loss: 0.7432 (0.7558) time: 0.2043 data: 0.1081 max mem: 9377 +Train: [89] [ 800/6250] eta: 0:14:57 lr: 0.000004 grad: 0.2121 (0.2188) loss: 0.7415 (0.7545) time: 0.1602 data: 0.0675 max mem: 9377 +Train: [89] [ 900/6250] eta: 0:14:41 lr: 0.000004 grad: 0.2149 (0.2181) loss: 0.7434 (0.7538) time: 0.1786 data: 0.0884 max mem: 9377 +Train: [89] [1000/6250] eta: 0:14:27 lr: 0.000004 grad: 0.2048 (0.2172) loss: 0.7504 (0.7535) time: 0.2218 data: 0.1429 max mem: 9377 +Train: [89] [1100/6250] eta: 0:14:04 lr: 0.000004 grad: 0.2060 (0.2168) loss: 0.7480 (0.7532) time: 0.1493 data: 0.0511 max mem: 9377 +Train: [89] [1200/6250] eta: 0:13:45 lr: 0.000004 grad: 0.2034 (0.2167) loss: 0.7464 (0.7528) time: 0.1636 data: 0.0738 max mem: 9377 +Train: [89] [1300/6250] eta: 0:13:36 lr: 0.000004 grad: 0.2041 (0.2162) loss: 0.7517 (0.7526) time: 0.2298 data: 0.1465 max mem: 9377 +Train: [89] [1400/6250] eta: 0:13:22 lr: 0.000004 grad: 0.2008 (0.2157) loss: 0.7557 (0.7523) time: 0.1450 data: 0.0610 max mem: 9377 +Train: [89] [1500/6250] eta: 0:13:06 lr: 0.000004 grad: 0.2094 (0.2152) loss: 0.7466 (0.7523) time: 0.1500 data: 0.0585 max mem: 9377 +Train: [89] [1600/6250] eta: 0:12:50 lr: 0.000004 grad: 0.1993 (0.2148) loss: 0.7571 (0.7523) time: 0.1591 data: 0.0745 max mem: 9377 +Train: [89] [1700/6250] eta: 0:12:31 lr: 0.000004 grad: 0.2040 (0.2141) loss: 0.7587 (0.7524) time: 0.1506 data: 0.0630 max mem: 9377 +Train: [89] [1800/6250] eta: 0:12:15 lr: 0.000004 grad: 0.1941 (0.2138) loss: 0.7547 (0.7525) time: 0.1721 data: 0.0822 max mem: 9377 +Train: [89] [1900/6250] eta: 0:11:57 lr: 0.000004 grad: 0.1959 (0.2133) loss: 0.7580 (0.7526) time: 0.1682 data: 0.0722 max mem: 9377 +Train: [89] [2000/6250] eta: 0:11:42 lr: 0.000004 grad: 0.2146 (0.2131) loss: 0.7446 (0.7524) time: 0.1815 data: 0.0971 max mem: 9377 +Train: [89] [2100/6250] eta: 0:11:23 lr: 0.000004 grad: 0.2054 (0.2129) loss: 0.7503 (0.7523) time: 0.1379 data: 0.0442 max mem: 9377 +Train: [89] [2200/6250] eta: 0:11:06 lr: 0.000004 grad: 0.2107 (0.2126) loss: 0.7491 (0.7521) time: 0.1664 data: 0.0724 max mem: 9377 +Train: [89] [2300/6250] eta: 0:10:49 lr: 0.000004 grad: 0.2035 (0.2123) loss: 0.7363 (0.7519) time: 0.1600 data: 0.0744 max mem: 9377 +Train: [89] [2400/6250] eta: 0:10:29 lr: 0.000004 grad: 0.2040 (0.2122) loss: 0.7361 (0.7517) time: 0.1475 data: 0.0499 max mem: 9377 +Train: [89] [2500/6250] eta: 0:10:11 lr: 0.000004 grad: 0.2147 (0.2123) loss: 0.7502 (0.7516) time: 0.1503 data: 0.0625 max mem: 9377 +Train: [89] [2600/6250] eta: 0:09:53 lr: 0.000004 grad: 0.2040 (0.2122) loss: 0.7477 (0.7515) time: 0.1575 data: 0.0702 max mem: 9377 +Train: [89] [2700/6250] eta: 0:09:34 lr: 0.000004 grad: 0.1995 (0.2120) loss: 0.7579 (0.7514) time: 0.1376 data: 0.0475 max mem: 9377 +Train: [89] [2800/6250] eta: 0:09:16 lr: 0.000004 grad: 0.2150 (0.2119) loss: 0.7424 (0.7514) time: 0.1409 data: 0.0500 max mem: 9377 +Train: [89] [2900/6250] eta: 0:08:59 lr: 0.000004 grad: 0.1993 (0.2117) loss: 0.7534 (0.7514) time: 0.1420 data: 0.0528 max mem: 9377 +Train: [89] [3000/6250] eta: 0:08:42 lr: 0.000004 grad: 0.2016 (0.2117) loss: 0.7610 (0.7515) time: 0.1607 data: 0.0774 max mem: 9377 +Train: [89] [3100/6250] eta: 0:08:27 lr: 0.000004 grad: 0.1961 (0.2116) loss: 0.7608 (0.7515) time: 0.1483 data: 0.0630 max mem: 9377 +Train: [89] [3200/6250] eta: 0:08:10 lr: 0.000004 grad: 0.2037 (0.2114) loss: 0.7587 (0.7516) time: 0.1834 data: 0.0982 max mem: 9377 +Train: [89] [3300/6250] eta: 0:07:54 lr: 0.000004 grad: 0.2034 (0.2111) loss: 0.7579 (0.7517) time: 0.1343 data: 0.0436 max mem: 9377 +Train: [89] [3400/6250] eta: 0:07:36 lr: 0.000004 grad: 0.2092 (0.2110) loss: 0.7530 (0.7517) time: 0.1336 data: 0.0453 max mem: 9377 +Train: [89] [3500/6250] eta: 0:07:20 lr: 0.000004 grad: 0.2047 (0.2109) loss: 0.7583 (0.7519) time: 0.1633 data: 0.0751 max mem: 9377 +Train: [89] [3600/6250] eta: 0:07:03 lr: 0.000004 grad: 0.2028 (0.2109) loss: 0.7609 (0.7520) time: 0.1514 data: 0.0593 max mem: 9377 +Train: [89] [3700/6250] eta: 0:06:47 lr: 0.000004 grad: 0.2091 (0.2107) loss: 0.7548 (0.7521) time: 0.1479 data: 0.0645 max mem: 9377 +Train: [89] [3800/6250] eta: 0:06:31 lr: 0.000004 grad: 0.2069 (0.2107) loss: 0.7462 (0.7521) time: 0.1242 data: 0.0301 max mem: 9377 +Train: [89] [3900/6250] eta: 0:06:14 lr: 0.000004 grad: 0.2022 (0.2106) loss: 0.7511 (0.7522) time: 0.1394 data: 0.0474 max mem: 9377 +Train: [89] [4000/6250] eta: 0:05:57 lr: 0.000004 grad: 0.2087 (0.2105) loss: 0.7428 (0.7522) time: 0.1296 data: 0.0396 max mem: 9377 +Train: [89] [4100/6250] eta: 0:05:40 lr: 0.000004 grad: 0.2038 (0.2104) loss: 0.7557 (0.7522) time: 0.1413 data: 0.0664 max mem: 9377 +Train: [89] [4200/6250] eta: 0:05:24 lr: 0.000004 grad: 0.2070 (0.2102) loss: 0.7473 (0.7523) time: 0.1343 data: 0.0395 max mem: 9377 +Train: [89] [4300/6250] eta: 0:05:08 lr: 0.000004 grad: 0.2074 (0.2101) loss: 0.7501 (0.7523) time: 0.1576 data: 0.0734 max mem: 9377 +Train: [89] [4400/6250] eta: 0:04:52 lr: 0.000004 grad: 0.2083 (0.2101) loss: 0.7529 (0.7524) time: 0.1612 data: 0.0838 max mem: 9377 +Train: [89] [4500/6250] eta: 0:04:36 lr: 0.000004 grad: 0.2071 (0.2101) loss: 0.7454 (0.7525) time: 0.1548 data: 0.0708 max mem: 9377 +Train: [89] [4600/6250] eta: 0:04:20 lr: 0.000004 grad: 0.2021 (0.2099) loss: 0.7486 (0.7526) time: 0.1420 data: 0.0575 max mem: 9377 +Train: [89] [4700/6250] eta: 0:04:04 lr: 0.000004 grad: 0.2083 (0.2099) loss: 0.7540 (0.7526) time: 0.1891 data: 0.1077 max mem: 9377 +Train: [89] [4800/6250] eta: 0:03:48 lr: 0.000004 grad: 0.2070 (0.2099) loss: 0.7625 (0.7526) time: 0.1406 data: 0.0533 max mem: 9377 +Train: [89] [4900/6250] eta: 0:03:33 lr: 0.000004 grad: 0.2088 (0.2100) loss: 0.7528 (0.7526) time: 0.1548 data: 0.0636 max mem: 9377 +Train: [89] [5000/6250] eta: 0:03:17 lr: 0.000004 grad: 0.2095 (0.2100) loss: 0.7534 (0.7526) time: 0.1739 data: 0.0860 max mem: 9377 +Train: [89] [5100/6250] eta: 0:03:01 lr: 0.000004 grad: 0.2133 (0.2100) loss: 0.7534 (0.7526) time: 0.1555 data: 0.0495 max mem: 9377 +Train: [89] [5200/6250] eta: 0:02:45 lr: 0.000003 grad: 0.2094 (0.2100) loss: 0.7519 (0.7526) time: 0.1601 data: 0.0681 max mem: 9377 +Train: [89] [5300/6250] eta: 0:02:29 lr: 0.000003 grad: 0.2000 (0.2099) loss: 0.7547 (0.7526) time: 0.1437 data: 0.0526 max mem: 9377 +Train: [89] [5400/6250] eta: 0:02:13 lr: 0.000003 grad: 0.2042 (0.2098) loss: 0.7633 (0.7526) time: 0.1827 data: 0.0979 max mem: 9377 +Train: [89] [5500/6250] eta: 0:01:57 lr: 0.000003 grad: 0.2014 (0.2098) loss: 0.7446 (0.7526) time: 0.1448 data: 0.0556 max mem: 9377 +Train: [89] [5600/6250] eta: 0:01:41 lr: 0.000003 grad: 0.2113 (0.2099) loss: 0.7545 (0.7525) time: 0.1343 data: 0.0578 max mem: 9377 +Train: [89] [5700/6250] eta: 0:01:26 lr: 0.000003 grad: 0.2060 (0.2098) loss: 0.7507 (0.7525) time: 0.1496 data: 0.0607 max mem: 9377 +Train: [89] [5800/6250] eta: 0:01:10 lr: 0.000003 grad: 0.2104 (0.2098) loss: 0.7456 (0.7524) time: 0.1454 data: 0.0506 max mem: 9377 +Train: [89] [5900/6250] eta: 0:00:54 lr: 0.000003 grad: 0.1944 (0.2098) loss: 0.7583 (0.7524) time: 0.1255 data: 0.0258 max mem: 9377 +Train: [89] [6000/6250] eta: 0:00:38 lr: 0.000003 grad: 0.2006 (0.2097) loss: 0.7494 (0.7523) time: 0.1356 data: 0.0464 max mem: 9377 +Train: [89] [6100/6250] eta: 0:00:23 lr: 0.000003 grad: 0.2067 (0.2098) loss: 0.7454 (0.7523) time: 0.1327 data: 0.0330 max mem: 9377 +Train: [89] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.2110 (0.2097) loss: 0.7438 (0.7522) time: 0.1315 data: 0.0436 max mem: 9377 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.2004 (0.2097) loss: 0.7538 (0.7522) time: 0.1878 data: 0.0956 max mem: 9377 +Train: [89] Total time: 0:16:16 (0.1562 s / it) +Averaged stats: lr: 0.000003 grad: 0.2004 (0.2097) loss: 0.7538 (0.7522) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:03:59 loss: 0.8464 (0.8464) time: 3.8575 data: 3.7943 max mem: 9377 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.8576 (0.8579) time: 0.1338 data: 0.1069 max mem: 9377 +Eval (hcp-train-subset): [89] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (hcp-train-subset): loss: 0.8576 (0.8579) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [89] [ 0/62] eta: 0:04:33 loss: 0.8545 (0.8545) time: 4.4094 data: 4.3365 max mem: 9377 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.8559 (0.8570) time: 0.1184 data: 0.0933 max mem: 9377 +Eval (hcp-val): [89] Total time: 0:00:14 (0.2266 s / it) +Averaged stats (hcp-val): loss: 0.8559 (0.8570) +Making plots (hcp-val): example=26 +Eval (nsd-val): [89] [ 0/62] eta: 0:04:57 loss: 0.8243 (0.8243) time: 4.8062 data: 4.7279 max mem: 9377 +Eval (nsd-val): [89] [61/62] eta: 0:00:00 loss: 0.8308 (0.8334) time: 0.1447 data: 0.1153 max mem: 9377 +Eval (nsd-val): [89] Total time: 0:00:14 (0.2358 s / it) +Averaged stats (nsd-val): loss: 0.8308 (0.8334) +Making plots (nsd-val): example=61 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00089.pth +Train: [90] [ 0/6250] eta: 8:42:46 lr: 0.000003 grad: 0.3830 (0.3830) loss: 0.6857 (0.6857) time: 5.0187 data: 4.6090 max mem: 9377 +Train: [90] [ 100/6250] eta: 0:21:40 lr: 0.000003 grad: 0.2355 (0.2454) loss: 0.7635 (0.7645) time: 0.1249 data: 0.0111 max mem: 9377 +Train: [90] [ 200/6250] eta: 0:18:27 lr: 0.000003 grad: 0.2243 (0.2377) loss: 0.7466 (0.7542) time: 0.1592 data: 0.0562 max mem: 9377 +Train: [90] [ 300/6250] eta: 0:17:12 lr: 0.000003 grad: 0.2175 (0.2289) loss: 0.7524 (0.7549) time: 0.1460 data: 0.0485 max mem: 9377 +Train: [90] [ 400/6250] eta: 0:16:24 lr: 0.000003 grad: 0.2141 (0.2249) loss: 0.7420 (0.7559) time: 0.1584 data: 0.0631 max mem: 9377 +Train: [90] [ 500/6250] eta: 0:15:53 lr: 0.000003 grad: 0.2223 (0.2249) loss: 0.7531 (0.7535) time: 0.1821 data: 0.0968 max mem: 9377 +Train: [90] [ 600/6250] eta: 0:15:25 lr: 0.000003 grad: 0.2113 (0.2242) loss: 0.7541 (0.7524) time: 0.1484 data: 0.0632 max mem: 9377 +Train: [90] [ 700/6250] eta: 0:15:00 lr: 0.000003 grad: 0.2073 (0.2227) loss: 0.7463 (0.7519) time: 0.1510 data: 0.0576 max mem: 9377 +Train: [90] [ 800/6250] eta: 0:14:44 lr: 0.000003 grad: 0.2154 (0.2223) loss: 0.7427 (0.7508) time: 0.1842 data: 0.0896 max mem: 9377 +Train: [90] [ 900/6250] eta: 0:14:18 lr: 0.000003 grad: 0.2074 (0.2214) loss: 0.7550 (0.7508) time: 0.1487 data: 0.0565 max mem: 9377 +Train: [90] [1000/6250] eta: 0:13:59 lr: 0.000003 grad: 0.2065 (0.2209) loss: 0.7394 (0.7502) time: 0.1690 data: 0.0816 max mem: 9377 +Train: [90] [1100/6250] eta: 0:13:37 lr: 0.000003 grad: 0.2054 (0.2202) loss: 0.7570 (0.7499) time: 0.1386 data: 0.0366 max mem: 9377 +Train: [90] [1200/6250] eta: 0:13:15 lr: 0.000003 grad: 0.2055 (0.2195) loss: 0.7515 (0.7493) time: 0.1518 data: 0.0551 max mem: 9377 +Train: [90] [1300/6250] eta: 0:12:55 lr: 0.000003 grad: 0.2159 (0.2190) loss: 0.7412 (0.7488) time: 0.1637 data: 0.0807 max mem: 9377 +Train: [90] [1400/6250] eta: 0:12:35 lr: 0.000003 grad: 0.2088 (0.2185) loss: 0.7343 (0.7484) time: 0.1332 data: 0.0408 max mem: 9377 +Train: [90] [1500/6250] eta: 0:12:16 lr: 0.000003 grad: 0.2117 (0.2181) loss: 0.7484 (0.7481) time: 0.1612 data: 0.0791 max mem: 9377 +Train: [90] [1600/6250] eta: 0:11:57 lr: 0.000003 grad: 0.2135 (0.2179) loss: 0.7350 (0.7478) time: 0.1541 data: 0.0682 max mem: 9377 +Train: [90] [1700/6250] eta: 0:11:45 lr: 0.000003 grad: 0.1977 (0.2174) loss: 0.7523 (0.7478) time: 0.1774 data: 0.0984 max mem: 9377 +Train: [90] [1800/6250] eta: 0:11:31 lr: 0.000003 grad: 0.2152 (0.2170) loss: 0.7318 (0.7477) time: 0.1493 data: 0.0611 max mem: 9377 +Train: [90] [1900/6250] eta: 0:11:15 lr: 0.000003 grad: 0.2105 (0.2167) loss: 0.7492 (0.7478) time: 0.1718 data: 0.0793 max mem: 9377 +Train: [90] [2000/6250] eta: 0:10:58 lr: 0.000003 grad: 0.2115 (0.2164) loss: 0.7433 (0.7477) time: 0.1554 data: 0.0737 max mem: 9377 +Train: [90] [2100/6250] eta: 0:10:43 lr: 0.000003 grad: 0.2169 (0.2161) loss: 0.7455 (0.7476) time: 0.1491 data: 0.0585 max mem: 9377 +Train: [90] [2200/6250] eta: 0:10:28 lr: 0.000003 grad: 0.2071 (0.2159) loss: 0.7398 (0.7476) time: 0.1766 data: 0.0984 max mem: 9377 +Train: [90] [2300/6250] eta: 0:10:13 lr: 0.000003 grad: 0.2123 (0.2156) loss: 0.7431 (0.7475) time: 0.1719 data: 0.0839 max mem: 9377 +Train: [90] [2400/6250] eta: 0:09:56 lr: 0.000003 grad: 0.2096 (0.2153) loss: 0.7347 (0.7474) time: 0.1430 data: 0.0450 max mem: 9377 +Train: [90] [2500/6250] eta: 0:09:40 lr: 0.000003 grad: 0.2055 (0.2151) loss: 0.7513 (0.7474) time: 0.1712 data: 0.0842 max mem: 9377 +Train: [90] [2600/6250] eta: 0:09:22 lr: 0.000003 grad: 0.2039 (0.2150) loss: 0.7461 (0.7474) time: 0.1292 data: 0.0312 max mem: 9377 +Train: [90] [2700/6250] eta: 0:09:06 lr: 0.000003 grad: 0.2072 (0.2148) loss: 0.7657 (0.7473) time: 0.1290 data: 0.0408 max mem: 9377 +Train: [90] [2800/6250] eta: 0:08:49 lr: 0.000003 grad: 0.2189 (0.2148) loss: 0.7406 (0.7472) time: 0.1498 data: 0.0633 max mem: 9377 +Train: [90] [2900/6250] eta: 0:08:32 lr: 0.000003 grad: 0.2025 (0.2146) loss: 0.7468 (0.7472) time: 0.1453 data: 0.0523 max mem: 9377 +Train: [90] [3000/6250] eta: 0:08:17 lr: 0.000003 grad: 0.2136 (0.2146) loss: 0.7447 (0.7471) time: 0.1576 data: 0.0774 max mem: 9377 +Train: [90] [3100/6250] eta: 0:08:02 lr: 0.000003 grad: 0.2049 (0.2144) loss: 0.7458 (0.7470) time: 0.1486 data: 0.0672 max mem: 9377 +Train: [90] [3200/6250] eta: 0:07:46 lr: 0.000003 grad: 0.2035 (0.2142) loss: 0.7536 (0.7469) time: 0.1422 data: 0.0486 max mem: 9377 +Train: [90] [3300/6250] eta: 0:07:31 lr: 0.000003 grad: 0.2076 (0.2141) loss: 0.7418 (0.7470) time: 0.1524 data: 0.0667 max mem: 9377 +Train: [90] [3400/6250] eta: 0:07:17 lr: 0.000003 grad: 0.2154 (0.2142) loss: 0.7469 (0.7469) time: 0.1849 data: 0.0982 max mem: 9377 +Train: [90] [3500/6250] eta: 0:07:02 lr: 0.000003 grad: 0.1996 (0.2141) loss: 0.7502 (0.7470) time: 0.1485 data: 0.0619 max mem: 9377 +Train: [90] [3600/6250] eta: 0:06:46 lr: 0.000003 grad: 0.2104 (0.2142) loss: 0.7486 (0.7470) time: 0.1584 data: 0.0690 max mem: 9377 +Train: [90] [3700/6250] eta: 0:06:30 lr: 0.000003 grad: 0.2070 (0.2142) loss: 0.7481 (0.7469) time: 0.1354 data: 0.0502 max mem: 9377 +Train: [90] [3800/6250] eta: 0:06:14 lr: 0.000003 grad: 0.2074 (0.2142) loss: 0.7471 (0.7469) time: 0.1466 data: 0.0493 max mem: 9377 +Train: [90] [3900/6250] eta: 0:05:59 lr: 0.000003 grad: 0.2118 (0.2142) loss: 0.7414 (0.7469) time: 0.1582 data: 0.0651 max mem: 9377 +Train: [90] [4000/6250] eta: 0:05:43 lr: 0.000003 grad: 0.2131 (0.2142) loss: 0.7376 (0.7468) time: 0.1471 data: 0.0587 max mem: 9377 +Train: [90] [4100/6250] eta: 0:05:27 lr: 0.000003 grad: 0.2071 (0.2142) loss: 0.7512 (0.7469) time: 0.1443 data: 0.0507 max mem: 9377 +Train: [90] [4200/6250] eta: 0:05:12 lr: 0.000003 grad: 0.2179 (0.2142) loss: 0.7353 (0.7469) time: 0.1337 data: 0.0436 max mem: 9377 +Train: [90] [4300/6250] eta: 0:04:56 lr: 0.000003 grad: 0.2054 (0.2142) loss: 0.7586 (0.7469) time: 0.1402 data: 0.0499 max mem: 9377 +Train: [90] [4400/6250] eta: 0:04:40 lr: 0.000003 grad: 0.2140 (0.2142) loss: 0.7467 (0.7469) time: 0.1719 data: 0.0986 max mem: 9377 +Train: [90] [4500/6250] eta: 0:04:25 lr: 0.000003 grad: 0.2078 (0.2142) loss: 0.7432 (0.7468) time: 0.1314 data: 0.0480 max mem: 9377 +Train: [90] [4600/6250] eta: 0:04:10 lr: 0.000003 grad: 0.2037 (0.2142) loss: 0.7493 (0.7467) time: 0.1600 data: 0.0719 max mem: 9377 +Train: [90] [4700/6250] eta: 0:03:55 lr: 0.000003 grad: 0.2055 (0.2141) loss: 0.7467 (0.7466) time: 0.1506 data: 0.0630 max mem: 9377 +Train: [90] [4800/6250] eta: 0:03:40 lr: 0.000003 grad: 0.2113 (0.2141) loss: 0.7441 (0.7465) time: 0.1444 data: 0.0522 max mem: 9377 +Train: [90] [4900/6250] eta: 0:03:25 lr: 0.000003 grad: 0.2151 (0.2141) loss: 0.7504 (0.7464) time: 0.1420 data: 0.0527 max mem: 9377 +Train: [90] [5000/6250] eta: 0:03:10 lr: 0.000003 grad: 0.2152 (0.2141) loss: 0.7400 (0.7464) time: 0.1608 data: 0.0752 max mem: 9377 +Train: [90] [5100/6250] eta: 0:02:55 lr: 0.000003 grad: 0.2126 (0.2140) loss: 0.7457 (0.7464) time: 0.1600 data: 0.0808 max mem: 9377 +Train: [90] [5200/6250] eta: 0:02:39 lr: 0.000003 grad: 0.2001 (0.2138) loss: 0.7513 (0.7464) time: 0.1581 data: 0.0715 max mem: 9377 +Train: [90] [5300/6250] eta: 0:02:24 lr: 0.000003 grad: 0.2052 (0.2137) loss: 0.7413 (0.7465) time: 0.1547 data: 0.0698 max mem: 9377 +Train: [90] [5400/6250] eta: 0:02:09 lr: 0.000003 grad: 0.2102 (0.2136) loss: 0.7421 (0.7465) time: 0.1701 data: 0.0919 max mem: 9377 +Train: [90] [5500/6250] eta: 0:01:54 lr: 0.000003 grad: 0.2025 (0.2135) loss: 0.7523 (0.7465) time: 0.1443 data: 0.0629 max mem: 9377 +Train: [90] [5600/6250] eta: 0:01:39 lr: 0.000003 grad: 0.1960 (0.2134) loss: 0.7544 (0.7465) time: 0.1299 data: 0.0430 max mem: 9377 +Train: [90] [5700/6250] eta: 0:01:23 lr: 0.000003 grad: 0.1994 (0.2133) loss: 0.7498 (0.7465) time: 0.1234 data: 0.0349 max mem: 9377 +Train: [90] [5800/6250] eta: 0:01:08 lr: 0.000003 grad: 0.2077 (0.2133) loss: 0.7523 (0.7466) time: 0.1192 data: 0.0375 max mem: 9377 +Train: [90] [5900/6250] eta: 0:00:53 lr: 0.000003 grad: 0.2055 (0.2131) loss: 0.7586 (0.7466) time: 0.1273 data: 0.0346 max mem: 9377 +Train: [90] [6000/6250] eta: 0:00:37 lr: 0.000003 grad: 0.2064 (0.2131) loss: 0.7502 (0.7466) time: 0.1529 data: 0.0622 max mem: 9377 +Train: [90] [6100/6250] eta: 0:00:22 lr: 0.000003 grad: 0.1999 (0.2129) loss: 0.7619 (0.7467) time: 0.1395 data: 0.0511 max mem: 9377 +Train: [90] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.2086 (0.2128) loss: 0.7497 (0.7468) time: 0.1645 data: 0.0841 max mem: 9377 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.2093 (0.2129) loss: 0.7497 (0.7468) time: 0.1628 data: 0.0817 max mem: 9377 +Train: [90] Total time: 0:15:53 (0.1526 s / it) +Averaged stats: lr: 0.000003 grad: 0.2093 (0.2129) loss: 0.7497 (0.7468) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:04:28 loss: 0.8471 (0.8471) time: 4.3277 data: 4.2510 max mem: 9377 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.8543 (0.8579) time: 0.1212 data: 0.0962 max mem: 9377 +Eval (hcp-train-subset): [90] Total time: 0:00:14 (0.2337 s / it) +Averaged stats (hcp-train-subset): loss: 0.8543 (0.8579) +Eval (hcp-val): [90] [ 0/62] eta: 0:05:51 loss: 0.8512 (0.8512) time: 5.6703 data: 5.6384 max mem: 9377 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.8536 (0.8563) time: 0.1328 data: 0.1074 max mem: 9377 +Eval (hcp-val): [90] Total time: 0:00:14 (0.2317 s / it) +Averaged stats (hcp-val): loss: 0.8536 (0.8563) +Eval (nsd-val): [90] [ 0/62] eta: 0:04:04 loss: 0.8219 (0.8219) time: 3.9512 data: 3.8541 max mem: 9377 +Eval (nsd-val): [90] [61/62] eta: 0:00:00 loss: 0.8324 (0.8337) time: 0.1211 data: 0.0960 max mem: 9377 +Eval (nsd-val): [90] Total time: 0:00:14 (0.2305 s / it) +Averaged stats (nsd-val): loss: 0.8324 (0.8337) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 12:04:19 lr: 0.000003 grad: 0.4974 (0.4974) loss: 0.5601 (0.5601) time: 6.9536 data: 6.8198 max mem: 9377 +Train: [91] [ 100/6250] eta: 0:21:43 lr: 0.000003 grad: 0.2237 (0.2849) loss: 0.7411 (0.7317) time: 0.1507 data: 0.0503 max mem: 9377 +Train: [91] [ 200/6250] eta: 0:18:39 lr: 0.000003 grad: 0.2197 (0.2608) loss: 0.7631 (0.7391) time: 0.1470 data: 0.0558 max mem: 9377 +Train: [91] [ 300/6250] eta: 0:17:07 lr: 0.000003 grad: 0.2161 (0.2496) loss: 0.7500 (0.7432) time: 0.1333 data: 0.0320 max mem: 9377 +Train: [91] [ 400/6250] eta: 0:16:20 lr: 0.000003 grad: 0.2062 (0.2436) loss: 0.7503 (0.7443) time: 0.1495 data: 0.0489 max mem: 9377 +Train: [91] [ 500/6250] eta: 0:15:48 lr: 0.000003 grad: 0.2107 (0.2391) loss: 0.7598 (0.7454) time: 0.1526 data: 0.0542 max mem: 9377 +Train: [91] [ 600/6250] eta: 0:15:13 lr: 0.000003 grad: 0.2146 (0.2363) loss: 0.7482 (0.7461) time: 0.1253 data: 0.0123 max mem: 9377 +Train: [91] [ 700/6250] eta: 0:14:53 lr: 0.000003 grad: 0.2316 (0.2338) loss: 0.7447 (0.7465) time: 0.1743 data: 0.0804 max mem: 9377 +Train: [91] [ 800/6250] eta: 0:14:32 lr: 0.000003 grad: 0.2202 (0.2320) loss: 0.7490 (0.7472) time: 0.1461 data: 0.0567 max mem: 9377 +Train: [91] [ 900/6250] eta: 0:14:22 lr: 0.000003 grad: 0.2116 (0.2307) loss: 0.7455 (0.7472) time: 0.1575 data: 0.0518 max mem: 9377 +Train: [91] [1000/6250] eta: 0:14:35 lr: 0.000003 grad: 0.2058 (0.2292) loss: 0.7503 (0.7472) time: 0.1094 data: 0.0005 max mem: 9377 +Train: [91] [1100/6250] eta: 0:14:33 lr: 0.000003 grad: 0.2074 (0.2281) loss: 0.7486 (0.7472) time: 0.1277 data: 0.0005 max mem: 9377 +Train: [91] [1200/6250] eta: 0:14:35 lr: 0.000003 grad: 0.2140 (0.2274) loss: 0.7526 (0.7472) time: 0.2591 data: 0.1582 max mem: 9377 +Train: [91] [1300/6250] eta: 0:14:09 lr: 0.000003 grad: 0.2219 (0.2266) loss: 0.7402 (0.7470) time: 0.1620 data: 0.0729 max mem: 9377 +Train: [91] [1400/6250] eta: 0:13:47 lr: 0.000003 grad: 0.2046 (0.2257) loss: 0.7437 (0.7469) time: 0.1886 data: 0.1002 max mem: 9377 +Train: [91] [1500/6250] eta: 0:13:22 lr: 0.000003 grad: 0.2100 (0.2255) loss: 0.7465 (0.7470) time: 0.1746 data: 0.0681 max mem: 9377 +Train: [91] [1600/6250] eta: 0:13:08 lr: 0.000003 grad: 0.2165 (0.2250) loss: 0.7428 (0.7470) time: 0.1368 data: 0.0472 max mem: 9377 +Train: [91] [1700/6250] eta: 0:13:09 lr: 0.000003 grad: 0.2107 (0.2243) loss: 0.7485 (0.7471) time: 0.2589 data: 0.1608 max mem: 9377 +Train: [91] [1800/6250] eta: 0:12:53 lr: 0.000003 grad: 0.2094 (0.2237) loss: 0.7528 (0.7473) time: 0.1623 data: 0.0748 max mem: 9377 +Train: [91] [1900/6250] eta: 0:12:36 lr: 0.000003 grad: 0.2132 (0.2232) loss: 0.7443 (0.7473) time: 0.1849 data: 0.1000 max mem: 9377 +Train: [91] [2000/6250] eta: 0:12:17 lr: 0.000003 grad: 0.2111 (0.2226) loss: 0.7436 (0.7474) time: 0.1483 data: 0.0725 max mem: 9377 +Train: [91] [2100/6250] eta: 0:11:56 lr: 0.000003 grad: 0.2041 (0.2220) loss: 0.7534 (0.7475) time: 0.1499 data: 0.0595 max mem: 9377 +Train: [91] [2200/6250] eta: 0:11:37 lr: 0.000003 grad: 0.2077 (0.2215) loss: 0.7548 (0.7477) time: 0.1503 data: 0.0584 max mem: 9377 +Train: [91] [2300/6250] eta: 0:11:16 lr: 0.000003 grad: 0.2055 (0.2209) loss: 0.7496 (0.7478) time: 0.1446 data: 0.0523 max mem: 9377 +Train: [91] [2400/6250] eta: 0:10:57 lr: 0.000003 grad: 0.2024 (0.2205) loss: 0.7539 (0.7481) time: 0.1279 data: 0.0462 max mem: 9377 +Train: [91] [2500/6250] eta: 0:10:37 lr: 0.000003 grad: 0.2104 (0.2199) loss: 0.7513 (0.7483) time: 0.1545 data: 0.0611 max mem: 9377 +Train: [91] [2600/6250] eta: 0:10:17 lr: 0.000003 grad: 0.2044 (0.2194) loss: 0.7566 (0.7484) time: 0.1602 data: 0.0676 max mem: 9377 +Train: [91] [2700/6250] eta: 0:09:57 lr: 0.000002 grad: 0.2079 (0.2190) loss: 0.7521 (0.7485) time: 0.1491 data: 0.0532 max mem: 9377 +Train: [91] [2800/6250] eta: 0:09:37 lr: 0.000002 grad: 0.2138 (0.2188) loss: 0.7511 (0.7486) time: 0.1415 data: 0.0516 max mem: 9377 +Train: [91] [2900/6250] eta: 0:09:18 lr: 0.000002 grad: 0.2182 (0.2186) loss: 0.7422 (0.7486) time: 0.1529 data: 0.0596 max mem: 9377 +Train: [91] [3000/6250] eta: 0:09:00 lr: 0.000002 grad: 0.2041 (0.2183) loss: 0.7598 (0.7486) time: 0.1551 data: 0.0649 max mem: 9377 +Train: [91] [3100/6250] eta: 0:08:42 lr: 0.000002 grad: 0.2126 (0.2181) loss: 0.7367 (0.7486) time: 0.1450 data: 0.0612 max mem: 9377 +Train: [91] [3200/6250] eta: 0:08:25 lr: 0.000002 grad: 0.2034 (0.2179) loss: 0.7581 (0.7485) time: 0.1683 data: 0.0700 max mem: 9377 +Train: [91] [3300/6250] eta: 0:08:07 lr: 0.000002 grad: 0.2101 (0.2177) loss: 0.7397 (0.7485) time: 0.1286 data: 0.0339 max mem: 9377 +Train: [91] [3400/6250] eta: 0:07:51 lr: 0.000002 grad: 0.2079 (0.2175) loss: 0.7413 (0.7485) time: 0.1682 data: 0.0763 max mem: 9377 +Train: [91] [3500/6250] eta: 0:07:33 lr: 0.000002 grad: 0.2018 (0.2173) loss: 0.7563 (0.7485) time: 0.1731 data: 0.0881 max mem: 9377 +Train: [91] [3600/6250] eta: 0:07:16 lr: 0.000002 grad: 0.2029 (0.2173) loss: 0.7424 (0.7485) time: 0.1603 data: 0.0709 max mem: 9377 +Train: [91] [3700/6250] eta: 0:06:58 lr: 0.000002 grad: 0.2100 (0.2172) loss: 0.7487 (0.7484) time: 0.1377 data: 0.0372 max mem: 9377 +Train: [91] [3800/6250] eta: 0:06:41 lr: 0.000002 grad: 0.2174 (0.2171) loss: 0.7463 (0.7484) time: 0.1447 data: 0.0484 max mem: 9377 +Train: [91] [3900/6250] eta: 0:06:23 lr: 0.000002 grad: 0.2055 (0.2170) loss: 0.7488 (0.7483) time: 0.1528 data: 0.0644 max mem: 9377 +Train: [91] [4000/6250] eta: 0:06:06 lr: 0.000002 grad: 0.2053 (0.2170) loss: 0.7549 (0.7482) time: 0.1485 data: 0.0588 max mem: 9377 +Train: [91] [4100/6250] eta: 0:05:49 lr: 0.000002 grad: 0.2181 (0.2170) loss: 0.7408 (0.7481) time: 0.1685 data: 0.0850 max mem: 9377 +Train: [91] [4200/6250] eta: 0:05:33 lr: 0.000002 grad: 0.2088 (0.2169) loss: 0.7513 (0.7480) time: 0.1501 data: 0.0633 max mem: 9377 +Train: [91] [4300/6250] eta: 0:05:16 lr: 0.000002 grad: 0.2061 (0.2167) loss: 0.7473 (0.7481) time: 0.1495 data: 0.0622 max mem: 9377 +Train: [91] [4400/6250] eta: 0:05:00 lr: 0.000002 grad: 0.2117 (0.2166) loss: 0.7507 (0.7482) time: 0.1362 data: 0.0627 max mem: 9377 +Train: [91] [4500/6250] eta: 0:04:43 lr: 0.000002 grad: 0.2099 (0.2167) loss: 0.7471 (0.7482) time: 0.1520 data: 0.0719 max mem: 9377 +Train: [91] [4600/6250] eta: 0:04:27 lr: 0.000002 grad: 0.2111 (0.2168) loss: 0.7509 (0.7483) time: 0.1504 data: 0.0455 max mem: 9377 +Train: [91] [4700/6250] eta: 0:04:10 lr: 0.000002 grad: 0.2119 (0.2166) loss: 0.7533 (0.7483) time: 0.1513 data: 0.0688 max mem: 9377 +Train: [91] [4800/6250] eta: 0:03:54 lr: 0.000002 grad: 0.2063 (0.2165) loss: 0.7449 (0.7483) time: 0.1797 data: 0.0933 max mem: 9377 +Train: [91] [4900/6250] eta: 0:03:38 lr: 0.000002 grad: 0.2006 (0.2163) loss: 0.7477 (0.7485) time: 0.1472 data: 0.0577 max mem: 9377 +Train: [91] [5000/6250] eta: 0:03:22 lr: 0.000002 grad: 0.2009 (0.2161) loss: 0.7523 (0.7486) time: 0.1464 data: 0.0490 max mem: 9377 +Train: [91] [5100/6250] eta: 0:03:06 lr: 0.000002 grad: 0.2102 (0.2160) loss: 0.7438 (0.7486) time: 0.1374 data: 0.0503 max mem: 9377 +Train: [91] [5200/6250] eta: 0:02:49 lr: 0.000002 grad: 0.2062 (0.2159) loss: 0.7441 (0.7487) time: 0.1359 data: 0.0556 max mem: 9377 +Train: [91] [5300/6250] eta: 0:02:33 lr: 0.000002 grad: 0.2103 (0.2158) loss: 0.7517 (0.7487) time: 0.1528 data: 0.0694 max mem: 9377 +Train: [91] [5400/6250] eta: 0:02:16 lr: 0.000002 grad: 0.2014 (0.2156) loss: 0.7521 (0.7488) time: 0.1429 data: 0.0585 max mem: 9377 +Train: [91] [5500/6250] eta: 0:02:00 lr: 0.000002 grad: 0.2026 (0.2155) loss: 0.7412 (0.7488) time: 0.1362 data: 0.0507 max mem: 9377 +Train: [91] [5600/6250] eta: 0:01:44 lr: 0.000002 grad: 0.2116 (0.2154) loss: 0.7456 (0.7488) time: 0.1543 data: 0.0663 max mem: 9377 +Train: [91] [5700/6250] eta: 0:01:28 lr: 0.000002 grad: 0.2117 (0.2154) loss: 0.7449 (0.7488) time: 0.1683 data: 0.0798 max mem: 9377 +Train: [91] [5800/6250] eta: 0:01:12 lr: 0.000002 grad: 0.2091 (0.2153) loss: 0.7427 (0.7487) time: 0.1545 data: 0.0652 max mem: 9377 +Train: [91] [5900/6250] eta: 0:00:55 lr: 0.000002 grad: 0.2080 (0.2153) loss: 0.7553 (0.7487) time: 0.1333 data: 0.0433 max mem: 9377 +Train: [91] [6000/6250] eta: 0:00:39 lr: 0.000002 grad: 0.2025 (0.2152) loss: 0.7451 (0.7487) time: 0.1690 data: 0.0853 max mem: 9377 +Train: [91] [6100/6250] eta: 0:00:23 lr: 0.000002 grad: 0.2041 (0.2151) loss: 0.7526 (0.7487) time: 0.1091 data: 0.0223 max mem: 9377 +Train: [91] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.2019 (0.2150) loss: 0.7590 (0.7488) time: 0.1523 data: 0.0745 max mem: 9377 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.2074 (0.2150) loss: 0.7501 (0.7488) time: 0.1528 data: 0.0699 max mem: 9377 +Train: [91] Total time: 0:16:41 (0.1603 s / it) +Averaged stats: lr: 0.000002 grad: 0.2074 (0.2150) loss: 0.7501 (0.7488) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:05:33 loss: 0.8476 (0.8476) time: 5.3814 data: 5.3514 max mem: 9377 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.8551 (0.8573) time: 0.1003 data: 0.0756 max mem: 9377 +Eval (hcp-train-subset): [91] Total time: 0:00:14 (0.2318 s / it) +Averaged stats (hcp-train-subset): loss: 0.8551 (0.8573) +Eval (hcp-val): [91] [ 0/62] eta: 0:05:58 loss: 0.8499 (0.8499) time: 5.7746 data: 5.7433 max mem: 9377 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.8533 (0.8560) time: 0.0987 data: 0.0720 max mem: 9377 +Eval (hcp-val): [91] Total time: 0:00:14 (0.2306 s / it) +Averaged stats (hcp-val): loss: 0.8533 (0.8560) +Eval (nsd-val): [91] [ 0/62] eta: 0:05:12 loss: 0.8264 (0.8264) time: 5.0450 data: 5.0080 max mem: 9377 +Eval (nsd-val): [91] [61/62] eta: 0:00:00 loss: 0.8331 (0.8328) time: 0.1307 data: 0.1041 max mem: 9377 +Eval (nsd-val): [91] Total time: 0:00:14 (0.2295 s / it) +Averaged stats (nsd-val): loss: 0.8331 (0.8328) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [92] [ 0/6250] eta: 8:14:03 lr: 0.000002 grad: 0.3175 (0.3175) loss: 0.7352 (0.7352) time: 4.7430 data: 4.3735 max mem: 9377 +Train: [92] [ 100/6250] eta: 0:22:01 lr: 0.000002 grad: 0.2444 (0.2656) loss: 0.7343 (0.7417) time: 0.1840 data: 0.0821 max mem: 9377 +Train: [92] [ 200/6250] eta: 0:19:02 lr: 0.000002 grad: 0.2490 (0.2564) loss: 0.7296 (0.7398) time: 0.1591 data: 0.0504 max mem: 9377 +Train: [92] [ 300/6250] eta: 0:17:30 lr: 0.000002 grad: 0.2151 (0.2487) loss: 0.7277 (0.7391) time: 0.1530 data: 0.0518 max mem: 9377 +Train: [92] [ 400/6250] eta: 0:16:32 lr: 0.000002 grad: 0.2427 (0.2461) loss: 0.7315 (0.7383) time: 0.1596 data: 0.0669 max mem: 9377 +Train: [92] [ 500/6250] eta: 0:15:49 lr: 0.000002 grad: 0.2188 (0.2425) loss: 0.7518 (0.7397) time: 0.1392 data: 0.0402 max mem: 9377 +Train: [92] [ 600/6250] eta: 0:15:22 lr: 0.000002 grad: 0.2169 (0.2379) loss: 0.7420 (0.7419) time: 0.1374 data: 0.0493 max mem: 9377 +Train: [92] [ 700/6250] eta: 0:14:56 lr: 0.000002 grad: 0.2024 (0.2336) loss: 0.7614 (0.7442) time: 0.1493 data: 0.0514 max mem: 9377 +Train: [92] [ 800/6250] eta: 0:14:34 lr: 0.000002 grad: 0.2086 (0.2309) loss: 0.7515 (0.7458) time: 0.1466 data: 0.0535 max mem: 9377 +Train: [92] [ 900/6250] eta: 0:14:14 lr: 0.000002 grad: 0.2084 (0.2288) loss: 0.7502 (0.7468) time: 0.1728 data: 0.0792 max mem: 9377 +Train: [92] [1000/6250] eta: 0:13:54 lr: 0.000002 grad: 0.2122 (0.2270) loss: 0.7570 (0.7475) time: 0.1702 data: 0.0910 max mem: 9377 +Train: [92] [1100/6250] eta: 0:13:32 lr: 0.000002 grad: 0.2095 (0.2255) loss: 0.7588 (0.7482) time: 0.1410 data: 0.0546 max mem: 9377 +Train: [92] [1200/6250] eta: 0:13:14 lr: 0.000002 grad: 0.2050 (0.2242) loss: 0.7536 (0.7486) time: 0.1417 data: 0.0503 max mem: 9377 +Train: [92] [1300/6250] eta: 0:12:54 lr: 0.000002 grad: 0.2108 (0.2234) loss: 0.7460 (0.7488) time: 0.1451 data: 0.0480 max mem: 9377 +Train: [92] [1400/6250] eta: 0:12:35 lr: 0.000002 grad: 0.2149 (0.2224) loss: 0.7551 (0.7492) time: 0.1364 data: 0.0509 max mem: 9377 +Train: [92] [1500/6250] eta: 0:12:15 lr: 0.000002 grad: 0.2053 (0.2216) loss: 0.7613 (0.7496) time: 0.1461 data: 0.0621 max mem: 9377 +Train: [92] [1600/6250] eta: 0:12:01 lr: 0.000002 grad: 0.2027 (0.2206) loss: 0.7586 (0.7501) time: 0.1528 data: 0.0709 max mem: 9377 +Train: [92] [1700/6250] eta: 0:11:43 lr: 0.000002 grad: 0.2038 (0.2198) loss: 0.7562 (0.7503) time: 0.1620 data: 0.0739 max mem: 9377 +Train: [92] [1800/6250] eta: 0:11:31 lr: 0.000002 grad: 0.2088 (0.2191) loss: 0.7568 (0.7504) time: 0.1739 data: 0.0936 max mem: 9377 +Train: [92] [1900/6250] eta: 0:11:17 lr: 0.000002 grad: 0.1985 (0.2183) loss: 0.7522 (0.7508) time: 0.1678 data: 0.0817 max mem: 9377 +Train: [92] [2000/6250] eta: 0:10:59 lr: 0.000002 grad: 0.2004 (0.2176) loss: 0.7580 (0.7510) time: 0.1359 data: 0.0462 max mem: 9377 +Train: [92] [2100/6250] eta: 0:10:43 lr: 0.000002 grad: 0.2072 (0.2170) loss: 0.7548 (0.7513) time: 0.1391 data: 0.0460 max mem: 9377 +Train: [92] [2200/6250] eta: 0:10:29 lr: 0.000002 grad: 0.2043 (0.2165) loss: 0.7534 (0.7515) time: 0.1566 data: 0.0629 max mem: 9377 +Train: [92] [2300/6250] eta: 0:10:13 lr: 0.000002 grad: 0.1972 (0.2160) loss: 0.7589 (0.7517) time: 0.1437 data: 0.0587 max mem: 9377 +Train: [92] [2400/6250] eta: 0:09:57 lr: 0.000002 grad: 0.2041 (0.2157) loss: 0.7513 (0.7517) time: 0.1649 data: 0.0769 max mem: 9377 +Train: [92] [2500/6250] eta: 0:09:40 lr: 0.000002 grad: 0.2015 (0.2152) loss: 0.7502 (0.7517) time: 0.1285 data: 0.0221 max mem: 9377 +Train: [92] [2600/6250] eta: 0:09:24 lr: 0.000002 grad: 0.2046 (0.2147) loss: 0.7511 (0.7518) time: 0.1514 data: 0.0532 max mem: 9377 +Train: [92] [2700/6250] eta: 0:09:07 lr: 0.000002 grad: 0.2078 (0.2145) loss: 0.7423 (0.7518) time: 0.1471 data: 0.0608 max mem: 9377 +Train: [92] [2800/6250] eta: 0:08:51 lr: 0.000002 grad: 0.2027 (0.2143) loss: 0.7385 (0.7517) time: 0.1371 data: 0.0474 max mem: 9377 +Train: [92] [2900/6250] eta: 0:08:35 lr: 0.000002 grad: 0.2055 (0.2140) loss: 0.7471 (0.7517) time: 0.1533 data: 0.0645 max mem: 9377 +Train: [92] [3000/6250] eta: 0:08:20 lr: 0.000002 grad: 0.2109 (0.2138) loss: 0.7462 (0.7516) time: 0.1361 data: 0.0444 max mem: 9377 +Train: [92] [3100/6250] eta: 0:08:05 lr: 0.000002 grad: 0.2101 (0.2137) loss: 0.7469 (0.7514) time: 0.1703 data: 0.0838 max mem: 9377 +Train: [92] [3200/6250] eta: 0:07:49 lr: 0.000002 grad: 0.2123 (0.2136) loss: 0.7530 (0.7513) time: 0.1618 data: 0.0670 max mem: 9377 +Train: [92] [3300/6250] eta: 0:07:34 lr: 0.000002 grad: 0.2003 (0.2134) loss: 0.7592 (0.7512) time: 0.1575 data: 0.0722 max mem: 9377 +Train: [92] [3400/6250] eta: 0:07:19 lr: 0.000002 grad: 0.2164 (0.2133) loss: 0.7376 (0.7512) time: 0.1469 data: 0.0592 max mem: 9377 +Train: [92] [3500/6250] eta: 0:07:04 lr: 0.000002 grad: 0.2024 (0.2131) loss: 0.7605 (0.7511) time: 0.1573 data: 0.0616 max mem: 9377 +Train: [92] [3600/6250] eta: 0:06:48 lr: 0.000002 grad: 0.2160 (0.2130) loss: 0.7348 (0.7509) time: 0.1372 data: 0.0451 max mem: 9377 +Train: [92] [3700/6250] eta: 0:06:32 lr: 0.000002 grad: 0.2118 (0.2130) loss: 0.7552 (0.7509) time: 0.1687 data: 0.0782 max mem: 9377 +Train: [92] [3800/6250] eta: 0:06:16 lr: 0.000002 grad: 0.2059 (0.2130) loss: 0.7476 (0.7507) time: 0.1501 data: 0.0559 max mem: 9377 +Train: [92] [3900/6250] eta: 0:06:00 lr: 0.000002 grad: 0.2127 (0.2129) loss: 0.7456 (0.7507) time: 0.1476 data: 0.0640 max mem: 9377 +Train: [92] [4000/6250] eta: 0:05:45 lr: 0.000002 grad: 0.2141 (0.2129) loss: 0.7419 (0.7505) time: 0.1459 data: 0.0644 max mem: 9377 +Train: [92] [4100/6250] eta: 0:05:29 lr: 0.000002 grad: 0.2132 (0.2130) loss: 0.7388 (0.7503) time: 0.1466 data: 0.0504 max mem: 9377 +Train: [92] [4200/6250] eta: 0:05:14 lr: 0.000002 grad: 0.2135 (0.2130) loss: 0.7469 (0.7501) time: 0.1372 data: 0.0480 max mem: 9377 +Train: [92] [4300/6250] eta: 0:04:58 lr: 0.000002 grad: 0.2081 (0.2130) loss: 0.7379 (0.7500) time: 0.1458 data: 0.0595 max mem: 9377 +Train: [92] [4400/6250] eta: 0:04:43 lr: 0.000002 grad: 0.2199 (0.2130) loss: 0.7407 (0.7498) time: 0.1581 data: 0.0728 max mem: 9377 +Train: [92] [4500/6250] eta: 0:04:28 lr: 0.000002 grad: 0.2073 (0.2130) loss: 0.7318 (0.7497) time: 0.1788 data: 0.0876 max mem: 9377 +Train: [92] [4600/6250] eta: 0:04:12 lr: 0.000002 grad: 0.2080 (0.2132) loss: 0.7433 (0.7496) time: 0.1494 data: 0.0616 max mem: 9377 +Train: [92] [4700/6250] eta: 0:03:57 lr: 0.000002 grad: 0.2089 (0.2131) loss: 0.7481 (0.7495) time: 0.1768 data: 0.0812 max mem: 9377 +Train: [92] [4800/6250] eta: 0:03:42 lr: 0.000002 grad: 0.2088 (0.2131) loss: 0.7306 (0.7493) time: 0.1506 data: 0.0634 max mem: 9377 +Train: [92] [4900/6250] eta: 0:03:26 lr: 0.000002 grad: 0.2045 (0.2132) loss: 0.7455 (0.7493) time: 0.1479 data: 0.0642 max mem: 9377 +Train: [92] [5000/6250] eta: 0:03:11 lr: 0.000002 grad: 0.2065 (0.2131) loss: 0.7553 (0.7492) time: 0.1403 data: 0.0497 max mem: 9377 +Train: [92] [5100/6250] eta: 0:02:55 lr: 0.000002 grad: 0.2141 (0.2132) loss: 0.7369 (0.7491) time: 0.1520 data: 0.0676 max mem: 9377 +Train: [92] [5200/6250] eta: 0:02:40 lr: 0.000002 grad: 0.2092 (0.2131) loss: 0.7458 (0.7492) time: 0.1401 data: 0.0417 max mem: 9377 +Train: [92] [5300/6250] eta: 0:02:25 lr: 0.000002 grad: 0.2091 (0.2130) loss: 0.7429 (0.7490) time: 0.1297 data: 0.0360 max mem: 9377 +Train: [92] [5400/6250] eta: 0:02:09 lr: 0.000002 grad: 0.2127 (0.2130) loss: 0.7371 (0.7489) time: 0.1500 data: 0.0683 max mem: 9377 +Train: [92] [5500/6250] eta: 0:01:54 lr: 0.000002 grad: 0.2064 (0.2129) loss: 0.7497 (0.7490) time: 0.1449 data: 0.0577 max mem: 9377 +Train: [92] [5600/6250] eta: 0:01:39 lr: 0.000002 grad: 0.2062 (0.2129) loss: 0.7573 (0.7491) time: 0.1619 data: 0.0789 max mem: 9377 +Train: [92] [5700/6250] eta: 0:01:23 lr: 0.000002 grad: 0.2076 (0.2129) loss: 0.7563 (0.7491) time: 0.1650 data: 0.0838 max mem: 9377 +Train: [92] [5800/6250] eta: 0:01:08 lr: 0.000002 grad: 0.2168 (0.2129) loss: 0.7456 (0.7491) time: 0.1439 data: 0.0601 max mem: 9377 +Train: [92] [5900/6250] eta: 0:00:53 lr: 0.000002 grad: 0.2121 (0.2128) loss: 0.7484 (0.7492) time: 0.1615 data: 0.0769 max mem: 9377 +Train: [92] [6000/6250] eta: 0:00:38 lr: 0.000002 grad: 0.2179 (0.2129) loss: 0.7582 (0.7491) time: 0.1444 data: 0.0539 max mem: 9377 +Train: [92] [6100/6250] eta: 0:00:22 lr: 0.000002 grad: 0.2170 (0.2129) loss: 0.7567 (0.7492) time: 0.1842 data: 0.1012 max mem: 9377 +Train: [92] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.2119 (0.2130) loss: 0.7462 (0.7491) time: 0.1558 data: 0.0696 max mem: 9377 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.2227 (0.2131) loss: 0.7416 (0.7490) time: 0.1481 data: 0.0705 max mem: 9377 +Train: [92] Total time: 0:15:55 (0.1529 s / it) +Averaged stats: lr: 0.000002 grad: 0.2227 (0.2131) loss: 0.7416 (0.7490) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:04:52 loss: 0.8517 (0.8517) time: 4.7100 data: 4.6390 max mem: 9377 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.8567 (0.8576) time: 0.1261 data: 0.1002 max mem: 9377 +Eval (hcp-train-subset): [92] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-train-subset): loss: 0.8567 (0.8576) +Eval (hcp-val): [92] [ 0/62] eta: 0:03:49 loss: 0.8517 (0.8517) time: 3.6973 data: 3.6008 max mem: 9377 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.8548 (0.8557) time: 0.1261 data: 0.1005 max mem: 9377 +Eval (hcp-val): [92] Total time: 0:00:13 (0.2229 s / it) +Averaged stats (hcp-val): loss: 0.8548 (0.8557) +Eval (nsd-val): [92] [ 0/62] eta: 0:05:17 loss: 0.8237 (0.8237) time: 5.1135 data: 5.0816 max mem: 9377 +Eval (nsd-val): [92] [61/62] eta: 0:00:00 loss: 0.8326 (0.8336) time: 0.1289 data: 0.1033 max mem: 9377 +Eval (nsd-val): [92] Total time: 0:00:13 (0.2228 s / it) +Averaged stats (nsd-val): loss: 0.8326 (0.8336) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 11:57:44 lr: 0.000002 grad: 0.2471 (0.2471) loss: 0.7661 (0.7661) time: 6.8904 data: 6.7916 max mem: 9377 +Train: [93] [ 100/6250] eta: 0:21:50 lr: 0.000002 grad: 0.2416 (0.2605) loss: 0.7552 (0.7592) time: 0.1349 data: 0.0277 max mem: 9377 +Train: [93] [ 200/6250] eta: 0:18:42 lr: 0.000002 grad: 0.2264 (0.2483) loss: 0.7603 (0.7575) time: 0.1430 data: 0.0257 max mem: 9377 +Train: [93] [ 300/6250] eta: 0:17:37 lr: 0.000002 grad: 0.2073 (0.2383) loss: 0.7605 (0.7580) time: 0.1815 data: 0.0842 max mem: 9377 +Train: [93] [ 400/6250] eta: 0:16:31 lr: 0.000002 grad: 0.2053 (0.2314) loss: 0.7631 (0.7594) time: 0.1498 data: 0.0465 max mem: 9377 +Train: [93] [ 500/6250] eta: 0:15:48 lr: 0.000002 grad: 0.2079 (0.2282) loss: 0.7614 (0.7587) time: 0.1374 data: 0.0335 max mem: 9377 +Train: [93] [ 600/6250] eta: 0:15:24 lr: 0.000002 grad: 0.2047 (0.2251) loss: 0.7605 (0.7585) time: 0.1856 data: 0.0889 max mem: 9377 +Train: [93] [ 700/6250] eta: 0:15:00 lr: 0.000002 grad: 0.2120 (0.2227) loss: 0.7571 (0.7581) time: 0.1610 data: 0.0724 max mem: 9377 +Train: [93] [ 800/6250] eta: 0:14:36 lr: 0.000002 grad: 0.2145 (0.2214) loss: 0.7557 (0.7584) time: 0.1414 data: 0.0520 max mem: 9377 +Train: [93] [ 900/6250] eta: 0:14:19 lr: 0.000002 grad: 0.1962 (0.2191) loss: 0.7646 (0.7591) time: 0.1774 data: 0.0875 max mem: 9377 +Train: [93] [1000/6250] eta: 0:13:55 lr: 0.000002 grad: 0.2031 (0.2180) loss: 0.7493 (0.7594) time: 0.1432 data: 0.0500 max mem: 9377 +Train: [93] [1100/6250] eta: 0:13:36 lr: 0.000002 grad: 0.2008 (0.2169) loss: 0.7633 (0.7594) time: 0.1568 data: 0.0570 max mem: 9377 +Train: [93] [1200/6250] eta: 0:13:15 lr: 0.000002 grad: 0.1987 (0.2157) loss: 0.7647 (0.7594) time: 0.1371 data: 0.0457 max mem: 9377 +Train: [93] [1300/6250] eta: 0:12:55 lr: 0.000002 grad: 0.2119 (0.2151) loss: 0.7478 (0.7592) time: 0.1472 data: 0.0583 max mem: 9377 +Train: [93] [1400/6250] eta: 0:12:38 lr: 0.000002 grad: 0.2109 (0.2148) loss: 0.7485 (0.7586) time: 0.1492 data: 0.0634 max mem: 9377 +Train: [93] [1500/6250] eta: 0:12:21 lr: 0.000002 grad: 0.2079 (0.2147) loss: 0.7466 (0.7580) time: 0.1477 data: 0.0663 max mem: 9377 +Train: [93] [1600/6250] eta: 0:12:03 lr: 0.000002 grad: 0.2069 (0.2143) loss: 0.7451 (0.7576) time: 0.1785 data: 0.0874 max mem: 9377 +Train: [93] [1700/6250] eta: 0:11:45 lr: 0.000002 grad: 0.2099 (0.2142) loss: 0.7400 (0.7571) time: 0.1336 data: 0.0445 max mem: 9377 +Train: [93] [1800/6250] eta: 0:11:29 lr: 0.000002 grad: 0.2134 (0.2140) loss: 0.7437 (0.7565) time: 0.1566 data: 0.0656 max mem: 9377 +Train: [93] [1900/6250] eta: 0:11:14 lr: 0.000002 grad: 0.2029 (0.2138) loss: 0.7502 (0.7560) time: 0.1463 data: 0.0556 max mem: 9377 +Train: [93] [2000/6250] eta: 0:10:58 lr: 0.000002 grad: 0.2048 (0.2135) loss: 0.7466 (0.7557) time: 0.1661 data: 0.0688 max mem: 9377 +Train: [93] [2100/6250] eta: 0:10:44 lr: 0.000002 grad: 0.2188 (0.2135) loss: 0.7353 (0.7550) time: 0.1877 data: 0.0969 max mem: 9377 +Train: [93] [2200/6250] eta: 0:10:30 lr: 0.000002 grad: 0.2073 (0.2136) loss: 0.7487 (0.7545) time: 0.1754 data: 0.0895 max mem: 9377 +Train: [93] [2300/6250] eta: 0:10:13 lr: 0.000001 grad: 0.2107 (0.2134) loss: 0.7500 (0.7542) time: 0.1311 data: 0.0377 max mem: 9377 +Train: [93] [2400/6250] eta: 0:09:56 lr: 0.000001 grad: 0.2125 (0.2135) loss: 0.7481 (0.7539) time: 0.1346 data: 0.0401 max mem: 9377 +Train: [93] [2500/6250] eta: 0:09:39 lr: 0.000001 grad: 0.2059 (0.2133) loss: 0.7536 (0.7538) time: 0.1457 data: 0.0516 max mem: 9377 +Train: [93] [2600/6250] eta: 0:09:22 lr: 0.000001 grad: 0.2089 (0.2133) loss: 0.7541 (0.7538) time: 0.1414 data: 0.0466 max mem: 9377 +Train: [93] [2700/6250] eta: 0:09:06 lr: 0.000001 grad: 0.1936 (0.2131) loss: 0.7604 (0.7538) time: 0.1613 data: 0.0723 max mem: 9377 +Train: [93] [2800/6250] eta: 0:08:50 lr: 0.000001 grad: 0.2062 (0.2130) loss: 0.7519 (0.7537) time: 0.1389 data: 0.0545 max mem: 9377 +Train: [93] [2900/6250] eta: 0:08:33 lr: 0.000001 grad: 0.2202 (0.2130) loss: 0.7517 (0.7536) time: 0.1215 data: 0.0356 max mem: 9377 +Train: [93] [3000/6250] eta: 0:08:17 lr: 0.000001 grad: 0.1996 (0.2129) loss: 0.7585 (0.7536) time: 0.1513 data: 0.0647 max mem: 9377 +Train: [93] [3100/6250] eta: 0:08:02 lr: 0.000001 grad: 0.1994 (0.2125) loss: 0.7539 (0.7538) time: 0.1305 data: 0.0455 max mem: 9377 +Train: [93] [3200/6250] eta: 0:07:46 lr: 0.000001 grad: 0.2086 (0.2123) loss: 0.7513 (0.7538) time: 0.1584 data: 0.0711 max mem: 9377 +Train: [93] [3300/6250] eta: 0:07:31 lr: 0.000001 grad: 0.2146 (0.2123) loss: 0.7532 (0.7537) time: 0.1566 data: 0.0745 max mem: 9377 +Train: [93] [3400/6250] eta: 0:07:17 lr: 0.000001 grad: 0.2011 (0.2122) loss: 0.7475 (0.7537) time: 0.1200 data: 0.0250 max mem: 9377 +Train: [93] [3500/6250] eta: 0:07:01 lr: 0.000001 grad: 0.1943 (0.2121) loss: 0.7593 (0.7537) time: 0.1591 data: 0.0748 max mem: 9377 +Train: [93] [3600/6250] eta: 0:06:46 lr: 0.000001 grad: 0.2118 (0.2120) loss: 0.7432 (0.7537) time: 0.1376 data: 0.0480 max mem: 9377 +Train: [93] [3700/6250] eta: 0:06:30 lr: 0.000001 grad: 0.2047 (0.2119) loss: 0.7534 (0.7536) time: 0.1455 data: 0.0542 max mem: 9377 +Train: [93] [3800/6250] eta: 0:06:14 lr: 0.000001 grad: 0.2046 (0.2118) loss: 0.7476 (0.7536) time: 0.1321 data: 0.0307 max mem: 9377 +Train: [93] [3900/6250] eta: 0:05:59 lr: 0.000001 grad: 0.2029 (0.2118) loss: 0.7419 (0.7535) time: 0.1456 data: 0.0614 max mem: 9377 +Train: [93] [4000/6250] eta: 0:05:43 lr: 0.000001 grad: 0.2042 (0.2117) loss: 0.7582 (0.7536) time: 0.1480 data: 0.0542 max mem: 9377 +Train: [93] [4100/6250] eta: 0:05:27 lr: 0.000001 grad: 0.2110 (0.2117) loss: 0.7399 (0.7535) time: 0.1553 data: 0.0640 max mem: 9377 +Train: [93] [4200/6250] eta: 0:05:12 lr: 0.000001 grad: 0.2032 (0.2117) loss: 0.7590 (0.7535) time: 0.1391 data: 0.0417 max mem: 9377 +Train: [93] [4300/6250] eta: 0:04:57 lr: 0.000001 grad: 0.2052 (0.2117) loss: 0.7469 (0.7535) time: 0.1483 data: 0.0631 max mem: 9377 +Train: [93] [4400/6250] eta: 0:04:42 lr: 0.000001 grad: 0.2145 (0.2118) loss: 0.7414 (0.7533) time: 0.1519 data: 0.0641 max mem: 9377 +Train: [93] [4500/6250] eta: 0:04:27 lr: 0.000001 grad: 0.2121 (0.2120) loss: 0.7445 (0.7531) time: 0.1645 data: 0.0805 max mem: 9377 +Train: [93] [4600/6250] eta: 0:04:12 lr: 0.000001 grad: 0.2092 (0.2121) loss: 0.7554 (0.7530) time: 0.1418 data: 0.0496 max mem: 9377 +Train: [93] [4700/6250] eta: 0:03:56 lr: 0.000001 grad: 0.2053 (0.2121) loss: 0.7519 (0.7530) time: 0.1709 data: 0.0824 max mem: 9377 +Train: [93] [4800/6250] eta: 0:03:41 lr: 0.000001 grad: 0.2127 (0.2122) loss: 0.7463 (0.7529) time: 0.1435 data: 0.0537 max mem: 9377 +Train: [93] [4900/6250] eta: 0:03:26 lr: 0.000001 grad: 0.2059 (0.2122) loss: 0.7550 (0.7529) time: 0.1309 data: 0.0387 max mem: 9377 +Train: [93] [5000/6250] eta: 0:03:11 lr: 0.000001 grad: 0.2084 (0.2122) loss: 0.7436 (0.7528) time: 0.1333 data: 0.0454 max mem: 9377 +Train: [93] [5100/6250] eta: 0:02:55 lr: 0.000001 grad: 0.2045 (0.2121) loss: 0.7486 (0.7527) time: 0.1685 data: 0.0825 max mem: 9377 +Train: [93] [5200/6250] eta: 0:02:40 lr: 0.000001 grad: 0.2091 (0.2121) loss: 0.7493 (0.7527) time: 0.1442 data: 0.0523 max mem: 9377 +Train: [93] [5300/6250] eta: 0:02:24 lr: 0.000001 grad: 0.2072 (0.2121) loss: 0.7478 (0.7527) time: 0.1407 data: 0.0547 max mem: 9377 +Train: [93] [5400/6250] eta: 0:02:09 lr: 0.000001 grad: 0.2140 (0.2121) loss: 0.7517 (0.7526) time: 0.1420 data: 0.0566 max mem: 9377 +Train: [93] [5500/6250] eta: 0:01:54 lr: 0.000001 grad: 0.2089 (0.2120) loss: 0.7427 (0.7527) time: 0.1413 data: 0.0579 max mem: 9377 +Train: [93] [5600/6250] eta: 0:01:39 lr: 0.000001 grad: 0.2052 (0.2120) loss: 0.7514 (0.7527) time: 0.1533 data: 0.0634 max mem: 9377 +Train: [93] [5700/6250] eta: 0:01:23 lr: 0.000001 grad: 0.2063 (0.2119) loss: 0.7517 (0.7527) time: 0.1563 data: 0.0739 max mem: 9377 +Train: [93] [5800/6250] eta: 0:01:08 lr: 0.000001 grad: 0.2013 (0.2119) loss: 0.7599 (0.7528) time: 0.1421 data: 0.0542 max mem: 9377 +Train: [93] [5900/6250] eta: 0:00:53 lr: 0.000001 grad: 0.2064 (0.2118) loss: 0.7514 (0.7528) time: 0.1597 data: 0.0732 max mem: 9377 +Train: [93] [6000/6250] eta: 0:00:38 lr: 0.000001 grad: 0.2091 (0.2118) loss: 0.7484 (0.7529) time: 0.1331 data: 0.0416 max mem: 9377 +Train: [93] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.2017 (0.2117) loss: 0.7632 (0.7529) time: 0.1478 data: 0.0576 max mem: 9377 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2019 (0.2116) loss: 0.7541 (0.7530) time: 0.1668 data: 0.0837 max mem: 9377 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2079 (0.2115) loss: 0.7546 (0.7530) time: 0.1516 data: 0.0702 max mem: 9377 +Train: [93] Total time: 0:15:55 (0.1529 s / it) +Averaged stats: lr: 0.000001 grad: 0.2079 (0.2115) loss: 0.7546 (0.7530) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:06:22 loss: 0.8522 (0.8522) time: 6.1756 data: 6.1447 max mem: 9377 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.8560 (0.8582) time: 0.1371 data: 0.1118 max mem: 9377 +Eval (hcp-train-subset): [93] Total time: 0:00:14 (0.2349 s / it) +Averaged stats (hcp-train-subset): loss: 0.8560 (0.8582) +Eval (hcp-val): [93] [ 0/62] eta: 0:04:05 loss: 0.8522 (0.8522) time: 3.9584 data: 3.8707 max mem: 9377 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.8551 (0.8566) time: 0.1340 data: 0.1090 max mem: 9377 +Eval (hcp-val): [93] Total time: 0:00:14 (0.2310 s / it) +Averaged stats (hcp-val): loss: 0.8551 (0.8566) +Eval (nsd-val): [93] [ 0/62] eta: 0:04:06 loss: 0.8188 (0.8188) time: 3.9808 data: 3.9094 max mem: 9377 +Eval (nsd-val): [93] [61/62] eta: 0:00:00 loss: 0.8322 (0.8329) time: 0.1315 data: 0.1061 max mem: 9377 +Eval (nsd-val): [93] Total time: 0:00:13 (0.2244 s / it) +Averaged stats (nsd-val): loss: 0.8322 (0.8329) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [94] [ 0/6250] eta: 11:13:22 lr: 0.000001 grad: 0.1904 (0.1904) loss: 0.7766 (0.7766) time: 6.4644 data: 6.3060 max mem: 9377 +Train: [94] [ 100/6250] eta: 0:21:30 lr: 0.000001 grad: 0.2181 (0.2592) loss: 0.7678 (0.7656) time: 0.1465 data: 0.0307 max mem: 9377 +Train: [94] [ 200/6250] eta: 0:18:43 lr: 0.000001 grad: 0.2165 (0.2397) loss: 0.7520 (0.7625) time: 0.1588 data: 0.0570 max mem: 9377 +Train: [94] [ 300/6250] eta: 0:17:09 lr: 0.000001 grad: 0.2149 (0.2327) loss: 0.7454 (0.7578) time: 0.1385 data: 0.0420 max mem: 9377 +Train: [94] [ 400/6250] eta: 0:16:25 lr: 0.000001 grad: 0.2190 (0.2289) loss: 0.7472 (0.7559) time: 0.1421 data: 0.0471 max mem: 9377 +Train: [94] [ 500/6250] eta: 0:15:49 lr: 0.000001 grad: 0.2184 (0.2279) loss: 0.7364 (0.7533) time: 0.1439 data: 0.0494 max mem: 9377 +Train: [94] [ 600/6250] eta: 0:15:22 lr: 0.000001 grad: 0.2075 (0.2263) loss: 0.7483 (0.7523) time: 0.1477 data: 0.0535 max mem: 9377 +Train: [94] [ 700/6250] eta: 0:14:57 lr: 0.000001 grad: 0.2142 (0.2253) loss: 0.7520 (0.7517) time: 0.1369 data: 0.0417 max mem: 9377 +Train: [94] [ 800/6250] eta: 0:14:35 lr: 0.000001 grad: 0.2168 (0.2244) loss: 0.7592 (0.7518) time: 0.1427 data: 0.0514 max mem: 9377 +Train: [94] [ 900/6250] eta: 0:14:15 lr: 0.000001 grad: 0.2196 (0.2230) loss: 0.7635 (0.7521) time: 0.1203 data: 0.0242 max mem: 9377 +Train: [94] [1000/6250] eta: 0:13:52 lr: 0.000001 grad: 0.2111 (0.2220) loss: 0.7583 (0.7526) time: 0.1283 data: 0.0311 max mem: 9377 +Train: [94] [1100/6250] eta: 0:13:32 lr: 0.000001 grad: 0.2115 (0.2216) loss: 0.7606 (0.7524) time: 0.1606 data: 0.0778 max mem: 9377 +Train: [94] [1200/6250] eta: 0:13:09 lr: 0.000001 grad: 0.2137 (0.2209) loss: 0.7511 (0.7526) time: 0.1395 data: 0.0444 max mem: 9377 +Train: [94] [1300/6250] eta: 0:12:48 lr: 0.000001 grad: 0.2149 (0.2200) loss: 0.7448 (0.7527) time: 0.1432 data: 0.0546 max mem: 9377 +Train: [94] [1400/6250] eta: 0:12:29 lr: 0.000001 grad: 0.2143 (0.2194) loss: 0.7454 (0.7526) time: 0.1503 data: 0.0609 max mem: 9377 +Train: [94] [1500/6250] eta: 0:12:12 lr: 0.000001 grad: 0.2063 (0.2189) loss: 0.7456 (0.7525) time: 0.1540 data: 0.0620 max mem: 9377 +Train: [94] [1600/6250] eta: 0:11:55 lr: 0.000001 grad: 0.2062 (0.2183) loss: 0.7516 (0.7523) time: 0.1585 data: 0.0738 max mem: 9377 +Train: [94] [1700/6250] eta: 0:11:38 lr: 0.000001 grad: 0.2152 (0.2179) loss: 0.7450 (0.7521) time: 0.1431 data: 0.0577 max mem: 9377 +Train: [94] [1800/6250] eta: 0:11:23 lr: 0.000001 grad: 0.2054 (0.2175) loss: 0.7522 (0.7522) time: 0.1573 data: 0.0762 max mem: 9377 +Train: [94] [1900/6250] eta: 0:11:09 lr: 0.000001 grad: 0.2028 (0.2169) loss: 0.7614 (0.7523) time: 0.1415 data: 0.0586 max mem: 9377 +Train: [94] [2000/6250] eta: 0:10:55 lr: 0.000001 grad: 0.2049 (0.2166) loss: 0.7550 (0.7524) time: 0.1780 data: 0.1036 max mem: 9377 +Train: [94] [2100/6250] eta: 0:10:39 lr: 0.000001 grad: 0.1968 (0.2162) loss: 0.7560 (0.7525) time: 0.1525 data: 0.0616 max mem: 9377 +Train: [94] [2200/6250] eta: 0:10:25 lr: 0.000001 grad: 0.2087 (0.2159) loss: 0.7532 (0.7524) time: 0.1791 data: 0.0959 max mem: 9377 +Train: [94] [2300/6250] eta: 0:10:12 lr: 0.000001 grad: 0.2029 (0.2156) loss: 0.7489 (0.7525) time: 0.1668 data: 0.0754 max mem: 9377 +Train: [94] [2400/6250] eta: 0:09:56 lr: 0.000001 grad: 0.2096 (0.2154) loss: 0.7438 (0.7524) time: 0.1584 data: 0.0711 max mem: 9377 +Train: [94] [2500/6250] eta: 0:09:39 lr: 0.000001 grad: 0.1995 (0.2153) loss: 0.7583 (0.7523) time: 0.1321 data: 0.0407 max mem: 9377 +Train: [94] [2600/6250] eta: 0:09:23 lr: 0.000001 grad: 0.2122 (0.2152) loss: 0.7519 (0.7523) time: 0.1396 data: 0.0453 max mem: 9377 +Train: [94] [2700/6250] eta: 0:09:06 lr: 0.000001 grad: 0.2005 (0.2149) loss: 0.7530 (0.7524) time: 0.1270 data: 0.0342 max mem: 9377 +Train: [94] [2800/6250] eta: 0:08:51 lr: 0.000001 grad: 0.2078 (0.2148) loss: 0.7525 (0.7524) time: 0.1660 data: 0.0844 max mem: 9377 +Train: [94] [2900/6250] eta: 0:08:35 lr: 0.000001 grad: 0.2043 (0.2148) loss: 0.7593 (0.7526) time: 0.1681 data: 0.0736 max mem: 9377 +Train: [94] [3000/6250] eta: 0:08:20 lr: 0.000001 grad: 0.2182 (0.2145) loss: 0.7518 (0.7527) time: 0.2079 data: 0.1290 max mem: 9377 +Train: [94] [3100/6250] eta: 0:08:05 lr: 0.000001 grad: 0.1991 (0.2142) loss: 0.7620 (0.7527) time: 0.1094 data: 0.0263 max mem: 9377 +Train: [94] [3200/6250] eta: 0:07:49 lr: 0.000001 grad: 0.2090 (0.2142) loss: 0.7574 (0.7527) time: 0.1629 data: 0.0842 max mem: 9377 +Train: [94] [3300/6250] eta: 0:07:34 lr: 0.000001 grad: 0.2087 (0.2140) loss: 0.7544 (0.7528) time: 0.1439 data: 0.0630 max mem: 9377 +Train: [94] [3400/6250] eta: 0:07:17 lr: 0.000001 grad: 0.2056 (0.2141) loss: 0.7552 (0.7528) time: 0.1487 data: 0.0646 max mem: 9377 +Train: [94] [3500/6250] eta: 0:07:02 lr: 0.000001 grad: 0.2022 (0.2139) loss: 0.7495 (0.7528) time: 0.1330 data: 0.0416 max mem: 9377 +Train: [94] [3600/6250] eta: 0:06:47 lr: 0.000001 grad: 0.2128 (0.2140) loss: 0.7464 (0.7528) time: 0.1518 data: 0.0578 max mem: 9377 +Train: [94] [3700/6250] eta: 0:06:31 lr: 0.000001 grad: 0.2138 (0.2140) loss: 0.7456 (0.7527) time: 0.1334 data: 0.0366 max mem: 9377 +Train: [94] [3800/6250] eta: 0:06:15 lr: 0.000001 grad: 0.2069 (0.2140) loss: 0.7441 (0.7526) time: 0.1399 data: 0.0446 max mem: 9377 +Train: [94] [3900/6250] eta: 0:06:00 lr: 0.000001 grad: 0.2072 (0.2142) loss: 0.7467 (0.7524) time: 0.1562 data: 0.0691 max mem: 9377 +Train: [94] [4000/6250] eta: 0:05:44 lr: 0.000001 grad: 0.2274 (0.2143) loss: 0.7354 (0.7523) time: 0.1426 data: 0.0508 max mem: 9377 +Train: [94] [4100/6250] eta: 0:05:28 lr: 0.000001 grad: 0.2177 (0.2143) loss: 0.7445 (0.7521) time: 0.1476 data: 0.0617 max mem: 9377 +Train: [94] [4200/6250] eta: 0:05:13 lr: 0.000001 grad: 0.2191 (0.2144) loss: 0.7410 (0.7519) time: 0.1985 data: 0.1168 max mem: 9377 +Train: [94] [4300/6250] eta: 0:04:57 lr: 0.000001 grad: 0.2205 (0.2145) loss: 0.7422 (0.7517) time: 0.1798 data: 0.0969 max mem: 9377 +Train: [94] [4400/6250] eta: 0:04:42 lr: 0.000001 grad: 0.2113 (0.2146) loss: 0.7429 (0.7515) time: 0.1341 data: 0.0482 max mem: 9377 +Train: [94] [4500/6250] eta: 0:04:27 lr: 0.000001 grad: 0.2179 (0.2148) loss: 0.7466 (0.7514) time: 0.1877 data: 0.1003 max mem: 9377 +Train: [94] [4600/6250] eta: 0:04:12 lr: 0.000001 grad: 0.2140 (0.2149) loss: 0.7478 (0.7513) time: 0.1566 data: 0.0723 max mem: 9377 +Train: [94] [4700/6250] eta: 0:03:56 lr: 0.000001 grad: 0.2142 (0.2149) loss: 0.7407 (0.7512) time: 0.1357 data: 0.0503 max mem: 9377 +Train: [94] [4800/6250] eta: 0:03:41 lr: 0.000001 grad: 0.2158 (0.2150) loss: 0.7503 (0.7511) time: 0.1598 data: 0.0686 max mem: 9377 +Train: [94] [4900/6250] eta: 0:03:26 lr: 0.000001 grad: 0.2203 (0.2151) loss: 0.7380 (0.7509) time: 0.1493 data: 0.0663 max mem: 9377 +Train: [94] [5000/6250] eta: 0:03:10 lr: 0.000001 grad: 0.2196 (0.2152) loss: 0.7488 (0.7508) time: 0.1328 data: 0.0439 max mem: 9377 +Train: [94] [5100/6250] eta: 0:02:55 lr: 0.000001 grad: 0.2181 (0.2153) loss: 0.7447 (0.7507) time: 0.1518 data: 0.0568 max mem: 9377 +Train: [94] [5200/6250] eta: 0:02:40 lr: 0.000001 grad: 0.2191 (0.2154) loss: 0.7449 (0.7507) time: 0.1178 data: 0.0309 max mem: 9377 +Train: [94] [5300/6250] eta: 0:02:24 lr: 0.000001 grad: 0.2117 (0.2154) loss: 0.7502 (0.7506) time: 0.1428 data: 0.0599 max mem: 9377 +Train: [94] [5400/6250] eta: 0:02:09 lr: 0.000001 grad: 0.2198 (0.2155) loss: 0.7400 (0.7505) time: 0.1563 data: 0.0764 max mem: 9377 +Train: [94] [5500/6250] eta: 0:01:54 lr: 0.000001 grad: 0.2091 (0.2155) loss: 0.7663 (0.7505) time: 0.1619 data: 0.0721 max mem: 9377 +Train: [94] [5600/6250] eta: 0:01:38 lr: 0.000001 grad: 0.2085 (0.2156) loss: 0.7552 (0.7504) time: 0.1413 data: 0.0410 max mem: 9377 +Train: [94] [5700/6250] eta: 0:01:23 lr: 0.000001 grad: 0.2125 (0.2156) loss: 0.7420 (0.7504) time: 0.1470 data: 0.0549 max mem: 9377 +Train: [94] [5800/6250] eta: 0:01:08 lr: 0.000001 grad: 0.2215 (0.2157) loss: 0.7476 (0.7504) time: 0.1467 data: 0.0548 max mem: 9377 +Train: [94] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.2005 (0.2157) loss: 0.7490 (0.7503) time: 0.1473 data: 0.0624 max mem: 9377 +Train: [94] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.2025 (0.2156) loss: 0.7615 (0.7504) time: 0.0986 data: 0.0012 max mem: 9377 +Train: [94] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.2163 (0.2155) loss: 0.7433 (0.7504) time: 0.1499 data: 0.0668 max mem: 9377 +Train: [94] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2073 (0.2155) loss: 0.7453 (0.7504) time: 0.1483 data: 0.0513 max mem: 9377 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2033 (0.2154) loss: 0.7504 (0.7504) time: 0.1307 data: 0.0491 max mem: 9377 +Train: [94] Total time: 0:15:47 (0.1517 s / it) +Averaged stats: lr: 0.000001 grad: 0.2033 (0.2154) loss: 0.7504 (0.7504) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:04:17 loss: 0.8520 (0.8520) time: 4.1501 data: 4.0963 max mem: 9377 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.8573 (0.8574) time: 0.0987 data: 0.0738 max mem: 9377 +Eval (hcp-train-subset): [94] Total time: 0:00:14 (0.2327 s / it) +Averaged stats (hcp-train-subset): loss: 0.8573 (0.8574) +Making plots (hcp-train-subset): example=16 +Eval (hcp-val): [94] [ 0/62] eta: 0:04:18 loss: 0.8545 (0.8545) time: 4.1638 data: 4.0888 max mem: 9377 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.8543 (0.8556) time: 0.1403 data: 0.1135 max mem: 9377 +Eval (hcp-val): [94] Total time: 0:00:14 (0.2310 s / it) +Averaged stats (hcp-val): loss: 0.8543 (0.8556) +Making plots (hcp-val): example=51 +Eval (nsd-val): [94] [ 0/62] eta: 0:04:24 loss: 0.8185 (0.8185) time: 4.2594 data: 4.1728 max mem: 9377 +Eval (nsd-val): [94] [61/62] eta: 0:00:00 loss: 0.8313 (0.8335) time: 0.1478 data: 0.1223 max mem: 9377 +Eval (nsd-val): [94] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (nsd-val): loss: 0.8313 (0.8335) +Making plots (nsd-val): example=41 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00094.pth +Train: [95] [ 0/6250] eta: 9:58:25 lr: 0.000001 grad: 0.3439 (0.3439) loss: 0.7761 (0.7761) time: 5.7449 data: 5.5000 max mem: 9377 +Train: [95] [ 100/6250] eta: 0:21:26 lr: 0.000001 grad: 0.2419 (0.2492) loss: 0.7381 (0.7595) time: 0.1532 data: 0.0440 max mem: 9377 +Train: [95] [ 200/6250] eta: 0:18:22 lr: 0.000001 grad: 0.2263 (0.2402) loss: 0.7514 (0.7557) time: 0.1426 data: 0.0354 max mem: 9377 +Train: [95] [ 300/6250] eta: 0:17:06 lr: 0.000001 grad: 0.2228 (0.2329) loss: 0.7575 (0.7574) time: 0.1502 data: 0.0461 max mem: 9377 +Train: [95] [ 400/6250] eta: 0:16:18 lr: 0.000001 grad: 0.2049 (0.2302) loss: 0.7646 (0.7569) time: 0.1482 data: 0.0524 max mem: 9377 +Train: [95] [ 500/6250] eta: 0:15:44 lr: 0.000001 grad: 0.2173 (0.2274) loss: 0.7379 (0.7561) time: 0.1667 data: 0.0768 max mem: 9377 +Train: [95] [ 600/6250] eta: 0:15:14 lr: 0.000001 grad: 0.2069 (0.2255) loss: 0.7513 (0.7556) time: 0.1311 data: 0.0335 max mem: 9377 +Train: [95] [ 700/6250] eta: 0:14:51 lr: 0.000001 grad: 0.2126 (0.2238) loss: 0.7563 (0.7554) time: 0.1290 data: 0.0362 max mem: 9377 +Train: [95] [ 800/6250] eta: 0:14:28 lr: 0.000001 grad: 0.1994 (0.2222) loss: 0.7655 (0.7552) time: 0.1212 data: 0.0273 max mem: 9377 +Train: [95] [ 900/6250] eta: 0:14:09 lr: 0.000001 grad: 0.2038 (0.2210) loss: 0.7486 (0.7549) time: 0.1722 data: 0.0776 max mem: 9377 +Train: [95] [1000/6250] eta: 0:13:45 lr: 0.000001 grad: 0.2138 (0.2200) loss: 0.7445 (0.7547) time: 0.1455 data: 0.0563 max mem: 9377 +Train: [95] [1100/6250] eta: 0:13:21 lr: 0.000001 grad: 0.2044 (0.2192) loss: 0.7382 (0.7543) time: 0.1554 data: 0.0574 max mem: 9377 +Train: [95] [1200/6250] eta: 0:13:01 lr: 0.000001 grad: 0.2000 (0.2184) loss: 0.7539 (0.7539) time: 0.1633 data: 0.0787 max mem: 9377 +Train: [95] [1300/6250] eta: 0:12:43 lr: 0.000001 grad: 0.2068 (0.2177) loss: 0.7535 (0.7535) time: 0.1627 data: 0.0831 max mem: 9377 +Train: [95] [1400/6250] eta: 0:12:25 lr: 0.000001 grad: 0.2020 (0.2171) loss: 0.7519 (0.7534) time: 0.1483 data: 0.0623 max mem: 9377 +Train: [95] [1500/6250] eta: 0:12:06 lr: 0.000001 grad: 0.2023 (0.2166) loss: 0.7592 (0.7531) time: 0.1570 data: 0.0683 max mem: 9377 +Train: [95] [1600/6250] eta: 0:11:47 lr: 0.000001 grad: 0.2104 (0.2163) loss: 0.7477 (0.7531) time: 0.1463 data: 0.0539 max mem: 9377 +Train: [95] [1700/6250] eta: 0:11:30 lr: 0.000001 grad: 0.2146 (0.2163) loss: 0.7407 (0.7527) time: 0.1358 data: 0.0498 max mem: 9377 +Train: [95] [1800/6250] eta: 0:11:16 lr: 0.000001 grad: 0.2152 (0.2162) loss: 0.7391 (0.7524) time: 0.1734 data: 0.0902 max mem: 9377 +Train: [95] [1900/6250] eta: 0:11:03 lr: 0.000001 grad: 0.2070 (0.2159) loss: 0.7480 (0.7522) time: 0.1620 data: 0.0748 max mem: 9377 +Train: [95] [2000/6250] eta: 0:10:47 lr: 0.000001 grad: 0.2160 (0.2158) loss: 0.7488 (0.7521) time: 0.1660 data: 0.0787 max mem: 9377 +Train: [95] [2100/6250] eta: 0:10:31 lr: 0.000001 grad: 0.2048 (0.2156) loss: 0.7450 (0.7518) time: 0.1331 data: 0.0549 max mem: 9377 +Train: [95] [2200/6250] eta: 0:10:15 lr: 0.000001 grad: 0.2193 (0.2157) loss: 0.7390 (0.7516) time: 0.1536 data: 0.0685 max mem: 9377 +Train: [95] [2300/6250] eta: 0:10:00 lr: 0.000001 grad: 0.2156 (0.2156) loss: 0.7390 (0.7514) time: 0.1369 data: 0.0447 max mem: 9377 +Train: [95] [2400/6250] eta: 0:09:44 lr: 0.000001 grad: 0.2116 (0.2156) loss: 0.7521 (0.7512) time: 0.1470 data: 0.0452 max mem: 9377 +Train: [95] [2500/6250] eta: 0:09:28 lr: 0.000001 grad: 0.2048 (0.2153) loss: 0.7454 (0.7512) time: 0.1611 data: 0.0712 max mem: 9377 +Train: [95] [2600/6250] eta: 0:09:12 lr: 0.000001 grad: 0.2121 (0.2152) loss: 0.7441 (0.7512) time: 0.1397 data: 0.0531 max mem: 9377 +Train: [95] [2700/6250] eta: 0:08:55 lr: 0.000001 grad: 0.2044 (0.2149) loss: 0.7624 (0.7513) time: 0.1384 data: 0.0596 max mem: 9377 +Train: [95] [2800/6250] eta: 0:08:40 lr: 0.000001 grad: 0.2045 (0.2147) loss: 0.7588 (0.7514) time: 0.1396 data: 0.0601 max mem: 9377 +Train: [95] [2900/6250] eta: 0:08:26 lr: 0.000001 grad: 0.2055 (0.2146) loss: 0.7653 (0.7515) time: 0.1812 data: 0.0967 max mem: 9377 +Train: [95] [3000/6250] eta: 0:08:12 lr: 0.000001 grad: 0.2122 (0.2148) loss: 0.7511 (0.7517) time: 0.1827 data: 0.1028 max mem: 9377 +Train: [95] [3100/6250] eta: 0:07:57 lr: 0.000001 grad: 0.2007 (0.2148) loss: 0.7541 (0.7519) time: 0.1658 data: 0.0785 max mem: 9377 +Train: [95] [3200/6250] eta: 0:07:42 lr: 0.000001 grad: 0.1997 (0.2146) loss: 0.7621 (0.7521) time: 0.1323 data: 0.0385 max mem: 9377 +Train: [95] [3300/6250] eta: 0:07:26 lr: 0.000001 grad: 0.2063 (0.2145) loss: 0.7484 (0.7523) time: 0.1107 data: 0.0182 max mem: 9377 +Train: [95] [3400/6250] eta: 0:07:12 lr: 0.000001 grad: 0.2076 (0.2144) loss: 0.7457 (0.7523) time: 0.1645 data: 0.0825 max mem: 9377 +Train: [95] [3500/6250] eta: 0:06:57 lr: 0.000001 grad: 0.2068 (0.2143) loss: 0.7454 (0.7524) time: 0.1303 data: 0.0413 max mem: 9377 +Train: [95] [3600/6250] eta: 0:06:41 lr: 0.000001 grad: 0.2067 (0.2142) loss: 0.7571 (0.7525) time: 0.1517 data: 0.0535 max mem: 9377 +Train: [95] [3700/6250] eta: 0:06:26 lr: 0.000001 grad: 0.2147 (0.2142) loss: 0.7540 (0.7525) time: 0.1558 data: 0.0684 max mem: 9377 +Train: [95] [3800/6250] eta: 0:06:10 lr: 0.000001 grad: 0.2169 (0.2144) loss: 0.7409 (0.7525) time: 0.1515 data: 0.0568 max mem: 9377 +Train: [95] [3900/6250] eta: 0:05:55 lr: 0.000001 grad: 0.2094 (0.2144) loss: 0.7519 (0.7523) time: 0.1455 data: 0.0612 max mem: 9377 +Train: [95] [4000/6250] eta: 0:05:39 lr: 0.000001 grad: 0.2194 (0.2145) loss: 0.7480 (0.7522) time: 0.1351 data: 0.0437 max mem: 9377 +Train: [95] [4100/6250] eta: 0:05:24 lr: 0.000001 grad: 0.2143 (0.2146) loss: 0.7515 (0.7522) time: 0.1608 data: 0.0709 max mem: 9377 +Train: [95] [4200/6250] eta: 0:05:08 lr: 0.000001 grad: 0.2123 (0.2148) loss: 0.7415 (0.7521) time: 0.1384 data: 0.0504 max mem: 9377 +Train: [95] [4300/6250] eta: 0:04:53 lr: 0.000001 grad: 0.2127 (0.2148) loss: 0.7520 (0.7520) time: 0.1501 data: 0.0758 max mem: 9377 +Train: [95] [4400/6250] eta: 0:04:38 lr: 0.000001 grad: 0.2125 (0.2148) loss: 0.7477 (0.7519) time: 0.1539 data: 0.0690 max mem: 9377 +Train: [95] [4500/6250] eta: 0:04:23 lr: 0.000001 grad: 0.2100 (0.2148) loss: 0.7495 (0.7519) time: 0.1462 data: 0.0625 max mem: 9377 +Train: [95] [4600/6250] eta: 0:04:08 lr: 0.000001 grad: 0.2131 (0.2148) loss: 0.7467 (0.7517) time: 0.1280 data: 0.0417 max mem: 9377 +Train: [95] [4700/6250] eta: 0:03:54 lr: 0.000001 grad: 0.2162 (0.2149) loss: 0.7419 (0.7517) time: 0.1585 data: 0.0661 max mem: 9377 +Train: [95] [4800/6250] eta: 0:03:39 lr: 0.000001 grad: 0.2093 (0.2148) loss: 0.7513 (0.7517) time: 0.1415 data: 0.0459 max mem: 9377 +Train: [95] [4900/6250] eta: 0:03:23 lr: 0.000001 grad: 0.2111 (0.2148) loss: 0.7482 (0.7516) time: 0.1482 data: 0.0584 max mem: 9377 +Train: [95] [5000/6250] eta: 0:03:08 lr: 0.000001 grad: 0.2001 (0.2146) loss: 0.7502 (0.7516) time: 0.1554 data: 0.0623 max mem: 9377 +Train: [95] [5100/6250] eta: 0:02:53 lr: 0.000001 grad: 0.2044 (0.2146) loss: 0.7546 (0.7516) time: 0.1299 data: 0.0466 max mem: 9377 +Train: [95] [5200/6250] eta: 0:02:38 lr: 0.000001 grad: 0.2018 (0.2146) loss: 0.7498 (0.7516) time: 0.1339 data: 0.0481 max mem: 9377 +Train: [95] [5300/6250] eta: 0:02:22 lr: 0.000001 grad: 0.2176 (0.2146) loss: 0.7461 (0.7515) time: 0.1363 data: 0.0510 max mem: 9377 +Train: [95] [5400/6250] eta: 0:02:07 lr: 0.000001 grad: 0.2176 (0.2146) loss: 0.7476 (0.7515) time: 0.1328 data: 0.0426 max mem: 9377 +Train: [95] [5500/6250] eta: 0:01:52 lr: 0.000001 grad: 0.2018 (0.2145) loss: 0.7440 (0.7515) time: 0.1552 data: 0.0717 max mem: 9377 +Train: [95] [5600/6250] eta: 0:01:37 lr: 0.000001 grad: 0.2143 (0.2145) loss: 0.7531 (0.7515) time: 0.1324 data: 0.0455 max mem: 9377 +Train: [95] [5700/6250] eta: 0:01:22 lr: 0.000001 grad: 0.2121 (0.2144) loss: 0.7407 (0.7514) time: 0.1470 data: 0.0593 max mem: 9377 +Train: [95] [5800/6250] eta: 0:01:07 lr: 0.000001 grad: 0.2138 (0.2145) loss: 0.7518 (0.7514) time: 0.1497 data: 0.0566 max mem: 9377 +Train: [95] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.2074 (0.2144) loss: 0.7581 (0.7514) time: 0.1478 data: 0.0514 max mem: 9377 +Train: [95] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.2128 (0.2144) loss: 0.7474 (0.7514) time: 0.1312 data: 0.0401 max mem: 9377 +Train: [95] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.2072 (0.2143) loss: 0.7625 (0.7515) time: 0.1433 data: 0.0550 max mem: 9377 +Train: [95] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2038 (0.2142) loss: 0.7448 (0.7515) time: 0.1299 data: 0.0388 max mem: 9377 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2095 (0.2141) loss: 0.7499 (0.7516) time: 0.1457 data: 0.0546 max mem: 9377 +Train: [95] Total time: 0:15:34 (0.1496 s / it) +Averaged stats: lr: 0.000001 grad: 0.2095 (0.2141) loss: 0.7499 (0.7516) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:05:27 loss: 0.8495 (0.8495) time: 5.2755 data: 5.2446 max mem: 9377 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.8568 (0.8574) time: 0.1319 data: 0.1065 max mem: 9377 +Eval (hcp-train-subset): [95] Total time: 0:00:13 (0.2231 s / it) +Averaged stats (hcp-train-subset): loss: 0.8568 (0.8574) +Eval (hcp-val): [95] [ 0/62] eta: 0:04:43 loss: 0.8554 (0.8554) time: 4.5762 data: 4.5044 max mem: 9377 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.8534 (0.8558) time: 0.1320 data: 0.1069 max mem: 9377 +Eval (hcp-val): [95] Total time: 0:00:13 (0.2247 s / it) +Averaged stats (hcp-val): loss: 0.8534 (0.8558) +Eval (nsd-val): [95] [ 0/62] eta: 0:03:54 loss: 0.8277 (0.8277) time: 3.7752 data: 3.6926 max mem: 9377 +Eval (nsd-val): [95] [61/62] eta: 0:00:00 loss: 0.8313 (0.8328) time: 0.1280 data: 0.1027 max mem: 9377 +Eval (nsd-val): [95] Total time: 0:00:13 (0.2245 s / it) +Averaged stats (nsd-val): loss: 0.8313 (0.8328) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [96] [ 0/6250] eta: 9:24:21 lr: 0.000001 grad: 0.3968 (0.3968) loss: 0.7453 (0.7453) time: 5.4178 data: 5.1173 max mem: 9377 +Train: [96] [ 100/6250] eta: 0:22:29 lr: 0.000001 grad: 0.2281 (0.2651) loss: 0.7282 (0.7357) time: 0.1660 data: 0.0591 max mem: 9377 +Train: [96] [ 200/6250] eta: 0:18:45 lr: 0.000001 grad: 0.2204 (0.2485) loss: 0.7565 (0.7409) time: 0.1510 data: 0.0589 max mem: 9377 +Train: [96] [ 300/6250] eta: 0:17:22 lr: 0.000001 grad: 0.2020 (0.2385) loss: 0.7653 (0.7463) time: 0.1437 data: 0.0450 max mem: 9377 +Train: [96] [ 400/6250] eta: 0:16:29 lr: 0.000001 grad: 0.2055 (0.2305) loss: 0.7668 (0.7504) time: 0.1584 data: 0.0584 max mem: 9377 +Train: [96] [ 500/6250] eta: 0:15:42 lr: 0.000001 grad: 0.2033 (0.2256) loss: 0.7624 (0.7531) time: 0.1253 data: 0.0327 max mem: 9377 +Train: [96] [ 600/6250] eta: 0:15:12 lr: 0.000001 grad: 0.2261 (0.2235) loss: 0.7436 (0.7538) time: 0.1517 data: 0.0536 max mem: 9377 +Train: [96] [ 700/6250] eta: 0:14:45 lr: 0.000001 grad: 0.2144 (0.2226) loss: 0.7393 (0.7533) time: 0.1685 data: 0.0605 max mem: 9377 +Train: [96] [ 800/6250] eta: 0:14:17 lr: 0.000001 grad: 0.2114 (0.2221) loss: 0.7427 (0.7525) time: 0.1239 data: 0.0266 max mem: 9377 +Train: [96] [ 900/6250] eta: 0:13:53 lr: 0.000001 grad: 0.2011 (0.2212) loss: 0.7572 (0.7523) time: 0.1516 data: 0.0472 max mem: 9377 +Train: [96] [1000/6250] eta: 0:13:29 lr: 0.000001 grad: 0.1994 (0.2201) loss: 0.7540 (0.7524) time: 0.1360 data: 0.0326 max mem: 9377 +Train: [96] [1100/6250] eta: 0:13:09 lr: 0.000000 grad: 0.1955 (0.2190) loss: 0.7539 (0.7526) time: 0.1372 data: 0.0527 max mem: 9377 +Train: [96] [1200/6250] eta: 0:12:47 lr: 0.000000 grad: 0.2071 (0.2180) loss: 0.7540 (0.7528) time: 0.1357 data: 0.0421 max mem: 9377 +Train: [96] [1300/6250] eta: 0:12:28 lr: 0.000000 grad: 0.2096 (0.2174) loss: 0.7490 (0.7528) time: 0.1210 data: 0.0268 max mem: 9377 +Train: [96] [1400/6250] eta: 0:12:11 lr: 0.000000 grad: 0.2121 (0.2169) loss: 0.7493 (0.7526) time: 0.1339 data: 0.0481 max mem: 9377 +Train: [96] [1500/6250] eta: 0:11:54 lr: 0.000000 grad: 0.2031 (0.2163) loss: 0.7550 (0.7526) time: 0.1513 data: 0.0694 max mem: 9377 +Train: [96] [1600/6250] eta: 0:11:35 lr: 0.000000 grad: 0.2063 (0.2159) loss: 0.7478 (0.7527) time: 0.1067 data: 0.0211 max mem: 9377 +Train: [96] [1700/6250] eta: 0:11:20 lr: 0.000000 grad: 0.2077 (0.2155) loss: 0.7429 (0.7527) time: 0.1393 data: 0.0391 max mem: 9377 +Train: [96] [1800/6250] eta: 0:11:02 lr: 0.000000 grad: 0.2097 (0.2153) loss: 0.7465 (0.7525) time: 0.1550 data: 0.0762 max mem: 9377 +Train: [96] [1900/6250] eta: 0:10:50 lr: 0.000000 grad: 0.2063 (0.2152) loss: 0.7458 (0.7523) time: 0.1843 data: 0.1104 max mem: 9377 +Train: [96] [2000/6250] eta: 0:10:36 lr: 0.000000 grad: 0.2136 (0.2153) loss: 0.7521 (0.7522) time: 0.1352 data: 0.0473 max mem: 9377 +Train: [96] [2100/6250] eta: 0:10:21 lr: 0.000000 grad: 0.2077 (0.2152) loss: 0.7473 (0.7521) time: 0.1392 data: 0.0508 max mem: 9377 +Train: [96] [2200/6250] eta: 0:10:07 lr: 0.000000 grad: 0.2145 (0.2152) loss: 0.7502 (0.7520) time: 0.1527 data: 0.0630 max mem: 9377 +Train: [96] [2300/6250] eta: 0:09:53 lr: 0.000000 grad: 0.2160 (0.2153) loss: 0.7348 (0.7516) time: 0.1696 data: 0.0735 max mem: 9377 +Train: [96] [2400/6250] eta: 0:09:38 lr: 0.000000 grad: 0.2156 (0.2154) loss: 0.7432 (0.7513) time: 0.1340 data: 0.0401 max mem: 9377 +Train: [96] [2500/6250] eta: 0:09:23 lr: 0.000000 grad: 0.2133 (0.2154) loss: 0.7506 (0.7511) time: 0.1578 data: 0.0652 max mem: 9377 +Train: [96] [2600/6250] eta: 0:09:08 lr: 0.000000 grad: 0.2121 (0.2155) loss: 0.7459 (0.7508) time: 0.1470 data: 0.0565 max mem: 9377 +Train: [96] [2700/6250] eta: 0:08:52 lr: 0.000000 grad: 0.2033 (0.2155) loss: 0.7484 (0.7505) time: 0.1616 data: 0.0790 max mem: 9377 +Train: [96] [2800/6250] eta: 0:08:37 lr: 0.000000 grad: 0.2106 (0.2153) loss: 0.7481 (0.7504) time: 0.1468 data: 0.0681 max mem: 9377 +Train: [96] [2900/6250] eta: 0:08:22 lr: 0.000000 grad: 0.2138 (0.2154) loss: 0.7384 (0.7502) time: 0.1720 data: 0.0921 max mem: 9377 +Train: [96] [3000/6250] eta: 0:08:07 lr: 0.000000 grad: 0.2133 (0.2154) loss: 0.7493 (0.7501) time: 0.1479 data: 0.0578 max mem: 9377 +Train: [96] [3100/6250] eta: 0:07:53 lr: 0.000000 grad: 0.2069 (0.2154) loss: 0.7467 (0.7499) time: 0.1649 data: 0.0794 max mem: 9377 +Train: [96] [3200/6250] eta: 0:07:38 lr: 0.000000 grad: 0.2083 (0.2152) loss: 0.7470 (0.7500) time: 0.1556 data: 0.0650 max mem: 9377 +Train: [96] [3300/6250] eta: 0:07:23 lr: 0.000000 grad: 0.2064 (0.2151) loss: 0.7520 (0.7500) time: 0.1813 data: 0.0915 max mem: 9377 +Train: [96] [3400/6250] eta: 0:07:08 lr: 0.000000 grad: 0.2126 (0.2150) loss: 0.7407 (0.7499) time: 0.1583 data: 0.0755 max mem: 9377 +Train: [96] [3500/6250] eta: 0:06:53 lr: 0.000000 grad: 0.1966 (0.2151) loss: 0.7511 (0.7497) time: 0.1484 data: 0.0622 max mem: 9377 +Train: [96] [3600/6250] eta: 0:06:38 lr: 0.000000 grad: 0.2091 (0.2151) loss: 0.7443 (0.7497) time: 0.1573 data: 0.0707 max mem: 9377 +Train: [96] [3700/6250] eta: 0:06:22 lr: 0.000000 grad: 0.2077 (0.2150) loss: 0.7403 (0.7497) time: 0.1312 data: 0.0384 max mem: 9377 +Train: [96] [3800/6250] eta: 0:06:07 lr: 0.000000 grad: 0.2122 (0.2150) loss: 0.7458 (0.7496) time: 0.1477 data: 0.0604 max mem: 9377 +Train: [96] [3900/6250] eta: 0:05:52 lr: 0.000000 grad: 0.2127 (0.2151) loss: 0.7438 (0.7494) time: 0.1410 data: 0.0549 max mem: 9377 +Train: [96] [4000/6250] eta: 0:05:37 lr: 0.000000 grad: 0.2165 (0.2152) loss: 0.7451 (0.7492) time: 0.1362 data: 0.0454 max mem: 9377 +Train: [96] [4100/6250] eta: 0:05:21 lr: 0.000000 grad: 0.2137 (0.2151) loss: 0.7342 (0.7492) time: 0.1399 data: 0.0504 max mem: 9377 +Train: [96] [4200/6250] eta: 0:05:06 lr: 0.000000 grad: 0.2220 (0.2150) loss: 0.7498 (0.7492) time: 0.1330 data: 0.0447 max mem: 9377 +Train: [96] [4300/6250] eta: 0:04:51 lr: 0.000000 grad: 0.2098 (0.2149) loss: 0.7507 (0.7492) time: 0.1392 data: 0.0598 max mem: 9377 +Train: [96] [4400/6250] eta: 0:04:36 lr: 0.000000 grad: 0.2203 (0.2149) loss: 0.7495 (0.7492) time: 0.1491 data: 0.0655 max mem: 9377 +Train: [96] [4500/6250] eta: 0:04:21 lr: 0.000000 grad: 0.2123 (0.2149) loss: 0.7553 (0.7491) time: 0.1378 data: 0.0533 max mem: 9377 +Train: [96] [4600/6250] eta: 0:04:06 lr: 0.000000 grad: 0.2101 (0.2148) loss: 0.7562 (0.7492) time: 0.1325 data: 0.0426 max mem: 9377 +Train: [96] [4700/6250] eta: 0:03:51 lr: 0.000000 grad: 0.2086 (0.2147) loss: 0.7613 (0.7493) time: 0.1456 data: 0.0601 max mem: 9377 +Train: [96] [4800/6250] eta: 0:03:36 lr: 0.000000 grad: 0.2096 (0.2147) loss: 0.7504 (0.7493) time: 0.1498 data: 0.0626 max mem: 9377 +Train: [96] [4900/6250] eta: 0:03:22 lr: 0.000000 grad: 0.2067 (0.2147) loss: 0.7531 (0.7494) time: 0.1318 data: 0.0479 max mem: 9377 +Train: [96] [5000/6250] eta: 0:03:07 lr: 0.000000 grad: 0.2024 (0.2146) loss: 0.7532 (0.7495) time: 0.1634 data: 0.0742 max mem: 9377 +Train: [96] [5100/6250] eta: 0:02:51 lr: 0.000000 grad: 0.2061 (0.2146) loss: 0.7501 (0.7495) time: 0.1637 data: 0.0768 max mem: 9377 +Train: [96] [5200/6250] eta: 0:02:36 lr: 0.000000 grad: 0.2044 (0.2145) loss: 0.7565 (0.7497) time: 0.1184 data: 0.0311 max mem: 9377 +Train: [96] [5300/6250] eta: 0:02:21 lr: 0.000000 grad: 0.2019 (0.2144) loss: 0.7554 (0.7498) time: 0.1591 data: 0.0730 max mem: 9377 +Train: [96] [5400/6250] eta: 0:02:06 lr: 0.000000 grad: 0.2037 (0.2144) loss: 0.7452 (0.7498) time: 0.1522 data: 0.0652 max mem: 9377 +Train: [96] [5500/6250] eta: 0:01:51 lr: 0.000000 grad: 0.2020 (0.2143) loss: 0.7548 (0.7499) time: 0.1504 data: 0.0666 max mem: 9377 +Train: [96] [5600/6250] eta: 0:01:36 lr: 0.000000 grad: 0.2003 (0.2142) loss: 0.7612 (0.7500) time: 0.1569 data: 0.0697 max mem: 9377 +Train: [96] [5700/6250] eta: 0:01:21 lr: 0.000000 grad: 0.2098 (0.2142) loss: 0.7524 (0.7500) time: 0.1250 data: 0.0295 max mem: 9377 +Train: [96] [5800/6250] eta: 0:01:06 lr: 0.000000 grad: 0.2029 (0.2142) loss: 0.7537 (0.7500) time: 0.1561 data: 0.0685 max mem: 9377 +Train: [96] [5900/6250] eta: 0:00:51 lr: 0.000000 grad: 0.2133 (0.2143) loss: 0.7438 (0.7500) time: 0.1572 data: 0.0716 max mem: 9377 +Train: [96] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.2046 (0.2143) loss: 0.7640 (0.7501) time: 0.1173 data: 0.0300 max mem: 9377 +Train: [96] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.2079 (0.2143) loss: 0.7590 (0.7501) time: 0.1583 data: 0.0729 max mem: 9377 +Train: [96] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2128 (0.2143) loss: 0.7509 (0.7501) time: 0.1412 data: 0.0526 max mem: 9377 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2088 (0.2143) loss: 0.7492 (0.7501) time: 0.1233 data: 0.0346 max mem: 9377 +Train: [96] Total time: 0:15:29 (0.1488 s / it) +Averaged stats: lr: 0.000000 grad: 0.2088 (0.2143) loss: 0.7492 (0.7501) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:05:01 loss: 0.8500 (0.8500) time: 4.8620 data: 4.7806 max mem: 9377 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.8561 (0.8577) time: 0.1289 data: 0.1026 max mem: 9377 +Eval (hcp-train-subset): [96] Total time: 0:00:14 (0.2259 s / it) +Averaged stats (hcp-train-subset): loss: 0.8561 (0.8577) +Eval (hcp-val): [96] [ 0/62] eta: 0:04:43 loss: 0.8517 (0.8517) time: 4.5786 data: 4.4969 max mem: 9377 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.8532 (0.8560) time: 0.1250 data: 0.1000 max mem: 9377 +Eval (hcp-val): [96] Total time: 0:00:13 (0.2255 s / it) +Averaged stats (hcp-val): loss: 0.8532 (0.8560) +Eval (nsd-val): [96] [ 0/62] eta: 0:06:17 loss: 0.8210 (0.8210) time: 6.0935 data: 6.0605 max mem: 9377 +Eval (nsd-val): [96] [61/62] eta: 0:00:00 loss: 0.8303 (0.8329) time: 0.1327 data: 0.1076 max mem: 9377 +Eval (nsd-val): [96] Total time: 0:00:14 (0.2308 s / it) +Averaged stats (nsd-val): loss: 0.8303 (0.8329) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [97] [ 0/6250] eta: 10:01:10 lr: 0.000000 grad: 0.1766 (0.1766) loss: 0.8230 (0.8230) time: 5.7713 data: 5.4992 max mem: 9377 +Train: [97] [ 100/6250] eta: 0:22:07 lr: 0.000000 grad: 0.2157 (0.2634) loss: 0.7662 (0.7497) time: 0.1442 data: 0.0406 max mem: 9377 +Train: [97] [ 200/6250] eta: 0:18:53 lr: 0.000000 grad: 0.2394 (0.2530) loss: 0.7337 (0.7434) time: 0.1438 data: 0.0385 max mem: 9377 +Train: [97] [ 300/6250] eta: 0:17:28 lr: 0.000000 grad: 0.2190 (0.2462) loss: 0.7501 (0.7421) time: 0.1573 data: 0.0605 max mem: 9377 +Train: [97] [ 400/6250] eta: 0:16:34 lr: 0.000000 grad: 0.2295 (0.2416) loss: 0.7547 (0.7440) time: 0.1623 data: 0.0711 max mem: 9377 +Train: [97] [ 500/6250] eta: 0:15:57 lr: 0.000000 grad: 0.2132 (0.2369) loss: 0.7596 (0.7458) time: 0.1791 data: 0.0942 max mem: 9377 +Train: [97] [ 600/6250] eta: 0:15:23 lr: 0.000000 grad: 0.2175 (0.2336) loss: 0.7516 (0.7466) time: 0.1537 data: 0.0662 max mem: 9377 +Train: [97] [ 700/6250] eta: 0:14:58 lr: 0.000000 grad: 0.2203 (0.2311) loss: 0.7394 (0.7472) time: 0.1516 data: 0.0567 max mem: 9377 +Train: [97] [ 800/6250] eta: 0:14:32 lr: 0.000000 grad: 0.2183 (0.2297) loss: 0.7476 (0.7471) time: 0.1507 data: 0.0467 max mem: 9377 +Train: [97] [ 900/6250] eta: 0:14:09 lr: 0.000000 grad: 0.2335 (0.2290) loss: 0.7426 (0.7464) time: 0.1469 data: 0.0546 max mem: 9377 +Train: [97] [1000/6250] eta: 0:13:47 lr: 0.000000 grad: 0.2238 (0.2284) loss: 0.7311 (0.7464) time: 0.1467 data: 0.0642 max mem: 9377 +Train: [97] [1100/6250] eta: 0:13:26 lr: 0.000000 grad: 0.2262 (0.2280) loss: 0.7409 (0.7464) time: 0.1242 data: 0.0391 max mem: 9377 +Train: [97] [1200/6250] eta: 0:13:09 lr: 0.000000 grad: 0.2219 (0.2276) loss: 0.7422 (0.7463) time: 0.1589 data: 0.0563 max mem: 9377 +Train: [97] [1300/6250] eta: 0:12:51 lr: 0.000000 grad: 0.2161 (0.2272) loss: 0.7464 (0.7462) time: 0.1445 data: 0.0561 max mem: 9377 +Train: [97] [1400/6250] eta: 0:12:34 lr: 0.000000 grad: 0.2147 (0.2268) loss: 0.7455 (0.7460) time: 0.1733 data: 0.0822 max mem: 9377 +Train: [97] [1500/6250] eta: 0:12:18 lr: 0.000000 grad: 0.2156 (0.2265) loss: 0.7476 (0.7458) time: 0.1583 data: 0.0693 max mem: 9377 +Train: [97] [1600/6250] eta: 0:12:01 lr: 0.000000 grad: 0.2111 (0.2259) loss: 0.7512 (0.7459) time: 0.1408 data: 0.0484 max mem: 9377 +Train: [97] [1700/6250] eta: 0:11:45 lr: 0.000000 grad: 0.2293 (0.2255) loss: 0.7439 (0.7461) time: 0.1638 data: 0.0646 max mem: 9377 +Train: [97] [1800/6250] eta: 0:11:29 lr: 0.000000 grad: 0.2241 (0.2255) loss: 0.7450 (0.7457) time: 0.1477 data: 0.0668 max mem: 9377 +Train: [97] [1900/6250] eta: 0:11:17 lr: 0.000000 grad: 0.2175 (0.2252) loss: 0.7404 (0.7455) time: 0.2288 data: 0.1563 max mem: 9377 +Train: [97] [2000/6250] eta: 0:11:02 lr: 0.000000 grad: 0.2091 (0.2247) loss: 0.7455 (0.7455) time: 0.1592 data: 0.0804 max mem: 9377 +Train: [97] [2100/6250] eta: 0:10:48 lr: 0.000000 grad: 0.2157 (0.2243) loss: 0.7505 (0.7455) time: 0.1559 data: 0.0752 max mem: 9377 +Train: [97] [2200/6250] eta: 0:10:34 lr: 0.000000 grad: 0.2176 (0.2241) loss: 0.7471 (0.7456) time: 0.1535 data: 0.0717 max mem: 9377 +Train: [97] [2300/6250] eta: 0:10:17 lr: 0.000000 grad: 0.2179 (0.2238) loss: 0.7436 (0.7456) time: 0.1643 data: 0.0767 max mem: 9377 +Train: [97] [2400/6250] eta: 0:10:02 lr: 0.000000 grad: 0.2094 (0.2233) loss: 0.7506 (0.7457) time: 0.1556 data: 0.0646 max mem: 9377 +Train: [97] [2500/6250] eta: 0:09:46 lr: 0.000000 grad: 0.2118 (0.2229) loss: 0.7531 (0.7460) time: 0.1664 data: 0.0701 max mem: 9377 +Train: [97] [2600/6250] eta: 0:09:30 lr: 0.000000 grad: 0.2150 (0.2226) loss: 0.7484 (0.7462) time: 0.1511 data: 0.0646 max mem: 9377 +Train: [97] [2700/6250] eta: 0:09:14 lr: 0.000000 grad: 0.2119 (0.2224) loss: 0.7529 (0.7464) time: 0.1506 data: 0.0627 max mem: 9377 +Train: [97] [2800/6250] eta: 0:08:58 lr: 0.000000 grad: 0.2095 (0.2221) loss: 0.7553 (0.7466) time: 0.1713 data: 0.0847 max mem: 9377 +Train: [97] [2900/6250] eta: 0:08:41 lr: 0.000000 grad: 0.2029 (0.2219) loss: 0.7686 (0.7469) time: 0.1984 data: 0.1177 max mem: 9377 +Train: [97] [3000/6250] eta: 0:08:26 lr: 0.000000 grad: 0.2246 (0.2218) loss: 0.7455 (0.7470) time: 0.1335 data: 0.0502 max mem: 9377 +Train: [97] [3100/6250] eta: 0:08:11 lr: 0.000000 grad: 0.2097 (0.2216) loss: 0.7529 (0.7471) time: 0.1618 data: 0.0765 max mem: 9377 +Train: [97] [3200/6250] eta: 0:07:55 lr: 0.000000 grad: 0.2118 (0.2212) loss: 0.7491 (0.7473) time: 0.1497 data: 0.0638 max mem: 9377 +Train: [97] [3300/6250] eta: 0:07:39 lr: 0.000000 grad: 0.2118 (0.2209) loss: 0.7507 (0.7474) time: 0.1670 data: 0.0761 max mem: 9377 +Train: [97] [3400/6250] eta: 0:07:22 lr: 0.000000 grad: 0.2178 (0.2207) loss: 0.7403 (0.7474) time: 0.1495 data: 0.0652 max mem: 9377 +Train: [97] [3500/6250] eta: 0:07:06 lr: 0.000000 grad: 0.2059 (0.2205) loss: 0.7465 (0.7474) time: 0.1631 data: 0.0808 max mem: 9377 +Train: [97] [3600/6250] eta: 0:06:50 lr: 0.000000 grad: 0.2081 (0.2203) loss: 0.7513 (0.7475) time: 0.1351 data: 0.0409 max mem: 9377 +Train: [97] [3700/6250] eta: 0:06:34 lr: 0.000000 grad: 0.2062 (0.2201) loss: 0.7458 (0.7475) time: 0.1472 data: 0.0616 max mem: 9377 +Train: [97] [3800/6250] eta: 0:06:18 lr: 0.000000 grad: 0.2120 (0.2199) loss: 0.7563 (0.7477) time: 0.1432 data: 0.0576 max mem: 9377 +Train: [97] [3900/6250] eta: 0:06:02 lr: 0.000000 grad: 0.2094 (0.2197) loss: 0.7511 (0.7478) time: 0.1401 data: 0.0513 max mem: 9377 +Train: [97] [4000/6250] eta: 0:05:46 lr: 0.000000 grad: 0.2065 (0.2195) loss: 0.7621 (0.7479) time: 0.1564 data: 0.0702 max mem: 9377 +Train: [97] [4100/6250] eta: 0:05:30 lr: 0.000000 grad: 0.2068 (0.2193) loss: 0.7583 (0.7480) time: 0.1420 data: 0.0592 max mem: 9377 +Train: [97] [4200/6250] eta: 0:05:14 lr: 0.000000 grad: 0.2126 (0.2191) loss: 0.7512 (0.7482) time: 0.1435 data: 0.0606 max mem: 9377 +Train: [97] [4300/6250] eta: 0:04:58 lr: 0.000000 grad: 0.2082 (0.2189) loss: 0.7565 (0.7484) time: 0.1248 data: 0.0326 max mem: 9377 +Train: [97] [4400/6250] eta: 0:04:43 lr: 0.000000 grad: 0.2071 (0.2187) loss: 0.7528 (0.7485) time: 0.1519 data: 0.0686 max mem: 9377 +Train: [97] [4500/6250] eta: 0:04:28 lr: 0.000000 grad: 0.2106 (0.2185) loss: 0.7477 (0.7486) time: 0.1667 data: 0.0825 max mem: 9377 +Train: [97] [4600/6250] eta: 0:04:12 lr: 0.000000 grad: 0.2060 (0.2183) loss: 0.7498 (0.7488) time: 0.1450 data: 0.0579 max mem: 9377 +Train: [97] [4700/6250] eta: 0:03:57 lr: 0.000000 grad: 0.2119 (0.2181) loss: 0.7502 (0.7489) time: 0.1320 data: 0.0497 max mem: 9377 +Train: [97] [4800/6250] eta: 0:03:42 lr: 0.000000 grad: 0.2109 (0.2179) loss: 0.7515 (0.7490) time: 0.1614 data: 0.0779 max mem: 9377 +Train: [97] [4900/6250] eta: 0:03:26 lr: 0.000000 grad: 0.2074 (0.2178) loss: 0.7581 (0.7491) time: 0.1422 data: 0.0589 max mem: 9377 +Train: [97] [5000/6250] eta: 0:03:11 lr: 0.000000 grad: 0.2039 (0.2177) loss: 0.7565 (0.7492) time: 0.2102 data: 0.1280 max mem: 9377 +Train: [97] [5100/6250] eta: 0:02:56 lr: 0.000000 grad: 0.2051 (0.2176) loss: 0.7548 (0.7493) time: 0.1421 data: 0.0486 max mem: 9377 +Train: [97] [5200/6250] eta: 0:02:40 lr: 0.000000 grad: 0.2160 (0.2175) loss: 0.7489 (0.7493) time: 0.1221 data: 0.0185 max mem: 9377 +Train: [97] [5300/6250] eta: 0:02:25 lr: 0.000000 grad: 0.2115 (0.2173) loss: 0.7556 (0.7494) time: 0.1575 data: 0.0672 max mem: 9377 +Train: [97] [5400/6250] eta: 0:02:09 lr: 0.000000 grad: 0.2098 (0.2174) loss: 0.7530 (0.7494) time: 0.1830 data: 0.1010 max mem: 9377 +Train: [97] [5500/6250] eta: 0:01:54 lr: 0.000000 grad: 0.2117 (0.2173) loss: 0.7514 (0.7494) time: 0.1317 data: 0.0463 max mem: 9377 +Train: [97] [5600/6250] eta: 0:01:39 lr: 0.000000 grad: 0.2053 (0.2172) loss: 0.7550 (0.7495) time: 0.1526 data: 0.0647 max mem: 9377 +Train: [97] [5700/6250] eta: 0:01:23 lr: 0.000000 grad: 0.2004 (0.2171) loss: 0.7544 (0.7496) time: 0.1544 data: 0.0673 max mem: 9377 +Train: [97] [5800/6250] eta: 0:01:08 lr: 0.000000 grad: 0.2140 (0.2170) loss: 0.7559 (0.7497) time: 0.1499 data: 0.0700 max mem: 9377 +Train: [97] [5900/6250] eta: 0:00:53 lr: 0.000000 grad: 0.2088 (0.2170) loss: 0.7508 (0.7498) time: 0.1351 data: 0.0403 max mem: 9377 +Train: [97] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.2096 (0.2169) loss: 0.7508 (0.7498) time: 0.1145 data: 0.0234 max mem: 9377 +Train: [97] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.2039 (0.2168) loss: 0.7625 (0.7498) time: 0.1587 data: 0.0732 max mem: 9377 +Train: [97] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2087 (0.2167) loss: 0.7508 (0.7498) time: 0.1347 data: 0.0496 max mem: 9377 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2080 (0.2167) loss: 0.7485 (0.7498) time: 0.1380 data: 0.0508 max mem: 9377 +Train: [97] Total time: 0:15:53 (0.1526 s / it) +Averaged stats: lr: 0.000000 grad: 0.2080 (0.2167) loss: 0.7485 (0.7498) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:05:56 loss: 0.8485 (0.8485) time: 5.7471 data: 5.7148 max mem: 9377 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.8573 (0.8574) time: 0.1287 data: 0.1039 max mem: 9377 +Eval (hcp-train-subset): [97] Total time: 0:00:14 (0.2307 s / it) +Averaged stats (hcp-train-subset): loss: 0.8573 (0.8574) +Eval (hcp-val): [97] [ 0/62] eta: 0:06:30 loss: 0.8587 (0.8587) time: 6.2909 data: 6.2577 max mem: 9377 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.8537 (0.8566) time: 0.1320 data: 0.1069 max mem: 9377 +Eval (hcp-val): [97] Total time: 0:00:14 (0.2326 s / it) +Averaged stats (hcp-val): loss: 0.8537 (0.8566) +Eval (nsd-val): [97] [ 0/62] eta: 0:06:17 loss: 0.8147 (0.8147) time: 6.0910 data: 6.0606 max mem: 9377 +Eval (nsd-val): [97] [61/62] eta: 0:00:00 loss: 0.8316 (0.8325) time: 0.1336 data: 0.1066 max mem: 9377 +Eval (nsd-val): [97] Total time: 0:00:14 (0.2282 s / it) +Averaged stats (nsd-val): loss: 0.8316 (0.8325) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [98] [ 0/6250] eta: 10:19:00 lr: 0.000000 grad: 0.2493 (0.2493) loss: 0.7758 (0.7758) time: 5.9425 data: 5.8351 max mem: 9377 +Train: [98] [ 100/6250] eta: 0:21:42 lr: 0.000000 grad: 0.2151 (0.2277) loss: 0.7827 (0.7708) time: 0.1327 data: 0.0207 max mem: 9377 +Train: [98] [ 200/6250] eta: 0:18:39 lr: 0.000000 grad: 0.2299 (0.2255) loss: 0.7393 (0.7605) time: 0.1615 data: 0.0562 max mem: 9377 +Train: [98] [ 300/6250] eta: 0:17:17 lr: 0.000000 grad: 0.2155 (0.2223) loss: 0.7553 (0.7585) time: 0.1479 data: 0.0497 max mem: 9377 +Train: [98] [ 400/6250] eta: 0:16:25 lr: 0.000000 grad: 0.2247 (0.2193) loss: 0.7494 (0.7588) time: 0.1648 data: 0.0655 max mem: 9377 +Train: [98] [ 500/6250] eta: 0:15:43 lr: 0.000000 grad: 0.2037 (0.2185) loss: 0.7553 (0.7582) time: 0.1469 data: 0.0565 max mem: 9377 +Train: [98] [ 600/6250] eta: 0:15:17 lr: 0.000000 grad: 0.2129 (0.2167) loss: 0.7665 (0.7591) time: 0.1446 data: 0.0425 max mem: 9377 +Train: [98] [ 700/6250] eta: 0:14:57 lr: 0.000000 grad: 0.2083 (0.2156) loss: 0.7575 (0.7593) time: 0.1392 data: 0.0444 max mem: 9377 +Train: [98] [ 800/6250] eta: 0:14:31 lr: 0.000000 grad: 0.2075 (0.2151) loss: 0.7556 (0.7591) time: 0.1635 data: 0.0659 max mem: 9377 +Train: [98] [ 900/6250] eta: 0:14:11 lr: 0.000000 grad: 0.2083 (0.2152) loss: 0.7505 (0.7587) time: 0.1702 data: 0.0797 max mem: 9377 +Train: [98] [1000/6250] eta: 0:13:45 lr: 0.000000 grad: 0.2107 (0.2149) loss: 0.7452 (0.7578) time: 0.1546 data: 0.0644 max mem: 9377 +Train: [98] [1100/6250] eta: 0:13:23 lr: 0.000000 grad: 0.2138 (0.2148) loss: 0.7541 (0.7571) time: 0.1442 data: 0.0519 max mem: 9377 +Train: [98] [1200/6250] eta: 0:13:03 lr: 0.000000 grad: 0.2098 (0.2149) loss: 0.7521 (0.7564) time: 0.1572 data: 0.0672 max mem: 9377 +Train: [98] [1300/6250] eta: 0:12:42 lr: 0.000000 grad: 0.2085 (0.2148) loss: 0.7551 (0.7558) time: 0.1235 data: 0.0302 max mem: 9377 +Train: [98] [1400/6250] eta: 0:12:23 lr: 0.000000 grad: 0.2186 (0.2152) loss: 0.7381 (0.7548) time: 0.1038 data: 0.0142 max mem: 9377 +Train: [98] [1500/6250] eta: 0:12:04 lr: 0.000000 grad: 0.2093 (0.2153) loss: 0.7423 (0.7541) time: 0.1423 data: 0.0515 max mem: 9377 +Train: [98] [1600/6250] eta: 0:11:47 lr: 0.000000 grad: 0.2091 (0.2154) loss: 0.7372 (0.7537) time: 0.1570 data: 0.0798 max mem: 9377 +Train: [98] [1700/6250] eta: 0:11:30 lr: 0.000000 grad: 0.2169 (0.2156) loss: 0.7364 (0.7530) time: 0.1303 data: 0.0482 max mem: 9377 +Train: [98] [1800/6250] eta: 0:11:14 lr: 0.000000 grad: 0.2159 (0.2157) loss: 0.7364 (0.7524) time: 0.1425 data: 0.0508 max mem: 9377 +Train: [98] [1900/6250] eta: 0:10:57 lr: 0.000000 grad: 0.2114 (0.2158) loss: 0.7371 (0.7518) time: 0.1426 data: 0.0570 max mem: 9377 +Train: [98] [2000/6250] eta: 0:10:44 lr: 0.000000 grad: 0.2118 (0.2159) loss: 0.7520 (0.7512) time: 0.1600 data: 0.0734 max mem: 9377 +Train: [98] [2100/6250] eta: 0:10:31 lr: 0.000000 grad: 0.2168 (0.2160) loss: 0.7341 (0.7509) time: 0.1736 data: 0.0930 max mem: 9377 +Train: [98] [2200/6250] eta: 0:10:16 lr: 0.000000 grad: 0.2215 (0.2161) loss: 0.7295 (0.7505) time: 0.1432 data: 0.0595 max mem: 9377 +Train: [98] [2300/6250] eta: 0:10:01 lr: 0.000000 grad: 0.2082 (0.2160) loss: 0.7484 (0.7501) time: 0.1650 data: 0.0750 max mem: 9377 +Train: [98] [2400/6250] eta: 0:09:48 lr: 0.000000 grad: 0.2071 (0.2160) loss: 0.7423 (0.7498) time: 0.1584 data: 0.0755 max mem: 9377 +Train: [98] [2500/6250] eta: 0:09:33 lr: 0.000000 grad: 0.2145 (0.2162) loss: 0.7355 (0.7494) time: 0.1682 data: 0.0895 max mem: 9377 +Train: [98] [2600/6250] eta: 0:09:17 lr: 0.000000 grad: 0.2070 (0.2162) loss: 0.7390 (0.7490) time: 0.1475 data: 0.0577 max mem: 9377 +Train: [98] [2700/6250] eta: 0:09:01 lr: 0.000000 grad: 0.2132 (0.2162) loss: 0.7440 (0.7488) time: 0.1433 data: 0.0588 max mem: 9377 +Train: [98] [2800/6250] eta: 0:08:45 lr: 0.000000 grad: 0.2086 (0.2162) loss: 0.7500 (0.7485) time: 0.1227 data: 0.0280 max mem: 9377 +Train: [98] [2900/6250] eta: 0:08:30 lr: 0.000000 grad: 0.2178 (0.2163) loss: 0.7385 (0.7484) time: 0.1770 data: 0.0950 max mem: 9377 +Train: [98] [3000/6250] eta: 0:08:16 lr: 0.000000 grad: 0.2150 (0.2163) loss: 0.7494 (0.7482) time: 0.1397 data: 0.0567 max mem: 9377 +Train: [98] [3100/6250] eta: 0:08:01 lr: 0.000000 grad: 0.2080 (0.2164) loss: 0.7589 (0.7482) time: 0.1586 data: 0.0589 max mem: 9377 +Train: [98] [3200/6250] eta: 0:07:46 lr: 0.000000 grad: 0.2160 (0.2163) loss: 0.7508 (0.7482) time: 0.1316 data: 0.0449 max mem: 9377 +Train: [98] [3300/6250] eta: 0:07:31 lr: 0.000000 grad: 0.2180 (0.2163) loss: 0.7391 (0.7481) time: 0.1647 data: 0.0702 max mem: 9377 +Train: [98] [3400/6250] eta: 0:07:16 lr: 0.000000 grad: 0.2131 (0.2162) loss: 0.7503 (0.7481) time: 0.1556 data: 0.0627 max mem: 9377 +Train: [98] [3500/6250] eta: 0:06:59 lr: 0.000000 grad: 0.2041 (0.2161) loss: 0.7470 (0.7481) time: 0.1357 data: 0.0425 max mem: 9377 +Train: [98] [3600/6250] eta: 0:06:43 lr: 0.000000 grad: 0.2149 (0.2161) loss: 0.7476 (0.7480) time: 0.1274 data: 0.0401 max mem: 9377 +Train: [98] [3700/6250] eta: 0:06:27 lr: 0.000000 grad: 0.2129 (0.2161) loss: 0.7496 (0.7479) time: 0.1389 data: 0.0500 max mem: 9377 +Train: [98] [3800/6250] eta: 0:06:12 lr: 0.000000 grad: 0.2097 (0.2160) loss: 0.7588 (0.7479) time: 0.1534 data: 0.0634 max mem: 9377 +Train: [98] [3900/6250] eta: 0:05:56 lr: 0.000000 grad: 0.2120 (0.2159) loss: 0.7473 (0.7479) time: 0.1395 data: 0.0517 max mem: 9377 +Train: [98] [4000/6250] eta: 0:05:40 lr: 0.000000 grad: 0.2164 (0.2158) loss: 0.7540 (0.7478) time: 0.1454 data: 0.0622 max mem: 9377 +Train: [98] [4100/6250] eta: 0:05:25 lr: 0.000000 grad: 0.2118 (0.2158) loss: 0.7518 (0.7478) time: 0.1592 data: 0.0711 max mem: 9377 +Train: [98] [4200/6250] eta: 0:05:10 lr: 0.000000 grad: 0.2112 (0.2158) loss: 0.7339 (0.7476) time: 0.1298 data: 0.0412 max mem: 9377 +Train: [98] [4300/6250] eta: 0:04:54 lr: 0.000000 grad: 0.2138 (0.2157) loss: 0.7422 (0.7476) time: 0.1405 data: 0.0591 max mem: 9377 +Train: [98] [4400/6250] eta: 0:04:40 lr: 0.000000 grad: 0.2029 (0.2156) loss: 0.7495 (0.7476) time: 0.1523 data: 0.0694 max mem: 9377 +Train: [98] [4500/6250] eta: 0:04:24 lr: 0.000000 grad: 0.2138 (0.2156) loss: 0.7389 (0.7476) time: 0.1427 data: 0.0561 max mem: 9377 +Train: [98] [4600/6250] eta: 0:04:09 lr: 0.000000 grad: 0.2086 (0.2155) loss: 0.7484 (0.7476) time: 0.1610 data: 0.0716 max mem: 9377 +Train: [98] [4700/6250] eta: 0:03:54 lr: 0.000000 grad: 0.2123 (0.2155) loss: 0.7406 (0.7475) time: 0.1429 data: 0.0591 max mem: 9377 +Train: [98] [4800/6250] eta: 0:03:39 lr: 0.000000 grad: 0.2164 (0.2155) loss: 0.7509 (0.7475) time: 0.1720 data: 0.0820 max mem: 9377 +Train: [98] [4900/6250] eta: 0:03:25 lr: 0.000000 grad: 0.2169 (0.2154) loss: 0.7399 (0.7476) time: 0.1683 data: 0.0798 max mem: 9377 +Train: [98] [5000/6250] eta: 0:03:09 lr: 0.000000 grad: 0.2055 (0.2153) loss: 0.7450 (0.7476) time: 0.1471 data: 0.0654 max mem: 9377 +Train: [98] [5100/6250] eta: 0:02:54 lr: 0.000000 grad: 0.2081 (0.2153) loss: 0.7577 (0.7476) time: 0.1588 data: 0.0708 max mem: 9377 +Train: [98] [5200/6250] eta: 0:02:39 lr: 0.000000 grad: 0.2078 (0.2152) loss: 0.7536 (0.7476) time: 0.1503 data: 0.0645 max mem: 9377 +Train: [98] [5300/6250] eta: 0:02:24 lr: 0.000000 grad: 0.2037 (0.2151) loss: 0.7428 (0.7477) time: 0.1556 data: 0.0787 max mem: 9377 +Train: [98] [5400/6250] eta: 0:02:08 lr: 0.000000 grad: 0.2065 (0.2149) loss: 0.7480 (0.7477) time: 0.1419 data: 0.0570 max mem: 9377 +Train: [98] [5500/6250] eta: 0:01:53 lr: 0.000000 grad: 0.2113 (0.2149) loss: 0.7481 (0.7477) time: 0.1377 data: 0.0560 max mem: 9377 +Train: [98] [5600/6250] eta: 0:01:38 lr: 0.000000 grad: 0.2005 (0.2147) loss: 0.7479 (0.7477) time: 0.1388 data: 0.0589 max mem: 9377 +Train: [98] [5700/6250] eta: 0:01:23 lr: 0.000000 grad: 0.2101 (0.2147) loss: 0.7407 (0.7477) time: 0.1545 data: 0.0628 max mem: 9377 +Train: [98] [5800/6250] eta: 0:01:07 lr: 0.000000 grad: 0.2081 (0.2147) loss: 0.7438 (0.7477) time: 0.1348 data: 0.0471 max mem: 9377 +Train: [98] [5900/6250] eta: 0:00:52 lr: 0.000000 grad: 0.2102 (0.2147) loss: 0.7501 (0.7477) time: 0.1499 data: 0.0676 max mem: 9377 +Train: [98] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.2110 (0.2146) loss: 0.7396 (0.7476) time: 0.1518 data: 0.0643 max mem: 9377 +Train: [98] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.2014 (0.2146) loss: 0.7498 (0.7477) time: 0.1416 data: 0.0529 max mem: 9377 +Train: [98] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2100 (0.2146) loss: 0.7346 (0.7476) time: 0.1584 data: 0.0689 max mem: 9377 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2071 (0.2146) loss: 0.7303 (0.7476) time: 0.1361 data: 0.0605 max mem: 9377 +Train: [98] Total time: 0:15:46 (0.1514 s / it) +Averaged stats: lr: 0.000000 grad: 0.2071 (0.2146) loss: 0.7303 (0.7476) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:06:01 loss: 0.8509 (0.8509) time: 5.8382 data: 5.8057 max mem: 9377 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.8562 (0.8581) time: 0.1304 data: 0.1033 max mem: 9377 +Eval (hcp-train-subset): [98] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-train-subset): loss: 0.8562 (0.8581) +Eval (hcp-val): [98] [ 0/62] eta: 0:06:04 loss: 0.8560 (0.8560) time: 5.8787 data: 5.8472 max mem: 9377 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.8525 (0.8564) time: 0.1378 data: 0.1111 max mem: 9377 +Eval (hcp-val): [98] Total time: 0:00:14 (0.2301 s / it) +Averaged stats (hcp-val): loss: 0.8525 (0.8564) +Eval (nsd-val): [98] [ 0/62] eta: 0:06:14 loss: 0.8203 (0.8203) time: 6.0443 data: 6.0040 max mem: 9377 +Eval (nsd-val): [98] [61/62] eta: 0:00:00 loss: 0.8314 (0.8331) time: 0.1233 data: 0.0981 max mem: 9377 +Eval (nsd-val): [98] Total time: 0:00:14 (0.2317 s / it) +Averaged stats (nsd-val): loss: 0.8314 (0.8331) +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +Train: [99] [ 0/6250] eta: 9:59:24 lr: 0.000000 grad: 0.4222 (0.4222) loss: 0.6483 (0.6483) time: 5.7543 data: 5.4660 max mem: 9377 +Train: [99] [ 100/6250] eta: 0:21:49 lr: 0.000000 grad: 0.2357 (0.2622) loss: 0.7591 (0.7541) time: 0.1543 data: 0.0345 max mem: 9377 +Train: [99] [ 200/6250] eta: 0:18:45 lr: 0.000000 grad: 0.2342 (0.2513) loss: 0.7506 (0.7531) time: 0.1569 data: 0.0553 max mem: 9377 +Train: [99] [ 300/6250] eta: 0:17:28 lr: 0.000000 grad: 0.2239 (0.2469) loss: 0.7228 (0.7475) time: 0.1660 data: 0.0659 max mem: 9377 +Train: [99] [ 400/6250] eta: 0:16:37 lr: 0.000000 grad: 0.2288 (0.2428) loss: 0.7470 (0.7463) time: 0.1645 data: 0.0739 max mem: 9377 +Train: [99] [ 500/6250] eta: 0:16:02 lr: 0.000000 grad: 0.2249 (0.2399) loss: 0.7347 (0.7457) time: 0.1521 data: 0.0568 max mem: 9377 +Train: [99] [ 600/6250] eta: 0:15:29 lr: 0.000000 grad: 0.2244 (0.2371) loss: 0.7630 (0.7461) time: 0.1409 data: 0.0484 max mem: 9377 +Train: [99] [ 700/6250] eta: 0:15:10 lr: 0.000000 grad: 0.2146 (0.2349) loss: 0.7512 (0.7459) time: 0.1457 data: 0.0499 max mem: 9377 +Train: [99] [ 800/6250] eta: 0:14:40 lr: 0.000000 grad: 0.2152 (0.2327) loss: 0.7558 (0.7465) time: 0.1218 data: 0.0051 max mem: 9377 +Train: [99] [ 900/6250] eta: 0:14:13 lr: 0.000000 grad: 0.2167 (0.2305) loss: 0.7457 (0.7469) time: 0.1426 data: 0.0517 max mem: 9377 +Train: [99] [1000/6250] eta: 0:13:50 lr: 0.000000 grad: 0.2021 (0.2300) loss: 0.7511 (0.7472) time: 0.1456 data: 0.0470 max mem: 9377 +Train: [99] [1100/6250] eta: 0:13:26 lr: 0.000000 grad: 0.2075 (0.2287) loss: 0.7465 (0.7473) time: 0.1536 data: 0.0619 max mem: 9377 +Train: [99] [1200/6250] eta: 0:13:05 lr: 0.000000 grad: 0.2066 (0.2275) loss: 0.7427 (0.7477) time: 0.1354 data: 0.0416 max mem: 9377 +Train: [99] [1300/6250] eta: 0:12:45 lr: 0.000000 grad: 0.2145 (0.2267) loss: 0.7425 (0.7478) time: 0.1333 data: 0.0369 max mem: 9377 +Train: [99] [1400/6250] eta: 0:12:23 lr: 0.000000 grad: 0.2224 (0.2257) loss: 0.7504 (0.7479) time: 0.1254 data: 0.0302 max mem: 9377 +Train: [99] [1500/6250] eta: 0:12:03 lr: 0.000000 grad: 0.2066 (0.2247) loss: 0.7487 (0.7480) time: 0.1328 data: 0.0500 max mem: 9377 +Train: [99] [1600/6250] eta: 0:11:45 lr: 0.000000 grad: 0.2134 (0.2241) loss: 0.7424 (0.7482) time: 0.1404 data: 0.0399 max mem: 9377 +Train: [99] [1700/6250] eta: 0:11:28 lr: 0.000000 grad: 0.2122 (0.2230) loss: 0.7462 (0.7485) time: 0.1578 data: 0.0635 max mem: 9377 +Train: [99] [1800/6250] eta: 0:11:12 lr: 0.000000 grad: 0.2097 (0.2222) loss: 0.7447 (0.7488) time: 0.1445 data: 0.0424 max mem: 9377 +Train: [99] [1900/6250] eta: 0:10:57 lr: 0.000000 grad: 0.1986 (0.2216) loss: 0.7546 (0.7490) time: 0.1648 data: 0.0808 max mem: 9377 +Train: [99] [2000/6250] eta: 0:10:45 lr: 0.000000 grad: 0.2069 (0.2209) loss: 0.7533 (0.7491) time: 0.1330 data: 0.0520 max mem: 9377 +Train: [99] [2100/6250] eta: 0:10:30 lr: 0.000000 grad: 0.2109 (0.2205) loss: 0.7520 (0.7492) time: 0.1498 data: 0.0525 max mem: 9377 +Train: [99] [2200/6250] eta: 0:10:17 lr: 0.000000 grad: 0.2062 (0.2201) loss: 0.7543 (0.7495) time: 0.1457 data: 0.0644 max mem: 9377 +Train: [99] [2300/6250] eta: 0:10:03 lr: 0.000000 grad: 0.2078 (0.2196) loss: 0.7456 (0.7497) time: 0.1612 data: 0.0749 max mem: 9377 +Train: [99] [2400/6250] eta: 0:09:48 lr: 0.000000 grad: 0.2065 (0.2194) loss: 0.7564 (0.7496) time: 0.1525 data: 0.0665 max mem: 9377 +Train: [99] [2500/6250] eta: 0:09:33 lr: 0.000000 grad: 0.1995 (0.2191) loss: 0.7574 (0.7498) time: 0.1416 data: 0.0486 max mem: 9377 +Train: [99] [2600/6250] eta: 0:09:16 lr: 0.000000 grad: 0.2088 (0.2186) loss: 0.7509 (0.7499) time: 0.1352 data: 0.0410 max mem: 9377 +Train: [99] [2700/6250] eta: 0:09:01 lr: 0.000000 grad: 0.2106 (0.2184) loss: 0.7511 (0.7499) time: 0.1555 data: 0.0694 max mem: 9377 +Train: [99] [2800/6250] eta: 0:08:45 lr: 0.000000 grad: 0.2127 (0.2182) loss: 0.7497 (0.7499) time: 0.1445 data: 0.0524 max mem: 9377 +Train: [99] [2900/6250] eta: 0:08:30 lr: 0.000000 grad: 0.2074 (0.2179) loss: 0.7574 (0.7501) time: 0.1643 data: 0.0773 max mem: 9377 +Train: [99] [3000/6250] eta: 0:08:15 lr: 0.000000 grad: 0.2043 (0.2178) loss: 0.7541 (0.7501) time: 0.1598 data: 0.0717 max mem: 9377 +Train: [99] [3100/6250] eta: 0:08:00 lr: 0.000000 grad: 0.2040 (0.2177) loss: 0.7540 (0.7501) time: 0.1534 data: 0.0579 max mem: 9377 +Train: [99] [3200/6250] eta: 0:07:45 lr: 0.000000 grad: 0.2048 (0.2176) loss: 0.7587 (0.7502) time: 0.1282 data: 0.0388 max mem: 9377 +Train: [99] [3300/6250] eta: 0:07:29 lr: 0.000000 grad: 0.2063 (0.2174) loss: 0.7541 (0.7502) time: 0.1251 data: 0.0424 max mem: 9377 +Train: [99] [3400/6250] eta: 0:07:13 lr: 0.000000 grad: 0.2074 (0.2174) loss: 0.7397 (0.7501) time: 0.1448 data: 0.0650 max mem: 9377 +Train: [99] [3500/6250] eta: 0:06:55 lr: 0.000000 grad: 0.2074 (0.2173) loss: 0.7590 (0.7501) time: 0.1320 data: 0.0500 max mem: 9377 +Train: [99] [3600/6250] eta: 0:06:39 lr: 0.000000 grad: 0.2170 (0.2173) loss: 0.7491 (0.7500) time: 0.1575 data: 0.0773 max mem: 9377 +Train: [99] [3700/6250] eta: 0:06:23 lr: 0.000000 grad: 0.2115 (0.2173) loss: 0.7383 (0.7500) time: 0.1144 data: 0.0348 max mem: 9377 +Train: [99] [3800/6250] eta: 0:06:07 lr: 0.000000 grad: 0.2114 (0.2172) loss: 0.7439 (0.7499) time: 0.1186 data: 0.0374 max mem: 9377 +Train: [99] [3900/6250] eta: 0:05:51 lr: 0.000000 grad: 0.2044 (0.2171) loss: 0.7403 (0.7498) time: 0.1484 data: 0.0675 max mem: 9377 +Train: [99] [4000/6250] eta: 0:05:35 lr: 0.000000 grad: 0.2027 (0.2169) loss: 0.7556 (0.7497) time: 0.1366 data: 0.0562 max mem: 9377 +Train: [99] [4100/6250] eta: 0:05:20 lr: 0.000000 grad: 0.2131 (0.2169) loss: 0.7535 (0.7497) time: 0.1399 data: 0.0580 max mem: 9377 +Train: [99] [4200/6250] eta: 0:05:04 lr: 0.000000 grad: 0.2062 (0.2168) loss: 0.7432 (0.7497) time: 0.1355 data: 0.0479 max mem: 9377 +Train: [99] [4300/6250] eta: 0:04:48 lr: 0.000000 grad: 0.2089 (0.2168) loss: 0.7497 (0.7496) time: 0.1389 data: 0.0571 max mem: 9377 +Train: [99] [4400/6250] eta: 0:04:34 lr: 0.000000 grad: 0.2160 (0.2167) loss: 0.7480 (0.7496) time: 0.1348 data: 0.0450 max mem: 9377 +Train: [99] [4500/6250] eta: 0:04:19 lr: 0.000000 grad: 0.2109 (0.2167) loss: 0.7436 (0.7496) time: 0.1667 data: 0.0829 max mem: 9377 +Train: [99] [4600/6250] eta: 0:04:04 lr: 0.000000 grad: 0.2174 (0.2166) loss: 0.7473 (0.7496) time: 0.1486 data: 0.0693 max mem: 9377 +Train: [99] [4700/6250] eta: 0:03:49 lr: 0.000000 grad: 0.2116 (0.2165) loss: 0.7510 (0.7496) time: 0.1256 data: 0.0428 max mem: 9377 +Train: [99] [4800/6250] eta: 0:03:33 lr: 0.000000 grad: 0.2125 (0.2164) loss: 0.7479 (0.7496) time: 0.1211 data: 0.0383 max mem: 9377 +Train: [99] [4900/6250] eta: 0:03:18 lr: 0.000000 grad: 0.2115 (0.2163) loss: 0.7512 (0.7497) time: 0.1179 data: 0.0423 max mem: 9377 +Train: [99] [5000/6250] eta: 0:03:03 lr: 0.000000 grad: 0.2089 (0.2161) loss: 0.7459 (0.7497) time: 0.1267 data: 0.0498 max mem: 9377 +Train: [99] [5100/6250] eta: 0:02:48 lr: 0.000000 grad: 0.1989 (0.2160) loss: 0.7503 (0.7498) time: 0.1187 data: 0.0408 max mem: 9377 +Train: [99] [5200/6250] eta: 0:02:33 lr: 0.000000 grad: 0.2074 (0.2158) loss: 0.7554 (0.7499) time: 0.1140 data: 0.0374 max mem: 9377 +Train: [99] [5300/6250] eta: 0:02:18 lr: 0.000000 grad: 0.1963 (0.2157) loss: 0.7616 (0.7501) time: 0.1225 data: 0.0495 max mem: 9377 +Train: [99] [5400/6250] eta: 0:02:03 lr: 0.000000 grad: 0.2038 (0.2155) loss: 0.7630 (0.7502) time: 0.1287 data: 0.0502 max mem: 9377 +Train: [99] [5500/6250] eta: 0:01:48 lr: 0.000000 grad: 0.2011 (0.2153) loss: 0.7591 (0.7503) time: 0.1242 data: 0.0492 max mem: 9377 +Train: [99] [5600/6250] eta: 0:01:33 lr: 0.000000 grad: 0.2103 (0.2152) loss: 0.7553 (0.7504) time: 0.1277 data: 0.0512 max mem: 9377 +Train: [99] [5700/6250] eta: 0:01:19 lr: 0.000000 grad: 0.2063 (0.2152) loss: 0.7554 (0.7504) time: 0.1307 data: 0.0559 max mem: 9377 +Train: [99] [5800/6250] eta: 0:01:04 lr: 0.000000 grad: 0.2218 (0.2152) loss: 0.7641 (0.7505) time: 0.1196 data: 0.0443 max mem: 9377 +Train: [99] [5900/6250] eta: 0:00:50 lr: 0.000000 grad: 0.2038 (0.2151) loss: 0.7505 (0.7506) time: 0.1412 data: 0.0655 max mem: 9377 +Train: [99] [6000/6250] eta: 0:00:35 lr: 0.000000 grad: 0.2005 (0.2151) loss: 0.7477 (0.7507) time: 0.1332 data: 0.0542 max mem: 9377 +Train: [99] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.2151 (0.2151) loss: 0.7564 (0.7507) time: 0.1401 data: 0.0665 max mem: 9377 +Train: [99] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2091 (0.2150) loss: 0.7383 (0.7507) time: 0.1336 data: 0.0632 max mem: 9377 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2101 (0.2150) loss: 0.7484 (0.7507) time: 0.1202 data: 0.0423 max mem: 9377 +Train: [99] Total time: 0:14:57 (0.1436 s / it) +Averaged stats: lr: 0.000000 grad: 0.2101 (0.2150) loss: 0.7484 (0.7507) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:05:13 loss: 0.8502 (0.8502) time: 5.0642 data: 5.0350 max mem: 9377 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.8569 (0.8575) time: 0.0947 data: 0.0700 max mem: 9377 +Eval (hcp-train-subset): [99] Total time: 0:00:11 (0.1900 s / it) +Averaged stats (hcp-train-subset): loss: 0.8569 (0.8575) +Making plots (hcp-train-subset): example=4 +Eval (hcp-val): [99] [ 0/62] eta: 0:05:14 loss: 0.8549 (0.8549) time: 5.0778 data: 5.0490 max mem: 9377 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.8535 (0.8557) time: 0.1116 data: 0.0867 max mem: 9377 +Eval (hcp-val): [99] Total time: 0:00:11 (0.1906 s / it) +Averaged stats (hcp-val): loss: 0.8535 (0.8557) +Making plots (hcp-val): example=41 +Eval (nsd-val): [99] [ 0/62] eta: 0:03:13 loss: 0.8215 (0.8215) time: 3.1135 data: 3.0576 max mem: 9377 +Eval (nsd-val): [99] [61/62] eta: 0:00:00 loss: 0.8312 (0.8334) time: 0.1079 data: 0.0808 max mem: 9377 +Eval (nsd-val): [99] Total time: 0:00:11 (0.1908 s / it) +Averaged stats (nsd-val): loss: 0.8312 (0.8334) +Making plots (nsd-val): example=43 +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n400_2/pretrain/checkpoint-00099.pth +done! training time: 1 day, 5:40:13 diff --git a/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic/config.yaml b/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..35a700313889a00de522ebc41c947c43732812ee --- /dev/null +++ b/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic/eval_table.csv b/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..c1a8ebebb4c2861f1428da64819e4cbe94c173d9 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,0.005994842503189409,train,0.6850393700787402,0.021224671987416004,0.6844195954809441,0.021365492410495956,0.6849265747484758,0.02124289737416035 +flat_mae,patch,logistic,aabc_age,,0.005994842503189409,test,0.3076923076923077,0.05757675607601288,0.2875046347793845,0.05700036520146199,0.301510989010989,0.057403021015216986 +flat_mae,patch,logistic,aabc_age,1,0.000774263682681127,train,0.5570866141732284,0.021190412656840282,0.5514135814735559,0.021876919841948433,0.5577517458174224,0.021207422195337403 +flat_mae,patch,logistic,aabc_age,1,0.000774263682681127,test,0.46153846153846156,0.06247484700962252,0.44600885225885223,0.06256013104795484,0.45650183150183155,0.062089749429509136 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,train,0.5649606299212598,0.02092486870210325,0.5595246228369275,0.02138896310361961,0.5656986674057182,0.020921191102177314 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,test,0.4230769230769231,0.06175615341977754,0.4120063228148235,0.06346255419665894,0.4210164835164836,0.061539195993226964 +flat_mae,patch,logistic,aabc_age,3,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,3,21.54434690031882,test,0.40384615384615385,0.06503184245079005,0.39925925925925926,0.06454900909198748,0.40613553113553114,0.06561036383762181 +flat_mae,patch,logistic,aabc_age,4,0.3593813663804626,train,0.9960629921259843,0.0027374947466168678,0.9960256812008625,0.0027686084708755626,0.9959177552652626,0.0028373223444518466 +flat_mae,patch,logistic,aabc_age,4,0.3593813663804626,test,0.5,0.06772110401487916,0.49155422288855566,0.06907414366075038,0.49954212454212454,0.06764776172948235 +flat_mae,patch,logistic,aabc_age,5,0.000774263682681127,train,0.5610236220472441,0.020486595651820788,0.5564395451496418,0.02105677776163441,0.5606961644760438,0.020449476822917215 +flat_mae,patch,logistic,aabc_age,5,0.000774263682681127,test,0.5,0.06142146329945383,0.4776190476190476,0.06335422454293077,0.4993131868131868,0.06119878303744495 +flat_mae,patch,logistic,aabc_age,6,0.046415888336127774,train,0.844488188976378,0.015053071221179647,0.845212171373394,0.015066140662210046,0.8446977688443753,0.015032356183279588 +flat_mae,patch,logistic,aabc_age,6,0.046415888336127774,test,0.46153846153846156,0.06657401229027762,0.4504148469665711,0.06735864037210726,0.46543040293040294,0.06714973150961155 +flat_mae,patch,logistic,aabc_age,7,0.046415888336127774,train,0.8562992125984252,0.01449414461637721,0.8564883892093578,0.014491833219470989,0.8573296521407241,0.014463607269458735 +flat_mae,patch,logistic,aabc_age,7,0.046415888336127774,test,0.4807692307692308,0.06470232246638738,0.4609649122807017,0.06486746807928159,0.47435897435897434,0.06430503870055063 +flat_mae,patch,logistic,aabc_age,8,0.005994842503189409,train,0.7066929133858267,0.019007800624821623,0.7051482823729485,0.01919139426228021,0.7060177238108207,0.019002776850955774 +flat_mae,patch,logistic,aabc_age,8,0.005994842503189409,test,0.4807692307692308,0.06490986012023446,0.479790008841733,0.06279252457740357,0.48466117216117216,0.06532409121851347 +flat_mae,patch,logistic,aabc_age,9,0.000774263682681127,train,0.5531496062992126,0.021012286823008933,0.5519432696570439,0.021392019325659407,0.5539694211040126,0.02099969475487992 +flat_mae,patch,logistic,aabc_age,9,0.000774263682681127,test,0.46153846153846156,0.06463983054052426,0.4481837606837607,0.06502554673919092,0.4551282051282051,0.06399651404904408 +flat_mae,patch,logistic,aabc_age,10,0.000774263682681127,train,0.5590551181102362,0.020694157895317745,0.5560497981484616,0.021123933690335158,0.5600178082007867,0.020623022044829013 +flat_mae,patch,logistic,aabc_age,10,0.000774263682681127,test,0.4423076923076923,0.06726059890961776,0.4420335286902004,0.06754222610302825,0.44047619047619047,0.06690628130422376 +flat_mae,patch,logistic,aabc_age,11,0.000774263682681127,train,0.547244094488189,0.021549670328721564,0.5413203419134089,0.021972835709529265,0.5480562497482686,0.021495173740776523 +flat_mae,patch,logistic,aabc_age,11,0.000774263682681127,test,0.5769230769230769,0.06839932350265829,0.5777894491129786,0.07240478580840506,0.57257326007326,0.06889604540377417 +flat_mae,patch,logistic,aabc_age,12,0.046415888336127774,train,0.8740157480314961,0.014523982232573795,0.8740880663405771,0.014528239933929193,0.8747545019169948,0.014444044723455143 +flat_mae,patch,logistic,aabc_age,12,0.046415888336127774,test,0.38461538461538464,0.06150791309076471,0.38741258741258744,0.06106416635066722,0.388507326007326,0.06204579591714843 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,train,0.7047244094488189,0.0188833131404166,0.704121090994902,0.01904688088371253,0.7049218529734997,0.018787948524745066 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,test,0.46153846153846156,0.06348455293643061,0.46835978835978836,0.06189251727726032,0.46108058608058605,0.06385561843909417 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,train,0.8681102362204725,0.01579317545171905,0.867816629683455,0.01590259952757229,0.8692588451233149,0.015649721064066324 +flat_mae,patch,logistic,aabc_age,14,0.046415888336127774,test,0.5384615384615384,0.0680179557741359,0.5424653744556294,0.06834096073823309,0.5428113553113554,0.06822708010809156 +flat_mae,patch,logistic,aabc_age,15,0.046415888336127774,train,0.8582677165354331,0.016051919817525247,0.8586867040574316,0.016057160267986866,0.8587930508698879,0.01598324314497557 +flat_mae,patch,logistic,aabc_age,15,0.046415888336127774,test,0.40384615384615385,0.06678521387926678,0.4106990622335891,0.06629143836223779,0.40201465201465203,0.06677506813727727 +flat_mae,patch,logistic,aabc_age,16,0.3593813663804626,train,0.9980314960629921,0.002032136870183823,0.9980665982539895,0.0019986263793594803,0.9979338842975207,0.0021329039877136047 +flat_mae,patch,logistic,aabc_age,16,0.3593813663804626,test,0.46153846153846156,0.06966609924931483,0.4501604717121958,0.07059627780409601,0.45535714285714285,0.06929363548228835 +flat_mae,patch,logistic,aabc_age,17,0.000774263682681127,train,0.5649606299212598,0.02124966948206268,0.5619696694491696,0.02188867362378902,0.565145937102624,0.02116535863480556 +flat_mae,patch,logistic,aabc_age,17,0.000774263682681127,test,0.38461538461538464,0.06202018186509841,0.357683976347989,0.05828643207974376,0.37934981684981683,0.06086836440818583 +flat_mae,patch,logistic,aabc_age,18,0.3593813663804626,train,0.9980314960629921,0.001992826119537746,0.99795904766665,0.002066063203797111,0.997983870967742,0.002041039654687862 +flat_mae,patch,logistic,aabc_age,18,0.3593813663804626,test,0.3269230769230769,0.06632361936148463,0.3315937223695844,0.06480662449753313,0.33058608058608063,0.06717984528226675 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,train,0.8740157480314961,0.014654482045015701,0.8743546718771061,0.014653536506143008,0.8753395976900165,0.014521502171826954 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,test,0.36538461538461536,0.06538194564685305,0.3564696593728852,0.06540993232506881,0.36057692307692313,0.06541127575021219 +flat_mae,patch,logistic,aabc_age,20,9.999999999999999e-05,train,0.49803149606299213,0.020956096399758053,0.4766210382453204,0.021571229282098568,0.49486276081773883,0.02089935200649891 +flat_mae,patch,logistic,aabc_age,20,9.999999999999999e-05,test,0.4807692307692308,0.05510345300019209,0.4546861471861472,0.06199659535324122,0.4771062271062271,0.05470054922058839 +flat_mae,patch,logistic,aabc_age,21,0.046415888336127774,train,0.8582677165354331,0.01509860947027957,0.8582958277084526,0.015240698265925811,0.8585431175187817,0.015128191412029257 +flat_mae,patch,logistic,aabc_age,21,0.046415888336127774,test,0.36538461538461536,0.05920908694053102,0.3425009269558769,0.05782507744213379,0.3658424908424908,0.0595261974214603 +flat_mae,patch,logistic,aabc_age,22,0.046415888336127774,train,0.8700787401574803,0.014787908890864126,0.8702078805371835,0.014867801782348145,0.870354715960636,0.014877589743995539 +flat_mae,patch,logistic,aabc_age,22,0.046415888336127774,test,0.38461538461538464,0.06467575019063863,0.3847042694868782,0.06486590762277762,0.38278388278388276,0.0647812249931977 +flat_mae,patch,logistic,aabc_age,23,0.046415888336127774,train,0.8562992125984252,0.015076876902211357,0.8569169641431951,0.015047951678488538,0.8568945163783662,0.015008295481546696 +flat_mae,patch,logistic,aabc_age,23,0.046415888336127774,test,0.4230769230769231,0.06074274008778976,0.4137362637362637,0.058110440368564796,0.4207875457875458,0.06033584553589197 +flat_mae,patch,logistic,aabc_age,24,0.000774263682681127,train,0.5669291338582677,0.021610787781853227,0.5623278398080517,0.022156502678025634,0.5673296473458397,0.02156350760606105 +flat_mae,patch,logistic,aabc_age,24,0.000774263682681127,test,0.40384615384615385,0.06427221141180156,0.39771062271062274,0.06420122347417925,0.40315934065934067,0.06406548918039497 +flat_mae,patch,logistic,aabc_age,25,0.005994842503189409,train,0.7007874015748031,0.01956997869104951,0.699509935427497,0.01973180835475673,0.7021450155258357,0.01948092902014046 +flat_mae,patch,logistic,aabc_age,25,0.005994842503189409,test,0.3269230769230769,0.06003697085815111,0.3239170506912442,0.057879678076511884,0.3202838827838828,0.05907660011741308 +flat_mae,patch,logistic,aabc_age,26,0.000774263682681127,train,0.562992125984252,0.02174947219081605,0.5595222782671694,0.022008187696911083,0.5635649438327237,0.021698923408261937 +flat_mae,patch,logistic,aabc_age,26,0.000774263682681127,test,0.38461538461538464,0.061240818657350475,0.361078431372549,0.059632047662972684,0.3807234432234432,0.06070888892527783 +flat_mae,patch,logistic,aabc_age,27,0.005994842503189409,train,0.702755905511811,0.01960913673613622,0.7026310288978495,0.019714426340963334,0.7028557372710204,0.01966636924387146 +flat_mae,patch,logistic,aabc_age,27,0.005994842503189409,test,0.40384615384615385,0.06597027302563575,0.39298722349351634,0.06841016289734832,0.40041208791208793,0.06568768767013522 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,train,0.7047244094488189,0.020859260119128517,0.7044004037291512,0.021072422703630064,0.7056745299574789,0.02075572383253953 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,test,0.46153846153846156,0.06623451030263292,0.4551251646903821,0.06728809276628998,0.4624542124542125,0.06625810981373556 +flat_mae,patch,logistic,aabc_age,29,9.999999999999999e-05,train,0.4940944881889764,0.0205297030041209,0.47123894585760995,0.02104692492891872,0.4907305294127801,0.02045021740229822 +flat_mae,patch,logistic,aabc_age,29,9.999999999999999e-05,test,0.28846153846153844,0.06129445615249054,0.26722873900293254,0.05097403681864139,0.28434065934065933,0.06024760351265629 +flat_mae,patch,logistic,aabc_age,30,0.005994842503189409,train,0.6889763779527559,0.019186643433768507,0.6864989815601618,0.019580620048214436,0.6878901837207919,0.0192790764238808 +flat_mae,patch,logistic,aabc_age,30,0.005994842503189409,test,0.46153846153846156,0.06567811952709506,0.4778846153846154,0.06187459143401348,0.46703296703296704,0.06605509637704993 +flat_mae,patch,logistic,aabc_age,31,9.999999999999999e-05,train,0.4862204724409449,0.020560288506023615,0.4598847942531543,0.02074175556891514,0.4830011757056631,0.0204423119150722 +flat_mae,patch,logistic,aabc_age,31,9.999999999999999e-05,test,0.5384615384615384,0.059494554969391246,0.4907806993765248,0.0600043229227126,0.5290750915750916,0.05849989356761955 +flat_mae,patch,logistic,aabc_age,32,0.3593813663804626,train,0.9960629921259843,0.00275706106257576,0.9961647955669576,0.0026844785264231443,0.9959177552652626,0.0028586955309325935 +flat_mae,patch,logistic,aabc_age,32,0.3593813663804626,test,0.4230769230769231,0.06216597130396726,0.42647664835164834,0.06089824594736231,0.4210164835164836,0.06206877848853379 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,train,0.8622047244094488,0.015564768309358957,0.8622157400090784,0.015644193998806597,0.8621226186206461,0.015509706119487916 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,test,0.4230769230769231,0.06678628814377746,0.42497501249375313,0.06653975261426583,0.42261904761904756,0.06687432796630408 +flat_mae,patch,logistic,aabc_age,34,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,34,166.81005372000556,test,0.3269230769230769,0.06353095271332489,0.33413848631239934,0.06420904218131555,0.32463369963369965,0.0631371219976035 +flat_mae,patch,logistic,aabc_age,35,9.999999999999999e-05,train,0.49606299212598426,0.021025413162075964,0.47668732147898807,0.022158371309859457,0.4931141863368809,0.020938155674515214 +flat_mae,patch,logistic,aabc_age,35,9.999999999999999e-05,test,0.40384615384615385,0.062289456020749985,0.38212800712800715,0.061797825438919686,0.3985805860805861,0.061649514178212954 +flat_mae,patch,logistic,aabc_age,36,0.046415888336127774,train,0.8503937007874016,0.016492772114567778,0.8511761815366061,0.016420431964289925,0.8511636705032135,0.016441047643793394 +flat_mae,patch,logistic,aabc_age,36,0.046415888336127774,test,0.46153846153846156,0.0634101306934169,0.43979200875752594,0.06382133077768497,0.45650183150183155,0.06301258587866009 +flat_mae,patch,logistic,aabc_age,37,0.000774263682681127,train,0.5708661417322834,0.02054622492806689,0.5637441785772905,0.021278156367402528,0.5709443908482917,0.020620455073895934 +flat_mae,patch,logistic,aabc_age,37,0.000774263682681127,test,0.4423076923076923,0.06449405963195874,0.4313607085346216,0.06781856280567988,0.43475274725274726,0.06435751009204459 +flat_mae,patch,logistic,aabc_age,38,0.005994842503189409,train,0.6830708661417323,0.019602527465053216,0.6800534201713224,0.01995341145000247,0.6829472572302062,0.019669158922702074 +flat_mae,patch,logistic,aabc_age,38,0.005994842503189409,test,0.5,0.06832414909175544,0.48796536796536794,0.07113357123469288,0.4951923076923077,0.06811849713185918 +flat_mae,patch,logistic,aabc_age,39,0.3593813663804626,train,0.9940944881889764,0.003372239722202656,0.9940901067702771,0.0033763891797255405,0.9938516395627832,0.0035128352625573927 +flat_mae,patch,logistic,aabc_age,39,0.3593813663804626,test,0.5192307692307693,0.06897842900964803,0.5237179487179487,0.06953226549361397,0.5192307692307693,0.06942424145922961 +flat_mae,patch,logistic,aabc_age,40,9.999999999999999e-05,train,0.49015748031496065,0.019903907664855173,0.450309034915108,0.019923549482285034,0.485645674342957,0.019728916990701735 +flat_mae,patch,logistic,aabc_age,40,9.999999999999999e-05,test,0.5576923076923077,0.05053594420612653,0.48430735930735924,0.04386825411550989,0.5467032967032966,0.04889190701053372 +flat_mae,patch,logistic,aabc_age,41,9.999999999999999e-05,train,0.4940944881889764,0.019413878844717854,0.4700351571651964,0.020230081435905836,0.4902953936504222,0.01934147310562602 +flat_mae,patch,logistic,aabc_age,41,9.999999999999999e-05,test,0.3076923076923077,0.057936662008901824,0.2853991207161396,0.056704455840757256,0.30654761904761907,0.057791706779774396 +flat_mae,patch,logistic,aabc_age,42,0.005994842503189409,train,0.7047244094488189,0.01989588616253552,0.7034744567528756,0.020154816549879797,0.7043867438706992,0.019971379385361546 +flat_mae,patch,logistic,aabc_age,42,0.005994842503189409,test,0.46153846153846156,0.06049201619774827,0.45028419576289946,0.06453271716824137,0.46565934065934067,0.061119697032854886 +flat_mae,patch,logistic,aabc_age,43,9.999999999999999e-05,train,0.4744094488188976,0.01919526777402727,0.4405773270637231,0.019288987820429603,0.47108960392336624,0.01906607539990396 +flat_mae,patch,logistic,aabc_age,43,9.999999999999999e-05,test,0.5384615384615384,0.06311931033588775,0.5153769841269842,0.06917881899933104,0.529532967032967,0.06315810523328326 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,train,0.8622047244094488,0.015043098437144563,0.8628828604176825,0.014979900700998746,0.8630928634858043,0.015035544102264537 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,test,0.46153846153846156,0.07058382904799741,0.4606896551724138,0.07161411558485851,0.4658882783882784,0.0710119919128899 +flat_mae,patch,logistic,aabc_age,45,0.000774263682681127,train,0.5570866141732284,0.020441950341067442,0.5486303650972066,0.0212775251600658,0.5550585791031264,0.020417484591783944 +flat_mae,patch,logistic,aabc_age,45,0.000774263682681127,test,0.4807692307692308,0.06384193683941837,0.4891116941529235,0.06345930016322801,0.48489010989010994,0.0639201290998226 +flat_mae,patch,logistic,aabc_age,46,9.999999999999999e-05,train,0.4881889763779528,0.019218104203356224,0.4667605404908347,0.020319043016026817,0.4854348192999853,0.019131049839362608 +flat_mae,patch,logistic,aabc_age,46,9.999999999999999e-05,test,0.46153846153846156,0.05778570644910803,0.41849837662337663,0.059248031736398275,0.4548992673992674,0.05672014035810992 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,train,0.6889763779527559,0.01989898656814795,0.688664672636798,0.01990846166601078,0.6893455510185293,0.019796500304493814 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,test,0.40384615384615385,0.06524794539828994,0.3948825835221347,0.06702828902770422,0.40613553113553114,0.06534546620802473 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,train,0.8740157480314961,0.014506909229771528,0.8742597245102945,0.01450833926027847,0.8745369340358159,0.014428273770160687 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,test,0.36538461538461536,0.05917603615009939,0.3531181379007466,0.05983420045139798,0.3630952380952381,0.05875145918824808 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,train,0.8582677165354331,0.014944303150846256,0.8591130755855456,0.01492166283531991,0.8595457278538672,0.014888674074945513 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,test,0.36538461538461536,0.06561119113662595,0.3701181357212213,0.0660971633237626,0.3695054945054945,0.06612743182319211 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,train,0.6909448818897638,0.020226312719016582,0.6898072394220939,0.020576752609164473,0.690826570947987,0.020129632681029744 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,test,0.36538461538461536,0.06372614996490147,0.3660041407867495,0.06391961235658569,0.3649267399267399,0.06367563231087588 +flat_mae,patch,logistic,aabc_age,51,0.005994842503189409,train,0.6988188976377953,0.01923859534236066,0.6989270478507257,0.019325911486959356,0.7003288331744628,0.019151574525865668 +flat_mae,patch,logistic,aabc_age,51,0.005994842503189409,test,0.4807692307692308,0.0702768300383326,0.47649110149110147,0.07049296336401929,0.48031135531135527,0.0703892466728851 +flat_mae,patch,logistic,aabc_age,52,0.046415888336127774,train,0.8523622047244095,0.014951258979774593,0.8528195792550568,0.014909253658382277,0.8534473540868719,0.0148423887616245 +flat_mae,patch,logistic,aabc_age,52,0.046415888336127774,test,0.4230769230769231,0.06683215530153522,0.4252501725327812,0.06878872871419349,0.4198717948717948,0.06670005454064401 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,train,0.4862204724409449,0.02035528257418838,0.4660185264916611,0.021296165818764555,0.4842889617924429,0.020305651209824783 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,test,0.5384615384615384,0.05769603834091805,0.5020320914717467,0.05582157797171618,0.530448717948718,0.056618913762078546 +flat_mae,patch,logistic,aabc_age,54,0.000774263682681127,train,0.5570866141732284,0.021025415005087342,0.5541278840468169,0.021518922646954146,0.5578017324876435,0.021010887637640356 +flat_mae,patch,logistic,aabc_age,54,0.000774263682681127,test,0.46153846153846156,0.061790397752708066,0.43744343891402715,0.06261157804239742,0.45650183150183155,0.061145302146378984 +flat_mae,patch,logistic,aabc_age,55,0.005994842503189409,train,0.6889763779527559,0.01969765383224948,0.6869446177967572,0.019945139831463798,0.6891456043376443,0.01968219437312703 +flat_mae,patch,logistic,aabc_age,55,0.005994842503189409,test,0.46153846153846156,0.05378848560270081,0.4418998596843261,0.05724403149238457,0.46222527472527475,0.054003297459853834 +flat_mae,patch,logistic,aabc_age,56,0.000774263682681127,train,0.5708661417322834,0.021548296838919616,0.5666686112108313,0.02176104941835132,0.5703740393449037,0.021528658585198642 +flat_mae,patch,logistic,aabc_age,56,0.000774263682681127,test,0.38461538461538464,0.06321041221996382,0.36883116883116884,0.06173740538091418,0.38369963369963367,0.06324689081836064 +flat_mae,patch,logistic,aabc_age,57,9.999999999999999e-05,train,0.4921259842519685,0.02051797026935676,0.4724109477150341,0.021318135128852063,0.48951706403472267,0.020364302630555626 +flat_mae,patch,logistic,aabc_age,57,9.999999999999999e-05,test,0.46153846153846156,0.06067386626330382,0.40388888888888885,0.0521161739778682,0.45192307692307687,0.05917838127210574 +flat_mae,patch,logistic,aabc_age,58,0.005994842503189409,train,0.6929133858267716,0.01968258685509733,0.6930353463901923,0.019795749863473558,0.6930926333313514,0.0196856427931976 +flat_mae,patch,logistic,aabc_age,58,0.005994842503189409,test,0.40384615384615385,0.05320892170597358,0.3508522727272727,0.049961582457160945,0.396978021978022,0.051563884928871054 +flat_mae,patch,logistic,aabc_age,59,0.000774263682681127,train,0.5590551181102362,0.020456254579387264,0.5524072739422657,0.020965031665685276,0.5586800354437857,0.020454408213471928 +flat_mae,patch,logistic,aabc_age,59,0.000774263682681127,test,0.4807692307692308,0.06555313125817468,0.47252314814814816,0.0661023309160732,0.47596153846153844,0.06521740884693747 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,train,0.6889763779527559,0.021512596393360027,0.6879643649192079,0.0219323035882614,0.68859287403455,0.021524917241352227 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,test,0.36538461538461536,0.059697102801059256,0.35850401866735804,0.06225002719502797,0.36744505494505497,0.06034869295075325 +flat_mae,patch,logistic,aabc_age,61,0.3593813663804626,train,0.9960629921259843,0.0027573084168259787,0.9960256812008625,0.0027830004650851157,0.9959177552652626,0.002860708266494133 +flat_mae,patch,logistic,aabc_age,61,0.3593813663804626,test,0.38461538461538464,0.06720788203986214,0.3802524544179523,0.06570127360531582,0.37957875457875456,0.06677563322526223 +flat_mae,patch,logistic,aabc_age,62,0.000774263682681127,train,0.5590551181102362,0.021629671522826695,0.5569474369686795,0.0219298473540438,0.5600178082007867,0.021644548197838183 +flat_mae,patch,logistic,aabc_age,62,0.000774263682681127,test,0.5,0.062424581715593924,0.4845914954610607,0.06203738978332007,0.4908424908424909,0.06190462796249896 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,train,0.7066929133858267,0.01928466862035455,0.7050994320237524,0.01957485637895821,0.7059677371405993,0.01934326620005031 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,test,0.40384615384615385,0.056717977697400675,0.37732954545454545,0.05661087648440694,0.4043040293040293,0.056955932490691016 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,train,0.8661417322834646,0.013976453365661012,0.866625517383646,0.013961925543296635,0.8677602039935634,0.01385373957123611 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,test,0.36538461538461536,0.06596013678708083,0.35993055555555553,0.06733309495154545,0.36057692307692313,0.06585734900614695 +flat_mae,patch,logistic,aabc_age,65,0.046415888336127774,train,0.8681102362204725,0.014859802694769263,0.8687425447044989,0.014822839626413337,0.8697763330258215,0.014684630045767397 +flat_mae,patch,logistic,aabc_age,65,0.046415888336127774,test,0.5,0.061337882249198714,0.485695084485407,0.06168436633125767,0.4965659340659341,0.06111939119103956 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,train,0.6830708661417323,0.01921601033019356,0.6830274513912976,0.019196355995242224,0.6838322730245556,0.01913187942995083 +flat_mae,patch,logistic,aabc_age,66,0.005994842503189409,test,0.4423076923076923,0.06395144237817096,0.44165070242656446,0.06325090735088686,0.4432234432234432,0.0641325429702632 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,train,0.8562992125984252,0.015092811735588975,0.8564462668683592,0.015203708887007904,0.8571120842595452,0.015116810025284049 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,test,0.4807692307692308,0.06690539112019857,0.48126618508927355,0.06740009244429777,0.4819139194139194,0.0670663034909975 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,train,0.8523622047244095,0.015127078622689036,0.8523493562540901,0.015224383021739186,0.8532474074059867,0.015060043686122366 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,test,0.5,0.06840294597275381,0.5010878489326765,0.06869249887700786,0.5011446886446886,0.0684168215857913 +flat_mae,patch,logistic,aabc_age,69,0.000774263682681127,train,0.5748031496062992,0.02038616587766121,0.5685045224139659,0.020804342079471162,0.5750266355830291,0.0203452734585649 +flat_mae,patch,logistic,aabc_age,69,0.000774263682681127,test,0.36538461538461536,0.06404670771578168,0.3472086466165414,0.06130593268705105,0.35874542124542125,0.06316124475223499 +flat_mae,patch,logistic,aabc_age,70,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,70,0.3593813663804626,test,0.40384615384615385,0.06583559404901715,0.40501165501165504,0.06630027025722854,0.40819597069597074,0.0662187722047837 +flat_mae,patch,logistic,aabc_age,71,0.005994842503189409,train,0.6988188976377953,0.01964145529418663,0.6978532491981495,0.019832748042053393,0.698555924655104,0.019607883073035466 +flat_mae,patch,logistic,aabc_age,71,0.005994842503189409,test,0.40384615384615385,0.06440987235513461,0.4091937474290416,0.06450441791593017,0.4017857142857143,0.0643279039052277 +flat_mae,patch,logistic,aabc_age,72,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,72,2.782559402207126,test,0.5,0.06263239232256211,0.4914456233421751,0.06295029434111293,0.4965659340659341,0.06250047627419866 +flat_mae,patch,logistic,aabc_age,73,0.005994842503189409,train,0.6791338582677166,0.020508032357073706,0.679051275940657,0.020602685668142117,0.6794324870681967,0.020484733765088862 +flat_mae,patch,logistic,aabc_age,73,0.005994842503189409,test,0.5192307692307693,0.06394482643969225,0.5192307692307693,0.06382540009981036,0.5203754578754579,0.06416098899994466 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,train,0.6968503937007874,0.019865663839630412,0.6958240335964908,0.01999283094240815,0.6977276083691831,0.019807110570347897 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,test,0.5192307692307693,0.06293694599833735,0.51,0.06366880725590576,0.5157967032967034,0.06278865279374736 +flat_mae,patch,logistic,aabc_age,75,0.000774263682681127,train,0.5531496062992126,0.019885695433697195,0.5479147006446817,0.020250955525341747,0.5528492162281904,0.01987264433283327 +flat_mae,patch,logistic,aabc_age,75,0.000774263682681127,test,0.46153846153846156,0.06405013753570866,0.4504468366947145,0.06623350146828673,0.459478021978022,0.06407603028815871 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,train,0.8562992125984252,0.01529518742564199,0.8564785119378471,0.015357059859559425,0.8572296788002816,0.015294650728255907 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,test,0.4423076923076923,0.06997861042604206,0.44671414292853573,0.06909320909577493,0.44184981684981683,0.0701237442222691 +flat_mae,patch,logistic,aabc_age,77,0.005994842503189409,train,0.6988188976377953,0.020614802944491143,0.6979817658449678,0.020823190048771378,0.6989910604174618,0.020557222940205604 +flat_mae,patch,logistic,aabc_age,77,0.005994842503189409,test,0.4230769230769231,0.06590268735934394,0.4226532567049809,0.06498933178721925,0.4253663003663004,0.06628560757200418 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,train,0.8543307086614174,0.01605743496584793,0.8542712795568533,0.016206920176625835,0.8549459952166232,0.016006699821954868 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,test,0.5384615384615384,0.07070649393106455,0.5421790367442542,0.07097013247529732,0.5354853479853481,0.07089332358697414 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,train,0.8661417322834646,0.01564605565623857,0.8669266031572945,0.015612361234013515,0.8676602306531209,0.015541040579391678 +flat_mae,patch,logistic,aabc_age,79,0.046415888336127774,test,0.46153846153846156,0.06441861063737485,0.4421130952380953,0.06308756374435527,0.4551282051282052,0.06374891260639694 +flat_mae,patch,logistic,aabc_age,80,0.005994842503189409,train,0.6909448818897638,0.021117085591118093,0.6898272888978045,0.021346569682761347,0.6906589897370293,0.02119696153464879 +flat_mae,patch,logistic,aabc_age,80,0.005994842503189409,test,0.4807692307692308,0.06327529781525701,0.48094606542882407,0.06263048039073062,0.48168498168498164,0.06363452546861556 +flat_mae,patch,logistic,aabc_age,81,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,81,2.782559402207126,test,0.46153846153846156,0.06353877584202389,0.4582417582417583,0.06532134669047739,0.46291208791208793,0.06373321116669038 +flat_mae,patch,logistic,aabc_age,82,0.000774263682681127,train,0.547244094488189,0.021079113338991238,0.5406025024372516,0.02192339622120845,0.5466508691207524,0.021110496992347273 +flat_mae,patch,logistic,aabc_age,82,0.000774263682681127,test,0.4807692307692308,0.06430413819770164,0.45473953470277,0.06597214276511726,0.47573260073260076,0.0637480087565718 +flat_mae,patch,logistic,aabc_age,83,0.046415888336127774,train,0.8779527559055118,0.013393839658211573,0.8784054189857762,0.01340183829894458,0.8789367199921747,0.013322049460198085 +flat_mae,patch,logistic,aabc_age,83,0.046415888336127774,test,0.4230769230769231,0.06113178121530743,0.41002866864935833,0.05883305735730801,0.42216117216117216,0.06087249131765312 +flat_mae,patch,logistic,aabc_age,84,0.005994842503189409,train,0.6988188976377953,0.019990265161357972,0.6981887844237302,0.020071583618667108,0.6989910604174618,0.019941417697456547 +flat_mae,patch,logistic,aabc_age,84,0.005994842503189409,test,0.5,0.0624271765088243,0.48259085419411507,0.06659581681974798,0.4981684981684982,0.06236466767442612 +flat_mae,patch,logistic,aabc_age,85,9.999999999999999e-05,train,0.468503937007874,0.02023130665853143,0.43360827031868876,0.020326698830987547,0.46564393379990754,0.019950566845506952 +flat_mae,patch,logistic,aabc_age,85,9.999999999999999e-05,test,0.4807692307692308,0.05155803302329104,0.4220924908424909,0.060629012937797724,0.4741300366300366,0.050587019694515976 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,train,0.8503937007874016,0.01482005233688137,0.8503590350119632,0.014874256348787805,0.8521015498984444,0.014703162966494694 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,test,0.5192307692307693,0.06675632408044745,0.5084880636604774,0.06957198710569922,0.5146520146520146,0.06673800058657993 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,train,0.7047244094488189,0.018918335122489176,0.7037122273364018,0.019108807850146858,0.7047042850923207,0.018874260977817674 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,test,0.4230769230769231,0.06650550540008482,0.4261907507784569,0.06399138451690853,0.42994505494505497,0.06720600327761954 +flat_mae,patch,logistic,aabc_age,88,0.005994842503189409,train,0.6791338582677166,0.019851307265487582,0.6777223966755326,0.02002596260567417,0.6795000949387118,0.019831926884737415 +flat_mae,patch,logistic,aabc_age,88,0.005994842503189409,test,0.46153846153846156,0.06256276138747421,0.4455956112852665,0.06393337566021547,0.46382783882783885,0.06287964665450277 +flat_mae,patch,logistic,aabc_age,89,9.999999999999999e-05,train,0.4940944881889764,0.02014647329585931,0.47547003503533797,0.020420531588287254,0.49153319306698073,0.0200254504177079 +flat_mae,patch,logistic,aabc_age,89,9.999999999999999e-05,test,0.3269230769230769,0.061950674691035906,0.30302022088034325,0.06036256192181151,0.3244047619047619,0.06147668915682448 +flat_mae,patch,logistic,aabc_age,90,0.005994842503189409,train,0.702755905511811,0.02079354888525166,0.7008306177399155,0.021143625586645495,0.7022030336274835,0.020863350962186583 +flat_mae,patch,logistic,aabc_age,90,0.005994842503189409,test,0.4807692307692308,0.06555073918945555,0.47022546419098143,0.06583701984551005,0.47870879120879123,0.06516030924721601 +flat_mae,patch,logistic,aabc_age,91,0.046415888336127774,train,0.8523622047244095,0.015430509087558377,0.8530456565692138,0.01546916711847044,0.8535649486276082,0.015294003654204687 +flat_mae,patch,logistic,aabc_age,91,0.046415888336127774,test,0.38461538461538464,0.06277426214752094,0.3792724867724868,0.0625036641596621,0.39010989010989006,0.06333045752912517 +flat_mae,patch,logistic,aabc_age,92,0.000774263682681127,train,0.5728346456692913,0.020640309264755043,0.5659498237352909,0.021419881338355404,0.5724077895774555,0.020617404428055958 +flat_mae,patch,logistic,aabc_age,92,0.000774263682681127,test,0.5384615384615384,0.0564074751645371,0.5102756380329909,0.06432689840088908,0.5364010989010989,0.05638025375920742 +flat_mae,patch,logistic,aabc_age,93,0.046415888336127774,train,0.8661417322834646,0.014575777265949339,0.866715390012881,0.014568916946644563,0.8674926494421632,0.014532598113629077 +flat_mae,patch,logistic,aabc_age,93,0.046415888336127774,test,0.4230769230769231,0.06354772702046435,0.40773809523809523,0.06135619836245136,0.4251373626373627,0.06412196674569563 +flat_mae,patch,logistic,aabc_age,94,0.000774263682681127,train,0.5728346456692913,0.0215649224457627,0.5661519373075437,0.022024773847335175,0.5717050992636976,0.021471193801899276 +flat_mae,patch,logistic,aabc_age,94,0.000774263682681127,test,0.4230769230769231,0.061622190635340336,0.40613041555294893,0.06447920405614192,0.42239010989010994,0.061676114808781934 +flat_mae,patch,logistic,aabc_age,95,0.046415888336127774,train,0.8641732283464567,0.014402762421810978,0.8644492654813904,0.014442079219446636,0.866129224053442,0.014252624016859266 +flat_mae,patch,logistic,aabc_age,95,0.046415888336127774,test,0.5192307692307693,0.0652645957286284,0.5049201251646904,0.06158108280535813,0.51007326007326,0.0645898111086808 +flat_mae,patch,logistic,aabc_age,96,0.3593813663804626,train,0.9921259842519685,0.004011923066442492,0.9920314572343741,0.004075543982218511,0.9917855238603039,0.004184353976116914 +flat_mae,patch,logistic,aabc_age,96,0.3593813663804626,test,0.5384615384615384,0.07058395479537975,0.5470488398477529,0.0699059264024412,0.5384615384615384,0.07068616510381091 +flat_mae,patch,logistic,aabc_age,97,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,97,0.3593813663804626,test,0.46153846153846156,0.06521982627732897,0.4510281385281385,0.0672644134173424,0.4610805860805861,0.06509616131638007 +flat_mae,patch,logistic,aabc_age,98,0.000774263682681127,train,0.5570866141732284,0.021892349589524206,0.5543784641889395,0.022491275572441082,0.5574665700657283,0.02190794191581315 +flat_mae,patch,logistic,aabc_age,98,0.000774263682681127,test,0.46153846153846156,0.05770731855999389,0.42310105363984674,0.056701285306138105,0.4548992673992674,0.05677297478324679 +flat_mae,patch,logistic,aabc_age,99,0.000774263682681127,train,0.5649606299212598,0.021401387584425006,0.5611781466679261,0.021734526806128283,0.5644432467888659,0.021384989255523203 +flat_mae,patch,logistic,aabc_age,99,0.000774263682681127,test,0.4230769230769231,0.056843434409815166,0.3917387218045113,0.05714803211270748,0.4235347985347986,0.056977448894920055 +flat_mae,patch,logistic,aabc_age,100,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,100,21.54434690031882,test,0.36538461538461536,0.06747968136001907,0.3704878330065736,0.0672051604286978,0.36790293040293043,0.06776703746178762 diff --git a/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic/log.txt b/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..eb3a7c6b637575524f6dc83b7967fd4c3d832b4d --- /dev/null +++ b/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:20:08 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:19:49 time: 5.2157 data: 4.4183 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:39 time: 0.2406 data: 0.0789 max mem: 3393 +extract (train) [ 40/228] eta: 0:01:03 time: 0.1930 data: 0.0555 max mem: 3393 +extract (train) [ 60/228] eta: 0:00:49 time: 0.1983 data: 0.0623 max mem: 3393 +extract (train) [ 80/228] eta: 0:00:40 time: 0.2042 data: 0.0629 max mem: 3393 +extract (train) [100/228] eta: 0:00:32 time: 0.1976 data: 0.0583 max mem: 3393 +extract (train) [120/228] eta: 0:00:26 time: 0.2108 data: 0.0673 max mem: 3393 +extract (train) [140/228] eta: 0:00:21 time: 0.2139 data: 0.0685 max mem: 3393 +extract (train) [160/228] eta: 0:00:16 time: 0.2298 data: 0.0750 max mem: 3393 +extract (train) [180/228] eta: 0:00:11 time: 0.2396 data: 0.0781 max mem: 3393 +extract (train) [200/228] eta: 0:00:06 time: 0.2190 data: 0.0683 max mem: 3393 +extract (train) [220/228] eta: 0:00:01 time: 0.1748 data: 0.0491 max mem: 3393 +extract (train) [227/228] eta: 0:00:00 time: 0.1711 data: 0.0480 max mem: 3393 +extract (train) Total time: 0:00:53 (0.2339 s / it) +extract (validation) [ 0/27] eta: 0:02:29 time: 5.5217 data: 5.3556 max mem: 3393 +extract (validation) [20/27] eta: 0:00:03 time: 0.1855 data: 0.0499 max mem: 3393 +extract (validation) [26/27] eta: 0:00:00 time: 0.1629 data: 0.0429 max mem: 3393 +extract (validation) Total time: 0:00:10 (0.3888 s / it) +extract (test) [ 0/26] eta: 0:02:04 time: 4.7814 data: 4.6521 max mem: 3393 +extract (test) [20/26] eta: 0:00:02 time: 0.1737 data: 0.0480 max mem: 3393 +extract (test) [25/26] eta: 0:00:00 time: 0.1775 data: 0.0511 max mem: 3393 +extract (test) Total time: 0:00:09 (0.3634 s / it) +feature extraction time: 0:01:13 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.0059948 | train | 0.68504 | 0.021225 | 0.68442 | 0.021365 | 0.68493 | 0.021243 | +| flat_mae | patch | logistic | aabc_age | | 0.0059948 | test | 0.30769 | 0.057577 | 0.2875 | 0.057 | 0.30151 | 0.057403 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06247484700962252, "f1": 0.44600885225885223, "f1_std": 0.06256013104795484, "bacc": 0.45650183150183155, "bacc_std": 0.062089749429509136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06175615341977754, "f1": 0.4120063228148235, "f1_std": 0.06346255419665894, "bacc": 0.4210164835164836, "bacc_std": 0.061539195993226964} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 21.54434690031882, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06503184245079005, "f1": 0.39925925925925926, "f1_std": 0.06454900909198748, "bacc": 0.40613553113553114, "bacc_std": 0.06561036383762181} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.06772110401487916, "f1": 0.49155422288855566, "f1_std": 0.06907414366075038, "bacc": 0.49954212454212454, "bacc_std": 0.06764776172948235} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.06142146329945383, "f1": 0.4776190476190476, "f1_std": 0.06335422454293077, "bacc": 0.4993131868131868, "bacc_std": 0.06119878303744495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06657401229027762, "f1": 0.4504148469665711, "f1_std": 0.06735864037210726, "bacc": 0.46543040293040294, "bacc_std": 0.06714973150961155} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06470232246638738, "f1": 0.4609649122807017, "f1_std": 0.06486746807928159, "bacc": 0.47435897435897434, "bacc_std": 0.06430503870055063} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06490986012023446, "f1": 0.479790008841733, "f1_std": 0.06279252457740357, "bacc": 0.48466117216117216, "bacc_std": 0.06532409121851347} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06463983054052426, "f1": 0.4481837606837607, "f1_std": 0.06502554673919092, "bacc": 0.4551282051282051, "bacc_std": 0.06399651404904408} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06726059890961776, "f1": 0.4420335286902004, "f1_std": 0.06754222610302825, "bacc": 0.44047619047619047, "bacc_std": 0.06690628130422376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 0.000774263682681127, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06839932350265829, "f1": 0.5777894491129786, "f1_std": 0.07240478580840506, "bacc": 0.57257326007326, "bacc_std": 0.06889604540377417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06150791309076471, "f1": 0.38741258741258744, "f1_std": 0.06106416635066722, "bacc": 0.388507326007326, "bacc_std": 0.06204579591714843} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06348455293643061, "f1": 0.46835978835978836, "f1_std": 0.06189251727726032, "bacc": 0.46108058608058605, "bacc_std": 0.06385561843909417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.0680179557741359, "f1": 0.5424653744556294, "f1_std": 0.06834096073823309, "bacc": 0.5428113553113554, "bacc_std": 0.06822708010809156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06678521387926678, "f1": 0.4106990622335891, "f1_std": 0.06629143836223779, "bacc": 0.40201465201465203, "bacc_std": 0.06677506813727727} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06966609924931483, "f1": 0.4501604717121958, "f1_std": 0.07059627780409601, "bacc": 0.45535714285714285, "bacc_std": 0.06929363548228835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06202018186509841, "f1": 0.357683976347989, "f1_std": 0.05828643207974376, "bacc": 0.37934981684981683, "bacc_std": 0.06086836440818583} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 0.3593813663804626, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.06632361936148463, "f1": 0.3315937223695844, "f1_std": 0.06480662449753313, "bacc": 0.33058608058608063, "bacc_std": 0.06717984528226675} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06538194564685305, "f1": 0.3564696593728852, "f1_std": 0.06540993232506881, "bacc": 0.36057692307692313, "bacc_std": 0.06541127575021219} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05510345300019209, "f1": 0.4546861471861472, "f1_std": 0.06199659535324122, "bacc": 0.4771062271062271, "bacc_std": 0.05470054922058839} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.05920908694053102, "f1": 0.3425009269558769, "f1_std": 0.05782507744213379, "bacc": 0.3658424908424908, "bacc_std": 0.0595261974214603} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06467575019063863, "f1": 0.3847042694868782, "f1_std": 0.06486590762277762, "bacc": 0.38278388278388276, "bacc_std": 0.0647812249931977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06074274008778976, "f1": 0.4137362637362637, "f1_std": 0.058110440368564796, "bacc": 0.4207875457875458, "bacc_std": 0.06033584553589197} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06427221141180156, "f1": 0.39771062271062274, "f1_std": 0.06420122347417925, "bacc": 0.40315934065934067, "bacc_std": 0.06406548918039497} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.06003697085815111, "f1": 0.3239170506912442, "f1_std": 0.057879678076511884, "bacc": 0.3202838827838828, "bacc_std": 0.05907660011741308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.061240818657350475, "f1": 0.361078431372549, "f1_std": 0.059632047662972684, "bacc": 0.3807234432234432, "bacc_std": 0.06070888892527783} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06597027302563575, "f1": 0.39298722349351634, "f1_std": 0.06841016289734832, "bacc": 0.40041208791208793, "bacc_std": 0.06568768767013522} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06623451030263292, "f1": 0.4551251646903821, "f1_std": 0.06728809276628998, "bacc": 0.4624542124542125, "bacc_std": 0.06625810981373556} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 9.999999999999999e-05, "split": "test", "acc": 0.28846153846153844, "acc_std": 0.06129445615249054, "f1": 0.26722873900293254, "f1_std": 0.05097403681864139, "bacc": 0.28434065934065933, "bacc_std": 0.06024760351265629} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06567811952709506, "f1": 0.4778846153846154, "f1_std": 0.06187459143401348, "bacc": 0.46703296703296704, "bacc_std": 0.06605509637704993} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.059494554969391246, "f1": 0.4907806993765248, "f1_std": 0.0600043229227126, "bacc": 0.5290750915750916, "bacc_std": 0.05849989356761955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06216597130396726, "f1": 0.42647664835164834, "f1_std": 0.06089824594736231, "bacc": 0.4210164835164836, "bacc_std": 0.06206877848853379} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06678628814377746, "f1": 0.42497501249375313, "f1_std": 0.06653975261426583, "bacc": 0.42261904761904756, "bacc_std": 0.06687432796630408} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 166.81005372000556, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.06353095271332489, "f1": 0.33413848631239934, "f1_std": 0.06420904218131555, "bacc": 0.32463369963369965, "bacc_std": 0.0631371219976035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 9.999999999999999e-05, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.062289456020749985, "f1": 0.38212800712800715, "f1_std": 0.061797825438919686, "bacc": 0.3985805860805861, "bacc_std": 0.061649514178212954} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0634101306934169, "f1": 0.43979200875752594, "f1_std": 0.06382133077768497, "bacc": 0.45650183150183155, "bacc_std": 0.06301258587866009} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06449405963195874, "f1": 0.4313607085346216, "f1_std": 0.06781856280567988, "bacc": 0.43475274725274726, "bacc_std": 0.06435751009204459} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06832414909175544, "f1": 0.48796536796536794, "f1_std": 0.07113357123469288, "bacc": 0.4951923076923077, "bacc_std": 0.06811849713185918} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06897842900964803, "f1": 0.5237179487179487, "f1_std": 0.06953226549361397, "bacc": 0.5192307692307693, "bacc_std": 0.06942424145922961} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.05053594420612653, "f1": 0.48430735930735924, "f1_std": 0.04386825411550989, "bacc": 0.5467032967032966, "bacc_std": 0.04889190701053372} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 9.999999999999999e-05, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.057936662008901824, "f1": 0.2853991207161396, "f1_std": 0.056704455840757256, "bacc": 0.30654761904761907, "bacc_std": 0.057791706779774396} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06049201619774827, "f1": 0.45028419576289946, "f1_std": 0.06453271716824137, "bacc": 0.46565934065934067, "bacc_std": 0.061119697032854886} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06311931033588775, "f1": 0.5153769841269842, "f1_std": 0.06917881899933104, "bacc": 0.529532967032967, "bacc_std": 0.06315810523328326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.07058382904799741, "f1": 0.4606896551724138, "f1_std": 0.07161411558485851, "bacc": 0.4658882783882784, "bacc_std": 0.0710119919128899} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06384193683941837, "f1": 0.4891116941529235, "f1_std": 0.06345930016322801, "bacc": 0.48489010989010994, "bacc_std": 0.0639201290998226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05778570644910803, "f1": 0.41849837662337663, "f1_std": 0.059248031736398275, "bacc": 0.4548992673992674, "bacc_std": 0.05672014035810992} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06524794539828994, "f1": 0.3948825835221347, "f1_std": 0.06702828902770422, "bacc": 0.40613553113553114, "bacc_std": 0.06534546620802473} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.05917603615009939, "f1": 0.3531181379007466, "f1_std": 0.05983420045139798, "bacc": 0.3630952380952381, "bacc_std": 0.05875145918824808} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06561119113662595, "f1": 0.3701181357212213, "f1_std": 0.0660971633237626, "bacc": 0.3695054945054945, "bacc_std": 0.06612743182319211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06372614996490147, "f1": 0.3660041407867495, "f1_std": 0.06391961235658569, "bacc": 0.3649267399267399, "bacc_std": 0.06367563231087588} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.0702768300383326, "f1": 0.47649110149110147, "f1_std": 0.07049296336401929, "bacc": 0.48031135531135527, "bacc_std": 0.0703892466728851} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06683215530153522, "f1": 0.4252501725327812, "f1_std": 0.06878872871419349, "bacc": 0.4198717948717948, "bacc_std": 0.06670005454064401} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05769603834091805, "f1": 0.5020320914717467, "f1_std": 0.05582157797171618, "bacc": 0.530448717948718, "bacc_std": 0.056618913762078546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.061790397752708066, "f1": 0.43744343891402715, "f1_std": 0.06261157804239742, "bacc": 0.45650183150183155, "bacc_std": 0.061145302146378984} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05378848560270081, "f1": 0.4418998596843261, "f1_std": 0.05724403149238457, "bacc": 0.46222527472527475, "bacc_std": 0.054003297459853834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06321041221996382, "f1": 0.36883116883116884, "f1_std": 0.06173740538091418, "bacc": 0.38369963369963367, "bacc_std": 0.06324689081836064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06067386626330382, "f1": 0.40388888888888885, "f1_std": 0.0521161739778682, "bacc": 0.45192307692307687, "bacc_std": 0.05917838127210574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05320892170597358, "f1": 0.3508522727272727, "f1_std": 0.049961582457160945, "bacc": 0.396978021978022, "bacc_std": 0.051563884928871054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06555313125817468, "f1": 0.47252314814814816, "f1_std": 0.0661023309160732, "bacc": 0.47596153846153844, "bacc_std": 0.06521740884693747} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.059697102801059256, "f1": 0.35850401866735804, "f1_std": 0.06225002719502797, "bacc": 0.36744505494505497, "bacc_std": 0.06034869295075325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06720788203986214, "f1": 0.3802524544179523, "f1_std": 0.06570127360531582, "bacc": 0.37957875457875456, "bacc_std": 0.06677563322526223} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.062424581715593924, "f1": 0.4845914954610607, "f1_std": 0.06203738978332007, "bacc": 0.4908424908424909, "bacc_std": 0.06190462796249896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.056717977697400675, "f1": 0.37732954545454545, "f1_std": 0.05661087648440694, "bacc": 0.4043040293040293, "bacc_std": 0.056955932490691016} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06596013678708083, "f1": 0.35993055555555553, "f1_std": 0.06733309495154545, "bacc": 0.36057692307692313, "bacc_std": 0.06585734900614695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.061337882249198714, "f1": 0.485695084485407, "f1_std": 0.06168436633125767, "bacc": 0.4965659340659341, "bacc_std": 0.06111939119103956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06395144237817096, "f1": 0.44165070242656446, "f1_std": 0.06325090735088686, "bacc": 0.4432234432234432, "bacc_std": 0.0641325429702632} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06690539112019857, "f1": 0.48126618508927355, "f1_std": 0.06740009244429777, "bacc": 0.4819139194139194, "bacc_std": 0.0670663034909975} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06840294597275381, "f1": 0.5010878489326765, "f1_std": 0.06869249887700786, "bacc": 0.5011446886446886, "bacc_std": 0.0684168215857913} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.000774263682681127, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06404670771578168, "f1": 0.3472086466165414, "f1_std": 0.06130593268705105, "bacc": 0.35874542124542125, "bacc_std": 0.06316124475223499} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06583559404901715, "f1": 0.40501165501165504, "f1_std": 0.06630027025722854, "bacc": 0.40819597069597074, "bacc_std": 0.0662187722047837} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06440987235513461, "f1": 0.4091937474290416, "f1_std": 0.06450441791593017, "bacc": 0.4017857142857143, "bacc_std": 0.0643279039052277} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 2.782559402207126, "split": "test", "acc": 0.5, "acc_std": 0.06263239232256211, "f1": 0.4914456233421751, "f1_std": 0.06295029434111293, "bacc": 0.4965659340659341, "bacc_std": 0.06250047627419866} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06394482643969225, "f1": 0.5192307692307693, "f1_std": 0.06382540009981036, "bacc": 0.5203754578754579, "bacc_std": 0.06416098899994466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06293694599833735, "f1": 0.51, "f1_std": 0.06366880725590576, "bacc": 0.5157967032967034, "bacc_std": 0.06278865279374736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06405013753570866, "f1": 0.4504468366947145, "f1_std": 0.06623350146828673, "bacc": 0.459478021978022, "bacc_std": 0.06407603028815871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06997861042604206, "f1": 0.44671414292853573, "f1_std": 0.06909320909577493, "bacc": 0.44184981684981683, "bacc_std": 0.0701237442222691} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06590268735934394, "f1": 0.4226532567049809, "f1_std": 0.06498933178721925, "bacc": 0.4253663003663004, "bacc_std": 0.06628560757200418} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.07070649393106455, "f1": 0.5421790367442542, "f1_std": 0.07097013247529732, "bacc": 0.5354853479853481, "bacc_std": 0.07089332358697414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06441861063737485, "f1": 0.4421130952380953, "f1_std": 0.06308756374435527, "bacc": 0.4551282051282052, "bacc_std": 0.06374891260639694} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06327529781525701, "f1": 0.48094606542882407, "f1_std": 0.06263048039073062, "bacc": 0.48168498168498164, "bacc_std": 0.06363452546861556} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 2.782559402207126, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06353877584202389, "f1": 0.4582417582417583, "f1_std": 0.06532134669047739, "bacc": 0.46291208791208793, "bacc_std": 0.06373321116669038} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06430413819770164, "f1": 0.45473953470277, "f1_std": 0.06597214276511726, "bacc": 0.47573260073260076, "bacc_std": 0.0637480087565718} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06113178121530743, "f1": 0.41002866864935833, "f1_std": 0.05883305735730801, "bacc": 0.42216117216117216, "bacc_std": 0.06087249131765312} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.0624271765088243, "f1": 0.48259085419411507, "f1_std": 0.06659581681974798, "bacc": 0.4981684981684982, "bacc_std": 0.06236466767442612} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 9.999999999999999e-05, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05155803302329104, "f1": 0.4220924908424909, "f1_std": 0.060629012937797724, "bacc": 0.4741300366300366, "bacc_std": 0.050587019694515976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06675632408044745, "f1": 0.5084880636604774, "f1_std": 0.06957198710569922, "bacc": 0.5146520146520146, "bacc_std": 0.06673800058657993} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06650550540008482, "f1": 0.4261907507784569, "f1_std": 0.06399138451690853, "bacc": 0.42994505494505497, "bacc_std": 0.06720600327761954} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06256276138747421, "f1": 0.4455956112852665, "f1_std": 0.06393337566021547, "bacc": 0.46382783882783885, "bacc_std": 0.06287964665450277} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 9.999999999999999e-05, "split": "test", "acc": 0.3269230769230769, "acc_std": 0.061950674691035906, "f1": 0.30302022088034325, "f1_std": 0.06036256192181151, "bacc": 0.3244047619047619, "bacc_std": 0.06147668915682448} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06555073918945555, "f1": 0.47022546419098143, "f1_std": 0.06583701984551005, "bacc": 0.47870879120879123, "bacc_std": 0.06516030924721601} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06277426214752094, "f1": 0.3792724867724868, "f1_std": 0.0625036641596621, "bacc": 0.39010989010989006, "bacc_std": 0.06333045752912517} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.0564074751645371, "f1": 0.5102756380329909, "f1_std": 0.06432689840088908, "bacc": 0.5364010989010989, "bacc_std": 0.05638025375920742} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06354772702046435, "f1": 0.40773809523809523, "f1_std": 0.06135619836245136, "bacc": 0.4251373626373627, "bacc_std": 0.06412196674569563} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.061622190635340336, "f1": 0.40613041555294893, "f1_std": 0.06447920405614192, "bacc": 0.42239010989010994, "bacc_std": 0.061676114808781934} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.0652645957286284, "f1": 0.5049201251646904, "f1_std": 0.06158108280535813, "bacc": 0.51007326007326, "bacc_std": 0.0645898111086808} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.07058395479537975, "f1": 0.5470488398477529, "f1_std": 0.0699059264024412, "bacc": 0.5384615384615384, "bacc_std": 0.07068616510381091} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06521982627732897, "f1": 0.4510281385281385, "f1_std": 0.0672644134173424, "bacc": 0.4610805860805861, "bacc_std": 0.06509616131638007} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05770731855999389, "f1": 0.42310105363984674, "f1_std": 0.056701285306138105, "bacc": 0.4548992673992674, "bacc_std": 0.05677297478324679} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.056843434409815166, "f1": 0.3917387218045113, "f1_std": 0.05714803211270748, "bacc": 0.4235347985347986, "bacc_std": 0.056977448894920055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 21.54434690031882, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06747968136001907, "f1": 0.3704878330065736, "f1_std": 0.0672051604286978, "bacc": 0.36790293040293043, "bacc_std": 0.06776703746178762} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 2.2012 | 16.903 | 0.72823 | 0.16903 | 0.72397 | 0.17468 | 0.72815 | 0.16973 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 2.2012 | 16.903 | 0.44365 | 0.060829 | 0.43181 | 0.060497 | 0.44178 | 0.060143 | + + +done! total time: 0:05:29 diff --git a/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic/config.yaml b/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..846b27e1764e8187479ad3b5f83f1a5137c22c47 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..2fec98eea11ffd08130e24ab216d7981881eb022 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,0.046415888336127774,train,0.947069943289225,0.009535696429641421,0.9456671655368724,0.009816371149924393,0.9446135831381733,0.010082945858950167 +flat_mae,patch,logistic,aabc_sex,,0.046415888336127774,test,0.9272727272727272,0.035074060957723675,0.9252717391304348,0.03572428615457714,0.9318181818181819,0.033511133930934155 +flat_mae,patch,logistic,aabc_sex,1,0.046415888336127774,train,0.9584120982986768,0.00906443759199902,0.9573624666608048,0.009298833687666277,0.9573624666608048,0.00944713604185389 +flat_mae,patch,logistic,aabc_sex,1,0.046415888336127774,test,0.8,0.05360018502496742,0.795677136102668,0.0550311239987463,0.7975543478260869,0.055265473862417897 +flat_mae,patch,logistic,aabc_sex,2,0.3593813663804626,train,0.998109640831758,0.0018287737516631017,0.9980631011617731,0.0018718264794708498,0.9983660130718954,0.0015807537820747885 +flat_mae,patch,logistic,aabc_sex,2,0.3593813663804626,test,0.8909090909090909,0.04432105258767371,0.8879076086956521,0.045610744781148495,0.8879076086956521,0.04584006156936709 +flat_mae,patch,logistic,aabc_sex,3,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,3,21.54434690031882,test,0.7636363636363637,0.05830826033368253,0.7555555555555555,0.060966395520253486,0.7540760869565217,0.06067902368230076 +flat_mae,patch,logistic,aabc_sex,4,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,4,21.54434690031882,test,0.8,0.054380587202261875,0.7997351870241642,0.05432364760190682,0.8158967391304348,0.050370238751659906 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,train,0.9584120982986768,0.008742083647550102,0.9573624666608048,0.008957272315736859,0.9573624666608048,0.009005424190937403 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,test,0.8181818181818182,0.0510358292552261,0.8074229691876751,0.055741434316438784,0.8009510869565217,0.055079608187171586 +flat_mae,patch,logistic,aabc_sex,6,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,6,166.81005372000556,test,0.8909090909090909,0.04513727821100338,0.8879076086956521,0.046481707555103766,0.8879076086956521,0.046446822356867964 +flat_mae,patch,logistic,aabc_sex,7,0.3593813663804626,train,0.994328922495274,0.0034317990814610433,0.9941961885745005,0.0035029660925376114,0.9950980392156863,0.0029663753498249882 +flat_mae,patch,logistic,aabc_sex,7,0.3593813663804626,test,0.8909090909090909,0.042009223701861685,0.8879076086956521,0.043363112025605145,0.8879076086956521,0.04351212206822735 +flat_mae,patch,logistic,aabc_sex,8,0.046415888336127774,train,0.9527410207939508,0.009120875441347608,0.9514580924590283,0.00938355286892491,0.9506360092617017,0.009583386492834961 +flat_mae,patch,logistic,aabc_sex,8,0.046415888336127774,test,0.8727272727272727,0.04230544579367593,0.8711943793911007,0.042519492917998385,0.8783967391304348,0.04144489680903666 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,train,0.9489603024574669,0.009885238119701628,0.9476400828491303,0.010147511656563043,0.9473680354054925,0.010285481958658515 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,test,0.8909090909090909,0.04114170633311166,0.8863636363636364,0.04365531978580378,0.8817934782608696,0.04477956636721617 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,train,0.9527410207939508,0.009136988024248838,0.951396009511314,0.009443729784056843,0.9500278437234386,0.009838519620988815 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,test,0.8909090909090909,0.042917888158283624,0.8879076086956521,0.04414070307969446,0.8879076086956521,0.04424423935287465 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,train,0.9508506616257089,0.00965190882966105,0.9497295321637427,0.009851453219753772,0.9508265189483864,0.00975528484459305 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,test,0.9090909090909091,0.039095559977027516,0.9045470322804582,0.0424735734702768,0.8974184782608696,0.04409067525408761 +flat_mae,patch,logistic,aabc_sex,12,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,12,2.782559402207126,test,0.8363636363636363,0.05045532841325338,0.8328267477203647,0.05162254992774571,0.8349184782608696,0.05186654982975368 +flat_mae,patch,logistic,aabc_sex,13,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,13,166.81005372000556,test,0.8545454545454545,0.04820775974024879,0.8505434782608696,0.04954225366136106,0.8505434782608696,0.04941861473914161 +flat_mae,patch,logistic,aabc_sex,14,0.005994842503189409,train,0.8998109640831758,0.012741554234287978,0.8963990762124712,0.013300890148959497,0.893329230047774,0.013647845006461042 +flat_mae,patch,logistic,aabc_sex,14,0.005994842503189409,test,0.8909090909090909,0.04165620805380018,0.8879076086956521,0.04297066165791267,0.8879076086956521,0.04327355008947624 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,train,0.9546313799621928,0.009025526530057086,0.9533701592525121,0.009318364060339222,0.9522699961898062,0.009690178857009646 +flat_mae,patch,logistic,aabc_sex,15,0.046415888336127774,test,0.8181818181818182,0.04855681174605831,0.8106060606060606,0.051457852391395116,0.8070652173913043,0.051478715493405776 +flat_mae,patch,logistic,aabc_sex,16,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,16,21.54434690031882,test,0.8363636363636363,0.04931095464431693,0.8307692307692308,0.0516500145886078,0.8288043478260869,0.052010224624454156 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,train,0.9527410207939508,0.009227896273387472,0.9516349047875045,0.00943735355281725,0.9524605058764911,0.009472023077610374 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,test,0.8545454545454545,0.0463629803875163,0.84593837535014,0.05075770734739274,0.8383152173913043,0.050714286880234015 +flat_mae,patch,logistic,aabc_sex,18,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,18,21.54434690031882,test,0.8727272727272727,0.04415262784313686,0.8711943793911007,0.044364481304215816,0.8783967391304348,0.04284902972808546 +flat_mae,patch,logistic,aabc_sex,19,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,19,2.782559402207126,test,0.8545454545454545,0.04489239292072498,0.8505434782608696,0.04622522556357376,0.8505434782608696,0.046252745469622346 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,train,0.9489603024574669,0.009730614224420106,0.9475747398557507,0.010020367125526009,0.9467598698672295,0.010277179418361478 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,test,0.8909090909090909,0.038474222345928795,0.884453781512605,0.04229176516852998,0.8756793478260869,0.04360663571660619 +flat_mae,patch,logistic,aabc_sex,21,0.3593813663804626,train,0.998109640831758,0.001926946498744512,0.998060743809227,0.001979502602717412,0.9977578475336323,0.002285548649856152 +flat_mae,patch,logistic,aabc_sex,21,0.3593813663804626,test,0.8363636363636363,0.05128541429108535,0.8281846581048247,0.055210821398869604,0.8226902173913043,0.05526197329397536 +flat_mae,patch,logistic,aabc_sex,22,0.046415888336127774,train,0.9527410207939508,0.009461091081534949,0.951577529044329,0.009684142016864315,0.951852340338228,0.009681362001875674 +flat_mae,patch,logistic,aabc_sex,22,0.046415888336127774,test,0.9272727272727272,0.034764168881429185,0.9260752688172043,0.03511102367777514,0.9313858695652174,0.03339806958094755 +flat_mae,patch,logistic,aabc_sex,23,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,23,21.54434690031882,test,0.8545454545454545,0.04682974261501367,0.8541114058355437,0.04672294886385042,0.8688858695652174,0.04281237831657952 +flat_mae,patch,logistic,aabc_sex,24,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,24,166.81005372000556,test,0.8545454545454545,0.044369656819508864,0.84593837535014,0.0490774419853704,0.8383152173913043,0.04918412495056353 +flat_mae,patch,logistic,aabc_sex,25,0.005994842503189409,train,0.8941398865784499,0.01237251325294254,0.8904567506803929,0.01292562267019994,0.887210938186934,0.013212663063100769 +flat_mae,patch,logistic,aabc_sex,25,0.005994842503189409,test,0.9636363636363636,0.025450804919970225,0.9626358695652174,0.026208980131858878,0.9626358695652174,0.0265330140469633 +flat_mae,patch,logistic,aabc_sex,26,0.005994842503189409,train,0.9054820415879017,0.012108626853292788,0.9023322107999527,0.012606656986244403,0.899447521908614,0.012908480956798599 +flat_mae,patch,logistic,aabc_sex,26,0.005994842503189409,test,0.8181818181818182,0.04847866021114616,0.8106060606060606,0.051597052806010305,0.8070652173913043,0.05156378485539002 +flat_mae,patch,logistic,aabc_sex,27,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,27,2.782559402207126,test,0.8181818181818182,0.04916627211869519,0.8106060606060606,0.052250736834412954,0.8070652173913043,0.05221801399489685 +flat_mae,patch,logistic,aabc_sex,28,0.005994842503189409,train,0.9054820415879017,0.012598167596131084,0.9023322107999527,0.013135204759889382,0.899447521908614,0.013544627445924932 +flat_mae,patch,logistic,aabc_sex,28,0.005994842503189409,test,0.8181818181818182,0.049570385720668596,0.8035714285714286,0.05722046764229968,0.7948369565217391,0.05554964208638689 +flat_mae,patch,logistic,aabc_sex,29,0.046415888336127774,train,0.9489603024574669,0.01041593047856945,0.9475747398557507,0.010724350376817127,0.9467598698672295,0.010989064433016978 +flat_mae,patch,logistic,aabc_sex,29,0.046415888336127774,test,0.8727272727272727,0.044352845185321045,0.8699763593380614,0.04519003997853287,0.8722826086956521,0.04487040211328107 +flat_mae,patch,logistic,aabc_sex,30,0.3593813663804626,train,0.996219281663516,0.002692389130571605,0.9961238606055277,0.0027610218895435124,0.9961238606055277,0.002820465075759719 +flat_mae,patch,logistic,aabc_sex,30,0.3593813663804626,test,0.8181818181818182,0.052988993087524784,0.8131793478260869,0.05445001763260876,0.8131793478260869,0.054043994464375106 +flat_mae,patch,logistic,aabc_sex,31,0.005994842503189409,train,0.9092627599243857,0.012154958885317885,0.9062389223679547,0.012677606709928587,0.9033236613030862,0.013092579515601556 +flat_mae,patch,logistic,aabc_sex,31,0.005994842503189409,test,0.7818181818181819,0.053207046075365944,0.7782258064516129,0.05362063585424122,0.7819293478260869,0.05300708466384377 +flat_mae,patch,logistic,aabc_sex,32,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,32,2.782559402207126,test,0.8181818181818182,0.048829458660299,0.8074229691876751,0.05367773260640902,0.8009510869565217,0.053021209005295895 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,train,0.943289224952741,0.009479336058750978,0.9417126990656403,0.009764515616848967,0.9406415780063895,0.010012052655784759 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,test,0.8909090909090909,0.04089873000109513,0.8879076086956521,0.042172607611893106,0.8879076086956521,0.042422029347501784 +flat_mae,patch,logistic,aabc_sex,34,0.046415888336127774,train,0.9508506616257089,0.009408605288834464,0.9495480822842386,0.009679543584321904,0.9490020223335971,0.009916446164765513 +flat_mae,patch,logistic,aabc_sex,34,0.046415888336127774,test,0.8545454545454545,0.048491096567112084,0.8521505376344086,0.04908375139146938,0.8566576086956521,0.048622728019332095 +flat_mae,patch,logistic,aabc_sex,35,0.046415888336127774,train,0.9546313799621928,0.008905802044634252,0.9534289990316049,0.009144113057422411,0.9528781617280695,0.009254517216023774 +flat_mae,patch,logistic,aabc_sex,35,0.046415888336127774,test,0.8545454545454545,0.04615144312455827,0.8521505376344086,0.04664207955515588,0.8566576086956521,0.04601273606881211 +flat_mae,patch,logistic,aabc_sex,36,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,36,21.54434690031882,test,0.8909090909090909,0.03916853181187062,0.8863636363636364,0.04143217295999993,0.8817934782608696,0.0421593452356514 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,train,0.9565217391304348,0.009066893301635657,0.9553414450623061,0.009337845152492434,0.9545121486561741,0.009633951843938811 +flat_mae,patch,logistic,aabc_sex,37,0.046415888336127774,test,0.7818181818181819,0.05653219370960084,0.7782258064516129,0.057471517133049835,0.7819293478260869,0.05728936521377037 +flat_mae,patch,logistic,aabc_sex,38,0.005994842503189409,train,0.9130434782608695,0.012358785422403203,0.9101456339359566,0.012864618783543617,0.9071998006975586,0.013217652234847284 +flat_mae,patch,logistic,aabc_sex,38,0.005994842503189409,test,0.8545454545454545,0.042554637703576356,0.8428571428571429,0.04894372924134342,0.8322010869565217,0.04855517293696971 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,train,0.9489603024574669,0.009112836308100735,0.9476400828491303,0.009369540898272825,0.9473680354054925,0.009611596274749558 +flat_mae,patch,logistic,aabc_sex,39,0.046415888336127774,test,0.8363636363636363,0.048117816840757215,0.8307692307692308,0.05034603697714955,0.8288043478260869,0.05060176883236828 +flat_mae,patch,logistic,aabc_sex,40,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,40,2.782559402207126,test,0.8909090909090909,0.03926294490961174,0.8879076086956521,0.040521418652632674,0.8879076086956521,0.04073438414883521 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,train,0.8979206049149339,0.013419116901837358,0.8943690095846646,0.014008260506492334,0.8910870775814063,0.014346150638928409 +flat_mae,patch,logistic,aabc_sex,41,0.005994842503189409,test,0.9090909090909091,0.03853454391036912,0.905982905982906,0.040360998772559116,0.9035326086956521,0.041054300161994184 +flat_mae,patch,logistic,aabc_sex,42,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,42,0.3593813663804626,test,0.7636363636363637,0.05956424686111821,0.7607895617263298,0.06010077969996637,0.7663043478260869,0.05960687356241817 +flat_mae,patch,logistic,aabc_sex,43,0.005994842503189409,train,0.8998109640831758,0.012545330415440578,0.896250328415428,0.013113257701166899,0.8927210645095108,0.013472520744686002 +flat_mae,patch,logistic,aabc_sex,43,0.005994842503189409,test,0.8545454545454545,0.04837149628544232,0.8505434782608696,0.049828634793461835,0.8505434782608696,0.04984259953057727 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,train,0.9584120982986768,0.008746568284317408,0.9573624666608048,0.008963036212247338,0.9573624666608048,0.009007182710470983 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,test,0.8,0.05094641488939768,0.790003471017008,0.054994556527002185,0.7853260869565217,0.05441850193932155 +flat_mae,patch,logistic,aabc_sex,45,0.3593813663804626,train,0.996219281663516,0.002587721496457814,0.9961238606055277,0.0026529802712065586,0.9961238606055277,0.002690091291493456 +flat_mae,patch,logistic,aabc_sex,45,0.3593813663804626,test,0.9090909090909091,0.03650699016049547,0.9086075108009306,0.036351808626755105,0.921875,0.031373194669175784 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,train,0.9527410207939508,0.00967633109890575,0.9514580924590283,0.009960518265538546,0.9506360092617017,0.010204518542046093 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,test,0.8909090909090909,0.04316374358568286,0.8891129032258065,0.043601522338402435,0.8940217391304348,0.04244351689345328 +flat_mae,patch,logistic,aabc_sex,47,0.005994842503189409,train,0.8998109640831758,0.012881792320831606,0.8966861598440545,0.013365920117525841,0.8945455611243003,0.013632807907730898 +flat_mae,patch,logistic,aabc_sex,47,0.005994842503189409,test,0.8727272727272727,0.04224776975873136,0.8663658451926415,0.045638659989561486,0.8600543478260869,0.046469068011797295 +flat_mae,patch,logistic,aabc_sex,48,0.005994842503189409,train,0.9017013232514177,0.012963661435270779,0.8981336098355799,0.013602343460340014,0.8943550514376154,0.014080032837654076 +flat_mae,patch,logistic,aabc_sex,48,0.005994842503189409,test,0.9090909090909091,0.037959456405203075,0.905982905982906,0.03961412854866127,0.9035326086956521,0.04038021936568441 +flat_mae,patch,logistic,aabc_sex,49,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,49,166.81005372000556,test,0.8181818181818182,0.052442919776751,0.8106060606060606,0.055341211835933726,0.8070652173913043,0.05519417390702738 +flat_mae,patch,logistic,aabc_sex,50,0.3593813663804626,train,0.996219281663516,0.0025297046026413,0.9961238606055277,0.0025938709592180113,0.9961238606055277,0.002632897626483147 +flat_mae,patch,logistic,aabc_sex,50,0.3593813663804626,test,0.8181818181818182,0.05291971152386029,0.8106060606060606,0.05604220561689455,0.8070652173913043,0.055451613410302567 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,train,0.947069943289225,0.008929272361348455,0.9456671655368724,0.009170301092184772,0.9451258829391249,0.009317026932208204 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,test,0.9090909090909091,0.03879608057225105,0.905982905982906,0.04056335962108123,0.9035326086956521,0.041079327618032194 +flat_mae,patch,logistic,aabc_sex,52,0.046415888336127774,train,0.9489603024574669,0.009751769880662882,0.9477037313678753,0.009986156724289217,0.9479762009437558,0.01005377011015622 +flat_mae,patch,logistic,aabc_sex,52,0.046415888336127774,test,0.9272727272727272,0.034364290518064215,0.9252717391304348,0.03548243281045991,0.9252717391304348,0.03590502248975702 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,train,0.947069943289225,0.01013184830861746,0.9455985191279309,0.01043219834551248,0.9445177174008617,0.01064873695355713 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,test,0.9090909090909091,0.037132668114778854,0.905982905982906,0.03880755714329223,0.9035326086956521,0.03968605241153058 +flat_mae,patch,logistic,aabc_sex,54,0.046415888336127774,train,0.9584120982986768,0.00865866713392863,0.9573624666608048,0.00888874700127431,0.9573624666608048,0.00911590490194993 +flat_mae,patch,logistic,aabc_sex,54,0.046415888336127774,test,0.8363636363636363,0.05027000157003416,0.8343927735028438,0.05054279930158628,0.8410326086956521,0.04924854913021658 +flat_mae,patch,logistic,aabc_sex,55,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,55,166.81005372000556,test,0.8727272727272727,0.043849479580767334,0.8699763593380614,0.044653963239154465,0.8722826086956521,0.04437453845293 +flat_mae,patch,logistic,aabc_sex,56,0.3593813663804626,train,0.998109640831758,0.0019003554223639796,0.9980631011617731,0.001944843815365902,0.9983660130718954,0.0016426274810956464 +flat_mae,patch,logistic,aabc_sex,56,0.3593813663804626,test,0.8363636363636363,0.048303106345706434,0.8307692307692308,0.050494865110545385,0.8288043478260869,0.05078730095183825 +flat_mae,patch,logistic,aabc_sex,57,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,57,2.782559402207126,test,0.8181818181818182,0.0506536317871003,0.8131793478260869,0.05238540433203689,0.8131793478260869,0.05267086233853475 +flat_mae,patch,logistic,aabc_sex,58,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,58,21.54434690031882,test,0.8181818181818182,0.05065421914663545,0.8166666666666667,0.05064978501386193,0.8254076086956521,0.04920174491321142 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,train,0.9546313799621928,0.009267940374578353,0.9534289990316049,0.009516243815776797,0.9528781617280695,0.009601116263118999 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,test,0.9272727272727272,0.03400455001154411,0.9260752688172043,0.03432378313067766,0.9313858695652174,0.03253269730241678 +flat_mae,patch,logistic,aabc_sex,60,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,60,166.81005372000556,test,0.8181818181818182,0.05450271052475694,0.8131793478260869,0.05617547551770596,0.8131793478260869,0.0561026224349438 +flat_mae,patch,logistic,aabc_sex,61,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,61,2.782559402207126,test,0.9090909090909091,0.03909393645782675,0.905982905982906,0.04079576282656648,0.9035326086956521,0.04193156964847208 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,train,0.9924385633270322,0.0038655880630990482,0.9922477212110554,0.003964281498348454,0.9922477212110554,0.004048926593556631 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,test,0.9636363636363636,0.023618874648666514,0.9630376344086022,0.023772365252808854,0.96875,0.02029747040119778 +flat_mae,patch,logistic,aabc_sex,63,0.046415888336127774,train,0.945179584120983,0.009737958429639067,0.943691387252473,0.010028864434874978,0.9428837304727571,0.010304481846557523 +flat_mae,patch,logistic,aabc_sex,63,0.046415888336127774,test,0.9454545454545454,0.030017493522200893,0.9442755825734549,0.030586492305141582,0.9470108695652174,0.029880341863299936 +flat_mae,patch,logistic,aabc_sex,64,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,64,166.81005372000556,test,0.8,0.053207841340964845,0.7989365237620472,0.05315535030257365,0.8097826086956521,0.05195920957039336 +flat_mae,patch,logistic,aabc_sex,65,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,65,2.782559402207126,test,0.8545454545454545,0.04894090212517995,0.8505434782608696,0.050607532048132786,0.8505434782608696,0.0508534662635076 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,train,0.9527410207939508,0.009353381022028947,0.9515185952306762,0.009585773187187471,0.9512441747999649,0.009607320311354767 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,test,0.8909090909090909,0.041036794616498666,0.8863636363636364,0.04339520053109175,0.8817934782608696,0.04423854622517791 +flat_mae,patch,logistic,aabc_sex,67,0.046415888336127774,train,0.9546313799621928,0.009111084866979553,0.9535421545667447,0.009318258109387979,0.9540944928045957,0.009337117258624373 +flat_mae,patch,logistic,aabc_sex,67,0.046415888336127774,test,0.8727272727272727,0.044312623427656836,0.8683760683760684,0.04624472892892121,0.8661684782608696,0.04676436808749032 +flat_mae,patch,logistic,aabc_sex,68,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,68,21.54434690031882,test,0.9090909090909091,0.037804263172948986,0.9045470322804582,0.04108514219757328,0.8974184782608696,0.04284679470925087 +flat_mae,patch,logistic,aabc_sex,69,0.3593813663804626,train,0.998109640831758,0.0017651501403643083,0.9980631011617731,0.0018067541979653356,0.9983660130718954,0.0015257588631580217 +flat_mae,patch,logistic,aabc_sex,69,0.3593813663804626,test,0.9090909090909091,0.035687884834589775,0.9045470322804582,0.038750348366196126,0.8974184782608696,0.04052811389287208 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,train,0.9508506616257089,0.009058332093293537,0.9495480822842386,0.00931732426112392,0.9490020223335971,0.009542219892060241 +flat_mae,patch,logistic,aabc_sex,70,0.046415888336127774,test,0.9272727272727272,0.035624691966190405,0.9252717391304348,0.036662654117502816,0.9252717391304348,0.036933790663580573 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,train,0.9489603024574669,0.00967634882523351,0.9477037313678753,0.00991790128735846,0.9479762009437558,0.010048978480507922 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,test,0.8545454545454545,0.04429017763241351,0.84593837535014,0.04870379047304653,0.8383152173913043,0.04842836700820749 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,train,0.996219281663516,0.002369819878142127,0.9961238606055277,0.0024298010120006094,0.9961238606055277,0.0024711955834450738 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,test,0.9272727272727272,0.03477808752506134,0.9252717391304348,0.035761751674160236,0.9252717391304348,0.035871101411233335 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,train,0.994328922495274,0.0035066972715459086,0.9941822314276811,0.0035999120143796904,0.99388170813916,0.0037904060624898636 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,test,0.9454545454545454,0.031112995876268406,0.9435897435897436,0.032527321155661384,0.9408967391304348,0.03380333960208342 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,train,0.8979206049149339,0.012465135572750027,0.8943690095846646,0.01300794969168114,0.8910870775814063,0.013319307796374273 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,test,0.8727272727272727,0.04687897553389404,0.8699763593380614,0.04782311720873875,0.8722826086956521,0.0475769569700974 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,train,0.9603024574669187,0.008318449000867853,0.959275619993768,0.008545262642648294,0.9589964535889094,0.008723602692975018 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,test,0.9090909090909091,0.03646451461964469,0.9071259709557582,0.03727505265281507,0.9096467391304348,0.03708450931046663 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,train,0.9584120982986768,0.00864198033026255,0.9574136416861827,0.008849373907208738,0.957970632199068,0.008931444311982933 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,test,0.8363636363636363,0.05261167265135705,0.8307692307692308,0.054768722847339114,0.8288043478260869,0.054562158531829816 +flat_mae,patch,logistic,aabc_sex,77,0.046415888336127774,train,0.9508506616257089,0.00920663215335661,0.9496706674473068,0.009404515871492885,0.9502183534101234,0.009303793774031364 +flat_mae,patch,logistic,aabc_sex,77,0.046415888336127774,test,0.8727272727272727,0.04458773503669454,0.8663658451926415,0.04799664493719112,0.8600543478260869,0.04878631688865249 +flat_mae,patch,logistic,aabc_sex,78,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,78,2.782559402207126,test,0.9272727272727272,0.033688431444980356,0.9252717391304348,0.03474051477576148,0.9252717391304348,0.035215221098591515 +flat_mae,patch,logistic,aabc_sex,79,0.046415888336127774,train,0.9489603024574669,0.009547594665984321,0.9475747398557507,0.009835472176946419,0.9467598698672295,0.01011066344203181 +flat_mae,patch,logistic,aabc_sex,79,0.046415888336127774,test,0.9272727272727272,0.03369233672096681,0.9229691876750701,0.037490197431910226,0.9130434782608696,0.04028431564463422 +flat_mae,patch,logistic,aabc_sex,80,0.3593813663804626,train,0.996219281663516,0.002782205424053034,0.9961238606055277,0.0028520240393210103,0.9961238606055277,0.0028606344195999216 +flat_mae,patch,logistic,aabc_sex,80,0.3593813663804626,test,0.8545454545454545,0.04654174701143084,0.84593837535014,0.05139602955550552,0.8383152173913043,0.051669693598193435 +flat_mae,patch,logistic,aabc_sex,81,0.046415888336127774,train,0.9546313799621928,0.00871597526461475,0.9535421545667447,0.008919026219923624,0.9540944928045957,0.00893830821863701 +flat_mae,patch,logistic,aabc_sex,81,0.046415888336127774,test,0.9272727272727272,0.03440092253231562,0.9252717391304348,0.0354125899574063,0.9252717391304348,0.03578115690072171 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,train,0.996219281663516,0.0025375211492149243,0.9961238606055277,0.0026018366532883723,0.9961238606055277,0.0026333968991472663 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,test,0.9090909090909091,0.03585380784784349,0.9045470322804582,0.0386490359895269,0.8974184782608696,0.040520596997251974 +flat_mae,patch,logistic,aabc_sex,83,0.005994842503189409,train,0.9035916824196597,0.012988569668281198,0.9004483312116013,0.013531448452850788,0.8978135349805094,0.01391854588605342 +flat_mae,patch,logistic,aabc_sex,83,0.005994842503189409,test,0.8727272727272727,0.041929953395341814,0.8639095086603039,0.04714004153528494,0.8539402173913043,0.0474508912672186 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,train,0.947069943289225,0.010249344718021984,0.9456671655368724,0.010527675407099436,0.9451258829391249,0.010659133612540894 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,test,0.8181818181818182,0.0546832078717086,0.8131793478260869,0.05635657253612301,0.8131793478260869,0.05654758436585441 +flat_mae,patch,logistic,aabc_sex,85,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,85,21.54434690031882,test,0.8363636363636363,0.0518420997432118,0.8328267477203647,0.052909222566861244,0.8349184782608696,0.05293165910331421 +flat_mae,patch,logistic,aabc_sex,86,0.046415888336127774,train,0.9489603024574669,0.009804080615356998,0.9476400828491303,0.010053782045262118,0.9473680354054925,0.010113761640450575 +flat_mae,patch,logistic,aabc_sex,86,0.046415888336127774,test,0.9090909090909091,0.03889670151092188,0.9045470322804582,0.04198546225871392,0.8974184782608696,0.04353830964933455 +flat_mae,patch,logistic,aabc_sex,87,0.046415888336127774,train,0.9546313799621928,0.009100887978605997,0.9534289990316049,0.00936040421456027,0.9528781617280695,0.00959912799991329 +flat_mae,patch,logistic,aabc_sex,87,0.046415888336127774,test,0.8909090909090909,0.041859757988346936,0.8879076086956521,0.04316055213588007,0.8879076086956521,0.04334652296904593 +flat_mae,patch,logistic,aabc_sex,88,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,88,2.782559402207126,test,0.8909090909090909,0.04007231479741519,0.884453781512605,0.04409526730846388,0.8756793478260869,0.0456111216332922 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,train,0.941398865784499,0.010424922740935122,0.9398080346491953,0.010731999272089817,0.9390075910782849,0.010974925712016671 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,test,0.8909090909090909,0.039979680789534296,0.884453781512605,0.044249145716676555,0.8756793478260869,0.04571077394772666 +flat_mae,patch,logistic,aabc_sex,90,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,90,21.54434690031882,test,0.8363636363636363,0.0486175698608389,0.8328267477203647,0.04994980605139352,0.8349184782608696,0.05044766521950021 +flat_mae,patch,logistic,aabc_sex,91,0.3593813663804626,train,0.996219281663516,0.0026557332954077703,0.9961238606055277,0.0027232130437811635,0.9961238606055277,0.002773531257988806 +flat_mae,patch,logistic,aabc_sex,91,0.3593813663804626,test,0.8545454545454545,0.046105004120579746,0.84593837535014,0.050533806613032944,0.8383152173913043,0.05082916506734851 +flat_mae,patch,logistic,aabc_sex,92,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,92,2.782559402207126,test,0.9454545454545454,0.03130180466373484,0.9435897435897436,0.03278543254486196,0.9408967391304348,0.03422236722471859 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,train,0.996219281663516,0.002591113594904849,0.9961238606055277,0.002656293853434121,0.9961238606055277,0.0026792944259355503 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,test,0.8909090909090909,0.04265209634137634,0.8863636363636364,0.04531524432247312,0.8817934782608696,0.04615590143571519 +flat_mae,patch,logistic,aabc_sex,94,0.046415888336127774,train,0.947069943289225,0.009666805147432478,0.9455985191279309,0.009946179713213026,0.9445177174008617,0.010091423921061554 +flat_mae,patch,logistic,aabc_sex,94,0.046415888336127774,test,0.9090909090909091,0.03612584066338615,0.9045470322804582,0.03913413751620355,0.8974184782608696,0.0407576221532483 +flat_mae,patch,logistic,aabc_sex,95,0.3593813663804626,train,0.994328922495274,0.0031885490029133225,0.9941893034853195,0.003264857859794128,0.9944898736774231,0.0031482198682573946 +flat_mae,patch,logistic,aabc_sex,95,0.3593813663804626,test,0.9090909090909091,0.03836051689169931,0.9071259709557582,0.03903001332405473,0.9096467391304348,0.038407423165091666 +flat_mae,patch,logistic,aabc_sex,96,0.005994842503189409,train,0.9017013232514177,0.012810209116694017,0.8984254992319508,0.013347389724695768,0.8955713825141417,0.01367240301020457 +flat_mae,patch,logistic,aabc_sex,96,0.005994842503189409,test,0.9090909090909091,0.039017266785847736,0.905982905982906,0.04069436110218064,0.9035326086956521,0.04149131685556947 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,train,0.996219281663516,0.0027867869509911767,0.9961285128805621,0.0028487225168673746,0.9967320261437909,0.0024088403546966178 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,test,0.9272727272727272,0.03260380751481046,0.9260752688172043,0.03293551327990053,0.9313858695652174,0.03131327534776713 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,train,0.947069943289225,0.0098066440802416,0.9455985191279309,0.010108908347014493,0.9445177174008617,0.01037533819352579 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,test,0.8727272727272727,0.04298204891493213,0.8639095086603039,0.04840505745883859,0.8539402173913043,0.04899899745062858 +flat_mae,patch,logistic,aabc_sex,99,0.3593813663804626,train,0.9924385633270322,0.003762975379275345,0.9922477212110554,0.0038592002077335406,0.9922477212110554,0.003940751541773994 +flat_mae,patch,logistic,aabc_sex,99,0.3593813663804626,test,0.8909090909090909,0.04081704392015871,0.884453781512605,0.04509051605093132,0.8756793478260869,0.04632466661242926 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,train,0.9508506616257089,0.00937994314443808,0.9494843391902215,0.009670294663550218,0.9483938567953341,0.009974476879678094 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,test,0.8909090909090909,0.03909055390959148,0.8891129032258065,0.039556654667959316,0.8940217391304348,0.03865139588383457 diff --git a/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic/log.txt b/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..82524097ee2e45721cb13a559618180536d94e0f --- /dev/null +++ b/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:20:17 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:22:00 time: 5.5942 data: 4.6034 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:42 time: 0.2181 data: 0.0731 max mem: 3393 +extract (train) [ 40/236] eta: 0:01:05 time: 0.1913 data: 0.0566 max mem: 3393 +extract (train) [ 60/236] eta: 0:00:50 time: 0.1892 data: 0.0580 max mem: 3393 +extract (train) [ 80/236] eta: 0:00:41 time: 0.1979 data: 0.0622 max mem: 3393 +extract (train) [100/236] eta: 0:00:35 time: 0.2253 data: 0.0778 max mem: 3393 +extract (train) [120/236] eta: 0:00:29 time: 0.2363 data: 0.0832 max mem: 3393 +extract (train) [140/236] eta: 0:00:23 time: 0.2232 data: 0.0747 max mem: 3393 +extract (train) [160/236] eta: 0:00:18 time: 0.1923 data: 0.0608 max mem: 3393 +extract (train) [180/236] eta: 0:00:13 time: 0.2134 data: 0.0703 max mem: 3393 +extract (train) [200/236] eta: 0:00:08 time: 0.2300 data: 0.0777 max mem: 3393 +extract (train) [220/236] eta: 0:00:03 time: 0.1822 data: 0.0549 max mem: 3393 +extract (train) [235/236] eta: 0:00:00 time: 0.1818 data: 0.0554 max mem: 3393 +extract (train) Total time: 0:00:55 (0.2331 s / it) +extract (validation) [ 0/29] eta: 0:02:39 time: 5.4846 data: 5.3296 max mem: 3393 +extract (validation) [20/29] eta: 0:00:04 time: 0.1938 data: 0.0585 max mem: 3393 +extract (validation) [28/29] eta: 0:00:00 time: 0.1684 data: 0.0464 max mem: 3393 +extract (validation) Total time: 0:00:11 (0.3859 s / it) +extract (test) [ 0/28] eta: 0:02:21 time: 5.0681 data: 4.8709 max mem: 3393 +extract (test) [20/28] eta: 0:00:03 time: 0.2060 data: 0.0640 max mem: 3393 +extract (test) [27/28] eta: 0:00:00 time: 0.1834 data: 0.0523 max mem: 3393 +extract (test) Total time: 0:00:10 (0.3877 s / it) +feature extraction time: 0:01:17 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|----------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 0.046416 | train | 0.94707 | 0.0095357 | 0.94567 | 0.0098164 | 0.94461 | 0.010083 | +| flat_mae | patch | logistic | aabc_sex | | 0.046416 | test | 0.92727 | 0.035074 | 0.92527 | 0.035724 | 0.93182 | 0.033511 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05360018502496742, "f1": 0.795677136102668, "f1_std": 0.0550311239987463, "bacc": 0.7975543478260869, "bacc_std": 0.055265473862417897} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04432105258767371, "f1": 0.8879076086956521, "f1_std": 0.045610744781148495, "bacc": 0.8879076086956521, "bacc_std": 0.04584006156936709} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 21.54434690031882, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05830826033368253, "f1": 0.7555555555555555, "f1_std": 0.060966395520253486, "bacc": 0.7540760869565217, "bacc_std": 0.06067902368230076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 21.54434690031882, "split": "test", "acc": 0.8, "acc_std": 0.054380587202261875, "f1": 0.7997351870241642, "f1_std": 0.05432364760190682, "bacc": 0.8158967391304348, "bacc_std": 0.050370238751659906} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.0510358292552261, "f1": 0.8074229691876751, "f1_std": 0.055741434316438784, "bacc": 0.8009510869565217, "bacc_std": 0.055079608187171586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 166.81005372000556, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04513727821100338, "f1": 0.8879076086956521, "f1_std": 0.046481707555103766, "bacc": 0.8879076086956521, "bacc_std": 0.046446822356867964} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042009223701861685, "f1": 0.8879076086956521, "f1_std": 0.043363112025605145, "bacc": 0.8879076086956521, "bacc_std": 0.04351212206822735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04230544579367593, "f1": 0.8711943793911007, "f1_std": 0.042519492917998385, "bacc": 0.8783967391304348, "bacc_std": 0.04144489680903666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04114170633311166, "f1": 0.8863636363636364, "f1_std": 0.04365531978580378, "bacc": 0.8817934782608696, "bacc_std": 0.04477956636721617} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042917888158283624, "f1": 0.8879076086956521, "f1_std": 0.04414070307969446, "bacc": 0.8879076086956521, "bacc_std": 0.04424423935287465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.039095559977027516, "f1": 0.9045470322804582, "f1_std": 0.0424735734702768, "bacc": 0.8974184782608696, "bacc_std": 0.04409067525408761} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05045532841325338, "f1": 0.8328267477203647, "f1_std": 0.05162254992774571, "bacc": 0.8349184782608696, "bacc_std": 0.05186654982975368} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 166.81005372000556, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04820775974024879, "f1": 0.8505434782608696, "f1_std": 0.04954225366136106, "bacc": 0.8505434782608696, "bacc_std": 0.04941861473914161} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04165620805380018, "f1": 0.8879076086956521, "f1_std": 0.04297066165791267, "bacc": 0.8879076086956521, "bacc_std": 0.04327355008947624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04855681174605831, "f1": 0.8106060606060606, "f1_std": 0.051457852391395116, "bacc": 0.8070652173913043, "bacc_std": 0.051478715493405776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04931095464431693, "f1": 0.8307692307692308, "f1_std": 0.0516500145886078, "bacc": 0.8288043478260869, "bacc_std": 0.052010224624454156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0463629803875163, "f1": 0.84593837535014, "f1_std": 0.05075770734739274, "bacc": 0.8383152173913043, "bacc_std": 0.050714286880234015} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 21.54434690031882, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04415262784313686, "f1": 0.8711943793911007, "f1_std": 0.044364481304215816, "bacc": 0.8783967391304348, "bacc_std": 0.04284902972808546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04489239292072498, "f1": 0.8505434782608696, "f1_std": 0.04622522556357376, "bacc": 0.8505434782608696, "bacc_std": 0.046252745469622346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.038474222345928795, "f1": 0.884453781512605, "f1_std": 0.04229176516852998, "bacc": 0.8756793478260869, "bacc_std": 0.04360663571660619} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05128541429108535, "f1": 0.8281846581048247, "f1_std": 0.055210821398869604, "bacc": 0.8226902173913043, "bacc_std": 0.05526197329397536} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.034764168881429185, "f1": 0.9260752688172043, "f1_std": 0.03511102367777514, "bacc": 0.9313858695652174, "bacc_std": 0.03339806958094755} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 21.54434690031882, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04682974261501367, "f1": 0.8541114058355437, "f1_std": 0.04672294886385042, "bacc": 0.8688858695652174, "bacc_std": 0.04281237831657952} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 166.81005372000556, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.044369656819508864, "f1": 0.84593837535014, "f1_std": 0.0490774419853704, "bacc": 0.8383152173913043, "bacc_std": 0.04918412495056353} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.005994842503189409, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.025450804919970225, "f1": 0.9626358695652174, "f1_std": 0.026208980131858878, "bacc": 0.9626358695652174, "bacc_std": 0.0265330140469633} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04847866021114616, "f1": 0.8106060606060606, "f1_std": 0.051597052806010305, "bacc": 0.8070652173913043, "bacc_std": 0.05156378485539002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04916627211869519, "f1": 0.8106060606060606, "f1_std": 0.052250736834412954, "bacc": 0.8070652173913043, "bacc_std": 0.05221801399489685} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.049570385720668596, "f1": 0.8035714285714286, "f1_std": 0.05722046764229968, "bacc": 0.7948369565217391, "bacc_std": 0.05554964208638689} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044352845185321045, "f1": 0.8699763593380614, "f1_std": 0.04519003997853287, "bacc": 0.8722826086956521, "bacc_std": 0.04487040211328107} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.052988993087524784, "f1": 0.8131793478260869, "f1_std": 0.05445001763260876, "bacc": 0.8131793478260869, "bacc_std": 0.054043994464375106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.053207046075365944, "f1": 0.7782258064516129, "f1_std": 0.05362063585424122, "bacc": 0.7819293478260869, "bacc_std": 0.05300708466384377} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.048829458660299, "f1": 0.8074229691876751, "f1_std": 0.05367773260640902, "bacc": 0.8009510869565217, "bacc_std": 0.053021209005295895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04089873000109513, "f1": 0.8879076086956521, "f1_std": 0.042172607611893106, "bacc": 0.8879076086956521, "bacc_std": 0.042422029347501784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.048491096567112084, "f1": 0.8521505376344086, "f1_std": 0.04908375139146938, "bacc": 0.8566576086956521, "bacc_std": 0.048622728019332095} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04615144312455827, "f1": 0.8521505376344086, "f1_std": 0.04664207955515588, "bacc": 0.8566576086956521, "bacc_std": 0.04601273606881211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 21.54434690031882, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03916853181187062, "f1": 0.8863636363636364, "f1_std": 0.04143217295999993, "bacc": 0.8817934782608696, "bacc_std": 0.0421593452356514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05653219370960084, "f1": 0.7782258064516129, "f1_std": 0.057471517133049835, "bacc": 0.7819293478260869, "bacc_std": 0.05728936521377037} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.042554637703576356, "f1": 0.8428571428571429, "f1_std": 0.04894372924134342, "bacc": 0.8322010869565217, "bacc_std": 0.04855517293696971} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048117816840757215, "f1": 0.8307692307692308, "f1_std": 0.05034603697714955, "bacc": 0.8288043478260869, "bacc_std": 0.05060176883236828} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03926294490961174, "f1": 0.8879076086956521, "f1_std": 0.040521418652632674, "bacc": 0.8879076086956521, "bacc_std": 0.04073438414883521} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03853454391036912, "f1": 0.905982905982906, "f1_std": 0.040360998772559116, "bacc": 0.9035326086956521, "bacc_std": 0.041054300161994184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05956424686111821, "f1": 0.7607895617263298, "f1_std": 0.06010077969996637, "bacc": 0.7663043478260869, "bacc_std": 0.05960687356241817} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04837149628544232, "f1": 0.8505434782608696, "f1_std": 0.049828634793461835, "bacc": 0.8505434782608696, "bacc_std": 0.04984259953057727} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05094641488939768, "f1": 0.790003471017008, "f1_std": 0.054994556527002185, "bacc": 0.7853260869565217, "bacc_std": 0.05441850193932155} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03650699016049547, "f1": 0.9086075108009306, "f1_std": 0.036351808626755105, "bacc": 0.921875, "bacc_std": 0.031373194669175784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04316374358568286, "f1": 0.8891129032258065, "f1_std": 0.043601522338402435, "bacc": 0.8940217391304348, "bacc_std": 0.04244351689345328} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04224776975873136, "f1": 0.8663658451926415, "f1_std": 0.045638659989561486, "bacc": 0.8600543478260869, "bacc_std": 0.046469068011797295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.037959456405203075, "f1": 0.905982905982906, "f1_std": 0.03961412854866127, "bacc": 0.9035326086956521, "bacc_std": 0.04038021936568441} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 166.81005372000556, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.052442919776751, "f1": 0.8106060606060606, "f1_std": 0.055341211835933726, "bacc": 0.8070652173913043, "bacc_std": 0.05519417390702738} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05291971152386029, "f1": 0.8106060606060606, "f1_std": 0.05604220561689455, "bacc": 0.8070652173913043, "bacc_std": 0.055451613410302567} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03879608057225105, "f1": 0.905982905982906, "f1_std": 0.04056335962108123, "bacc": 0.9035326086956521, "bacc_std": 0.041079327618032194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.034364290518064215, "f1": 0.9252717391304348, "f1_std": 0.03548243281045991, "bacc": 0.9252717391304348, "bacc_std": 0.03590502248975702} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.037132668114778854, "f1": 0.905982905982906, "f1_std": 0.03880755714329223, "bacc": 0.9035326086956521, "bacc_std": 0.03968605241153058} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05027000157003416, "f1": 0.8343927735028438, "f1_std": 0.05054279930158628, "bacc": 0.8410326086956521, "bacc_std": 0.04924854913021658} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 166.81005372000556, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043849479580767334, "f1": 0.8699763593380614, "f1_std": 0.044653963239154465, "bacc": 0.8722826086956521, "bacc_std": 0.04437453845293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048303106345706434, "f1": 0.8307692307692308, "f1_std": 0.050494865110545385, "bacc": 0.8288043478260869, "bacc_std": 0.05078730095183825} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 2.782559402207126, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.0506536317871003, "f1": 0.8131793478260869, "f1_std": 0.05238540433203689, "bacc": 0.8131793478260869, "bacc_std": 0.05267086233853475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 21.54434690031882, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05065421914663545, "f1": 0.8166666666666667, "f1_std": 0.05064978501386193, "bacc": 0.8254076086956521, "bacc_std": 0.04920174491321142} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03400455001154411, "f1": 0.9260752688172043, "f1_std": 0.03432378313067766, "bacc": 0.9313858695652174, "bacc_std": 0.03253269730241678} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 166.81005372000556, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05450271052475694, "f1": 0.8131793478260869, "f1_std": 0.05617547551770596, "bacc": 0.8131793478260869, "bacc_std": 0.0561026224349438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 2.782559402207126, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03909393645782675, "f1": 0.905982905982906, "f1_std": 0.04079576282656648, "bacc": 0.9035326086956521, "bacc_std": 0.04193156964847208} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.9636363636363636, "acc_std": 0.023618874648666514, "f1": 0.9630376344086022, "f1_std": 0.023772365252808854, "bacc": 0.96875, "bacc_std": 0.02029747040119778} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.030017493522200893, "f1": 0.9442755825734549, "f1_std": 0.030586492305141582, "bacc": 0.9470108695652174, "bacc_std": 0.029880341863299936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 166.81005372000556, "split": "test", "acc": 0.8, "acc_std": 0.053207841340964845, "f1": 0.7989365237620472, "f1_std": 0.05315535030257365, "bacc": 0.8097826086956521, "bacc_std": 0.05195920957039336} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04894090212517995, "f1": 0.8505434782608696, "f1_std": 0.050607532048132786, "bacc": 0.8505434782608696, "bacc_std": 0.0508534662635076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041036794616498666, "f1": 0.8863636363636364, "f1_std": 0.04339520053109175, "bacc": 0.8817934782608696, "bacc_std": 0.04423854622517791} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044312623427656836, "f1": 0.8683760683760684, "f1_std": 0.04624472892892121, "bacc": 0.8661684782608696, "bacc_std": 0.04676436808749032} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 21.54434690031882, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.037804263172948986, "f1": 0.9045470322804582, "f1_std": 0.04108514219757328, "bacc": 0.8974184782608696, "bacc_std": 0.04284679470925087} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.035687884834589775, "f1": 0.9045470322804582, "f1_std": 0.038750348366196126, "bacc": 0.8974184782608696, "bacc_std": 0.04052811389287208} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.035624691966190405, "f1": 0.9252717391304348, "f1_std": 0.036662654117502816, "bacc": 0.9252717391304348, "bacc_std": 0.036933790663580573} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04429017763241351, "f1": 0.84593837535014, "f1_std": 0.04870379047304653, "bacc": 0.8383152173913043, "bacc_std": 0.04842836700820749} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03477808752506134, "f1": 0.9252717391304348, "f1_std": 0.035761751674160236, "bacc": 0.9252717391304348, "bacc_std": 0.035871101411233335} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.031112995876268406, "f1": 0.9435897435897436, "f1_std": 0.032527321155661384, "bacc": 0.9408967391304348, "bacc_std": 0.03380333960208342} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04687897553389404, "f1": 0.8699763593380614, "f1_std": 0.04782311720873875, "bacc": 0.8722826086956521, "bacc_std": 0.0475769569700974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03646451461964469, "f1": 0.9071259709557582, "f1_std": 0.03727505265281507, "bacc": 0.9096467391304348, "bacc_std": 0.03708450931046663} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05261167265135705, "f1": 0.8307692307692308, "f1_std": 0.054768722847339114, "bacc": 0.8288043478260869, "bacc_std": 0.054562158531829816} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04458773503669454, "f1": 0.8663658451926415, "f1_std": 0.04799664493719112, "bacc": 0.8600543478260869, "bacc_std": 0.04878631688865249} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 2.782559402207126, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.033688431444980356, "f1": 0.9252717391304348, "f1_std": 0.03474051477576148, "bacc": 0.9252717391304348, "bacc_std": 0.035215221098591515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03369233672096681, "f1": 0.9229691876750701, "f1_std": 0.037490197431910226, "bacc": 0.9130434782608696, "bacc_std": 0.04028431564463422} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04654174701143084, "f1": 0.84593837535014, "f1_std": 0.05139602955550552, "bacc": 0.8383152173913043, "bacc_std": 0.051669693598193435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03440092253231562, "f1": 0.9252717391304348, "f1_std": 0.0354125899574063, "bacc": 0.9252717391304348, "bacc_std": 0.03578115690072171} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03585380784784349, "f1": 0.9045470322804582, "f1_std": 0.0386490359895269, "bacc": 0.8974184782608696, "bacc_std": 0.040520596997251974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.041929953395341814, "f1": 0.8639095086603039, "f1_std": 0.04714004153528494, "bacc": 0.8539402173913043, "bacc_std": 0.0474508912672186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.0546832078717086, "f1": 0.8131793478260869, "f1_std": 0.05635657253612301, "bacc": 0.8131793478260869, "bacc_std": 0.05654758436585441} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.0518420997432118, "f1": 0.8328267477203647, "f1_std": 0.052909222566861244, "bacc": 0.8349184782608696, "bacc_std": 0.05293165910331421} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03889670151092188, "f1": 0.9045470322804582, "f1_std": 0.04198546225871392, "bacc": 0.8974184782608696, "bacc_std": 0.04353830964933455} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041859757988346936, "f1": 0.8879076086956521, "f1_std": 0.04316055213588007, "bacc": 0.8879076086956521, "bacc_std": 0.04334652296904593} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04007231479741519, "f1": 0.884453781512605, "f1_std": 0.04409526730846388, "bacc": 0.8756793478260869, "bacc_std": 0.0456111216332922} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.039979680789534296, "f1": 0.884453781512605, "f1_std": 0.044249145716676555, "bacc": 0.8756793478260869, "bacc_std": 0.04571077394772666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.0486175698608389, "f1": 0.8328267477203647, "f1_std": 0.04994980605139352, "bacc": 0.8349184782608696, "bacc_std": 0.05044766521950021} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046105004120579746, "f1": 0.84593837535014, "f1_std": 0.050533806613032944, "bacc": 0.8383152173913043, "bacc_std": 0.05082916506734851} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.03130180466373484, "f1": 0.9435897435897436, "f1_std": 0.03278543254486196, "bacc": 0.9408967391304348, "bacc_std": 0.03422236722471859} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04265209634137634, "f1": 0.8863636363636364, "f1_std": 0.04531524432247312, "bacc": 0.8817934782608696, "bacc_std": 0.04615590143571519} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03612584066338615, "f1": 0.9045470322804582, "f1_std": 0.03913413751620355, "bacc": 0.8974184782608696, "bacc_std": 0.0407576221532483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03836051689169931, "f1": 0.9071259709557582, "f1_std": 0.03903001332405473, "bacc": 0.9096467391304348, "bacc_std": 0.038407423165091666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.039017266785847736, "f1": 0.905982905982906, "f1_std": 0.04069436110218064, "bacc": 0.9035326086956521, "bacc_std": 0.04149131685556947} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03260380751481046, "f1": 0.9260752688172043, "f1_std": 0.03293551327990053, "bacc": 0.9313858695652174, "bacc_std": 0.03131327534776713} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04298204891493213, "f1": 0.8639095086603039, "f1_std": 0.04840505745883859, "bacc": 0.8539402173913043, "bacc_std": 0.04899899745062858} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04081704392015871, "f1": 0.884453781512605, "f1_std": 0.04509051605093132, "bacc": 0.8756793478260869, "bacc_std": 0.04632466661242926} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03909055390959148, "f1": 0.8891129032258065, "f1_std": 0.039556654667959316, "bacc": 0.8940217391304348, "bacc_std": 0.03865139588383457} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 14.225 | 42.554 | 0.9672 | 0.033586 | 0.96625 | 0.034661 | 0.96573 | 0.035541 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 14.225 | 42.554 | 0.87073 | 0.04505 | 0.86632 | 0.046385 | 0.86524 | 0.046419 | + + +done! total time: 0:05:11 diff --git a/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic/config.yaml b/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4988b98dcd9d3775bdd0d17b6df11676980d69b9 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic/eval_table.csv b/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..0b312aa476ab4812937baa91d6dcbf235cfcdd1f --- /dev/null +++ b/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.005994842503189409,train,0.7065527065527065,0.017208045838620904,0.694756838905775,0.018360973771058428,0.6935370017729332,0.017757128609408156 +flat_mae,patch,logistic,abide_dx,,0.005994842503189409,test,0.5806451612903226,0.043356518784651396,0.5643243243243243,0.04623968953490457,0.5687352710133542,0.04398384019343657 +flat_mae,patch,logistic,abide_dx,1,0.3593813663804626,train,0.896011396011396,0.01179064708186545,0.8943893165167394,0.012044782717215116,0.8926910299003323,0.01222160015693287 +flat_mae,patch,logistic,abide_dx,1,0.3593813663804626,test,0.6370967741935484,0.0424969209878119,0.6330637206549615,0.04326001729120264,0.6328781512605042,0.04300602449563238 +flat_mae,patch,logistic,abide_dx,2,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,2,21.54434690031882,test,0.5806451612903226,0.044025613938050524,0.5796610169491525,0.04418305712117786,0.58140756302521,0.04420926734735061 +flat_mae,patch,logistic,abide_dx,3,0.046415888336127774,train,0.8034188034188035,0.013949452001522023,0.7990074441687345,0.014420156428859465,0.7966039128829827,0.014396717846957523 +flat_mae,patch,logistic,abide_dx,3,0.046415888336127774,test,0.5806451612903226,0.0425296126252289,0.5752305665349143,0.04349086000994006,0.5751050420168067,0.04320194904857804 +flat_mae,patch,logistic,abide_dx,4,0.005994842503189409,train,0.6994301994301995,0.016246944527125093,0.6871651146926078,0.017338670643436783,0.6863418235511258,0.01675260176592596 +flat_mae,patch,logistic,abide_dx,4,0.005994842503189409,test,0.6854838709677419,0.040645222094168564,0.6761968530297957,0.042619126110024316,0.6754201680672269,0.041594473513382135 +flat_mae,patch,logistic,abide_dx,5,0.005994842503189409,train,0.7051282051282052,0.01731217955032774,0.694489354410766,0.01828596359486995,0.69328165374677,0.017779993567526773 +flat_mae,patch,logistic,abide_dx,5,0.005994842503189409,test,0.5241935483870968,0.04262698785572874,0.5239148825405089,0.04276260926816436,0.5267857142857143,0.04310186842082049 +flat_mae,patch,logistic,abide_dx,6,0.005994842503189409,train,0.6923076923076923,0.0175592001162217,0.6833611760775142,0.018314932617584368,0.6822443706164636,0.01795377364174559 +flat_mae,patch,logistic,abide_dx,6,0.005994842503189409,test,0.5725806451612904,0.04559223821248224,0.5573516535327002,0.04730084633118738,0.5598739495798319,0.04596275670073352 +flat_mae,patch,logistic,abide_dx,7,0.005994842503189409,train,0.7051282051282052,0.01644229027555482,0.6960994472596165,0.017204459865141623,0.6947582133628645,0.016872364682928916 +flat_mae,patch,logistic,abide_dx,7,0.005994842503189409,test,0.6129032258064516,0.04357529336065589,0.6025641025641025,0.04587021839338623,0.6029411764705883,0.044571624472676034 +flat_mae,patch,logistic,abide_dx,8,0.000774263682681127,train,0.6638176638176638,0.015563328364959,0.6390179279569799,0.017584768502971294,0.6437061646363972,0.016099157179163232 +flat_mae,patch,logistic,abide_dx,8,0.000774263682681127,test,0.5403225806451613,0.040262468095594986,0.5005299978800084,0.04530937336890542,0.5178571428571428,0.040808688002002866 +flat_mae,patch,logistic,abide_dx,9,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,9,1291.5496650148827,test,0.6129032258064516,0.04099431129794625,0.5951020408163266,0.043750001679313935,0.5982142857142857,0.04158853768147724 +flat_mae,patch,logistic,abide_dx,10,0.005994842503189409,train,0.7008547008547008,0.016531968660025747,0.6902316433566433,0.017501795230383223,0.689110372831303,0.017033256349994086 +flat_mae,patch,logistic,abide_dx,10,0.005994842503189409,test,0.6129032258064516,0.04279604304989432,0.6003223207091055,0.04536209530544602,0.6013655462184874,0.043755650164269425 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,train,0.9002849002849003,0.010922938707666757,0.898905529953917,0.011112150363943066,0.8977482465854558,0.01122533931587058 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,test,0.5564516129032258,0.0451146187409527,0.5479551932126997,0.04611618996655193,0.5483193277310925,0.045520432421102736 +flat_mae,patch,logistic,abide_dx,12,0.005994842503189409,train,0.7136752136752137,0.01660061203585384,0.7063330024163186,0.017218832149954996,0.7048726467331119,0.016985263598381636 +flat_mae,patch,logistic,abide_dx,12,0.005994842503189409,test,0.5241935483870968,0.04538817145711648,0.5171275823377994,0.04608528943645212,0.5173319327731093,0.0457460718257316 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,train,0.9017094017094017,0.011576973469843987,0.9005124518613608,0.011743715557501238,0.8999261720191953,0.011842319523774745 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,test,0.5887096774193549,0.043912986874421224,0.5826018084614877,0.04449344280200769,0.5824579831932774,0.04404552942872 +flat_mae,patch,logistic,abide_dx,14,2.782559402207126,train,0.9857549857549858,0.004209893145757845,0.9856035437430786,0.0042556901403376915,0.9856035437430786,0.004308415244739394 +flat_mae,patch,logistic,abide_dx,14,2.782559402207126,test,0.5403225806451613,0.04148230318614761,0.5292707292707293,0.04363719350081013,0.5304621848739496,0.04235046056734061 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,train,0.7877492877492878,0.01462480911046432,0.7834423388674605,0.015088309378216364,0.7815060908084164,0.015087192243270044 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,test,0.6612903225806451,0.039947477874749365,0.6481081081081081,0.04248672917949669,0.6486344537815126,0.04087131635846684 +flat_mae,patch,logistic,abide_dx,16,0.3593813663804626,train,0.9002849002849003,0.011435164578473636,0.8991023103394467,0.011606255009657012,0.8986341823551125,0.011739762271950322 +flat_mae,patch,logistic,abide_dx,16,0.3593813663804626,test,0.532258064516129,0.04746152119092375,0.5291961246399581,0.047819309900422935,0.5294117647058824,0.047824147274846456 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,train,0.8988603988603988,0.011893544830413126,0.8975631110462571,0.012074586253114429,0.8967515688445922,0.01217499915574558 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,test,0.5403225806451613,0.04245302233173577,0.5267492467358554,0.044453885953759244,0.5288865546218487,0.043100823376115496 +flat_mae,patch,logistic,abide_dx,18,0.046415888336127774,train,0.7905982905982906,0.01463070455469994,0.7858063551960546,0.015101849162073735,0.7834994462901439,0.015032517484343887 +flat_mae,patch,logistic,abide_dx,18,0.046415888336127774,test,0.6612903225806451,0.03969471167895989,0.6481081081081081,0.04203411390986929,0.6486344537815126,0.040389583263485254 +flat_mae,patch,logistic,abide_dx,19,0.046415888336127774,train,0.792022792022792,0.014823699943522346,0.7878890728476822,0.015198795936671184,0.7859726836471022,0.0151377549170298 +flat_mae,patch,logistic,abide_dx,19,0.046415888336127774,test,0.5725806451612904,0.04329260606448079,0.5478500171998624,0.046875672072106696,0.5551470588235294,0.04396809762004359 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,train,0.792022792022792,0.015557689291476714,0.7882256678127479,0.015995646197920668,0.7865633074935401,0.016019314024552284 +flat_mae,patch,logistic,abide_dx,20,0.046415888336127774,test,0.5483870967741935,0.04323622888311722,0.5407407407407407,0.04428516004474335,0.5409663865546219,0.04376868868292745 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,train,0.8062678062678063,0.014121542095429731,0.802089552238806,0.014565922187177692,0.7997785160575859,0.014562819417323118 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,test,0.5967741935483871,0.043082284603048915,0.58994708994709,0.04382938156833008,0.5898109243697479,0.04342948305433757 +flat_mae,patch,logistic,abide_dx,22,0.005994842503189409,train,0.7136752136752137,0.01607472420115496,0.7030150595119076,0.017019969265607574,0.7016242155777039,0.01651545959690139 +flat_mae,patch,logistic,abide_dx,22,0.005994842503189409,test,0.6048387096774194,0.04326513316955097,0.5819745442036464,0.04722246824753425,0.5877100840336134,0.04404578043856335 +flat_mae,patch,logistic,abide_dx,23,0.3593813663804626,train,0.9017094017094017,0.010832061322176803,0.9003831417624522,0.011014000711427867,0.8993355481727574,0.011152630301266106 +flat_mae,patch,logistic,abide_dx,23,0.3593813663804626,test,0.6129032258064516,0.04394438827866721,0.6045708211533352,0.04551626461247428,0.6045168067226891,0.04473882836763189 +flat_mae,patch,logistic,abide_dx,24,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,24,166.81005372000556,test,0.5564516129032258,0.046327957613170274,0.5541026479241582,0.04667362008050804,0.5546218487394958,0.046881562942394386 +flat_mae,patch,logistic,abide_dx,25,0.046415888336127774,train,0.8034188034188035,0.015091347463794773,0.8001336898395722,0.015398870190925541,0.7986710963455149,0.015403317303724398 +flat_mae,patch,logistic,abide_dx,25,0.046415888336127774,test,0.5564516129032258,0.04560018301440172,0.551522325244953,0.045760678407232094,0.5514705882352942,0.04558263929707679 +flat_mae,patch,logistic,abide_dx,26,0.005994842503189409,train,0.7065527065527065,0.016748954094244515,0.6991662228731195,0.017274695663953274,0.6978220745662607,0.01706083043505936 +flat_mae,patch,logistic,abide_dx,26,0.005994842503189409,test,0.5564516129032258,0.04458269917307287,0.5376584638329605,0.046641105173601864,0.542016806722689,0.0448864626251833 +flat_mae,patch,logistic,abide_dx,27,2.782559402207126,train,0.9843304843304843,0.0046234286764824314,0.9841685408692337,0.004669980819806367,0.984311554078996,0.004656822635217527 +flat_mae,patch,logistic,abide_dx,27,2.782559402207126,test,0.5887096774193549,0.04236706884329536,0.5873947935016637,0.042423012099358094,0.5887605042016807,0.04255993631312756 +flat_mae,patch,logistic,abide_dx,28,0.3593813663804626,train,0.8931623931623932,0.011505842183315716,0.8917208062635349,0.011708256139481364,0.8906976744186046,0.011833677039663508 +flat_mae,patch,logistic,abide_dx,28,0.3593813663804626,test,0.5967741935483871,0.044821356005975095,0.5958279009126467,0.04503744787496478,0.5976890756302521,0.045068994567998885 +flat_mae,patch,logistic,abide_dx,29,0.3593813663804626,train,0.8903133903133903,0.012591319823265642,0.8889064725431239,0.012757406867296026,0.8881136950904394,0.012756348246337803 +flat_mae,patch,logistic,abide_dx,29,0.3593813663804626,test,0.6048387096774194,0.04422726227574919,0.6017043592264831,0.04480626739921381,0.601890756302521,0.04483625434976634 +flat_mae,patch,logistic,abide_dx,30,0.005994842503189409,train,0.707977207977208,0.01722012457798748,0.6987282147903909,0.01791790795544156,0.6973421926910299,0.017542812476527017 +flat_mae,patch,logistic,abide_dx,30,0.005994842503189409,test,0.5887096774193549,0.042651505067175736,0.5788211788211788,0.04419074209616156,0.5793067226890757,0.04318476187344522 +flat_mae,patch,logistic,abide_dx,31,0.3593813663804626,train,0.8931623931623932,0.011940307917212515,0.8919288392972604,0.012105773367119353,0.8915836101882613,0.012222431787537413 +flat_mae,patch,logistic,abide_dx,31,0.3593813663804626,test,0.5564516129032258,0.04678427032723744,0.5550336008351275,0.046818621667102274,0.5561974789915967,0.04692458479525845 +flat_mae,patch,logistic,abide_dx,32,0.046415888336127774,train,0.792022792022792,0.015805419450280918,0.7878890728476822,0.016223554879518737,0.7859726836471022,0.016195391981049117 +flat_mae,patch,logistic,abide_dx,32,0.046415888336127774,test,0.6451612903225806,0.04491305915225716,0.6313513513513513,0.04731131809621296,0.6323529411764706,0.04563179736993404 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,train,0.7279202279202279,0.015440967544345464,0.7201465201465203,0.016083611405290885,0.7183831672203765,0.015811114084195128 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,test,0.5645161290322581,0.04131592893002259,0.5444897959183673,0.04405425095971774,0.5493697478991597,0.04185917483361585 +flat_mae,patch,logistic,abide_dx,34,0.046415888336127774,train,0.8062678062678063,0.014983771114337144,0.8024172185430463,0.015438414609823757,0.8003691399040236,0.015450116362356926 +flat_mae,patch,logistic,abide_dx,34,0.046415888336127774,test,0.5887096774193549,0.044164751036670793,0.5808311791608669,0.04547776753304052,0.5808823529411764,0.044853100719512366 +flat_mae,patch,logistic,abide_dx,35,2.782559402207126,train,0.98005698005698,0.005548903900234551,0.9798449612403101,0.005607963009219656,0.9798449612403101,0.005614453877564336 +flat_mae,patch,logistic,abide_dx,35,2.782559402207126,test,0.6129032258064516,0.046109188152145424,0.6119947848761408,0.04624728108170697,0.6139705882352942,0.04614374275085244 +flat_mae,patch,logistic,abide_dx,36,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,36,21.54434690031882,test,0.5483870967741935,0.04514025719885952,0.5425559947299078,0.045862543630767166,0.5425420168067226,0.04555535424012755 +flat_mae,patch,logistic,abide_dx,37,2.782559402207126,train,0.9857549857549858,0.004453729617115768,0.9855949175914374,0.0045073247174157284,0.9853082318198597,0.004624477516699454 +flat_mae,patch,logistic,abide_dx,37,2.782559402207126,test,0.5887096774193549,0.04111234323009194,0.5841388834089565,0.04167765157584953,0.5840336134453781,0.04154285473332461 +flat_mae,patch,logistic,abide_dx,38,0.3593813663804626,train,0.8903133903133903,0.011042059890744587,0.8889064725431239,0.011203116902713908,0.8881136950904394,0.011271099146941842 +flat_mae,patch,logistic,abide_dx,38,0.3593813663804626,test,0.5887096774193549,0.04406348591342805,0.5826018084614877,0.04471821139379286,0.5824579831932774,0.04426924733316521 +flat_mae,patch,logistic,abide_dx,39,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,39,10000.0,test,0.5241935483870968,0.04300536895068809,0.5241626016260164,0.04303873043386993,0.5283613445378151,0.04301848415086838 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,train,0.7948717948717948,0.015368676717812207,0.7907947019867549,0.015800180155205453,0.7888519748984866,0.015756308487903103 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,test,0.5080645161290323,0.04307684975008339,0.4986412142904487,0.04443082154302219,0.4994747899159664,0.04363011326185973 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,train,0.6552706552706553,0.016260785528294495,0.6298404176508015,0.01861643208213238,0.6350682908822444,0.016934119818265096 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,test,0.6129032258064516,0.03525930036496536,0.5727820844099913,0.04286697549479839,0.5887605042016807,0.03665688287048688 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,train,0.8005698005698005,0.014376470087762043,0.7962686567164179,0.01480827621372188,0.7940199335548173,0.01475698236364058 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,test,0.5241935483870968,0.04371038863842274,0.5171275823377994,0.04424756380215423,0.5173319327731093,0.043759062875907535 +flat_mae,patch,logistic,abide_dx,43,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,43,10000.0,test,0.5806451612903226,0.04432719373091729,0.5796610169491525,0.044217580589399404,0.58140756302521,0.04411666910680797 +flat_mae,patch,logistic,abide_dx,44,0.000774263682681127,train,0.6509971509971509,0.017736744961158825,0.6305890085248691,0.01934653255045941,0.6335548172757475,0.018194957861283834 +flat_mae,patch,logistic,abide_dx,44,0.000774263682681127,test,0.5725806451612904,0.03667234055681029,0.5256586070010827,0.0434268387458536,0.5472689075630253,0.03736487636734959 +flat_mae,patch,logistic,abide_dx,45,0.3593813663804626,train,0.8988603988603988,0.010875706320924343,0.8974956965961465,0.011046532020061305,0.8964562569213732,0.011131947596878211 +flat_mae,patch,logistic,abide_dx,45,0.3593813663804626,test,0.5564516129032258,0.04279211904841225,0.5479551932126997,0.04368599442327077,0.5483193277310925,0.0430243210131684 +flat_mae,patch,logistic,abide_dx,46,0.005994842503189409,train,0.7108262108262108,0.015920426757767732,0.7013573522724177,0.016839908762547064,0.6999261720191953,0.016446656800952246 +flat_mae,patch,logistic,abide_dx,46,0.005994842503189409,test,0.4838709677419355,0.04297361434248471,0.4772068511198946,0.04349162249695439,0.4774159663865546,0.04329838104815404 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,train,0.792022792022792,0.014916622573919056,0.7878890728476822,0.015354214921462517,0.7859726836471022,0.015327839456342654 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,test,0.5887096774193549,0.04205292488228431,0.5765651155005022,0.0436832256764194,0.5777310924369747,0.042632506663133306 +flat_mae,patch,logistic,abide_dx,48,0.046415888336127774,train,0.7849002849002849,0.015254950837434922,0.7805355246240706,0.015669969942629397,0.7786267995570322,0.015612206146540408 +flat_mae,patch,logistic,abide_dx,48,0.046415888336127774,test,0.6370967741935484,0.041642587561958225,0.626380984265149,0.04334244446969076,0.6265756302521008,0.04229521233798542 +flat_mae,patch,logistic,abide_dx,49,0.000774263682681127,train,0.6481481481481481,0.016378955785155316,0.6242613359336908,0.018002696853346718,0.6289036544850498,0.01674444939093161 +flat_mae,patch,logistic,abide_dx,49,0.000774263682681127,test,0.5564516129032258,0.04051333910160074,0.5180552611122888,0.045670687600717234,0.5341386554621849,0.0410186358650983 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,train,0.8048433048433048,0.014338291473584789,0.8007181886561003,0.014778406244483148,0.7984865263935031,0.014753907734562824 +flat_mae,patch,logistic,abide_dx,50,0.046415888336127774,test,0.5806451612903226,0.04336478320569108,0.5643243243243243,0.04583400286280483,0.5672268907563025,0.04401056061114804 +flat_mae,patch,logistic,abide_dx,51,0.005994842503189409,train,0.7222222222222222,0.015779396913995918,0.7131265206557706,0.016719501630367718,0.7114433370247324,0.016352642967899934 +flat_mae,patch,logistic,abide_dx,51,0.005994842503189409,test,0.5483870967741935,0.04276750300421339,0.5308108108108108,0.04540544387235599,0.5346638655462185,0.04339911644295416 +flat_mae,patch,logistic,abide_dx,52,2.782559402207126,train,0.9829059829059829,0.004736379623464049,0.9827242524916944,0.004786979395078856,0.9827242524916944,0.004810977866083085 +flat_mae,patch,logistic,abide_dx,52,2.782559402207126,test,0.5806451612903226,0.04464764350735239,0.5735449735449736,0.045799258261929834,0.5735294117647058,0.04525605094755585 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,train,0.7891737891737892,0.014593653560062831,0.7844427662099471,0.01507584960761749,0.7822074566260613,0.0149872340319084 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,test,0.5967741935483871,0.0442946336270904,0.5860042735042735,0.04587611539779247,0.5866596638655462,0.044839992325260244 +flat_mae,patch,logistic,abide_dx,54,0.046415888336127774,train,0.7863247863247863,0.01572736562894063,0.783513293476205,0.015910495857748626,0.7828719084533038,0.015876888772672237 +flat_mae,patch,logistic,abide_dx,54,0.046415888336127774,test,0.5725806451612904,0.04401265072132296,0.5623043623043623,0.04537785965904692,0.5630252100840336,0.044405587107520655 +flat_mae,patch,logistic,abide_dx,55,0.046415888336127774,train,0.8076923076923077,0.014814079534679217,0.8034612553012785,0.015275973873442535,0.8010705057216685,0.015235947213974114 +flat_mae,patch,logistic,abide_dx,55,0.046415888336127774,test,0.5725806451612904,0.0442672918999345,0.5643931861867832,0.045260045382716545,0.5646008403361344,0.0446875709219777 +flat_mae,patch,logistic,abide_dx,56,0.000774263682681127,train,0.6396011396011396,0.01682521900052506,0.6127035908752908,0.01897911299096386,0.6190845330380215,0.017332202191257917 +flat_mae,patch,logistic,abide_dx,56,0.000774263682681127,test,0.6129032258064516,0.038128469375245874,0.5921052631578947,0.041086698537158545,0.5966386554621849,0.038837272307061 +flat_mae,patch,logistic,abide_dx,57,0.005994842503189409,train,0.7037037037037037,0.016611274742897406,0.6938405797101449,0.01732860868915213,0.6925802879291252,0.016913709766054684 +flat_mae,patch,logistic,abide_dx,57,0.005994842503189409,test,0.6209677419354839,0.04203344327792009,0.6118548118548119,0.04347772550497814,0.6118697478991597,0.04262106634066615 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,train,0.7962962962962963,0.015031020432117062,0.7924961604368937,0.015382931551034717,0.790734588409007,0.015343550713349065 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,test,0.5645161290322581,0.043658388549496586,0.5588932806324111,0.044371005602627425,0.5588235294117647,0.04406215371181414 +flat_mae,patch,logistic,abide_dx,59,0.000774263682681127,train,0.6623931623931624,0.016092153458849373,0.6400225886507176,0.017948890019954106,0.6435954226651901,0.01663837311075442 +flat_mae,patch,logistic,abide_dx,59,0.000774263682681127,test,0.5483870967741935,0.04292797006549237,0.5308108108108108,0.044416266238744236,0.5346638655462185,0.043050031154077746 +flat_mae,patch,logistic,abide_dx,60,0.3593813663804626,train,0.896011396011396,0.011561286909563113,0.8945371103999671,0.011763623521274802,0.89328165374677,0.011858952715589958 +flat_mae,patch,logistic,abide_dx,60,0.3593813663804626,test,0.5806451612903226,0.044564968533540125,0.5766806722689075,0.04542158430932799,0.5766806722689075,0.0453987947495435 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,train,0.7877492877492878,0.014514447943812521,0.7830794595547445,0.014997824687126656,0.7809154669619787,0.014950997691121674 +flat_mae,patch,logistic,abide_dx,61,0.046415888336127774,test,0.5887096774193549,0.04325919509720283,0.5826018084614877,0.04369611278102075,0.5824579831932774,0.04343758128190776 +flat_mae,patch,logistic,abide_dx,62,0.000774263682681127,train,0.6737891737891738,0.017279125945486605,0.6551996310873272,0.01904257255038436,0.6568844592100406,0.017891552399608848 +flat_mae,patch,logistic,abide_dx,62,0.000774263682681127,test,0.5564516129032258,0.03841114386405135,0.5131005925608625,0.043425791079969626,0.532563025210084,0.038799201990854265 +flat_mae,patch,logistic,abide_dx,63,0.005994842503189409,train,0.6994301994301995,0.01742144602496586,0.6878875803606986,0.018497775679511337,0.6869324473975637,0.017921920218669958 +flat_mae,patch,logistic,abide_dx,63,0.005994842503189409,test,0.6290322580645161,0.04446082352255578,0.6242424242424243,0.04517857974846416,0.6239495798319328,0.044925955800812434 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,train,0.8846153846153846,0.012612049721732639,0.8829795334574977,0.012848971820390579,0.8817644887412329,0.012981966318767746 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,test,0.6532258064516129,0.041105999243143516,0.650475254015077,0.04132879674432,0.6507352941176471,0.04117649035059978 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,train,0.896011396011396,0.011282878613654953,0.8947450577663671,0.011461032675341022,0.8941675895164267,0.01161707804827233 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,test,0.5887096774193549,0.04194918820739092,0.5740553647201454,0.04392711569864639,0.576155462184874,0.04241471953405649 +flat_mae,patch,logistic,abide_dx,66,0.3593813663804626,train,0.9045584045584045,0.010940296696214548,0.9031383152895821,0.011136499652893395,0.9016242155777039,0.011221990093342836 +flat_mae,patch,logistic,abide_dx,66,0.3593813663804626,test,0.5483870967741935,0.04510328936739077,0.5454307410316837,0.04542196417240161,0.5456932773109244,0.045349688640196076 +flat_mae,patch,logistic,abide_dx,67,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,67,166.81005372000556,test,0.5645161290322581,0.042587300921658376,0.5634941329856584,0.04270924285910668,0.5651260504201681,0.0426566922487837 +flat_mae,patch,logistic,abide_dx,68,0.046415888336127774,train,0.811965811965812,0.015058203434331688,0.8083823419935978,0.015448600826218807,0.8064230343300111,0.015469710129215908 +flat_mae,patch,logistic,abide_dx,68,0.046415888336127774,test,0.6290322580645161,0.041482554035184444,0.628935727296383,0.04145252365328761,0.6334033613445378,0.04119574230673715 +flat_mae,patch,logistic,abide_dx,69,0.3593813663804626,train,0.8917378917378918,0.010897925244585297,0.8901652708811977,0.011077986826946228,0.8888150609080843,0.01112347697026562 +flat_mae,patch,logistic,abide_dx,69,0.3593813663804626,test,0.6290322580645161,0.0439170298983031,0.6274817136886102,0.044016561319251704,0.6286764705882353,0.04396562096792839 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,train,0.7094017094017094,0.01689599908579732,0.699408085302884,0.017702260644905588,0.6980435585086748,0.017240673766118087 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,test,0.6048387096774194,0.04127569045174738,0.585171024783232,0.04426979542016302,0.5892857142857143,0.04201119426801134 +flat_mae,patch,logistic,abide_dx,71,0.3593813663804626,train,0.9002849002849003,0.011550686429552816,0.8989728702888957,0.011719047842916453,0.8980435585086748,0.011780894613595392 +flat_mae,patch,logistic,abide_dx,71,0.3593813663804626,test,0.5725806451612904,0.04375810910428986,0.5599598259122867,0.045684448757589965,0.5614495798319328,0.04433364913052039 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,train,0.782051282051282,0.015011697184041332,0.7776287103806809,0.015421562771919648,0.7757475083056479,0.015344787281450506 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,test,0.5403225806451613,0.043783032720614054,0.5334961388687216,0.04449108588339529,0.5336134453781513,0.04408998145918552 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,train,0.7934472934472935,0.015168866811225076,0.7897574123989218,0.015500569530976024,0.7881506090808417,0.015465883900789484 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,test,0.5806451612903226,0.04470918865813003,0.5766806722689075,0.04520150769576147,0.5766806722689075,0.045107523865839196 +flat_mae,patch,logistic,abide_dx,74,0.005994842503189409,train,0.7022792022792023,0.016866865886833903,0.6922053810248072,0.017691660007933017,0.6909929863418236,0.01726488130120723 +flat_mae,patch,logistic,abide_dx,74,0.005994842503189409,test,0.6048387096774194,0.04485362945093692,0.5972691721349506,0.04612662711909418,0.5971638655462186,0.04551009354532932 +flat_mae,patch,logistic,abide_dx,75,0.3593813663804626,train,0.8817663817663818,0.011621970674212426,0.8799220996012241,0.011852680171987155,0.8782945736434109,0.011964326608906515 +flat_mae,patch,logistic,abide_dx,75,0.3593813663804626,test,0.6290322580645161,0.04397957382276041,0.6227513227513227,0.04493506598629561,0.6223739495798319,0.04437686973088727 +flat_mae,patch,logistic,abide_dx,76,0.046415888336127774,train,0.7948717948717948,0.014906439218644346,0.7907947019867549,0.01533928039535936,0.7888519748984866,0.015315448668680334 +flat_mae,patch,logistic,abide_dx,76,0.046415888336127774,test,0.5806451612903226,0.04077134470958347,0.5716183895827798,0.041926012466803685,0.571953781512605,0.04109381436616801 +flat_mae,patch,logistic,abide_dx,77,0.3593813663804626,train,0.9017094017094017,0.011025948743101694,0.9003158988712018,0.011207391446364639,0.8990402362495387,0.011289271047042583 +flat_mae,patch,logistic,abide_dx,77,0.3593813663804626,test,0.5725806451612904,0.04636385920268603,0.5703170970905524,0.04675905660690171,0.5709033613445378,0.046868346172290326 +flat_mae,patch,logistic,abide_dx,78,2.782559402207126,train,0.9814814814814815,0.0048607061288364825,0.9812790399507667,0.0049178770840168804,0.9811369509043928,0.005014172369414709 +flat_mae,patch,logistic,abide_dx,78,2.782559402207126,test,0.5806451612903226,0.044918341333327935,0.5778999738151349,0.04506684754557906,0.5782563025210083,0.045189562734952296 +flat_mae,patch,logistic,abide_dx,79,0.046415888336127774,train,0.8005698005698005,0.014763569049514423,0.7972370766488414,0.015136864846912576,0.7957918050941306,0.015178967208169294 +flat_mae,patch,logistic,abide_dx,79,0.046415888336127774,test,0.5403225806451613,0.0443890747636926,0.5366764995083579,0.044796704531273046,0.5367647058823529,0.04472931345347566 +flat_mae,patch,logistic,abide_dx,80,2.782559402207126,train,0.9843304843304843,0.0046223040881841395,0.9841685408692337,0.0046670161364400446,0.984311554078996,0.004625473874483358 +flat_mae,patch,logistic,abide_dx,80,2.782559402207126,test,0.6774193548387096,0.04176407587028172,0.671957671957672,0.042447216854897954,0.671218487394958,0.04200029532454992 +flat_mae,patch,logistic,abide_dx,81,0.005994842503189409,train,0.7150997150997151,0.018359907202439883,0.7065241929415306,0.01913071329693055,0.7049833887043189,0.018768754186990172 +flat_mae,patch,logistic,abide_dx,81,0.005994842503189409,test,0.5483870967741935,0.04355785739320502,0.5308108108108108,0.04579648245694928,0.5346638655462185,0.0440057017610184 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,train,0.688034188034188,0.01697977590161057,0.6794313345663593,0.017677945259183005,0.6783684016242155,0.01737415717730913 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,test,0.6532258064516129,0.03886260116089291,0.626793588577028,0.044260395216348516,0.6334033613445378,0.040143806553030334 +flat_mae,patch,logistic,abide_dx,83,0.005994842503189409,train,0.6923076923076923,0.016729851981953994,0.680672552058764,0.017751024302442343,0.6798818752307125,0.017214859315812008 +flat_mae,patch,logistic,abide_dx,83,0.005994842503189409,test,0.6451612903225806,0.043891038459165366,0.6391534391534391,0.044424831133636315,0.6386554621848739,0.044000232048257114 +flat_mae,patch,logistic,abide_dx,84,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,84,10000.0,test,0.6048387096774194,0.04468323008678483,0.6046072753302532,0.04509310552291768,0.6129201680672269,0.04423481184514871 +flat_mae,patch,logistic,abide_dx,85,0.3593813663804626,train,0.8931623931623932,0.011004105605446101,0.8915727409958009,0.011228253219570978,0.8901070505721669,0.011374823315647922 +flat_mae,patch,logistic,abide_dx,85,0.3593813663804626,test,0.6370967741935484,0.04110397402370119,0.6283716283716283,0.04248419397740493,0.6281512605042017,0.04163584555967242 +flat_mae,patch,logistic,abide_dx,86,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,86,10000.0,test,0.5241935483870968,0.042982839060421184,0.5189057670809496,0.04351434525171074,0.51890756302521,0.043429406733741564 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,train,0.8888888888888888,0.011779799610614903,0.887571145806812,0.011935097621888751,0.8871170173495755,0.011998874963994363 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,test,0.5967741935483871,0.043748200498148745,0.5958279009126467,0.043784883572044846,0.5976890756302521,0.04389666709473899 +flat_mae,patch,logistic,abide_dx,88,0.046415888336127774,train,0.7849002849002849,0.01453920601191439,0.7801677744480967,0.015039713783062333,0.7780361757105942,0.015002747492101117 +flat_mae,patch,logistic,abide_dx,88,0.046415888336127774,test,0.6129032258064516,0.042963164533863035,0.6092436974789917,0.043620518963352395,0.6092436974789917,0.043575367456358205 +flat_mae,patch,logistic,abide_dx,89,0.005994842503189409,train,0.7136752136752137,0.016606945867615467,0.7052040873887219,0.017454100780154,0.7036913990402363,0.017117483460970397 +flat_mae,patch,logistic,abide_dx,89,0.005994842503189409,test,0.5161290322580645,0.044778341817688236,0.5141065830721003,0.044892572233252326,0.5147058823529411,0.04487637810669206 +flat_mae,patch,logistic,abide_dx,90,0.005994842503189409,train,0.6965811965811965,0.01638305234640453,0.6879120879120879,0.017186183936229258,0.6867109634551495,0.016887622710757225 +flat_mae,patch,logistic,abide_dx,90,0.005994842503189409,test,0.6129032258064516,0.039811506318899965,0.588836695219674,0.043641934462760205,0.595063025210084,0.040626185063082375 +flat_mae,patch,logistic,abide_dx,91,0.005994842503189409,train,0.698005698005698,0.01704463046246673,0.6879528985507246,0.017835170815807634,0.6868217054263566,0.017422059117508975 +flat_mae,patch,logistic,abide_dx,91,0.005994842503189409,test,0.5564516129032258,0.044841946970627246,0.5498646953996436,0.045359498337001744,0.5498949579831933,0.04499969716951593 +flat_mae,patch,logistic,abide_dx,92,0.3593813663804626,train,0.8988603988603988,0.010813795550274266,0.8973555281426915,0.011029499701878623,0.8958656330749355,0.011185822186872989 +flat_mae,patch,logistic,abide_dx,92,0.3593813663804626,test,0.532258064516129,0.04530459980827247,0.5278361344537815,0.045439475746490404,0.5278361344537815,0.045306351538446595 +flat_mae,patch,logistic,abide_dx,93,0.000774263682681127,train,0.6566951566951567,0.01612618639200273,0.6350398974896616,0.017684583342264963,0.6384274640088593,0.016529966404216903 +flat_mae,patch,logistic,abide_dx,93,0.000774263682681127,test,0.532258064516129,0.042075399344139126,0.4942334739803095,0.046252389801202276,0.5105042016806722,0.0424022795161003 +flat_mae,patch,logistic,abide_dx,94,0.005994842503189409,train,0.7037037037037037,0.017506386711754546,0.6947851606591918,0.018394612748777857,0.6934662236987819,0.018042250855830545 +flat_mae,patch,logistic,abide_dx,94,0.005994842503189409,test,0.5887096774193549,0.042915351184928015,0.5826018084614877,0.043591134885670034,0.5824579831932774,0.043169245533997674 +flat_mae,patch,logistic,abide_dx,95,0.3593813663804626,train,0.8974358974358975,0.011460842654860125,0.8958731623706242,0.011697524643370103,0.8942783314876339,0.011851483887749143 +flat_mae,patch,logistic,abide_dx,95,0.3593813663804626,test,0.5725806451612904,0.04580413442571614,0.5623043623043623,0.047409701955119996,0.5630252100840336,0.04645229026209076 +flat_mae,patch,logistic,abide_dx,96,0.005994842503189409,train,0.7108262108262108,0.016780626780626785,0.7016674517192651,0.017520427125671728,0.7002214839424141,0.01714486314284667 +flat_mae,patch,logistic,abide_dx,96,0.005994842503189409,test,0.5806451612903226,0.045273765469091044,0.5716183895827798,0.046668479562540666,0.571953781512605,0.0458043117634443 +flat_mae,patch,logistic,abide_dx,97,0.000774263682681127,train,0.6410256410256411,0.016643224802996733,0.615158371040724,0.018451526192391595,0.6209671465485419,0.017028517457707105 +flat_mae,patch,logistic,abide_dx,97,0.000774263682681127,test,0.6209677419354839,0.038088374152456576,0.5956989247311828,0.04227919199104977,0.6024159663865546,0.03902160814602721 +flat_mae,patch,logistic,abide_dx,98,0.046415888336127774,train,0.7948717948717948,0.014674422172005817,0.7904477611940299,0.01517504243214465,0.7882613510520486,0.015160158890883716 +flat_mae,patch,logistic,abide_dx,98,0.046415888336127774,test,0.6209677419354839,0.04141613895665556,0.607462787095036,0.04344733039161907,0.608718487394958,0.04209607558025063 +flat_mae,patch,logistic,abide_dx,99,0.3593813663804626,train,0.8931623931623932,0.01164930614038825,0.8916477161643497,0.011865661820956335,0.8904023624953858,0.01200788573748913 +flat_mae,patch,logistic,abide_dx,99,0.3593813663804626,test,0.5645161290322581,0.04410426582891969,0.5588932806324111,0.04444615333075375,0.5588235294117647,0.044155043911459246 +flat_mae,patch,logistic,abide_dx,100,0.005994842503189409,train,0.6908831908831908,0.017028855861884497,0.6820512820512821,0.017797569444793482,0.680952380952381,0.017496564105317473 +flat_mae,patch,logistic,abide_dx,100,0.005994842503189409,test,0.6129032258064516,0.04301095799682181,0.6003223207091055,0.044703965969656874,0.6013655462184874,0.043547517240503486 diff --git a/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic/log.txt b/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..baa71d3bbc861da0dc4c645003571d8b60622ad3 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:46 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:22:30 time: 4.6742 data: 3.7406 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:49 time: 0.1938 data: 0.0671 max mem: 2851 +extract (train) [ 40/289] eta: 0:01:12 time: 0.1720 data: 0.0534 max mem: 2851 +extract (train) [ 60/289] eta: 0:00:58 time: 0.1802 data: 0.0553 max mem: 2851 +extract (train) [ 80/289] eta: 0:00:50 time: 0.1910 data: 0.0593 max mem: 2851 +extract (train) [100/289] eta: 0:00:42 time: 0.1654 data: 0.0481 max mem: 2851 +extract (train) [120/289] eta: 0:00:37 time: 0.1923 data: 0.0596 max mem: 2851 +extract (train) [140/289] eta: 0:00:31 time: 0.1520 data: 0.0417 max mem: 2851 +extract (train) [160/289] eta: 0:00:26 time: 0.1674 data: 0.0481 max mem: 2851 +extract (train) [180/289] eta: 0:00:21 time: 0.1762 data: 0.0516 max mem: 2851 +extract (train) [200/289] eta: 0:00:17 time: 0.1747 data: 0.0558 max mem: 2851 +extract (train) [220/289] eta: 0:00:13 time: 0.1614 data: 0.0507 max mem: 2851 +extract (train) [240/289] eta: 0:00:09 time: 0.1831 data: 0.0645 max mem: 2851 +extract (train) [260/289] eta: 0:00:05 time: 0.1851 data: 0.0631 max mem: 2851 +extract (train) [280/289] eta: 0:00:01 time: 0.1528 data: 0.0448 max mem: 2851 +extract (train) [288/289] eta: 0:00:00 time: 0.1517 data: 0.0441 max mem: 2851 +extract (train) Total time: 0:00:55 (0.1913 s / it) +extract (validation) [ 0/62] eta: 0:04:07 time: 3.9905 data: 3.7387 max mem: 2851 +extract (validation) [20/62] eta: 0:00:17 time: 0.2402 data: 0.0890 max mem: 2851 +extract (validation) [40/62] eta: 0:00:06 time: 0.1469 data: 0.0392 max mem: 2851 +extract (validation) [60/62] eta: 0:00:00 time: 0.1370 data: 0.0357 max mem: 2851 +extract (validation) [61/62] eta: 0:00:00 time: 0.1375 data: 0.0362 max mem: 2851 +extract (validation) Total time: 0:00:15 (0.2421 s / it) +extract (test) [ 0/62] eta: 0:04:10 time: 4.0480 data: 3.8493 max mem: 2851 +extract (test) [20/62] eta: 0:00:15 time: 0.1961 data: 0.0624 max mem: 2851 +extract (test) [40/62] eta: 0:00:05 time: 0.1587 data: 0.0458 max mem: 2851 +extract (test) [60/62] eta: 0:00:00 time: 0.1412 data: 0.0398 max mem: 2851 +extract (test) [61/62] eta: 0:00:00 time: 0.1415 data: 0.0400 max mem: 2851 +extract (test) Total time: 0:00:14 (0.2332 s / it) +feature extraction time: 0:01:24 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.0059948 | train | 0.70655 | 0.017208 | 0.69476 | 0.018361 | 0.69354 | 0.017757 | +| flat_mae | patch | logistic | abide_dx | | 0.0059948 | test | 0.58065 | 0.043357 | 0.56432 | 0.04624 | 0.56874 | 0.043984 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.0424969209878119, "f1": 0.6330637206549615, "f1_std": 0.04326001729120264, "bacc": 0.6328781512605042, "bacc_std": 0.04300602449563238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 21.54434690031882, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.044025613938050524, "f1": 0.5796610169491525, "f1_std": 0.04418305712117786, "bacc": 0.58140756302521, "bacc_std": 0.04420926734735061} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.0425296126252289, "f1": 0.5752305665349143, "f1_std": 0.04349086000994006, "bacc": 0.5751050420168067, "bacc_std": 0.04320194904857804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.040645222094168564, "f1": 0.6761968530297957, "f1_std": 0.042619126110024316, "bacc": 0.6754201680672269, "bacc_std": 0.041594473513382135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04262698785572874, "f1": 0.5239148825405089, "f1_std": 0.04276260926816436, "bacc": 0.5267857142857143, "bacc_std": 0.04310186842082049} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04559223821248224, "f1": 0.5573516535327002, "f1_std": 0.04730084633118738, "bacc": 0.5598739495798319, "bacc_std": 0.04596275670073352} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04357529336065589, "f1": 0.6025641025641025, "f1_std": 0.04587021839338623, "bacc": 0.6029411764705883, "bacc_std": 0.044571624472676034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.000774263682681127, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.040262468095594986, "f1": 0.5005299978800084, "f1_std": 0.04530937336890542, "bacc": 0.5178571428571428, "bacc_std": 0.040808688002002866} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 1291.5496650148827, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04099431129794625, "f1": 0.5951020408163266, "f1_std": 0.043750001679313935, "bacc": 0.5982142857142857, "bacc_std": 0.04158853768147724} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04279604304989432, "f1": 0.6003223207091055, "f1_std": 0.04536209530544602, "bacc": 0.6013655462184874, "bacc_std": 0.043755650164269425} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.0451146187409527, "f1": 0.5479551932126997, "f1_std": 0.04611618996655193, "bacc": 0.5483193277310925, "bacc_std": 0.045520432421102736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04538817145711648, "f1": 0.5171275823377994, "f1_std": 0.04608528943645212, "bacc": 0.5173319327731093, "bacc_std": 0.0457460718257316} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.043912986874421224, "f1": 0.5826018084614877, "f1_std": 0.04449344280200769, "bacc": 0.5824579831932774, "bacc_std": 0.04404552942872} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 2.782559402207126, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04148230318614761, "f1": 0.5292707292707293, "f1_std": 0.04363719350081013, "bacc": 0.5304621848739496, "bacc_std": 0.04235046056734061} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.039947477874749365, "f1": 0.6481081081081081, "f1_std": 0.04248672917949669, "bacc": 0.6486344537815126, "bacc_std": 0.04087131635846684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04746152119092375, "f1": 0.5291961246399581, "f1_std": 0.047819309900422935, "bacc": 0.5294117647058824, "bacc_std": 0.047824147274846456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04245302233173577, "f1": 0.5267492467358554, "f1_std": 0.044453885953759244, "bacc": 0.5288865546218487, "bacc_std": 0.043100823376115496} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.03969471167895989, "f1": 0.6481081081081081, "f1_std": 0.04203411390986929, "bacc": 0.6486344537815126, "bacc_std": 0.040389583263485254} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04329260606448079, "f1": 0.5478500171998624, "f1_std": 0.046875672072106696, "bacc": 0.5551470588235294, "bacc_std": 0.04396809762004359} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04323622888311722, "f1": 0.5407407407407407, "f1_std": 0.04428516004474335, "bacc": 0.5409663865546219, "bacc_std": 0.04376868868292745} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.043082284603048915, "f1": 0.58994708994709, "f1_std": 0.04382938156833008, "bacc": 0.5898109243697479, "bacc_std": 0.04342948305433757} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04326513316955097, "f1": 0.5819745442036464, "f1_std": 0.04722246824753425, "bacc": 0.5877100840336134, "bacc_std": 0.04404578043856335} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04394438827866721, "f1": 0.6045708211533352, "f1_std": 0.04551626461247428, "bacc": 0.6045168067226891, "bacc_std": 0.04473882836763189} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 166.81005372000556, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.046327957613170274, "f1": 0.5541026479241582, "f1_std": 0.04667362008050804, "bacc": 0.5546218487394958, "bacc_std": 0.046881562942394386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04560018301440172, "f1": 0.551522325244953, "f1_std": 0.045760678407232094, "bacc": 0.5514705882352942, "bacc_std": 0.04558263929707679} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04458269917307287, "f1": 0.5376584638329605, "f1_std": 0.046641105173601864, "bacc": 0.542016806722689, "bacc_std": 0.0448864626251833} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 2.782559402207126, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04236706884329536, "f1": 0.5873947935016637, "f1_std": 0.042423012099358094, "bacc": 0.5887605042016807, "bacc_std": 0.04255993631312756} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044821356005975095, "f1": 0.5958279009126467, "f1_std": 0.04503744787496478, "bacc": 0.5976890756302521, "bacc_std": 0.045068994567998885} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04422726227574919, "f1": 0.6017043592264831, "f1_std": 0.04480626739921381, "bacc": 0.601890756302521, "bacc_std": 0.04483625434976634} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042651505067175736, "f1": 0.5788211788211788, "f1_std": 0.04419074209616156, "bacc": 0.5793067226890757, "bacc_std": 0.04318476187344522} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04678427032723744, "f1": 0.5550336008351275, "f1_std": 0.046818621667102274, "bacc": 0.5561974789915967, "bacc_std": 0.04692458479525845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04491305915225716, "f1": 0.6313513513513513, "f1_std": 0.04731131809621296, "bacc": 0.6323529411764706, "bacc_std": 0.04563179736993404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04131592893002259, "f1": 0.5444897959183673, "f1_std": 0.04405425095971774, "bacc": 0.5493697478991597, "bacc_std": 0.04185917483361585} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.044164751036670793, "f1": 0.5808311791608669, "f1_std": 0.04547776753304052, "bacc": 0.5808823529411764, "bacc_std": 0.044853100719512366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 2.782559402207126, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.046109188152145424, "f1": 0.6119947848761408, "f1_std": 0.04624728108170697, "bacc": 0.6139705882352942, "bacc_std": 0.04614374275085244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 21.54434690031882, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04514025719885952, "f1": 0.5425559947299078, "f1_std": 0.045862543630767166, "bacc": 0.5425420168067226, "bacc_std": 0.04555535424012755} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 2.782559402207126, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04111234323009194, "f1": 0.5841388834089565, "f1_std": 0.04167765157584953, "bacc": 0.5840336134453781, "bacc_std": 0.04154285473332461} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04406348591342805, "f1": 0.5826018084614877, "f1_std": 0.04471821139379286, "bacc": 0.5824579831932774, "bacc_std": 0.04426924733316521} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 10000.0, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04300536895068809, "f1": 0.5241626016260164, "f1_std": 0.04303873043386993, "bacc": 0.5283613445378151, "bacc_std": 0.04301848415086838} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.5080645161290323, "acc_std": 0.04307684975008339, "f1": 0.4986412142904487, "f1_std": 0.04443082154302219, "bacc": 0.4994747899159664, "bacc_std": 0.04363011326185973} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.03525930036496536, "f1": 0.5727820844099913, "f1_std": 0.04286697549479839, "bacc": 0.5887605042016807, "bacc_std": 0.03665688287048688} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04371038863842274, "f1": 0.5171275823377994, "f1_std": 0.04424756380215423, "bacc": 0.5173319327731093, "bacc_std": 0.043759062875907535} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 10000.0, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04432719373091729, "f1": 0.5796610169491525, "f1_std": 0.044217580589399404, "bacc": 0.58140756302521, "bacc_std": 0.04411666910680797} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.000774263682681127, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.03667234055681029, "f1": 0.5256586070010827, "f1_std": 0.0434268387458536, "bacc": 0.5472689075630253, "bacc_std": 0.03736487636734959} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 0.3593813663804626, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04279211904841225, "f1": 0.5479551932126997, "f1_std": 0.04368599442327077, "bacc": 0.5483193277310925, "bacc_std": 0.0430243210131684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.4838709677419355, "acc_std": 0.04297361434248471, "f1": 0.4772068511198946, "f1_std": 0.04349162249695439, "bacc": 0.4774159663865546, "bacc_std": 0.04329838104815404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04205292488228431, "f1": 0.5765651155005022, "f1_std": 0.0436832256764194, "bacc": 0.5777310924369747, "bacc_std": 0.042632506663133306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.041642587561958225, "f1": 0.626380984265149, "f1_std": 0.04334244446969076, "bacc": 0.6265756302521008, "bacc_std": 0.04229521233798542} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.000774263682681127, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04051333910160074, "f1": 0.5180552611122888, "f1_std": 0.045670687600717234, "bacc": 0.5341386554621849, "bacc_std": 0.0410186358650983} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04336478320569108, "f1": 0.5643243243243243, "f1_std": 0.04583400286280483, "bacc": 0.5672268907563025, "bacc_std": 0.04401056061114804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04276750300421339, "f1": 0.5308108108108108, "f1_std": 0.04540544387235599, "bacc": 0.5346638655462185, "bacc_std": 0.04339911644295416} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 2.782559402207126, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04464764350735239, "f1": 0.5735449735449736, "f1_std": 0.045799258261929834, "bacc": 0.5735294117647058, "bacc_std": 0.04525605094755585} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.0442946336270904, "f1": 0.5860042735042735, "f1_std": 0.04587611539779247, "bacc": 0.5866596638655462, "bacc_std": 0.044839992325260244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04401265072132296, "f1": 0.5623043623043623, "f1_std": 0.04537785965904692, "bacc": 0.5630252100840336, "bacc_std": 0.044405587107520655} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.0442672918999345, "f1": 0.5643931861867832, "f1_std": 0.045260045382716545, "bacc": 0.5646008403361344, "bacc_std": 0.0446875709219777} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.038128469375245874, "f1": 0.5921052631578947, "f1_std": 0.041086698537158545, "bacc": 0.5966386554621849, "bacc_std": 0.038837272307061} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04203344327792009, "f1": 0.6118548118548119, "f1_std": 0.04347772550497814, "bacc": 0.6118697478991597, "bacc_std": 0.04262106634066615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.043658388549496586, "f1": 0.5588932806324111, "f1_std": 0.044371005602627425, "bacc": 0.5588235294117647, "bacc_std": 0.04406215371181414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.000774263682681127, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04292797006549237, "f1": 0.5308108108108108, "f1_std": 0.044416266238744236, "bacc": 0.5346638655462185, "bacc_std": 0.043050031154077746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.044564968533540125, "f1": 0.5766806722689075, "f1_std": 0.04542158430932799, "bacc": 0.5766806722689075, "bacc_std": 0.0453987947495435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04325919509720283, "f1": 0.5826018084614877, "f1_std": 0.04369611278102075, "bacc": 0.5824579831932774, "bacc_std": 0.04343758128190776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.000774263682681127, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.03841114386405135, "f1": 0.5131005925608625, "f1_std": 0.043425791079969626, "bacc": 0.532563025210084, "bacc_std": 0.038799201990854265} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04446082352255578, "f1": 0.6242424242424243, "f1_std": 0.04517857974846416, "bacc": 0.6239495798319328, "bacc_std": 0.044925955800812434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.041105999243143516, "f1": 0.650475254015077, "f1_std": 0.04132879674432, "bacc": 0.6507352941176471, "bacc_std": 0.04117649035059978} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04194918820739092, "f1": 0.5740553647201454, "f1_std": 0.04392711569864639, "bacc": 0.576155462184874, "bacc_std": 0.04241471953405649} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.3593813663804626, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04510328936739077, "f1": 0.5454307410316837, "f1_std": 0.04542196417240161, "bacc": 0.5456932773109244, "bacc_std": 0.045349688640196076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 166.81005372000556, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.042587300921658376, "f1": 0.5634941329856584, "f1_std": 0.04270924285910668, "bacc": 0.5651260504201681, "bacc_std": 0.0426566922487837} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.041482554035184444, "f1": 0.628935727296383, "f1_std": 0.04145252365328761, "bacc": 0.6334033613445378, "bacc_std": 0.04119574230673715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.0439170298983031, "f1": 0.6274817136886102, "f1_std": 0.044016561319251704, "bacc": 0.6286764705882353, "bacc_std": 0.04396562096792839} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04127569045174738, "f1": 0.585171024783232, "f1_std": 0.04426979542016302, "bacc": 0.5892857142857143, "bacc_std": 0.04201119426801134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04375810910428986, "f1": 0.5599598259122867, "f1_std": 0.045684448757589965, "bacc": 0.5614495798319328, "bacc_std": 0.04433364913052039} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.043783032720614054, "f1": 0.5334961388687216, "f1_std": 0.04449108588339529, "bacc": 0.5336134453781513, "bacc_std": 0.04408998145918552} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04470918865813003, "f1": 0.5766806722689075, "f1_std": 0.04520150769576147, "bacc": 0.5766806722689075, "bacc_std": 0.045107523865839196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04485362945093692, "f1": 0.5972691721349506, "f1_std": 0.04612662711909418, "bacc": 0.5971638655462186, "bacc_std": 0.04551009354532932} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04397957382276041, "f1": 0.6227513227513227, "f1_std": 0.04493506598629561, "bacc": 0.6223739495798319, "bacc_std": 0.04437686973088727} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04077134470958347, "f1": 0.5716183895827798, "f1_std": 0.041926012466803685, "bacc": 0.571953781512605, "bacc_std": 0.04109381436616801} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04636385920268603, "f1": 0.5703170970905524, "f1_std": 0.04675905660690171, "bacc": 0.5709033613445378, "bacc_std": 0.046868346172290326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 2.782559402207126, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.044918341333327935, "f1": 0.5778999738151349, "f1_std": 0.04506684754557906, "bacc": 0.5782563025210083, "bacc_std": 0.045189562734952296} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.0443890747636926, "f1": 0.5366764995083579, "f1_std": 0.044796704531273046, "bacc": 0.5367647058823529, "bacc_std": 0.04472931345347566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04176407587028172, "f1": 0.671957671957672, "f1_std": 0.042447216854897954, "bacc": 0.671218487394958, "bacc_std": 0.04200029532454992} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04355785739320502, "f1": 0.5308108108108108, "f1_std": 0.04579648245694928, "bacc": 0.5346638655462185, "bacc_std": 0.0440057017610184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.03886260116089291, "f1": 0.626793588577028, "f1_std": 0.044260395216348516, "bacc": 0.6334033613445378, "bacc_std": 0.040143806553030334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.043891038459165366, "f1": 0.6391534391534391, "f1_std": 0.044424831133636315, "bacc": 0.6386554621848739, "bacc_std": 0.044000232048257114} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 10000.0, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04468323008678483, "f1": 0.6046072753302532, "f1_std": 0.04509310552291768, "bacc": 0.6129201680672269, "bacc_std": 0.04423481184514871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04110397402370119, "f1": 0.6283716283716283, "f1_std": 0.04248419397740493, "bacc": 0.6281512605042017, "bacc_std": 0.04163584555967242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 10000.0, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.042982839060421184, "f1": 0.5189057670809496, "f1_std": 0.04351434525171074, "bacc": 0.51890756302521, "bacc_std": 0.043429406733741564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.043748200498148745, "f1": 0.5958279009126467, "f1_std": 0.043784883572044846, "bacc": 0.5976890756302521, "bacc_std": 0.04389666709473899} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.042963164533863035, "f1": 0.6092436974789917, "f1_std": 0.043620518963352395, "bacc": 0.6092436974789917, "bacc_std": 0.043575367456358205} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.044778341817688236, "f1": 0.5141065830721003, "f1_std": 0.044892572233252326, "bacc": 0.5147058823529411, "bacc_std": 0.04487637810669206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.039811506318899965, "f1": 0.588836695219674, "f1_std": 0.043641934462760205, "bacc": 0.595063025210084, "bacc_std": 0.040626185063082375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.044841946970627246, "f1": 0.5498646953996436, "f1_std": 0.045359498337001744, "bacc": 0.5498949579831933, "bacc_std": 0.04499969716951593} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04530459980827247, "f1": 0.5278361344537815, "f1_std": 0.045439475746490404, "bacc": 0.5278361344537815, "bacc_std": 0.045306351538446595} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.000774263682681127, "split": "test", "acc": 0.532258064516129, "acc_std": 0.042075399344139126, "f1": 0.4942334739803095, "f1_std": 0.046252389801202276, "bacc": 0.5105042016806722, "bacc_std": 0.0424022795161003} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042915351184928015, "f1": 0.5826018084614877, "f1_std": 0.043591134885670034, "bacc": 0.5824579831932774, "bacc_std": 0.043169245533997674} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04580413442571614, "f1": 0.5623043623043623, "f1_std": 0.047409701955119996, "bacc": 0.5630252100840336, "bacc_std": 0.04645229026209076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.045273765469091044, "f1": 0.5716183895827798, "f1_std": 0.046668479562540666, "bacc": 0.571953781512605, "bacc_std": 0.0458043117634443} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 0.000774263682681127, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.038088374152456576, "f1": 0.5956989247311828, "f1_std": 0.04227919199104977, "bacc": 0.6024159663865546, "bacc_std": 0.03902160814602721} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04141613895665556, "f1": 0.607462787095036, "f1_std": 0.04344733039161907, "bacc": 0.608718487394958, "bacc_std": 0.04209607558025063} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04410426582891969, "f1": 0.5588932806324111, "f1_std": 0.04444615333075375, "bacc": 0.5588235294117647, "bacc_std": 0.044155043911459246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04301095799682181, "f1": 0.6003223207091055, "f1_std": 0.044703965969656874, "bacc": 0.6013655462184874, "bacc_std": 0.043547517240503486} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 416.98 | 1970.3 | 0.8158 | 0.1106 | 0.80991 | 0.1161 | 0.80921 | 0.1157 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 416.98 | 1970.3 | 0.58371 | 0.039321 | 0.57315 | 0.040582 | 0.57546 | 0.039372 | + + +done! total time: 0:05:51 diff --git a/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic/config.yaml b/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e9252651d63e2b7859475cd2b8c11fe2422474d1 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..0154aa75f407fe02b4385f64d7eb6cf79ab8e420 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,train,0.7753424657534247,0.021055571504633917,0.7654388714733542,0.022557165806944633,0.7615100445747084,0.02215351188981913 +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,test,0.6307692307692307,0.05935233080172701,0.6153846153846154,0.06230314272876992,0.6148648648648649,0.060784382280274565 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,train,0.7589041095890411,0.022088121845585142,0.7482758620689656,0.02371089223595098,0.7447945289124992,0.023236579546139634 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,test,0.6153846153846154,0.0586492452167719,0.6018132810585641,0.06125519323340435,0.6013513513513513,0.06005574222658638 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,train,0.726027397260274,0.02179838423515953,0.7146118721461188,0.023088804681447094,0.7120809672101117,0.022565252232753425 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,test,0.6923076923076923,0.05482759206887537,0.6697154471544715,0.06180796902829804,0.6689189189189189,0.05785906717548939 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,train,0.7698630136986301,0.02260177731450431,0.7608125819134994,0.023984695876877137,0.7573731452647005,0.02369499532688843 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,test,0.5846153846153846,0.05735842669064447,0.5644080416976918,0.06074002319704903,0.5656370656370656,0.058490673115888896 +flat_mae,patch,logistic,adhd200_dx,4,0.046415888336127774,train,0.8356164383561644,0.019221663983578968,0.8308932542624166,0.020091411243600525,0.8278225560236918,0.020272307453464998 +flat_mae,patch,logistic,adhd200_dx,4,0.046415888336127774,test,0.7384615384615385,0.052139267306573,0.7292330311198236,0.05524387353818188,0.7268339768339769,0.054527375389413144 +flat_mae,patch,logistic,adhd200_dx,5,0.046415888336127774,train,0.8602739726027397,0.01809651487520831,0.8563934426229508,0.01892956481427311,0.853254564328021,0.0192585585016143 +flat_mae,patch,logistic,adhd200_dx,5,0.046415888336127774,test,0.5230769230769231,0.06257087058810869,0.5157414083153088,0.06350041662061709,0.515926640926641,0.06352112266063878 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,train,0.7561643835616438,0.021315569244390885,0.7473969875817451,0.022318744931964243,0.7445197533125725,0.02201889699786926 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,test,0.676923076923077,0.054080536043000026,0.656084656084656,0.05966910851245781,0.6554054054054055,0.05625622120570483 +flat_mae,patch,logistic,adhd200_dx,7,0.005994842503189409,train,0.7561643835616438,0.02156213537933383,0.7468536917981687,0.022856153231857804,0.7438022836905416,0.022590948641456565 +flat_mae,patch,logistic,adhd200_dx,7,0.005994842503189409,test,0.5846153846153846,0.05370883589554365,0.5578231292517006,0.05785103863783277,0.5612934362934363,0.054793760380854206 +flat_mae,patch,logistic,adhd200_dx,8,0.046415888336127774,train,0.8465753424657534,0.018585655196831644,0.843010752688172,0.019146249371327705,0.841118641997924,0.01931742288834366 +flat_mae,patch,logistic,adhd200_dx,8,0.046415888336127774,test,0.676923076923077,0.06112163447735533,0.6690909090909091,0.062752556636735,0.6684362934362934,0.062134056183557176 +flat_mae,patch,logistic,adhd200_dx,9,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,9,166.81005372000556,test,0.5538461538461539,0.060053617069056896,0.5500119360229172,0.06036407901007265,0.5516409266409266,0.06055554867135271 +flat_mae,patch,logistic,adhd200_dx,10,0.005994842503189409,train,0.7698630136986301,0.022083409327731146,0.7618381804623415,0.023105112728021678,0.7588080845087622,0.022883973415425203 +flat_mae,patch,logistic,adhd200_dx,10,0.005994842503189409,test,0.5538461538461539,0.058328074375510026,0.5381034060279344,0.06026306468179886,0.5386100386100386,0.05904368809544072 +flat_mae,patch,logistic,adhd200_dx,11,0.046415888336127774,train,0.873972602739726,0.016597183391420047,0.8705913182883216,0.017252803176543537,0.8675428955242108,0.01755433801243838 +flat_mae,patch,logistic,adhd200_dx,11,0.046415888336127774,test,0.5692307692307692,0.05797219492666622,0.5608108108108107,0.058780344104028014,0.5608108108108107,0.05830446002917777 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,train,0.7643835616438356,0.022409193084467392,0.7556513887159049,0.023632287828258287,0.7525187763326616,0.023347272835088587 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,test,0.5692307692307692,0.05769291486859979,0.545,0.06284396798794614,0.5477799227799228,0.059471074744243284 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,train,0.7726027397260274,0.02075852209201196,0.763396099686819,0.022056912329186065,0.7598003297307199,0.02174693027299885 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,test,0.6307692307692307,0.0586999188532044,0.6235521235521235,0.05996010379632466,0.6235521235521235,0.05975670714394668 +flat_mae,patch,logistic,adhd200_dx,14,0.046415888336127774,train,0.8465753424657534,0.017859789607353033,0.8412254536415611,0.01878920278193944,0.8368138242657386,0.01886767204133096 +flat_mae,patch,logistic,adhd200_dx,14,0.046415888336127774,test,0.6615384615384615,0.05594671175514265,0.6515594541910331,0.05866704284252312,0.6505791505791505,0.05784178320686545 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,train,0.7643835616438356,0.02185520923632957,0.754566210045662,0.023425980498552032,0.7510838370885998,0.02305443380960587 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,test,0.6153846153846154,0.0591129697720075,0.61207925519217,0.05966993474129564,0.6143822393822393,0.06016667754693264 +flat_mae,patch,logistic,adhd200_dx,16,0.046415888336127774,train,0.8410958904109589,0.018427259580991093,0.8371237766972364,0.019079041109563624,0.8348293338218233,0.019253868967478015 +flat_mae,patch,logistic,adhd200_dx,16,0.046415888336127774,test,0.676923076923077,0.06034236836683114,0.6655231560891939,0.06319334702556247,0.6640926640926641,0.06228086211303479 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,train,0.7452054794520548,0.022182247910196316,0.7371233417745046,0.02312562692281875,0.7348110154484948,0.022944180563027317 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,test,0.6461538461538462,0.057550998222877986,0.6233308138070043,0.06320557878537558,0.6240347490347491,0.059739560277934096 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,train,0.7452054794520548,0.021541526184259953,0.734283634314163,0.02300554848049687,0.7312236673383403,0.02253883665718965 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,test,0.7076923076923077,0.05307470229467745,0.6934723256391164,0.057066399502810186,0.6911196911196911,0.05520990689687338 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,train,0.852054794520548,0.019133389018740662,0.848085460599334,0.019832018691260438,0.8452555413079319,0.01999433681406758 +flat_mae,patch,logistic,adhd200_dx,19,0.046415888336127774,test,0.5846153846153846,0.062038667079634714,0.578226387887527,0.06286586933383013,0.5786679536679536,0.06288708177124878 +flat_mae,patch,logistic,adhd200_dx,20,0.046415888336127774,train,0.8465753424657534,0.017689785312711766,0.8427401981904352,0.0182034388796841,0.8404011723758931,0.01820742011583839 +flat_mae,patch,logistic,adhd200_dx,20,0.046415888336127774,test,0.6153846153846154,0.05535010890877287,0.6018132810585641,0.057948411562869895,0.6013513513513513,0.05668944780781541 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,train,0.7315068493150685,0.02252140051047613,0.7183109683109683,0.02413150834867751,0.7155003968980888,0.02337799066201623 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,test,0.7538461538461538,0.05079346740832867,0.7523809523809524,0.050772382306003526,0.7577220077220077,0.05025474430928337 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,train,0.7479452054794521,0.02094661650729711,0.736833855799373,0.022599637431301882,0.7336508518043597,0.022145570139722827 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,test,0.6923076923076923,0.05706689524185085,0.6794871794871795,0.06061470187629927,0.6776061776061776,0.05923593322895811 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,train,0.7616438356164383,0.02148398819867946,0.752542372881356,0.02266498103243399,0.7493741222446113,0.02232378017254569 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,test,0.6307692307692307,0.05212369448367238,0.587737843551797,0.06256446301222388,0.5974903474903475,0.05477962264340211 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,train,0.7424657534246575,0.02240812856830242,0.7334855828983347,0.02352354641948612,0.7309488917384136,0.023180550739890268 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,test,0.6307692307692307,0.056959622364282377,0.6036585365853658,0.06257380327162604,0.6061776061776062,0.058583127770218034 +flat_mae,patch,logistic,adhd200_dx,25,0.046415888336127774,train,0.8410958904109589,0.019044746668838635,0.8365301457870027,0.01986458556743193,0.8333943945777615,0.020024641580546183 +flat_mae,patch,logistic,adhd200_dx,25,0.046415888336127774,test,0.5538461538461539,0.06184461536547981,0.543030303030303,0.06291070314330456,0.542953667953668,0.062407155670860116 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,train,0.7561643835616438,0.02172574685855947,0.7468536917981687,0.022994646399747517,0.7438022836905416,0.022646806879319113 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,test,0.7076923076923077,0.05254046848289771,0.6934723256391164,0.05619535575761905,0.6911196911196911,0.054373952352268475 +flat_mae,patch,logistic,adhd200_dx,27,0.000774263682681127,train,0.6876712328767123,0.0225923968033401,0.6731240573152337,0.0243988979054467,0.6716431580875618,0.02358929473488324 +flat_mae,patch,logistic,adhd200_dx,27,0.000774263682681127,test,0.6153846153846154,0.06006878896968062,0.606060606060606,0.061954231447803196,0.6056949806949807,0.061137770315890495 +flat_mae,patch,logistic,adhd200_dx,28,0.000774263682681127,train,0.6794520547945205,0.022391983124277832,0.6615254143252305,0.024446415612213432,0.6607742565793491,0.02333792330388347 +flat_mae,patch,logistic,adhd200_dx,28,0.000774263682681127,test,0.5384615384615384,0.055868857432108184,0.5045731707317074,0.06062855122439942,0.5120656370656371,0.05673465591389826 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,train,0.7506849315068493,0.022233725960535643,0.7417205153925708,0.023314720109554928,0.7389479147585027,0.02299164716175327 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,test,0.6307692307692307,0.055873559709009174,0.61,0.06072076438755054,0.6105212355212355,0.05780868554189184 +flat_mae,patch,logistic,adhd200_dx,30,0.005994842503189409,train,0.7534246575342466,0.02080828970020605,0.7458531905675558,0.021873591512347388,0.7435275080906149,0.02170619267099099 +flat_mae,patch,logistic,adhd200_dx,30,0.005994842503189409,test,0.6,0.05759911241919696,0.5833333333333333,0.061314455996662694,0.5834942084942085,0.05919270783521584 +flat_mae,patch,logistic,adhd200_dx,31,0.046415888336127774,train,0.8684931506849315,0.016955003424200647,0.8652058841632302,0.017539540574295114,0.862688526592172,0.017757992863537093 +flat_mae,patch,logistic,adhd200_dx,31,0.046415888336127774,test,0.5692307692307692,0.0559832942114809,0.5289855072463768,0.06305648306764737,0.5390926640926641,0.057546567554262705 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,train,0.8547945205479452,0.018180389237421997,0.8504803641956702,0.01895643276290259,0.8469652561519204,0.019118661661948994 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,test,0.5692307692307692,0.06291524495173867,0.5512820512820513,0.06506064222244805,0.5521235521235521,0.06355720824505892 +flat_mae,patch,logistic,adhd200_dx,33,0.000774263682681127,train,0.6794520547945205,0.02391817916202489,0.6615254143252305,0.026141886317223955,0.6607742565793491,0.02499050685549913 +flat_mae,patch,logistic,adhd200_dx,33,0.000774263682681127,test,0.5692307692307692,0.05753898806834366,0.545,0.06210675813321833,0.5477799227799228,0.058934098824058326 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,train,0.8602739726027397,0.01795715674393193,0.857379272301009,0.018423897233254206,0.8561244428161446,0.018620565373435594 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,test,0.5692307692307692,0.05908224335018044,0.5608108108108107,0.06082947129951985,0.5608108108108107,0.06054215211269054 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,train,0.7561643835616438,0.021669625003130955,0.7457122952038764,0.0230605568176532,0.7423673444464798,0.022592246051230756 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,test,0.6,0.05742441185526481,0.5833333333333333,0.06075163897680338,0.5834942084942085,0.05910838606871134 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,train,0.7616438356164383,0.02254445220035461,0.7535869759212843,0.023651120837803533,0.7508090614886731,0.023457516428978212 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,test,0.6307692307692307,0.05842914082066773,0.6153846153846154,0.06205847141649863,0.6148648648648649,0.060184162688304225 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,train,0.7397260273972602,0.021757272355618012,0.7291883068704555,0.023030212688864352,0.7263692984063015,0.022615977390063272 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,test,0.7076923076923077,0.051177389608112994,0.6888384983623079,0.05751432916598011,0.6867760617760618,0.054350330515328206 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,train,0.736986301369863,0.02291764101831269,0.7272387594968239,0.02435143418169608,0.724659583562313,0.02396954560270376 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,test,0.6307692307692307,0.05833706587100408,0.6153846153846154,0.06115090008016818,0.6148648648648649,0.05929701330084024 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,train,0.7589041095890411,0.022141296203260503,0.7488584474885844,0.023626144771428603,0.7455119985345301,0.02320502570266383 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,test,0.6615384615384615,0.053203692535464905,0.6299171842650104,0.061449574119516716,0.6332046332046332,0.055796633877430245 +flat_mae,patch,logistic,adhd200_dx,40,0.046415888336127774,train,0.8575342465753425,0.01794959686936075,0.8534408203607611,0.018707240149479443,0.8501099102399707,0.018940323186392664 +flat_mae,patch,logistic,adhd200_dx,40,0.046415888336127774,test,0.676923076923077,0.057598972706158,0.6690909090909091,0.059417401247724724,0.6684362934362934,0.059054261555825736 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,train,0.7643835616438356,0.02218203743488455,0.7551176433876303,0.02352015134861471,0.7518013067106307,0.023119464035073076 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,test,0.6153846153846154,0.059796348664525226,0.6094688776736361,0.06059961458654289,0.61003861003861,0.060637416111886784 +flat_mae,patch,logistic,adhd200_dx,42,0.046415888336127774,train,0.863013698630137,0.018118958884141886,0.8585271317829457,0.01904569116884466,0.8542468095499787,0.019273211111359483 +flat_mae,patch,logistic,adhd200_dx,42,0.046415888336127774,test,0.5846153846153846,0.06321664824541566,0.5830363506771205,0.06327567102661882,0.5873552123552124,0.0635305176903878 +flat_mae,patch,logistic,adhd200_dx,43,0.046415888336127774,train,0.852054794520548,0.018621189632283125,0.847803928836175,0.019392911897129255,0.844538071685901,0.01959264121279788 +flat_mae,patch,logistic,adhd200_dx,43,0.046415888336127774,test,0.6153846153846154,0.0601049260059233,0.6094688776736361,0.06099336664862284,0.61003861003861,0.060960328047696986 +flat_mae,patch,logistic,adhd200_dx,44,0.005994842503189409,train,0.7643835616438356,0.020816791726619495,0.7539968652037617,0.022433690245938828,0.7503663674665689,0.022064740521660443 +flat_mae,patch,logistic,adhd200_dx,44,0.005994842503189409,test,0.6,0.05739967630776925,0.5775,0.06160772250941067,0.5791505791505791,0.058999035712343294 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,train,0.7726027397260274,0.022084731489969478,0.7639197350477304,0.023473761186465235,0.7605177993527508,0.023245172588482615 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,test,0.6,0.05727768961655708,0.5833333333333333,0.0612895124600859,0.5834942084942085,0.05920994080051619 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,train,0.736986301369863,0.02223649072999127,0.7283720930232558,0.023191556857184836,0.7260945228063748,0.022953352827949174 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,test,0.6615384615384615,0.057033439791387905,0.6474358974358974,0.0604042532757117,0.6462355212355213,0.05874348996396787 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,train,0.7671232876712328,0.021660031394945493,0.7587499319600937,0.022786730907983957,0.7556634304207119,0.02251140647285408 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,test,0.5230769230769231,0.060267011396964866,0.5115151515151515,0.06097636931758606,0.5115830115830116,0.060466999245817644 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,train,0.8438356164383561,0.017767351209446287,0.8394985535197685,0.01846604705815378,0.8365390486658119,0.018581565811755282 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,test,0.676923076923077,0.057047473223477946,0.6719538572458543,0.05807424881977654,0.6727799227799228,0.05778098905312306 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,train,0.7753424657534247,0.021363266280727586,0.7648787078934138,0.02305169754431333,0.7607925749526775,0.02256033697774877 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,test,0.5230769230769231,0.06384931224810658,0.5115151515151515,0.06495081908448788,0.5115830115830116,0.06437195164776484 +flat_mae,patch,logistic,adhd200_dx,50,0.046415888336127774,train,0.8602739726027397,0.019162717132596877,0.8569056568964324,0.01974939953588524,0.8546895035720828,0.019886032329315934 +flat_mae,patch,logistic,adhd200_dx,50,0.046415888336127774,test,0.6923076923076923,0.05900876677420922,0.6832358674463938,0.06079852409508094,0.6819498069498069,0.059972950229013645 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,train,0.7616438356164383,0.021827372328649716,0.7545621072645906,0.022752461339488786,0.7522440007327349,0.0226398635961488 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,test,0.6615384615384615,0.05901438998119078,0.6549227799227799,0.060418535370449826,0.6549227799227799,0.06015491623409578 +flat_mae,patch,logistic,adhd200_dx,52,0.046415888336127774,train,0.852054794520548,0.018371532311556735,0.848085460599334,0.019034686975041307,0.8452555413079319,0.019178764314893657 +flat_mae,patch,logistic,adhd200_dx,52,0.046415888336127774,test,0.7692307692307693,0.047696436545842685,0.7543461829176115,0.05298493738714161,0.7495173745173744,0.05098117558444406 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,train,0.7561643835616438,0.02156412998495143,0.7468536917981687,0.02280570244824212,0.7438022836905416,0.022438023776812453 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,test,0.6307692307692307,0.05256295183230718,0.587737843551797,0.06236710783178109,0.5974903474903475,0.05495237644144849 +flat_mae,patch,logistic,adhd200_dx,54,0.046415888336127774,train,0.8410958904109589,0.018019660245860443,0.8362165005879805,0.018864161668265086,0.8326769249557306,0.01902864742666781 +flat_mae,patch,logistic,adhd200_dx,54,0.046415888336127774,test,0.5538461538461539,0.061814335481390696,0.5469838981014179,0.06265436741856187,0.5472972972972974,0.06268921953812513 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,train,0.7698630136986301,0.02162081730739138,0.7618381804623415,0.022796519173152507,0.7588080845087622,0.022602773673819045 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,test,0.5846153846153846,0.05961990057337146,0.578226387887527,0.060209415800580826,0.5786679536679536,0.05999540099142303 +flat_mae,patch,logistic,adhd200_dx,56,0.000774263682681127,train,0.6712328767123288,0.02259692743865535,0.6484073978936553,0.024898282920736436,0.6491878854491054,0.023414758955844883 +flat_mae,patch,logistic,adhd200_dx,56,0.000774263682681127,test,0.5692307692307692,0.060752862449912494,0.545,0.06452989749697928,0.5477799227799228,0.06191087031419503 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,train,0.8684931506849315,0.01736052642204697,0.8649648538660748,0.017940178933510288,0.8619710569701411,0.01803038016069954 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,test,0.6,0.06166368797113244,0.5833333333333333,0.06510801941350151,0.5834942084942085,0.06325632920159896 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,train,0.7643835616438356,0.022028081499629634,0.7556513887159049,0.023322304366942038,0.7525187763326616,0.023098532667445014 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,test,0.6307692307692307,0.05733372872667661,0.6285714285714286,0.05771159844013348,0.6322393822393823,0.05811284934126485 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,train,0.7726027397260274,0.021353516051843036,0.7649163103616852,0.022379953667103186,0.7619527385968126,0.022206082384403053 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,test,0.6461538461538462,0.06005576107143945,0.6336682185738789,0.0633257923447842,0.6327220077220077,0.061846083445892826 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,train,0.7424657534246575,0.021825566859467793,0.7329212853406402,0.022820501517917067,0.7302314221163827,0.022425174155993634 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,test,0.6615384615384615,0.05859301814335842,0.6474358974358974,0.06177314106379864,0.6462355212355213,0.060223207961485864 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,train,0.7561643835616438,0.021490089621985488,0.7473969875817451,0.022540314579018273,0.7445197533125725,0.022257000178358032 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,test,0.6,0.05632198221363815,0.570630081300813,0.06317748496594133,0.5748069498069498,0.05861471146566141 +flat_mae,patch,logistic,adhd200_dx,62,0.005994842503189409,train,0.7506849315068493,0.022127269670052584,0.7427766032417196,0.023147007169846932,0.7403828540025645,0.02295135915280949 +flat_mae,patch,logistic,adhd200_dx,62,0.005994842503189409,test,0.6307692307692307,0.05737863445837389,0.6198830409356726,0.05952979292205311,0.6192084942084942,0.05862044092877534 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,train,0.7616438356164383,0.02175262342818981,0.7550241080038573,0.02265804322207005,0.7529614703547658,0.022624043953210434 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,test,0.6153846153846154,0.05723092638522675,0.5966741126830479,0.06061712674956581,0.597007722007722,0.058455040365419104 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,train,0.7452054794520548,0.022050880278877125,0.7348896056731828,0.02340191051075621,0.7319411369603712,0.022957863875432145 +flat_mae,patch,logistic,adhd200_dx,64,0.005994842503189409,test,0.6615384615384615,0.055638894913710636,0.6474358974358974,0.05896469174076757,0.6462355212355213,0.05717426364831067 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,train,0.7424657534246575,0.022593169580445077,0.7317351598173516,0.024110359316583722,0.7287964828723209,0.023675069591443394 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,test,0.6923076923076923,0.055907920917359966,0.6862934362934363,0.057283361951727896,0.6862934362934363,0.05735605429576625 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,train,0.7698630136986301,0.022397691085463304,0.7618381804623415,0.023483351863612303,0.7588080845087622,0.023205206681307104 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,test,0.5846153846153846,0.06234981292351753,0.5745454545454545,0.06421730766189782,0.5743243243243243,0.06359581929880564 +flat_mae,patch,logistic,adhd200_dx,67,0.000774263682681127,train,0.6958904109589041,0.022327819796695552,0.678883085385475,0.024354875125448087,0.6774897722415583,0.023305310851416697 +flat_mae,patch,logistic,adhd200_dx,67,0.000774263682681127,test,0.6,0.04965176722097291,0.5533826638477801,0.05885195000662475,0.5661196911196912,0.05146637768853338 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,train,0.7506849315068493,0.022689417199278937,0.7387546110224081,0.02462576475055704,0.7353605666483483,0.024000129456750247 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,test,0.676923076923077,0.05531273968623894,0.656084656084656,0.0607033916050876,0.6554054054054055,0.05751941254288143 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,train,0.7616438356164383,0.0213457531714741,0.7545621072645906,0.022297053458175224,0.7522440007327349,0.022208234339209917 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,test,0.5384615384615384,0.06379953070629278,0.5192307692307693,0.06674136582427402,0.5207528957528957,0.06466464764643581 +flat_mae,patch,logistic,adhd200_dx,70,0.005994842503189409,train,0.7561643835616438,0.02220954878079134,0.7451137317672167,0.023939266609582736,0.7416498748244489,0.023424064273989163 +flat_mae,patch,logistic,adhd200_dx,70,0.005994842503189409,test,0.6,0.06195761865078208,0.588206627680312,0.06429508270222113,0.5878378378378378,0.06306332598177536 +flat_mae,patch,logistic,adhd200_dx,71,0.046415888336127774,train,0.8410958904109589,0.018081735984292743,0.8368325317548403,0.018809210278254898,0.8341118641997924,0.019008436761689177 +flat_mae,patch,logistic,adhd200_dx,71,0.046415888336127774,test,0.6615384615384615,0.0576063441608124,0.6474358974358974,0.060933759776488065,0.6462355212355213,0.05911085262413096 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,train,0.7726027397260274,0.02212685174226147,0.7649163103616852,0.02320309756229465,0.7619527385968126,0.023020645286954387 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,test,0.6461538461538462,0.05706082294177766,0.6289401836684041,0.060022546386913726,0.6283783783783784,0.05795222625689346 +flat_mae,patch,logistic,adhd200_dx,73,0.005994842503189409,train,0.7589041095890411,0.021240846935387428,0.7504971414367387,0.022112233544255826,0.7476644074006228,0.021843622282210044 +flat_mae,patch,logistic,adhd200_dx,73,0.005994842503189409,test,0.6,0.06086489840612704,0.588206627680312,0.06233347550294366,0.5878378378378378,0.06154226822861454 +flat_mae,patch,logistic,adhd200_dx,74,0.046415888336127774,train,0.8493150684931506,0.01839064355354047,0.8451301832208293,0.019088958286443938,0.8421108872198816,0.019201919142535974 +flat_mae,patch,logistic,adhd200_dx,74,0.046415888336127774,test,0.7076923076923077,0.056164896699822886,0.7031963470319634,0.05685804472136639,0.7041505791505791,0.05659037727588138 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,train,0.7561643835616438,0.022208301646859576,0.7462922032786373,0.023512931807504238,0.7430848140685107,0.023100129158254278 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,test,0.5692307692307692,0.06481711367264104,0.5565302144249512,0.06652491537281653,0.5564671814671815,0.06574791538933356 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,train,0.7589041095890411,0.02104641699269586,0.7488584474885844,0.02239843439536385,0.7455119985345301,0.022012773014064697 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,test,0.5384615384615384,0.04550180437283154,0.44221967963386727,0.05867887387274566,0.49034749034749037,0.04629379231389694 +flat_mae,patch,logistic,adhd200_dx,77,0.005994842503189409,train,0.7424657534246575,0.022413476765147794,0.7334855828983347,0.023523695367702012,0.7309488917384136,0.02322257203117533 +flat_mae,patch,logistic,adhd200_dx,77,0.005994842503189409,test,0.6923076923076923,0.05256489705465867,0.6635610766045548,0.06146919282327064,0.6645752895752897,0.05551380181922406 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,train,0.8794520547945206,0.01709804693107363,0.8770516903478687,0.01743794354532061,0.8759846125664041,0.017485013000491764 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,test,0.5538461538461539,0.05704100881287543,0.5250692869740489,0.06113188173763211,0.5299227799227799,0.057970710864969986 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,train,0.7643835616438356,0.02127316301858194,0.7556513887159049,0.022586003248446482,0.7525187763326616,0.022309277917813713 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,test,0.5692307692307692,0.061606885037499406,0.564176245210728,0.06237952793340967,0.5651544401544402,0.06252839864048879 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,train,0.7698630136986301,0.021820338753934618,0.7618381804623415,0.023031378302762126,0.7588080845087622,0.022867541014918946 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,test,0.6307692307692307,0.056602640706165316,0.6036585365853658,0.06253697406822761,0.6061776061776062,0.05860356969650831 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,train,0.7561643835616438,0.022552878835734487,0.747922308701084,0.023739457868005708,0.7452372229346034,0.023552207042920006 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,test,0.5538461538461539,0.057301684765136494,0.5321419707123356,0.06108018716067444,0.5342664092664092,0.05861726847725752 +flat_mae,patch,logistic,adhd200_dx,82,0.046415888336127774,train,0.8438356164383561,0.019283171174915213,0.839790237253679,0.020005072502372462,0.8372565182878428,0.020238411014287635 +flat_mae,patch,logistic,adhd200_dx,82,0.046415888336127774,test,0.676923076923077,0.056540083702526986,0.6766169154228856,0.05671445603589706,0.6858108108108107,0.056043799900987006 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,train,0.7315068493150685,0.021876031418195357,0.7209480122324159,0.023178088208413154,0.7183702753862123,0.022733161755973917 +flat_mae,patch,logistic,adhd200_dx,83,0.005994842503189409,test,0.7076923076923077,0.05672801505700417,0.7031963470319634,0.05761821594685017,0.7041505791505791,0.05774486798905216 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,train,0.7589041095890411,0.020440694713617365,0.7470547470547471,0.02198585194937071,0.7433595896684374,0.021378051116506175 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,test,0.6461538461538462,0.05858334678652339,0.6461538461538462,0.058845841824604964,0.6587837837837838,0.05861224359950269 +flat_mae,patch,logistic,adhd200_dx,85,0.000774263682681127,train,0.6493150684931507,0.022444135271972195,0.6163447641571409,0.02603822873802437,0.6218782438786102,0.023432287732869025 +flat_mae,patch,logistic,adhd200_dx,85,0.000774263682681127,test,0.6307692307692307,0.05444483881299293,0.5962732919254659,0.06252017817309617,0.6018339768339769,0.056738937068133835 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,train,0.7671232876712328,0.02158203836083595,0.7582310539645432,0.022932611226326113,0.754945960798681,0.022651058529303585 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,test,0.5230769230769231,0.056235674836000275,0.4834657780056396,0.06060270293629884,0.4942084942084942,0.056604371048379475 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,train,0.7561643835616438,0.021786524626071432,0.7457122952038764,0.02311794858033667,0.7423673444464798,0.022636159069792786 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,test,0.5538461538461539,0.06307830392484806,0.543030303030303,0.06444586279533669,0.542953667953668,0.0638729356978593 +flat_mae,patch,logistic,adhd200_dx,88,0.005994842503189409,train,0.7534246575342466,0.021119036581630227,0.7442863370282725,0.02212227101142573,0.7413750992245222,0.02178226264559861 +flat_mae,patch,logistic,adhd200_dx,88,0.005994842503189409,test,0.7076923076923077,0.055511606547986536,0.6934723256391164,0.05994389446395335,0.6911196911196911,0.058073946292536406 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,train,0.7506849315068493,0.0229773195833931,0.7417205153925708,0.02428014482263704,0.7389479147585027,0.023970589506502826 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,test,0.6153846153846154,0.0573416543661219,0.5966741126830479,0.06056817147972453,0.597007722007722,0.05857618572292959 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,train,0.7616438356164383,0.022320473121336223,0.752542372881356,0.023373543287193335,0.7493741222446113,0.023044570342014636 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,test,0.6615384615384615,0.0576756899143876,0.6425000000000001,0.06267375737144905,0.6418918918918919,0.059631558926346084 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,train,0.852054794520548,0.01799848120195233,0.848085460599334,0.018650753124191486,0.8452555413079319,0.018805485846863337 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,test,0.6307692307692307,0.05536190987573277,0.6036585365853658,0.06111084899660727,0.6061776061776062,0.0570083251147776 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,train,0.7452054794520548,0.020907804840451034,0.7336581691500129,0.02258441895455971,0.7305061977163094,0.02206833336809685 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,test,0.7076923076923077,0.05454130620768297,0.6973780936045086,0.05706615160427397,0.6954633204633205,0.05599943937432204 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,train,0.7671232876712328,0.021439394434134167,0.7597363876433645,0.02238203729253931,0.7570983696647737,0.02223780191250573 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,test,0.676923076923077,0.051205352900653596,0.6500897205844656,0.05876883783915455,0.6510617760617761,0.05386916494128072 +flat_mae,patch,logistic,adhd200_dx,94,0.046415888336127774,train,0.8657534246575342,0.018128637734223307,0.8614950940532335,0.01901514282444224,0.857391463638029,0.019296916538291204 +flat_mae,patch,logistic,adhd200_dx,94,0.046415888336127774,test,0.5692307692307692,0.059096302911023794,0.545,0.0636138611916929,0.5477799227799228,0.06062050875407463 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,train,0.7780821917808219,0.02067639050737788,0.7710429105777943,0.021656879220271933,0.7682420467729132,0.021553704371227833 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,test,0.5384615384615384,0.06237432334717349,0.5248538011695907,0.0643843101149295,0.525096525096525,0.06333677551300453 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,train,0.7342465753424657,0.02309760048268644,0.7246910988250481,0.02423743378640255,0.7222323990962936,0.023887109328639657 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,test,0.6153846153846154,0.0626141296415532,0.6139225469232596,0.06258907329369325,0.6187258687258688,0.06276617063257026 +flat_mae,patch,logistic,adhd200_dx,97,0.046415888336127774,train,0.8684931506849315,0.018558618567596812,0.8652058841632302,0.019178793394437905,0.862688526592172,0.019379075238963938 +flat_mae,patch,logistic,adhd200_dx,97,0.046415888336127774,test,0.5538461538461539,0.056838848286679886,0.5167905665214048,0.062423730983525286,0.5255791505791506,0.05784262177785035 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,train,0.7698630136986301,0.02103752823635336,0.7597178683385579,0.022498283858742512,0.7559382060206387,0.022074565275024372 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,test,0.6615384615384615,0.056275324871111254,0.6425000000000001,0.06086847907218919,0.6418918918918919,0.0581233464928014 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7671232876712328,0.022295013735260008,0.7606557377049181,0.023183529682747665,0.7585333089088355,0.023119908467631355 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.6461538461538462,0.057172269522617905,0.6375757575757576,0.05853808583904013,0.6370656370656371,0.05801514468633901 +flat_mae,patch,logistic,adhd200_dx,100,0.005994842503189409,train,0.7561643835616438,0.022155191212317356,0.7473969875817451,0.023416319111413442,0.7445197533125725,0.023169126080185305 +flat_mae,patch,logistic,adhd200_dx,100,0.005994842503189409,test,0.6461538461538462,0.053269519021713854,0.6167649320687003,0.0601319991094916,0.6196911196911197,0.055392104231222566 diff --git a/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic/log.txt b/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..9d64b3cc2aa97e7d912e4e29392d15513e4b5f91 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:46 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:10:14 time: 4.0673 data: 3.3087 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:52 time: 0.2152 data: 0.0773 max mem: 2851 +extract (train) [ 40/151] eta: 0:00:33 time: 0.1909 data: 0.0544 max mem: 2851 +extract (train) [ 60/151] eta: 0:00:24 time: 0.2100 data: 0.0606 max mem: 2851 +extract (train) [ 80/151] eta: 0:00:17 time: 0.1995 data: 0.0566 max mem: 2851 +extract (train) [100/151] eta: 0:00:12 time: 0.1862 data: 0.0505 max mem: 2851 +extract (train) [120/151] eta: 0:00:07 time: 0.2057 data: 0.0562 max mem: 2851 +extract (train) [140/151] eta: 0:00:02 time: 0.1518 data: 0.0359 max mem: 2851 +extract (train) [150/151] eta: 0:00:00 time: 0.1422 data: 0.0332 max mem: 2851 +extract (train) Total time: 0:00:33 (0.2191 s / it) +extract (validation) [ 0/32] eta: 0:02:13 time: 4.1780 data: 3.9503 max mem: 2851 +extract (validation) [20/32] eta: 0:00:04 time: 0.2056 data: 0.0597 max mem: 2851 +extract (validation) [31/32] eta: 0:00:00 time: 0.1468 data: 0.0368 max mem: 2851 +extract (validation) Total time: 0:00:10 (0.3208 s / it) +extract (test) [ 0/33] eta: 0:02:28 time: 4.4963 data: 4.3020 max mem: 2851 +extract (test) [20/33] eta: 0:00:05 time: 0.1993 data: 0.0600 max mem: 2851 +extract (test) [32/33] eta: 0:00:00 time: 0.1418 data: 0.0391 max mem: 2851 +extract (test) Total time: 0:00:10 (0.3192 s / it) +feature extraction time: 0:00:53 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | train | 0.77534 | 0.021056 | 0.76544 | 0.022557 | 0.76151 | 0.022154 | +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | test | 0.63077 | 0.059352 | 0.61538 | 0.062303 | 0.61486 | 0.060784 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0586492452167719, "f1": 0.6018132810585641, "f1_std": 0.06125519323340435, "bacc": 0.6013513513513513, "bacc_std": 0.06005574222658638} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05482759206887537, "f1": 0.6697154471544715, "f1_std": 0.06180796902829804, "bacc": 0.6689189189189189, "bacc_std": 0.05785906717548939} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05735842669064447, "f1": 0.5644080416976918, "f1_std": 0.06074002319704903, "bacc": 0.5656370656370656, "bacc_std": 0.058490673115888896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.052139267306573, "f1": 0.7292330311198236, "f1_std": 0.05524387353818188, "bacc": 0.7268339768339769, "bacc_std": 0.054527375389413144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06257087058810869, "f1": 0.5157414083153088, "f1_std": 0.06350041662061709, "bacc": 0.515926640926641, "bacc_std": 0.06352112266063878} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.054080536043000026, "f1": 0.656084656084656, "f1_std": 0.05966910851245781, "bacc": 0.6554054054054055, "bacc_std": 0.05625622120570483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05370883589554365, "f1": 0.5578231292517006, "f1_std": 0.05785103863783277, "bacc": 0.5612934362934363, "bacc_std": 0.054793760380854206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.06112163447735533, "f1": 0.6690909090909091, "f1_std": 0.062752556636735, "bacc": 0.6684362934362934, "bacc_std": 0.062134056183557176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 166.81005372000556, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.060053617069056896, "f1": 0.5500119360229172, "f1_std": 0.06036407901007265, "bacc": 0.5516409266409266, "bacc_std": 0.06055554867135271} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.058328074375510026, "f1": 0.5381034060279344, "f1_std": 0.06026306468179886, "bacc": 0.5386100386100386, "bacc_std": 0.05904368809544072} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05797219492666622, "f1": 0.5608108108108107, "f1_std": 0.058780344104028014, "bacc": 0.5608108108108107, "bacc_std": 0.05830446002917777} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05769291486859979, "f1": 0.545, "f1_std": 0.06284396798794614, "bacc": 0.5477799227799228, "bacc_std": 0.059471074744243284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.0586999188532044, "f1": 0.6235521235521235, "f1_std": 0.05996010379632466, "bacc": 0.6235521235521235, "bacc_std": 0.05975670714394668} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05594671175514265, "f1": 0.6515594541910331, "f1_std": 0.05866704284252312, "bacc": 0.6505791505791505, "bacc_std": 0.05784178320686545} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0591129697720075, "f1": 0.61207925519217, "f1_std": 0.05966993474129564, "bacc": 0.6143822393822393, "bacc_std": 0.06016667754693264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.06034236836683114, "f1": 0.6655231560891939, "f1_std": 0.06319334702556247, "bacc": 0.6640926640926641, "bacc_std": 0.06228086211303479} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.057550998222877986, "f1": 0.6233308138070043, "f1_std": 0.06320557878537558, "bacc": 0.6240347490347491, "bacc_std": 0.059739560277934096} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05307470229467745, "f1": 0.6934723256391164, "f1_std": 0.057066399502810186, "bacc": 0.6911196911196911, "bacc_std": 0.05520990689687338} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.062038667079634714, "f1": 0.578226387887527, "f1_std": 0.06286586933383013, "bacc": 0.5786679536679536, "bacc_std": 0.06288708177124878} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05535010890877287, "f1": 0.6018132810585641, "f1_std": 0.057948411562869895, "bacc": 0.6013513513513513, "bacc_std": 0.05668944780781541} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.7538461538461538, "acc_std": 0.05079346740832867, "f1": 0.7523809523809524, "f1_std": 0.050772382306003526, "bacc": 0.7577220077220077, "bacc_std": 0.05025474430928337} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05706689524185085, "f1": 0.6794871794871795, "f1_std": 0.06061470187629927, "bacc": 0.6776061776061776, "bacc_std": 0.05923593322895811} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05212369448367238, "f1": 0.587737843551797, "f1_std": 0.06256446301222388, "bacc": 0.5974903474903475, "bacc_std": 0.05477962264340211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.056959622364282377, "f1": 0.6036585365853658, "f1_std": 0.06257380327162604, "bacc": 0.6061776061776062, "bacc_std": 0.058583127770218034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06184461536547981, "f1": 0.543030303030303, "f1_std": 0.06291070314330456, "bacc": 0.542953667953668, "bacc_std": 0.062407155670860116} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05254046848289771, "f1": 0.6934723256391164, "f1_std": 0.05619535575761905, "bacc": 0.6911196911196911, "bacc_std": 0.054373952352268475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06006878896968062, "f1": 0.606060606060606, "f1_std": 0.061954231447803196, "bacc": 0.6056949806949807, "bacc_std": 0.061137770315890495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.055868857432108184, "f1": 0.5045731707317074, "f1_std": 0.06062855122439942, "bacc": 0.5120656370656371, "bacc_std": 0.05673465591389826} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.055873559709009174, "f1": 0.61, "f1_std": 0.06072076438755054, "bacc": 0.6105212355212355, "bacc_std": 0.05780868554189184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05759911241919696, "f1": 0.5833333333333333, "f1_std": 0.061314455996662694, "bacc": 0.5834942084942085, "bacc_std": 0.05919270783521584} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0559832942114809, "f1": 0.5289855072463768, "f1_std": 0.06305648306764737, "bacc": 0.5390926640926641, "bacc_std": 0.057546567554262705} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06291524495173867, "f1": 0.5512820512820513, "f1_std": 0.06506064222244805, "bacc": 0.5521235521235521, "bacc_std": 0.06355720824505892} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05753898806834366, "f1": 0.545, "f1_std": 0.06210675813321833, "bacc": 0.5477799227799228, "bacc_std": 0.058934098824058326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05908224335018044, "f1": 0.5608108108108107, "f1_std": 0.06082947129951985, "bacc": 0.5608108108108107, "bacc_std": 0.06054215211269054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05742441185526481, "f1": 0.5833333333333333, "f1_std": 0.06075163897680338, "bacc": 0.5834942084942085, "bacc_std": 0.05910838606871134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05842914082066773, "f1": 0.6153846153846154, "f1_std": 0.06205847141649863, "bacc": 0.6148648648648649, "bacc_std": 0.060184162688304225} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.051177389608112994, "f1": 0.6888384983623079, "f1_std": 0.05751432916598011, "bacc": 0.6867760617760618, "bacc_std": 0.054350330515328206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05833706587100408, "f1": 0.6153846153846154, "f1_std": 0.06115090008016818, "bacc": 0.6148648648648649, "bacc_std": 0.05929701330084024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.053203692535464905, "f1": 0.6299171842650104, "f1_std": 0.061449574119516716, "bacc": 0.6332046332046332, "bacc_std": 0.055796633877430245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.057598972706158, "f1": 0.6690909090909091, "f1_std": 0.059417401247724724, "bacc": 0.6684362934362934, "bacc_std": 0.059054261555825736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.059796348664525226, "f1": 0.6094688776736361, "f1_std": 0.06059961458654289, "bacc": 0.61003861003861, "bacc_std": 0.060637416111886784} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06321664824541566, "f1": 0.5830363506771205, "f1_std": 0.06327567102661882, "bacc": 0.5873552123552124, "bacc_std": 0.0635305176903878} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0601049260059233, "f1": 0.6094688776736361, "f1_std": 0.06099336664862284, "bacc": 0.61003861003861, "bacc_std": 0.060960328047696986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05739967630776925, "f1": 0.5775, "f1_std": 0.06160772250941067, "bacc": 0.5791505791505791, "bacc_std": 0.058999035712343294} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05727768961655708, "f1": 0.5833333333333333, "f1_std": 0.0612895124600859, "bacc": 0.5834942084942085, "bacc_std": 0.05920994080051619} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.057033439791387905, "f1": 0.6474358974358974, "f1_std": 0.0604042532757117, "bacc": 0.6462355212355213, "bacc_std": 0.05874348996396787} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.060267011396964866, "f1": 0.5115151515151515, "f1_std": 0.06097636931758606, "bacc": 0.5115830115830116, "bacc_std": 0.060466999245817644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.057047473223477946, "f1": 0.6719538572458543, "f1_std": 0.05807424881977654, "bacc": 0.6727799227799228, "bacc_std": 0.05778098905312306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06384931224810658, "f1": 0.5115151515151515, "f1_std": 0.06495081908448788, "bacc": 0.5115830115830116, "bacc_std": 0.06437195164776484} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05900876677420922, "f1": 0.6832358674463938, "f1_std": 0.06079852409508094, "bacc": 0.6819498069498069, "bacc_std": 0.059972950229013645} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05901438998119078, "f1": 0.6549227799227799, "f1_std": 0.060418535370449826, "bacc": 0.6549227799227799, "bacc_std": 0.06015491623409578} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.7692307692307693, "acc_std": 0.047696436545842685, "f1": 0.7543461829176115, "f1_std": 0.05298493738714161, "bacc": 0.7495173745173744, "bacc_std": 0.05098117558444406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05256295183230718, "f1": 0.587737843551797, "f1_std": 0.06236710783178109, "bacc": 0.5974903474903475, "bacc_std": 0.05495237644144849} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.061814335481390696, "f1": 0.5469838981014179, "f1_std": 0.06265436741856187, "bacc": 0.5472972972972974, "bacc_std": 0.06268921953812513} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05961990057337146, "f1": 0.578226387887527, "f1_std": 0.060209415800580826, "bacc": 0.5786679536679536, "bacc_std": 0.05999540099142303} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.060752862449912494, "f1": 0.545, "f1_std": 0.06452989749697928, "bacc": 0.5477799227799228, "bacc_std": 0.06191087031419503} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.06166368797113244, "f1": 0.5833333333333333, "f1_std": 0.06510801941350151, "bacc": 0.5834942084942085, "bacc_std": 0.06325632920159896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05733372872667661, "f1": 0.6285714285714286, "f1_std": 0.05771159844013348, "bacc": 0.6322393822393823, "bacc_std": 0.05811284934126485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.06005576107143945, "f1": 0.6336682185738789, "f1_std": 0.0633257923447842, "bacc": 0.6327220077220077, "bacc_std": 0.061846083445892826} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05859301814335842, "f1": 0.6474358974358974, "f1_std": 0.06177314106379864, "bacc": 0.6462355212355213, "bacc_std": 0.060223207961485864} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05632198221363815, "f1": 0.570630081300813, "f1_std": 0.06317748496594133, "bacc": 0.5748069498069498, "bacc_std": 0.05861471146566141} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05737863445837389, "f1": 0.6198830409356726, "f1_std": 0.05952979292205311, "bacc": 0.6192084942084942, "bacc_std": 0.05862044092877534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05723092638522675, "f1": 0.5966741126830479, "f1_std": 0.06061712674956581, "bacc": 0.597007722007722, "bacc_std": 0.058455040365419104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.055638894913710636, "f1": 0.6474358974358974, "f1_std": 0.05896469174076757, "bacc": 0.6462355212355213, "bacc_std": 0.05717426364831067} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.055907920917359966, "f1": 0.6862934362934363, "f1_std": 0.057283361951727896, "bacc": 0.6862934362934363, "bacc_std": 0.05735605429576625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06234981292351753, "f1": 0.5745454545454545, "f1_std": 0.06421730766189782, "bacc": 0.5743243243243243, "bacc_std": 0.06359581929880564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.04965176722097291, "f1": 0.5533826638477801, "f1_std": 0.05885195000662475, "bacc": 0.5661196911196912, "bacc_std": 0.05146637768853338} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05531273968623894, "f1": 0.656084656084656, "f1_std": 0.0607033916050876, "bacc": 0.6554054054054055, "bacc_std": 0.05751941254288143} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06379953070629278, "f1": 0.5192307692307693, "f1_std": 0.06674136582427402, "bacc": 0.5207528957528957, "bacc_std": 0.06466464764643581} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06195761865078208, "f1": 0.588206627680312, "f1_std": 0.06429508270222113, "bacc": 0.5878378378378378, "bacc_std": 0.06306332598177536} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.0576063441608124, "f1": 0.6474358974358974, "f1_std": 0.060933759776488065, "bacc": 0.6462355212355213, "bacc_std": 0.05911085262413096} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05706082294177766, "f1": 0.6289401836684041, "f1_std": 0.060022546386913726, "bacc": 0.6283783783783784, "bacc_std": 0.05795222625689346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06086489840612704, "f1": 0.588206627680312, "f1_std": 0.06233347550294366, "bacc": 0.5878378378378378, "bacc_std": 0.06154226822861454} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.056164896699822886, "f1": 0.7031963470319634, "f1_std": 0.05685804472136639, "bacc": 0.7041505791505791, "bacc_std": 0.05659037727588138} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06481711367264104, "f1": 0.5565302144249512, "f1_std": 0.06652491537281653, "bacc": 0.5564671814671815, "bacc_std": 0.06574791538933356} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.04550180437283154, "f1": 0.44221967963386727, "f1_std": 0.05867887387274566, "bacc": 0.49034749034749037, "bacc_std": 0.04629379231389694} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05256489705465867, "f1": 0.6635610766045548, "f1_std": 0.06146919282327064, "bacc": 0.6645752895752897, "bacc_std": 0.05551380181922406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05704100881287543, "f1": 0.5250692869740489, "f1_std": 0.06113188173763211, "bacc": 0.5299227799227799, "bacc_std": 0.057970710864969986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.061606885037499406, "f1": 0.564176245210728, "f1_std": 0.06237952793340967, "bacc": 0.5651544401544402, "bacc_std": 0.06252839864048879} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.056602640706165316, "f1": 0.6036585365853658, "f1_std": 0.06253697406822761, "bacc": 0.6061776061776062, "bacc_std": 0.05860356969650831} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.057301684765136494, "f1": 0.5321419707123356, "f1_std": 0.06108018716067444, "bacc": 0.5342664092664092, "bacc_std": 0.05861726847725752} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.056540083702526986, "f1": 0.6766169154228856, "f1_std": 0.05671445603589706, "bacc": 0.6858108108108107, "bacc_std": 0.056043799900987006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05672801505700417, "f1": 0.7031963470319634, "f1_std": 0.05761821594685017, "bacc": 0.7041505791505791, "bacc_std": 0.05774486798905216} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05858334678652339, "f1": 0.6461538461538462, "f1_std": 0.058845841824604964, "bacc": 0.6587837837837838, "bacc_std": 0.05861224359950269} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05444483881299293, "f1": 0.5962732919254659, "f1_std": 0.06252017817309617, "bacc": 0.6018339768339769, "bacc_std": 0.056738937068133835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.056235674836000275, "f1": 0.4834657780056396, "f1_std": 0.06060270293629884, "bacc": 0.4942084942084942, "bacc_std": 0.056604371048379475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06307830392484806, "f1": 0.543030303030303, "f1_std": 0.06444586279533669, "bacc": 0.542953667953668, "bacc_std": 0.0638729356978593} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.055511606547986536, "f1": 0.6934723256391164, "f1_std": 0.05994389446395335, "bacc": 0.6911196911196911, "bacc_std": 0.058073946292536406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0573416543661219, "f1": 0.5966741126830479, "f1_std": 0.06056817147972453, "bacc": 0.597007722007722, "bacc_std": 0.05857618572292959} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.0576756899143876, "f1": 0.6425000000000001, "f1_std": 0.06267375737144905, "bacc": 0.6418918918918919, "bacc_std": 0.059631558926346084} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05536190987573277, "f1": 0.6036585365853658, "f1_std": 0.06111084899660727, "bacc": 0.6061776061776062, "bacc_std": 0.0570083251147776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05454130620768297, "f1": 0.6973780936045086, "f1_std": 0.05706615160427397, "bacc": 0.6954633204633205, "bacc_std": 0.05599943937432204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.051205352900653596, "f1": 0.6500897205844656, "f1_std": 0.05876883783915455, "bacc": 0.6510617760617761, "bacc_std": 0.05386916494128072} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.059096302911023794, "f1": 0.545, "f1_std": 0.0636138611916929, "bacc": 0.5477799227799228, "bacc_std": 0.06062050875407463} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06237432334717349, "f1": 0.5248538011695907, "f1_std": 0.0643843101149295, "bacc": 0.525096525096525, "bacc_std": 0.06333677551300453} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0626141296415532, "f1": 0.6139225469232596, "f1_std": 0.06258907329369325, "bacc": 0.6187258687258688, "bacc_std": 0.06276617063257026} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.056838848286679886, "f1": 0.5167905665214048, "f1_std": 0.062423730983525286, "bacc": 0.5255791505791506, "bacc_std": 0.05784262177785035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.056275324871111254, "f1": 0.6425000000000001, "f1_std": 0.06086847907218919, "bacc": 0.6418918918918919, "bacc_std": 0.0581233464928014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.057172269522617905, "f1": 0.6375757575757576, "f1_std": 0.05853808583904013, "bacc": 0.6370656370656371, "bacc_std": 0.05801514468633901} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.053269519021713854, "f1": 0.6167649320687003, "f1_std": 0.0601319991094916, "bacc": 0.6196911196911197, "bacc_std": 0.055392104231222566} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 1.6846 | 16.679 | 0.78074 | 0.055993 | 0.77234 | 0.059765 | 0.76962 | 0.059477 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 1.6846 | 16.679 | 0.62308 | 0.056537 | 0.60605 | 0.060965 | 0.60789 | 0.058689 | + + +done! total time: 0:04:38 diff --git a/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8bd2a200c992bf37395f1bcc3d399227c23054da --- /dev/null +++ b/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..4b81e5b58c6c199849db68a47e20e50615080ede --- /dev/null +++ b/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,,2.782559402207126,test,0.7073170731707317,0.05239512626566523,0.4831932773109243,0.06888004073554593,0.4930555555555556,0.05727106900356372 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,train,0.8997289972899729,0.013800265869819932,0.8428467833834041,0.02410363288067936,0.8091667351466842,0.02626784374802943 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,test,0.8292682926829268,0.05108470063439163,0.7402714932126697,0.08565321873966225,0.717741935483871,0.08143451593160979 +flat_mae,patch,logistic,adni_ad_vs_cn,2,0.3593813663804626,train,0.986449864498645,0.005694150662512622,0.9806516564069758,0.008313372074815441,0.9709302325581395,0.012215939502716058 +flat_mae,patch,logistic,adni_ad_vs_cn,2,0.3593813663804626,test,0.6341463414634146,0.06690327040376882,0.5199063231850116,0.07952891109892937,0.5209677419354839,0.08221166426934493 +flat_mae,patch,logistic,adni_ad_vs_cn,3,0.046415888336127774,train,0.8997289972899729,0.013586838097604963,0.8395369336545807,0.02494603645227089,0.8010723970745337,0.026961264670046406 +flat_mae,patch,logistic,adni_ad_vs_cn,3,0.046415888336127774,test,0.7560975609756098,0.05341004546500095,0.6117424242424243,0.08882721189948364,0.6016129032258064,0.07632943408544551 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.3593813663804626,train,0.983739837398374,0.006215861738751879,0.9766829555986183,0.009186665779833576,0.9651162790697674,0.013335191753485109 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.3593813663804626,test,0.8048780487804879,0.06203855952019573,0.7354838709677419,0.08346023052299587,0.7354838709677419,0.08640819389933369 +flat_mae,patch,logistic,adni_ad_vs_cn,5,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,5,166.81005372000556,test,0.7317073170731707,0.04874327849146353,0.5512437810945273,0.08188023052232454,0.5516129032258065,0.06589196341642893 +flat_mae,patch,logistic,adni_ad_vs_cn,6,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,6,166.81005372000556,test,0.7073170731707317,0.06018561414416876,0.5729166666666666,0.08574032755728911,0.5693548387096774,0.08006642169758016 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.3593813663804626,train,0.983739837398374,0.00664379269285346,0.9768796992481203,0.009664725960705015,0.9691634481058427,0.013266447778093961 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.3593813663804626,test,0.6829268292682927,0.07046204272233669,0.6072218128224024,0.08292068316311693,0.6209677419354839,0.09060964084206258 +flat_mae,patch,logistic,adni_ad_vs_cn,8,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,8,21.54434690031882,test,0.6341463414634146,0.06682341081682772,0.5199063231850116,0.07730292753430686,0.5209677419354839,0.0800374191234173 +flat_mae,patch,logistic,adni_ad_vs_cn,9,0.3593813663804626,train,0.989159891598916,0.005427485420580502,0.9845864661654136,0.007896432607889862,0.9767441860465116,0.011643849536012862 +flat_mae,patch,logistic,adni_ad_vs_cn,9,0.3593813663804626,test,0.7560975609756098,0.06595069634660443,0.7054597701149425,0.0759259713480271,0.7370967741935484,0.08399224946900569 +flat_mae,patch,logistic,adni_ad_vs_cn,10,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,10,166.81005372000556,test,0.7804878048780488,0.06085433794038842,0.6917293233082706,0.08644835901751768,0.685483870967742,0.08609003460489856 +flat_mae,patch,logistic,adni_ad_vs_cn,11,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,11,166.81005372000556,test,0.7560975609756098,0.053771499719038,0.6117424242424243,0.08904144990357406,0.6016129032258064,0.07679332988670598 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.3593813663804626,train,0.983739837398374,0.006033238573545207,0.9766829555986183,0.008892676009828794,0.9651162790697674,0.012943401358361502 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.3593813663804626,test,0.7317073170731707,0.06413318978597382,0.6232247284878863,0.08928893509509123,0.6193548387096774,0.08670305490263011 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.046415888336127774,train,0.9105691056910569,0.013963582875717695,0.8612481626234888,0.023661949329445153,0.8283753800640973,0.026103066398491928 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.046415888336127774,test,0.7804878048780488,0.053952079559148526,0.6660633484162897,0.08704193147214698,0.6516129032258065,0.08142878402534323 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.046415888336127774,train,0.9051490514905149,0.013522585543027982,0.849799383613421,0.02433392908844472,0.8127003040512779,0.026742612785953655 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.046415888336127774,test,0.7804878048780488,0.04769434708228007,0.6328358208955224,0.09141296876896433,0.6177419354838709,0.07452015461825628 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,train,0.989159891598916,0.0053842148419328655,0.9845864661654136,0.007832819518674315,0.9767441860465116,0.011551019050425783 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,test,0.8048780487804879,0.05996358204711819,0.7354838709677419,0.08078687825342845,0.7354838709677419,0.08385306328742381 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.3593813663804626,train,0.986449864498645,0.005922437942799919,0.9806516564069758,0.008671406165358676,0.9709302325581395,0.012705695354030077 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.3593813663804626,test,0.7317073170731707,0.058535853653963354,0.5918552036199095,0.08845662546951909,0.5854838709677419,0.07886872587354073 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.3593813663804626,train,0.986449864498645,0.005815925257149491,0.9806516564069758,0.008503128090099002,0.9709302325581395,0.012477188487721894 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.3593813663804626,test,0.7560975609756098,0.060531141045646275,0.6440972222222222,0.09213107663108569,0.635483870967742,0.08636960024217871 +flat_mae,patch,logistic,adni_ad_vs_cn,18,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,18,166.81005372000556,test,0.8048780487804879,0.05011964269800898,0.6893939393939394,0.09301777459776546,0.667741935483871,0.08328667810172022 +flat_mae,patch,logistic,adni_ad_vs_cn,19,0.3593813663804626,train,0.994579945799458,0.0036401585983810237,0.9923570836785418,0.005191393551228629,0.9883720930232558,0.007809410016294203 +flat_mae,patch,logistic,adni_ad_vs_cn,19,0.3593813663804626,test,0.7073170731707317,0.06398875198599953,0.5729166666666666,0.08514338372950521,0.5693548387096774,0.08025601977221036 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.3593813663804626,train,0.983739837398374,0.006505137671263451,0.9766829555986183,0.009636863150453432,0.9651162790697674,0.013955789538931448 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.3593813663804626,test,0.7317073170731707,0.06761466564372723,0.6479313036690086,0.08367619158675715,0.6532258064516129,0.08691018220595213 +flat_mae,patch,logistic,adni_ad_vs_cn,21,0.3593813663804626,train,0.989159891598916,0.005069428587637272,0.9845864661654136,0.007352850288359732,0.9767441860465116,0.010875692725803267 +flat_mae,patch,logistic,adni_ad_vs_cn,21,0.3593813663804626,test,0.7560975609756098,0.05872885091506293,0.6440972222222222,0.0900047680735921,0.635483870967742,0.08460128952555071 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.046415888336127774,train,0.8997289972899729,0.01239331156736055,0.8395369336545807,0.022904162549516646,0.8010723970745337,0.024703585093402734 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.046415888336127774,test,0.8048780487804879,0.04771168118101039,0.6893939393939394,0.08582386845924725,0.667741935483871,0.0764420543908948 +flat_mae,patch,logistic,adni_ad_vs_cn,23,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,23,2.782559402207126,test,0.6829268292682927,0.06699996004509272,0.5839188134270101,0.08440314488067352,0.5870967741935484,0.0880611566770308 +flat_mae,patch,logistic,adni_ad_vs_cn,24,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,24,21.54434690031882,test,0.6829268292682927,0.07404299601430603,0.6072218128224024,0.08662857077199279,0.6209677419354839,0.0941268864311338 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,train,0.994579945799458,0.004212114489896403,0.9923570836785418,0.006023515019202035,0.9883720930232558,0.009036454923091743 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,test,0.7317073170731707,0.05561615281994409,0.5918552036199095,0.08821084923397805,0.5854838709677419,0.07932848572387133 +flat_mae,patch,logistic,adni_ad_vs_cn,26,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,26,1291.5496650148827,test,0.7560975609756098,0.051901407471868964,0.6117424242424243,0.08965691908794363,0.6016129032258064,0.07669398520956974 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.3593813663804626,train,0.981029810298103,0.0071091609461548496,0.9729123189697663,0.010425016963038165,0.9633494946174705,0.01418837060262456 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.3593813663804626,test,0.7560975609756098,0.054461836290522134,0.6117424242424243,0.08865372100857678,0.6016129032258064,0.07665078832390687 +flat_mae,patch,logistic,adni_ad_vs_cn,28,0.046415888336127774,train,0.9214092140921409,0.011717069109075067,0.8780665671539749,0.020319506357353505,0.8435368559454351,0.023879879100992544 +flat_mae,patch,logistic,adni_ad_vs_cn,28,0.046415888336127774,test,0.7560975609756098,0.05813962966664935,0.6440972222222222,0.08705799393553106,0.635483870967742,0.08281000633258839 +flat_mae,patch,logistic,adni_ad_vs_cn,29,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,29,21.54434690031882,test,0.7804878048780488,0.05456476430689265,0.6660633484162897,0.09148617293959133,0.6516129032258065,0.08411044693781554 +flat_mae,patch,logistic,adni_ad_vs_cn,30,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,30,2.782559402207126,test,0.7560975609756098,0.062139219257236165,0.6440972222222222,0.09269362976971332,0.635483870967742,0.08614559578560603 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,train,0.9159891598915989,0.01283236721851824,0.8696573648887318,0.02219230673696438,0.8359561180047662,0.025031535883863164 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,test,0.7073170731707317,0.03058423773544825,0.4142857142857143,0.010635254685075101,0.46774193548387094,0.02022506043795773 +flat_mae,patch,logistic,adni_ad_vs_cn,32,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,32,10000.0,test,0.6341463414634146,0.07223958749321432,0.5858585858585859,0.07506157902334169,0.6225806451612903,0.0876911932853733 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.046415888336127774,train,0.9186991869918699,0.01222807952475059,0.8732249198350893,0.021403898498785566,0.837722902457063,0.024528604476695432 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.046415888336127774,test,0.7317073170731707,0.06769556023602467,0.6835087719298245,0.0751779309702426,0.7209677419354839,0.0842295683670817 +flat_mae,patch,logistic,adni_ad_vs_cn,34,0.046415888336127774,train,0.9132791327913279,0.012905052305095314,0.8647732478240953,0.02268164267454106,0.8301421645163941,0.025647864846080393 +flat_mae,patch,logistic,adni_ad_vs_cn,34,0.046415888336127774,test,0.7317073170731707,0.06434105660109192,0.6232247284878863,0.08761058772260787,0.6193548387096774,0.08491268276334624 +flat_mae,patch,logistic,adni_ad_vs_cn,35,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,35,21.54434690031882,test,0.8048780487804879,0.058759312044418734,0.7354838709677419,0.07984617211229046,0.7354838709677419,0.08308949145355508 +flat_mae,patch,logistic,adni_ad_vs_cn,36,0.046415888336127774,train,0.907859078590786,0.01319632941369628,0.8563215758131013,0.022770031811147066,0.8225614265757252,0.025247442060009475 +flat_mae,patch,logistic,adni_ad_vs_cn,36,0.046415888336127774,test,0.7560975609756098,0.05598663476100334,0.6117424242424243,0.0923085718298026,0.6016129032258064,0.07831701340685703 +flat_mae,patch,logistic,adni_ad_vs_cn,37,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,37,2.782559402207126,test,0.6585365853658537,0.06010538039088824,0.5017361111111112,0.07727363315624428,0.5032258064516129,0.07197076496304068 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.046415888336127774,train,0.9105691056910569,0.013280538612604519,0.8612481626234888,0.022790631625609688,0.8283753800640973,0.025752319900916858 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.046415888336127774,test,0.6829268292682927,0.06945236620822558,0.5839188134270101,0.08529765626997025,0.5870967741935484,0.08827668704446438 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.3593813663804626,train,0.986449864498645,0.0057209795369730365,0.9808134274809954,0.008212511872249497,0.9749774015942148,0.011137736765544826 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.3593813663804626,test,0.6585365853658537,0.05279626471744206,0.4564393939393939,0.06188833782813355,0.4693548387096774,0.054006905462811845 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.3593813663804626,train,0.986449864498645,0.005994726961741729,0.9808134274809954,0.008637607365142274,0.9749774015942148,0.011579654222038573 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.3593813663804626,test,0.7073170731707317,0.06227715298134802,0.5729166666666666,0.08761052341784796,0.5693548387096774,0.0831137282343884 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.046415888336127774,train,0.9132791327913279,0.012668191472465801,0.8661224489795918,0.02135412974702983,0.8341893335524694,0.023913010066434207 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.046415888336127774,test,0.8536585365853658,0.04912599590972733,0.7864583333333333,0.07732871091397592,0.7677419354838709,0.07998928752418306 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,train,0.9051490514905149,0.013078059551406281,0.851341551849166,0.023039949299062095,0.8167474730873532,0.02561229754101489 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,test,0.7804878048780488,0.05987214217551687,0.6917293233082706,0.08525115021043907,0.685483870967742,0.0860933111862319 +flat_mae,patch,logistic,adni_ad_vs_cn,43,0.3593813663804626,train,0.983739837398374,0.00630843086420227,0.9766829555986183,0.009293919687204076,0.9651162790697674,0.013533784819131601 +flat_mae,patch,logistic,adni_ad_vs_cn,43,0.3593813663804626,test,0.8536585365853658,0.050529522060307176,0.7864583333333333,0.08022977196164363,0.7677419354838709,0.08091626406231499 +flat_mae,patch,logistic,adni_ad_vs_cn,44,0.3593813663804626,train,0.991869918699187,0.0048631479732164215,0.9884880564885973,0.007002453289693954,0.9825581395348837,0.010433148849516682 +flat_mae,patch,logistic,adni_ad_vs_cn,44,0.3593813663804626,test,0.8292682926829268,0.05030351661128897,0.7402714932126697,0.08451242067331728,0.717741935483871,0.08216584963982505 +flat_mae,patch,logistic,adni_ad_vs_cn,45,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,45,21.54434690031882,test,0.7317073170731707,0.06649129421976709,0.6676492262343405,0.07885062827347293,0.6870967741935483,0.08669912060111847 +flat_mae,patch,logistic,adni_ad_vs_cn,46,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,46,166.81005372000556,test,0.8292682926829268,0.05619292001106031,0.7602339181286549,0.07894036077011855,0.7516129032258064,0.08073164420901474 +flat_mae,patch,logistic,adni_ad_vs_cn,47,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,47,166.81005372000556,test,0.7317073170731707,0.06422773283928987,0.6232247284878863,0.08817551462951352,0.6193548387096774,0.08647206516621596 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.046415888336127774,train,0.907859078590786,0.013542831635851987,0.8577551020408163,0.02288483870813395,0.8266085956118004,0.025438760982416795 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.046415888336127774,test,0.6829268292682927,0.06011957151575972,0.5547201336675021,0.08105152493426108,0.5532258064516129,0.07832934312093076 +flat_mae,patch,logistic,adni_ad_vs_cn,49,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,49,21.54434690031882,test,0.6829268292682927,0.0713446945276223,0.5839188134270101,0.08720517013798747,0.5870967741935484,0.08944463288379555 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.046415888336127774,train,0.907859078590786,0.01270238125290116,0.8533249158249159,0.02333684501308953,0.8144670885035747,0.025752934215631677 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.046415888336127774,test,0.7560975609756098,0.06268055419254517,0.6693548387096775,0.08409322654115442,0.6693548387096775,0.08312816546670905 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.046415888336127774,train,0.9051490514905149,0.01318886525332406,0.849799383613421,0.023676661863085466,0.8127003040512779,0.02595724696065407 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.046415888336127774,test,0.6829268292682927,0.05828400792680525,0.5176470588235295,0.07937358489875375,0.5193548387096775,0.07105808172296633 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.046415888336127774,train,0.9105691056910569,0.013406935460230801,0.8612481626234888,0.022888500660178127,0.8283753800640973,0.025719411755606304 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.046415888336127774,test,0.6829268292682927,0.061453715843370796,0.5176470588235295,0.08369534247399568,0.5193548387096775,0.07427537836608844 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,53,2.782559402207126,test,0.7073170731707317,0.06436309479387678,0.603225806451613,0.08483543095764062,0.603225806451613,0.08507835869118449 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.005994842503189409,train,0.8482384823848238,0.012744671697556238,0.7250372578241431,0.0298307201469078,0.6865601117593887,0.02541868079239816 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.005994842503189409,test,0.7317073170731707,0.04706432369789029,0.5512437810945273,0.08274659084225103,0.5516129032258065,0.06599038329017796 +flat_mae,patch,logistic,adni_ad_vs_cn,55,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,55,166.81005372000556,test,0.7073170731707317,0.0656335254794755,0.603225806451613,0.08549048985668554,0.603225806451613,0.08612193398320361 +flat_mae,patch,logistic,adni_ad_vs_cn,56,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,56,21.54434690031882,test,0.7073170731707317,0.06596844557829641,0.603225806451613,0.08521881332041689,0.603225806451613,0.08551911020466542 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.3593813663804626,train,0.986449864498645,0.006028647614830936,0.9808134274809954,0.008670594618725332,0.9749774015942148,0.011760010486122957 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.3593813663804626,test,0.7560975609756098,0.06310287343169176,0.6693548387096775,0.08616786797144868,0.6693548387096775,0.08716796821437486 +flat_mae,patch,logistic,adni_ad_vs_cn,58,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,58,21.54434690031882,test,0.6585365853658537,0.06438758309358222,0.5017361111111112,0.0820159786068628,0.5032258064516129,0.07638926673846785 +flat_mae,patch,logistic,adni_ad_vs_cn,59,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,59,2.782559402207126,test,0.6097560975609756,0.06542441861310333,0.47096774193548385,0.07577677272963898,0.47096774193548385,0.07594570161748944 +flat_mae,patch,logistic,adni_ad_vs_cn,60,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,60,166.81005372000556,test,0.8048780487804879,0.06071905049814916,0.764367816091954,0.06882729894616536,0.8032258064516129,0.07250978619947124 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.046415888336127774,train,0.9186991869918699,0.013196642183839166,0.8757185198491109,0.022233416703653915,0.8458172405292136,0.02557016534335575 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.046415888336127774,test,0.6829268292682927,0.06819468578666478,0.5839188134270101,0.08210044978969859,0.5870967741935484,0.08307985451869204 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.3593813663804626,train,0.989159891598916,0.005087534387808546,0.9845864661654136,0.007382790815368213,0.9767441860465116,0.010914535983147454 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.3593813663804626,test,0.7560975609756098,0.05366959974643277,0.6117424242424243,0.09054081459573457,0.6016129032258064,0.07814685740028234 +flat_mae,patch,logistic,adni_ad_vs_cn,63,0.3593813663804626,train,0.989159891598916,0.004934738345729739,0.9845864661654136,0.007165376955761565,0.9767441860465116,0.010586735171943504 +flat_mae,patch,logistic,adni_ad_vs_cn,63,0.3593813663804626,test,0.8536585365853658,0.04908916967536744,0.7864583333333333,0.07696472553404875,0.7677419354838709,0.07851549844685417 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.005994842503189409,train,0.8401084010840109,0.014706931312457516,0.7228777637759194,0.03108824313973837,0.6893540964746487,0.027439947719544607 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.005994842503189409,test,0.7073170731707317,0.047714673481406525,0.4831932773109243,0.07009410268344919,0.5016129032258064,0.05486745205023569 +flat_mae,patch,logistic,adni_ad_vs_cn,65,0.3593813663804626,train,0.986449864498645,0.005750112996514331,0.9806516564069758,0.008414269021940387,0.9709302325581395,0.012335998230894136 +flat_mae,patch,logistic,adni_ad_vs_cn,65,0.3593813663804626,test,0.7073170731707317,0.06675630697633282,0.603225806451613,0.08723338061021052,0.603225806451613,0.08705867051812205 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,train,0.9132791327913279,0.0127923050933531,0.8647732478240953,0.022378041294235877,0.8301421645163941,0.02545841749605366 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,test,0.8048780487804879,0.05440378263085126,0.7152777777777778,0.08764719550837845,0.7016129032258065,0.08487736608586094 +flat_mae,patch,logistic,adni_ad_vs_cn,67,0.046415888336127774,train,0.8997289972899729,0.013480379799435128,0.8428467833834041,0.023680617982994524,0.8091667351466842,0.025863864937416572 +flat_mae,patch,logistic,adni_ad_vs_cn,67,0.046415888336127774,test,0.8048780487804879,0.049078964890928785,0.6893939393939394,0.08897242067704979,0.667741935483871,0.08025904055002406 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.046415888336127774,train,0.907859078590786,0.013355280810685873,0.8577551020408163,0.022632905174324904,0.8266085956118004,0.025092578268722514 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.046415888336127774,test,0.7560975609756098,0.06284736900301263,0.6693548387096775,0.08399149305131301,0.6693548387096775,0.08584852327798914 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,test,0.7804878048780488,0.06386176325989364,0.7280766396462786,0.07484016202073553,0.7532258064516129,0.07980238202521941 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.3593813663804626,train,0.991869918699187,0.004459107281997879,0.9884880564885973,0.006403188282784408,0.9825581395348837,0.00956634062242574 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.3593813663804626,test,0.7804878048780488,0.055169933789058635,0.6660633484162897,0.09199724296426812,0.6516129032258065,0.08539712427562972 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.3593813663804626,train,0.994579945799458,0.003413253043705045,0.9923570836785418,0.004862093553410207,0.9883720930232558,0.00732261844841375 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.3593813663804626,test,0.6341463414634146,0.05769746117100831,0.44343891402714936,0.06278514411789608,0.45322580645161287,0.05821722526347105 +flat_mae,patch,logistic,adni_ad_vs_cn,72,0.3593813663804626,train,0.989159891598916,0.005240539386429103,0.9845864661654136,0.0076170231318542616,0.9767441860465116,0.011242785079025276 +flat_mae,patch,logistic,adni_ad_vs_cn,72,0.3593813663804626,test,0.7560975609756098,0.032471100957351465,0.5119047619047619,0.07533385984010389,0.5338709677419355,0.04845950227852332 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.046415888336127774,train,0.907859078590786,0.01348505128131059,0.8577551020408163,0.023214120639250622,0.8266085956118004,0.026381705420095485 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.046415888336127774,test,0.7560975609756098,0.05723504249349867,0.6440972222222222,0.08585314397662934,0.635483870967742,0.08066222400140774 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.3593813663804626,train,0.989159891598916,0.005365084393682215,0.9845864661654136,0.007783052631202619,0.9767441860465116,0.011509977565515962 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.3593813663804626,test,0.6585365853658537,0.061238049860716535,0.5017361111111112,0.07799426485537997,0.5032258064516129,0.07267502816678274 +flat_mae,patch,logistic,adni_ad_vs_cn,75,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,75,21.54434690031882,test,0.6829268292682927,0.0645348708361379,0.5547201336675021,0.08702029445138852,0.5532258064516129,0.08330504423755765 +flat_mae,patch,logistic,adni_ad_vs_cn,76,0.046415888336127774,train,0.9051490514905149,0.013863022777633755,0.8542933537913061,0.023379486398194375,0.8248418111595037,0.026001490897623165 +flat_mae,patch,logistic,adni_ad_vs_cn,76,0.046415888336127774,test,0.7073170731707317,0.05417514066023472,0.5340909090909092,0.08397366136072622,0.535483870967742,0.07072546761238581 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.046415888336127774,train,0.8943089430894309,0.014055063794469623,0.8308632543926662,0.025588332022846248,0.7934916591338648,0.02704284456586975 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.046415888336127774,test,0.7804878048780488,0.039069333875743885,0.5886287625418061,0.09697222241653465,0.5838709677419355,0.06934543942309773 +flat_mae,patch,logistic,adni_ad_vs_cn,78,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,78,10000.0,test,0.7317073170731707,0.06922119663121769,0.6479313036690086,0.08744689804953423,0.6532258064516129,0.09049729478942424 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,test,0.8048780487804879,0.06133358335653286,0.764367816091954,0.07055236525156283,0.8032258064516129,0.07501692601237053 +flat_mae,patch,logistic,adni_ad_vs_cn,80,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,80,21.54434690031882,test,0.5853658536585366,0.07481500484150796,0.4558938329430133,0.07657158039908503,0.45483870967741935,0.07950177217770975 +flat_mae,patch,logistic,adni_ad_vs_cn,81,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,81,166.81005372000556,test,0.7073170731707317,0.06004051774897136,0.5729166666666666,0.08671244037870647,0.5693548387096774,0.07998055460863399 +flat_mae,patch,logistic,adni_ad_vs_cn,82,0.046415888336127774,train,0.9024390243902439,0.013578804360109128,0.8493877551020408,0.022946973430868788,0.8190278576711316,0.025385276344031298 +flat_mae,patch,logistic,adni_ad_vs_cn,82,0.046415888336127774,test,0.7073170731707317,0.04572176152011295,0.4831932773109243,0.07505254285435471,0.5016129032258064,0.057532174321904744 +flat_mae,patch,logistic,adni_ad_vs_cn,83,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,2.782559402207126,test,0.7560975609756098,0.062046326075095,0.6693548387096775,0.08275222960649664,0.6693548387096775,0.08550985947010609 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.005994842503189409,train,0.8319783197831978,0.012534600433139016,0.691504854368932,0.030343580984318764,0.6597707289013066,0.024701269932676306 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.005994842503189409,test,0.7804878048780488,0.04049012629254444,0.5886287625418061,0.09260849711363478,0.5838709677419355,0.06616432071825584 +flat_mae,patch,logistic,adni_ad_vs_cn,85,0.005994842503189409,train,0.8482384823848238,0.0131392343104422,0.7250372578241431,0.03081919861175354,0.6865601117593887,0.026470275888219465 +flat_mae,patch,logistic,adni_ad_vs_cn,85,0.005994842503189409,test,0.8780487804878049,0.04659228655016446,0.8144796380090498,0.08394520219782355,0.7838709677419355,0.08384860272527045 +flat_mae,patch,logistic,adni_ad_vs_cn,86,0.3593813663804626,train,0.994579945799458,0.0038144052619373085,0.9923570836785418,0.0054481275458085196,0.9883720930232558,0.008183229893342277 +flat_mae,patch,logistic,adni_ad_vs_cn,86,0.3593813663804626,test,0.6829268292682927,0.06598902077301795,0.5839188134270101,0.08222714614733273,0.5870967741935484,0.08497885021810578 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.005994842503189409,train,0.8346883468834688,0.013376228178814594,0.6944244579899811,0.03316177817476307,0.6615375133536034,0.027144383805822903 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.005994842503189409,test,0.8048780487804879,0.03149161149082581,0.6095238095238095,0.09722792853269492,0.6,0.06455780355619295 +flat_mae,patch,logistic,adni_ad_vs_cn,88,0.3593813663804626,train,0.989159891598916,0.005449610469796486,0.9847141673570836,0.007762138796174625,0.9807913550825869,0.010181144480101248 +flat_mae,patch,logistic,adni_ad_vs_cn,88,0.3593813663804626,test,0.7560975609756098,0.06194630285848077,0.6693548387096775,0.08495625930361349,0.6693548387096775,0.08677949953798093 +flat_mae,patch,logistic,adni_ad_vs_cn,89,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,89,1291.5496650148827,test,0.6829268292682927,0.06984539738587277,0.5839188134270101,0.0890608755784303,0.5870967741935484,0.0920400918728719 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.046415888336127774,train,0.9105691056910569,0.012663802086467888,0.8568842921784099,0.023399192640178493,0.8162338729558715,0.02609390109930107 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.046415888336127774,test,0.7560975609756098,0.059925216135212005,0.6440972222222222,0.08962622517249541,0.635483870967742,0.08485483619574344 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.046415888336127774,train,0.9132791327913279,0.012390413426159158,0.861952861952862,0.022677030152454692,0.8220478264442436,0.02556560339952054 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.046415888336127774,test,0.7317073170731707,0.03751784029330109,0.4972129319955407,0.07278595793469829,0.5177419354838709,0.05072942845676924 +flat_mae,patch,logistic,adni_ad_vs_cn,92,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,92,166.81005372000556,test,0.5365853658536586,0.07156765892278635,0.42593957258658804,0.07153374823780329,0.42258064516129035,0.07850204097375268 +flat_mae,patch,logistic,adni_ad_vs_cn,93,0.046415888336127774,train,0.9051490514905149,0.012764585521942245,0.849799383613421,0.0227168032146466,0.8127003040512779,0.0250288499551646 +flat_mae,patch,logistic,adni_ad_vs_cn,93,0.046415888336127774,test,0.7073170731707317,0.06833336767215709,0.603225806451613,0.08745777039161112,0.603225806451613,0.08792361947510177 +flat_mae,patch,logistic,adni_ad_vs_cn,94,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,94,21.54434690031882,test,0.6341463414634146,0.06840088988501007,0.5467943994104643,0.07624683993649593,0.5548387096774194,0.0830766332533759 +flat_mae,patch,logistic,adni_ad_vs_cn,95,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,95,21.54434690031882,test,0.7804878048780488,0.06253610973526368,0.7119437939110069,0.07873588252754389,0.7193548387096774,0.0829862405379737 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,test,0.6585365853658537,0.06916144283841091,0.5876436781609196,0.07718752959111876,0.6048387096774194,0.08668565161334113 +flat_mae,patch,logistic,adni_ad_vs_cn,97,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,97,166.81005372000556,test,0.7560975609756098,0.05924921905272778,0.6440972222222222,0.09129882221750085,0.635483870967742,0.08745588071593703 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.046415888336127774,train,0.8997289972899729,0.014014604286437247,0.8428467833834041,0.02444445867086944,0.8091667351466842,0.026596909047181944 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.046415888336127774,test,0.8048780487804879,0.050473333615937935,0.6893939393939394,0.09259993930507154,0.667741935483871,0.0822416997636893 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.046415888336127774,train,0.8970189701897019,0.013299954915635103,0.8377684191040355,0.023643840591254864,0.8033527816583121,0.025725648070075238 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.046415888336127774,test,0.8048780487804879,0.05016216415457998,0.6893939393939394,0.09325870554743097,0.667741935483871,0.08198745690976576 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.005994842503189409,train,0.8455284552845529,0.014614535690938351,0.7255073409461664,0.03267890540272744,0.688840496343167,0.02844079878432992 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.005994842503189409,test,0.7804878048780488,0.04023189769212457,0.5886287625418061,0.0956869602060133,0.5838709677419355,0.0679063763731595 diff --git a/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..36027e3241ac3420693df6d802b5fad7d56821bb --- /dev/null +++ b/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:46 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:13:47 time: 5.0436 data: 3.8992 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:58 time: 0.1759 data: 0.0498 max mem: 2851 +extract (train) [ 40/164] eta: 0:00:35 time: 0.1659 data: 0.0446 max mem: 2851 +extract (train) [ 60/164] eta: 0:00:25 time: 0.1489 data: 0.0390 max mem: 2851 +extract (train) [ 80/164] eta: 0:00:18 time: 0.1703 data: 0.0499 max mem: 2851 +extract (train) [100/164] eta: 0:00:13 time: 0.1726 data: 0.0512 max mem: 2851 +extract (train) [120/164] eta: 0:00:09 time: 0.1748 data: 0.0518 max mem: 2851 +extract (train) [140/164] eta: 0:00:04 time: 0.1646 data: 0.0470 max mem: 2851 +extract (train) [160/164] eta: 0:00:00 time: 0.1420 data: 0.0386 max mem: 2851 +extract (train) [163/164] eta: 0:00:00 time: 0.1414 data: 0.0384 max mem: 2851 +extract (train) Total time: 0:00:32 (0.1959 s / it) +extract (validation) [ 0/21] eta: 0:01:17 time: 3.7047 data: 3.5630 max mem: 2851 +extract (validation) [20/21] eta: 0:00:00 time: 0.1690 data: 0.0550 max mem: 2851 +extract (validation) Total time: 0:00:07 (0.3542 s / it) +extract (test) [ 0/21] eta: 0:01:18 time: 3.7257 data: 3.5965 max mem: 2851 +extract (test) [20/21] eta: 0:00:00 time: 0.1386 data: 0.0408 max mem: 2851 +extract (test) Total time: 0:00:06 (0.3234 s / it) +feature extraction time: 0:00:46 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 2.7826 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 2.7826 | test | 0.70732 | 0.052395 | 0.48319 | 0.06888 | 0.49306 | 0.057271 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05108470063439163, "f1": 0.7402714932126697, "f1_std": 0.08565321873966225, "bacc": 0.717741935483871, "bacc_std": 0.08143451593160979} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06690327040376882, "f1": 0.5199063231850116, "f1_std": 0.07952891109892937, "bacc": 0.5209677419354839, "bacc_std": 0.08221166426934493} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05341004546500095, "f1": 0.6117424242424243, "f1_std": 0.08882721189948364, "bacc": 0.6016129032258064, "bacc_std": 0.07632943408544551} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06203855952019573, "f1": 0.7354838709677419, "f1_std": 0.08346023052299587, "bacc": 0.7354838709677419, "bacc_std": 0.08640819389933369} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04874327849146353, "f1": 0.5512437810945273, "f1_std": 0.08188023052232454, "bacc": 0.5516129032258065, "bacc_std": 0.06589196341642893} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06018561414416876, "f1": 0.5729166666666666, "f1_std": 0.08574032755728911, "bacc": 0.5693548387096774, "bacc_std": 0.08006642169758016} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07046204272233669, "f1": 0.6072218128224024, "f1_std": 0.08292068316311693, "bacc": 0.6209677419354839, "bacc_std": 0.09060964084206258} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 21.54434690031882, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06682341081682772, "f1": 0.5199063231850116, "f1_std": 0.07730292753430686, "bacc": 0.5209677419354839, "bacc_std": 0.0800374191234173} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06595069634660443, "f1": 0.7054597701149425, "f1_std": 0.0759259713480271, "bacc": 0.7370967741935484, "bacc_std": 0.08399224946900569} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06085433794038842, "f1": 0.6917293233082706, "f1_std": 0.08644835901751768, "bacc": 0.685483870967742, "bacc_std": 0.08609003460489856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.053771499719038, "f1": 0.6117424242424243, "f1_std": 0.08904144990357406, "bacc": 0.6016129032258064, "bacc_std": 0.07679332988670598} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06413318978597382, "f1": 0.6232247284878863, "f1_std": 0.08928893509509123, "bacc": 0.6193548387096774, "bacc_std": 0.08670305490263011} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.053952079559148526, "f1": 0.6660633484162897, "f1_std": 0.08704193147214698, "bacc": 0.6516129032258065, "bacc_std": 0.08142878402534323} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04769434708228007, "f1": 0.6328358208955224, "f1_std": 0.09141296876896433, "bacc": 0.6177419354838709, "bacc_std": 0.07452015461825628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05996358204711819, "f1": 0.7354838709677419, "f1_std": 0.08078687825342845, "bacc": 0.7354838709677419, "bacc_std": 0.08385306328742381} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.058535853653963354, "f1": 0.5918552036199095, "f1_std": 0.08845662546951909, "bacc": 0.5854838709677419, "bacc_std": 0.07886872587354073} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.060531141045646275, "f1": 0.6440972222222222, "f1_std": 0.09213107663108569, "bacc": 0.635483870967742, "bacc_std": 0.08636960024217871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05011964269800898, "f1": 0.6893939393939394, "f1_std": 0.09301777459776546, "bacc": 0.667741935483871, "bacc_std": 0.08328667810172022} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06398875198599953, "f1": 0.5729166666666666, "f1_std": 0.08514338372950521, "bacc": 0.5693548387096774, "bacc_std": 0.08025601977221036} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06761466564372723, "f1": 0.6479313036690086, "f1_std": 0.08367619158675715, "bacc": 0.6532258064516129, "bacc_std": 0.08691018220595213} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05872885091506293, "f1": 0.6440972222222222, "f1_std": 0.0900047680735921, "bacc": 0.635483870967742, "bacc_std": 0.08460128952555071} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04771168118101039, "f1": 0.6893939393939394, "f1_std": 0.08582386845924725, "bacc": 0.667741935483871, "bacc_std": 0.0764420543908948} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06699996004509272, "f1": 0.5839188134270101, "f1_std": 0.08440314488067352, "bacc": 0.5870967741935484, "bacc_std": 0.0880611566770308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 21.54434690031882, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07404299601430603, "f1": 0.6072218128224024, "f1_std": 0.08662857077199279, "bacc": 0.6209677419354839, "bacc_std": 0.0941268864311338} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05561615281994409, "f1": 0.5918552036199095, "f1_std": 0.08821084923397805, "bacc": 0.5854838709677419, "bacc_std": 0.07932848572387133} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 1291.5496650148827, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.051901407471868964, "f1": 0.6117424242424243, "f1_std": 0.08965691908794363, "bacc": 0.6016129032258064, "bacc_std": 0.07669398520956974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.054461836290522134, "f1": 0.6117424242424243, "f1_std": 0.08865372100857678, "bacc": 0.6016129032258064, "bacc_std": 0.07665078832390687} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05813962966664935, "f1": 0.6440972222222222, "f1_std": 0.08705799393553106, "bacc": 0.635483870967742, "bacc_std": 0.08281000633258839} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05456476430689265, "f1": 0.6660633484162897, "f1_std": 0.09148617293959133, "bacc": 0.6516129032258065, "bacc_std": 0.08411044693781554} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.062139219257236165, "f1": 0.6440972222222222, "f1_std": 0.09269362976971332, "bacc": 0.635483870967742, "bacc_std": 0.08614559578560603} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.03058423773544825, "f1": 0.4142857142857143, "f1_std": 0.010635254685075101, "bacc": 0.46774193548387094, "bacc_std": 0.02022506043795773} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 10000.0, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07223958749321432, "f1": 0.5858585858585859, "f1_std": 0.07506157902334169, "bacc": 0.6225806451612903, "bacc_std": 0.0876911932853733} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06769556023602467, "f1": 0.6835087719298245, "f1_std": 0.0751779309702426, "bacc": 0.7209677419354839, "bacc_std": 0.0842295683670817} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06434105660109192, "f1": 0.6232247284878863, "f1_std": 0.08761058772260787, "bacc": 0.6193548387096774, "bacc_std": 0.08491268276334624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.058759312044418734, "f1": 0.7354838709677419, "f1_std": 0.07984617211229046, "bacc": 0.7354838709677419, "bacc_std": 0.08308949145355508} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05598663476100334, "f1": 0.6117424242424243, "f1_std": 0.0923085718298026, "bacc": 0.6016129032258064, "bacc_std": 0.07831701340685703} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06010538039088824, "f1": 0.5017361111111112, "f1_std": 0.07727363315624428, "bacc": 0.5032258064516129, "bacc_std": 0.07197076496304068} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06945236620822558, "f1": 0.5839188134270101, "f1_std": 0.08529765626997025, "bacc": 0.5870967741935484, "bacc_std": 0.08827668704446438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05279626471744206, "f1": 0.4564393939393939, "f1_std": 0.06188833782813355, "bacc": 0.4693548387096774, "bacc_std": 0.054006905462811845} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06227715298134802, "f1": 0.5729166666666666, "f1_std": 0.08761052341784796, "bacc": 0.5693548387096774, "bacc_std": 0.0831137282343884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04912599590972733, "f1": 0.7864583333333333, "f1_std": 0.07732871091397592, "bacc": 0.7677419354838709, "bacc_std": 0.07998928752418306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05987214217551687, "f1": 0.6917293233082706, "f1_std": 0.08525115021043907, "bacc": 0.685483870967742, "bacc_std": 0.0860933111862319} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.050529522060307176, "f1": 0.7864583333333333, "f1_std": 0.08022977196164363, "bacc": 0.7677419354838709, "bacc_std": 0.08091626406231499} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05030351661128897, "f1": 0.7402714932126697, "f1_std": 0.08451242067331728, "bacc": 0.717741935483871, "bacc_std": 0.08216584963982505} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06649129421976709, "f1": 0.6676492262343405, "f1_std": 0.07885062827347293, "bacc": 0.6870967741935483, "bacc_std": 0.08669912060111847} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05619292001106031, "f1": 0.7602339181286549, "f1_std": 0.07894036077011855, "bacc": 0.7516129032258064, "bacc_std": 0.08073164420901474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06422773283928987, "f1": 0.6232247284878863, "f1_std": 0.08817551462951352, "bacc": 0.6193548387096774, "bacc_std": 0.08647206516621596} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06011957151575972, "f1": 0.5547201336675021, "f1_std": 0.08105152493426108, "bacc": 0.5532258064516129, "bacc_std": 0.07832934312093076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 21.54434690031882, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0713446945276223, "f1": 0.5839188134270101, "f1_std": 0.08720517013798747, "bacc": 0.5870967741935484, "bacc_std": 0.08944463288379555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06268055419254517, "f1": 0.6693548387096775, "f1_std": 0.08409322654115442, "bacc": 0.6693548387096775, "bacc_std": 0.08312816546670905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05828400792680525, "f1": 0.5176470588235295, "f1_std": 0.07937358489875375, "bacc": 0.5193548387096775, "bacc_std": 0.07105808172296633} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.061453715843370796, "f1": 0.5176470588235295, "f1_std": 0.08369534247399568, "bacc": 0.5193548387096775, "bacc_std": 0.07427537836608844} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06436309479387678, "f1": 0.603225806451613, "f1_std": 0.08483543095764062, "bacc": 0.603225806451613, "bacc_std": 0.08507835869118449} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04706432369789029, "f1": 0.5512437810945273, "f1_std": 0.08274659084225103, "bacc": 0.5516129032258065, "bacc_std": 0.06599038329017796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0656335254794755, "f1": 0.603225806451613, "f1_std": 0.08549048985668554, "bacc": 0.603225806451613, "bacc_std": 0.08612193398320361} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06596844557829641, "f1": 0.603225806451613, "f1_std": 0.08521881332041689, "bacc": 0.603225806451613, "bacc_std": 0.08551911020466542} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06310287343169176, "f1": 0.6693548387096775, "f1_std": 0.08616786797144868, "bacc": 0.6693548387096775, "bacc_std": 0.08716796821437486} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06438758309358222, "f1": 0.5017361111111112, "f1_std": 0.0820159786068628, "bacc": 0.5032258064516129, "bacc_std": 0.07638926673846785} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 2.782559402207126, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06542441861310333, "f1": 0.47096774193548385, "f1_std": 0.07577677272963898, "bacc": 0.47096774193548385, "bacc_std": 0.07594570161748944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06071905049814916, "f1": 0.764367816091954, "f1_std": 0.06882729894616536, "bacc": 0.8032258064516129, "bacc_std": 0.07250978619947124} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06819468578666478, "f1": 0.5839188134270101, "f1_std": 0.08210044978969859, "bacc": 0.5870967741935484, "bacc_std": 0.08307985451869204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05366959974643277, "f1": 0.6117424242424243, "f1_std": 0.09054081459573457, "bacc": 0.6016129032258064, "bacc_std": 0.07814685740028234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 0.3593813663804626, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04908916967536744, "f1": 0.7864583333333333, "f1_std": 0.07696472553404875, "bacc": 0.7677419354838709, "bacc_std": 0.07851549844685417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.047714673481406525, "f1": 0.4831932773109243, "f1_std": 0.07009410268344919, "bacc": 0.5016129032258064, "bacc_std": 0.05486745205023569} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06675630697633282, "f1": 0.603225806451613, "f1_std": 0.08723338061021052, "bacc": 0.603225806451613, "bacc_std": 0.08705867051812205} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05440378263085126, "f1": 0.7152777777777778, "f1_std": 0.08764719550837845, "bacc": 0.7016129032258065, "bacc_std": 0.08487736608586094} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.049078964890928785, "f1": 0.6893939393939394, "f1_std": 0.08897242067704979, "bacc": 0.667741935483871, "bacc_std": 0.08025904055002406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06284736900301263, "f1": 0.6693548387096775, "f1_std": 0.08399149305131301, "bacc": 0.6693548387096775, "bacc_std": 0.08584852327798914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06386176325989364, "f1": 0.7280766396462786, "f1_std": 0.07484016202073553, "bacc": 0.7532258064516129, "bacc_std": 0.07980238202521941} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.055169933789058635, "f1": 0.6660633484162897, "f1_std": 0.09199724296426812, "bacc": 0.6516129032258065, "bacc_std": 0.08539712427562972} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.05769746117100831, "f1": 0.44343891402714936, "f1_std": 0.06278514411789608, "bacc": 0.45322580645161287, "bacc_std": 0.05821722526347105} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.032471100957351465, "f1": 0.5119047619047619, "f1_std": 0.07533385984010389, "bacc": 0.5338709677419355, "bacc_std": 0.04845950227852332} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05723504249349867, "f1": 0.6440972222222222, "f1_std": 0.08585314397662934, "bacc": 0.635483870967742, "bacc_std": 0.08066222400140774} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.061238049860716535, "f1": 0.5017361111111112, "f1_std": 0.07799426485537997, "bacc": 0.5032258064516129, "bacc_std": 0.07267502816678274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 21.54434690031882, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0645348708361379, "f1": 0.5547201336675021, "f1_std": 0.08702029445138852, "bacc": 0.5532258064516129, "bacc_std": 0.08330504423755765} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05417514066023472, "f1": 0.5340909090909092, "f1_std": 0.08397366136072622, "bacc": 0.535483870967742, "bacc_std": 0.07072546761238581} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.039069333875743885, "f1": 0.5886287625418061, "f1_std": 0.09697222241653465, "bacc": 0.5838709677419355, "bacc_std": 0.06934543942309773} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 10000.0, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06922119663121769, "f1": 0.6479313036690086, "f1_std": 0.08744689804953423, "bacc": 0.6532258064516129, "bacc_std": 0.09049729478942424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06133358335653286, "f1": 0.764367816091954, "f1_std": 0.07055236525156283, "bacc": 0.8032258064516129, "bacc_std": 0.07501692601237053} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 21.54434690031882, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.07481500484150796, "f1": 0.4558938329430133, "f1_std": 0.07657158039908503, "bacc": 0.45483870967741935, "bacc_std": 0.07950177217770975} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06004051774897136, "f1": 0.5729166666666666, "f1_std": 0.08671244037870647, "bacc": 0.5693548387096774, "bacc_std": 0.07998055460863399} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.04572176152011295, "f1": 0.4831932773109243, "f1_std": 0.07505254285435471, "bacc": 0.5016129032258064, "bacc_std": 0.057532174321904744} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.062046326075095, "f1": 0.6693548387096775, "f1_std": 0.08275222960649664, "bacc": 0.6693548387096775, "bacc_std": 0.08550985947010609} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04049012629254444, "f1": 0.5886287625418061, "f1_std": 0.09260849711363478, "bacc": 0.5838709677419355, "bacc_std": 0.06616432071825584} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.04659228655016446, "f1": 0.8144796380090498, "f1_std": 0.08394520219782355, "bacc": 0.7838709677419355, "bacc_std": 0.08384860272527045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06598902077301795, "f1": 0.5839188134270101, "f1_std": 0.08222714614733273, "bacc": 0.5870967741935484, "bacc_std": 0.08497885021810578} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.03149161149082581, "f1": 0.6095238095238095, "f1_std": 0.09722792853269492, "bacc": 0.6, "bacc_std": 0.06455780355619295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06194630285848077, "f1": 0.6693548387096775, "f1_std": 0.08495625930361349, "bacc": 0.6693548387096775, "bacc_std": 0.08677949953798093} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 1291.5496650148827, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06984539738587277, "f1": 0.5839188134270101, "f1_std": 0.0890608755784303, "bacc": 0.5870967741935484, "bacc_std": 0.0920400918728719} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.059925216135212005, "f1": 0.6440972222222222, "f1_std": 0.08962622517249541, "bacc": 0.635483870967742, "bacc_std": 0.08485483619574344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.03751784029330109, "f1": 0.4972129319955407, "f1_std": 0.07278595793469829, "bacc": 0.5177419354838709, "bacc_std": 0.05072942845676924} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 166.81005372000556, "split": "test", "acc": 0.5365853658536586, "acc_std": 0.07156765892278635, "f1": 0.42593957258658804, "f1_std": 0.07153374823780329, "bacc": 0.42258064516129035, "bacc_std": 0.07850204097375268} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06833336767215709, "f1": 0.603225806451613, "f1_std": 0.08745777039161112, "bacc": 0.603225806451613, "bacc_std": 0.08792361947510177} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 21.54434690031882, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06840088988501007, "f1": 0.5467943994104643, "f1_std": 0.07624683993649593, "bacc": 0.5548387096774194, "bacc_std": 0.0830766332533759} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06253610973526368, "f1": 0.7119437939110069, "f1_std": 0.07873588252754389, "bacc": 0.7193548387096774, "bacc_std": 0.0829862405379737} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06916144283841091, "f1": 0.5876436781609196, "f1_std": 0.07718752959111876, "bacc": 0.6048387096774194, "bacc_std": 0.08668565161334113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05924921905272778, "f1": 0.6440972222222222, "f1_std": 0.09129882221750085, "bacc": 0.635483870967742, "bacc_std": 0.08745588071593703} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.050473333615937935, "f1": 0.6893939393939394, "f1_std": 0.09259993930507154, "bacc": 0.667741935483871, "bacc_std": 0.0822416997636893} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05016216415457998, "f1": 0.6893939393939394, "f1_std": 0.09325870554743097, "bacc": 0.667741935483871, "bacc_std": 0.08198745690976576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04023189769212457, "f1": 0.5886287625418061, "f1_std": 0.0956869602060133, "bacc": 0.5838709677419355, "bacc_std": 0.0679063763731595} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 253.72 | 1411.8 | 0.95957 | 0.050176 | 0.93488 | 0.084595 | 0.92034 | 0.099077 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 253.72 | 1411.8 | 0.73659 | 0.061917 | 0.61789 | 0.086627 | 0.6175 | 0.083039 | + + +done! total time: 0:04:39 diff --git a/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/config.yaml b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..28501b0a5ebca1978392b2ae2329ecbaf9472604 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n800_1; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_log.json b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..1b42d06a6bd6917fef1674a29978d63bc7d00ff3 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 13, "eval/id_best": 39, "eval/lr_best": 0.0036, "eval/wd_best": 0.05, "eval/train/loss": 0.000264921342022717, "eval/train/acc": 0.9998947313016474, "eval/train/acc_std": 7.182658477146743e-05, "eval/train/f1": 0.9998855615993159, "eval/train/f1_std": 8.073363373038127e-05, "eval/validation/loss": 0.15324951708316803, "eval/validation/acc": 0.9915674603174603, "eval/validation/acc_std": 0.0014457986134170575, "eval/validation/f1": 0.9905728077034007, "eval/validation/f1_std": 0.0017972191505517602, "eval/test/loss": 0.2622224688529968, "eval/test/acc": 0.9880952380952381, "eval/test/acc_std": 0.0014792230754789718, "eval/test/f1": 0.985845188625798, "eval/test/f1_std": 0.0019358341859476485} diff --git a/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..27071e6c2ae48206833ab8b755fc7e8e85933f7e --- /dev/null +++ b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 13, "eval/best/id_best": 39, "eval/best/lr_best": 0.0036, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.000264921342022717, "eval/best/train/acc": 0.9998947313016474, "eval/best/train/acc_std": 7.182658477146743e-05, "eval/best/train/f1": 0.9998855615993159, "eval/best/train/f1_std": 8.073363373038127e-05, "eval/best/validation/loss": 0.15324951708316803, "eval/best/validation/acc": 0.9915674603174603, "eval/best/validation/acc_std": 0.0014457986134170575, "eval/best/validation/f1": 0.9905728077034007, "eval/best/validation/f1_std": 0.0017972191505517602, "eval/best/test/loss": 0.2622224688529968, "eval/best/test/acc": 0.9880952380952381, "eval/best/test/acc_std": 0.0014792230754789718, "eval/best/test/f1": 0.985845188625798, "eval/best/test/f1_std": 0.0019358341859476485} diff --git a/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..02f43ef23e3940096c482e0a664bdd7eadd3eaec --- /dev/null +++ b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 39, "eval/last/lr_best": 0.0036, "eval/last/wd_best": 0.05, "eval/last/train/loss": 1.4975109934312059e-06, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.1336444765329361, "eval/last/validation/acc": 0.9915674603174603, "eval/last/validation/acc_std": 0.0015000875903501724, "eval/last/validation/f1": 0.9906587083498731, "eval/last/validation/f1_std": 0.0018038708999695713, "eval/last/test/loss": 0.2371741682291031, "eval/last/test/acc": 0.9878968253968254, "eval/last/test/acc_std": 0.0014541534418005275, "eval/last/test/f1": 0.9850852057062511, "eval/last/test/f1_std": 0.0019823753310718954} diff --git a/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..93bfbf1cd549a8e3700851e9d3650d49da68a362 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,13,0.0036,0.05,39,"[12, 1.0]",train,0.000264921342022717,0.9998947313016474,7.182658477146743e-05,0.9998855615993159,8.073363373038127e-05 +flat_mae,patch,attn,hcpya_task21,best,13,0.0036,0.05,39,"[12, 1.0]",validation,0.15324951708316803,0.9915674603174603,0.0014457986134170575,0.9905728077034007,0.0017972191505517602 +flat_mae,patch,attn,hcpya_task21,best,13,0.0036,0.05,39,"[12, 1.0]",test,0.2622224688529968,0.9880952380952381,0.0014792230754789718,0.985845188625798,0.0019358341859476485 diff --git a/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..93bfbf1cd549a8e3700851e9d3650d49da68a362 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,13,0.0036,0.05,39,"[12, 1.0]",train,0.000264921342022717,0.9998947313016474,7.182658477146743e-05,0.9998855615993159,8.073363373038127e-05 +flat_mae,patch,attn,hcpya_task21,best,13,0.0036,0.05,39,"[12, 1.0]",validation,0.15324951708316803,0.9915674603174603,0.0014457986134170575,0.9905728077034007,0.0017972191505517602 +flat_mae,patch,attn,hcpya_task21,best,13,0.0036,0.05,39,"[12, 1.0]",test,0.2622224688529968,0.9880952380952381,0.0014792230754789718,0.985845188625798,0.0019358341859476485 diff --git a/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..dbe18e0e9d06aebf5affc1cf3646eb4749dd9938 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.0036,0.05,39,"[12, 1.0]",train,1.4975109934312059e-06,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.0036,0.05,39,"[12, 1.0]",validation,0.1336444765329361,0.9915674603174603,0.0015000875903501724,0.9906587083498731,0.0018038708999695713 +flat_mae,patch,attn,hcpya_task21,last,19,0.0036,0.05,39,"[12, 1.0]",test,0.2371741682291031,0.9878968253968254,0.0014541534418005275,0.9850852057062511,0.0019823753310718954 diff --git a/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/log.txt b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e55a9713df6c1a5f1b7faee183edee6fb36424c9 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,886 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:40:26 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n800_1; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:23:22 lr: nan time: 3.5058 data: 2.8706 max mem: 21740 +train: [0] [ 20/400] eta: 0:04:06 lr: 0.000003 loss: 3.0699 (3.0736) grad: 0.2645 (0.2716) time: 0.5055 data: 0.0049 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:20 lr: 0.000006 loss: 3.0477 (3.0297) grad: 0.2642 (0.2718) time: 0.4632 data: 0.0034 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:59 lr: 0.000009 loss: 2.9063 (2.9760) grad: 0.2630 (0.2677) time: 0.4664 data: 0.0034 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:44 lr: 0.000012 loss: 2.8014 (2.9158) grad: 0.2474 (0.2607) time: 0.4697 data: 0.0033 max mem: 22446 +train: [0] [100/400] eta: 0:02:31 lr: 0.000015 loss: 2.6392 (2.8484) grad: 0.2330 (0.2565) time: 0.4698 data: 0.0034 max mem: 22446 +train: [0] [120/400] eta: 0:02:19 lr: 0.000018 loss: 2.5225 (2.7822) grad: 0.2362 (0.2521) time: 0.4665 data: 0.0034 max mem: 22446 +train: [0] [140/400] eta: 0:02:08 lr: 0.000021 loss: 2.3724 (2.7164) grad: 0.2282 (0.2502) time: 0.4739 data: 0.0034 max mem: 22446 +train: [0] [160/400] eta: 0:01:58 lr: 0.000024 loss: 2.2567 (2.6562) grad: 0.2227 (0.2453) time: 0.4730 data: 0.0033 max mem: 22446 +train: [0] [180/400] eta: 0:01:47 lr: 0.000027 loss: 2.1820 (2.5968) grad: 0.2045 (0.2412) time: 0.4683 data: 0.0032 max mem: 22446 +train: [0] [200/400] eta: 0:01:37 lr: 0.000030 loss: 2.0987 (2.5404) grad: 0.2045 (0.2383) time: 0.4691 data: 0.0034 max mem: 22446 +train: [0] [220/400] eta: 0:01:27 lr: 0.000033 loss: 1.9837 (2.4873) grad: 0.1963 (0.2345) time: 0.4611 data: 0.0034 max mem: 22446 +train: [0] [240/400] eta: 0:01:17 lr: 0.000036 loss: 1.8967 (2.4336) grad: 0.2049 (0.2324) time: 0.4677 data: 0.0034 max mem: 22446 +train: [0] [260/400] eta: 0:01:07 lr: 0.000039 loss: 1.8142 (2.3854) grad: 0.2058 (0.2302) time: 0.4540 data: 0.0032 max mem: 22446 +train: [0] [280/400] eta: 0:00:57 lr: 0.000042 loss: 1.7966 (2.3424) grad: 0.1911 (0.2269) time: 0.4824 data: 0.0035 max mem: 22446 +train: [0] [300/400] eta: 0:00:49 lr: 0.000045 loss: 1.7399 (2.3003) grad: 0.1764 (0.2236) time: 0.6452 data: 0.1855 max mem: 22446 +train: [0] [320/400] eta: 0:00:39 lr: 0.000048 loss: 1.6857 (2.2597) grad: 0.1747 (0.2208) time: 0.4605 data: 0.0030 max mem: 22446 +train: [0] [340/400] eta: 0:00:29 lr: 0.000051 loss: 1.6060 (2.2208) grad: 0.1841 (0.2189) time: 0.4758 data: 0.0030 max mem: 22446 +train: [0] [360/400] eta: 0:00:19 lr: 0.000054 loss: 1.5894 (2.1848) grad: 0.1773 (0.2166) time: 0.4668 data: 0.0034 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.5472 (2.1501) grad: 0.1737 (0.2142) time: 0.4524 data: 0.0033 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.4950 (2.1159) grad: 0.1734 (0.2125) time: 0.4680 data: 0.0032 max mem: 22446 +train: [0] Total time: 0:03:14 (0.4859 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.4950 (2.1159) grad: 0.1734 (0.2125) +eval (validation): [0] [ 0/63] eta: 0:03:30 time: 3.3378 data: 3.0517 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:21 time: 0.3686 data: 0.0045 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:10 time: 0.3616 data: 0.0028 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3353 data: 0.0033 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3306 data: 0.0031 max mem: 22446 +eval (validation): [0] Total time: 0:00:25 (0.4062 s / it) +cv: [0] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.052 acc: 0.983 f1: 0.980 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:23:12 lr: nan time: 3.4820 data: 3.0737 max mem: 22446 +train: [1] [ 20/400] eta: 0:04:00 lr: 0.000063 loss: 1.4351 (1.4466) grad: 0.1680 (0.1743) time: 0.4916 data: 0.0037 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:17 lr: 0.000066 loss: 1.4154 (1.4239) grad: 0.1694 (0.1724) time: 0.4577 data: 0.0031 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:57 lr: 0.000069 loss: 1.3900 (1.4041) grad: 0.1660 (0.1689) time: 0.4659 data: 0.0033 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:43 lr: 0.000072 loss: 1.3512 (1.3875) grad: 0.1577 (0.1669) time: 0.4743 data: 0.0035 max mem: 22446 +train: [1] [100/400] eta: 0:02:30 lr: 0.000075 loss: 1.3292 (1.3754) grad: 0.1603 (0.1664) time: 0.4661 data: 0.0035 max mem: 22446 +train: [1] [120/400] eta: 0:02:19 lr: 0.000078 loss: 1.2936 (1.3564) grad: 0.1548 (0.1647) time: 0.4738 data: 0.0035 max mem: 22446 +train: [1] [140/400] eta: 0:02:07 lr: 0.000081 loss: 1.2427 (1.3410) grad: 0.1526 (0.1627) time: 0.4668 data: 0.0035 max mem: 22446 +train: [1] [160/400] eta: 0:01:57 lr: 0.000084 loss: 1.2242 (1.3247) grad: 0.1505 (0.1614) time: 0.4631 data: 0.0033 max mem: 22446 +train: [1] [180/400] eta: 0:01:46 lr: 0.000087 loss: 1.1932 (1.3104) grad: 0.1510 (0.1608) time: 0.4635 data: 0.0034 max mem: 22446 +train: [1] [200/400] eta: 0:01:36 lr: 0.000090 loss: 1.1822 (1.2955) grad: 0.1463 (0.1595) time: 0.4700 data: 0.0034 max mem: 22446 +train: [1] [220/400] eta: 0:01:27 lr: 0.000093 loss: 1.1384 (1.2801) grad: 0.1497 (0.1595) time: 0.4763 data: 0.0034 max mem: 22446 +train: [1] [240/400] eta: 0:01:17 lr: 0.000096 loss: 1.1151 (1.2662) grad: 0.1527 (0.1585) time: 0.4660 data: 0.0034 max mem: 22446 +train: [1] [260/400] eta: 0:01:07 lr: 0.000099 loss: 1.1130 (1.2541) grad: 0.1495 (0.1577) time: 0.4769 data: 0.0034 max mem: 22446 +train: [1] [280/400] eta: 0:00:57 lr: 0.000102 loss: 1.0772 (1.2410) grad: 0.1397 (0.1572) time: 0.4742 data: 0.0034 max mem: 22446 +train: [1] [300/400] eta: 0:00:48 lr: 0.000105 loss: 1.0537 (1.2284) grad: 0.1388 (0.1558) time: 0.6064 data: 0.1670 max mem: 22446 +train: [1] [320/400] eta: 0:00:38 lr: 0.000108 loss: 1.0391 (1.2166) grad: 0.1388 (0.1548) time: 0.4529 data: 0.0024 max mem: 22446 +train: [1] [340/400] eta: 0:00:29 lr: 0.000111 loss: 1.0158 (1.2038) grad: 0.1349 (0.1534) time: 0.4644 data: 0.0032 max mem: 22446 +train: [1] [360/400] eta: 0:00:19 lr: 0.000114 loss: 1.0072 (1.1931) grad: 0.1312 (0.1521) time: 0.4545 data: 0.0033 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 0.9917 (1.1824) grad: 0.1340 (0.1514) time: 0.4584 data: 0.0033 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 0.9745 (1.1719) grad: 0.1347 (0.1505) time: 0.4642 data: 0.0033 max mem: 22446 +train: [1] Total time: 0:03:12 (0.4822 s / it) +train: [1] Summary: lr: 0.000120 loss: 0.9745 (1.1719) grad: 0.1347 (0.1505) +eval (validation): [1] [ 0/63] eta: 0:03:23 time: 3.2289 data: 2.9883 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:21 time: 0.3573 data: 0.0034 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:09 time: 0.3447 data: 0.0031 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3324 data: 0.0032 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3316 data: 0.0031 max mem: 22446 +eval (validation): [1] Total time: 0:00:24 (0.3934 s / it) +cv: [1] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.039 acc: 0.987 f1: 0.984 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:23:09 lr: nan time: 3.4728 data: 3.0627 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:51 lr: 0.000123 loss: 0.9118 (0.9273) grad: 0.1417 (0.1453) time: 0.4650 data: 0.0031 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:12 lr: 0.000126 loss: 0.9252 (0.9334) grad: 0.1467 (0.1451) time: 0.4576 data: 0.0031 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:53 lr: 0.000129 loss: 0.9196 (0.9209) grad: 0.1445 (0.1446) time: 0.4628 data: 0.0034 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:39 lr: 0.000132 loss: 0.8938 (0.9175) grad: 0.1489 (0.1487) time: 0.4653 data: 0.0035 max mem: 22446 +train: [2] [100/400] eta: 0:02:27 lr: 0.000135 loss: 0.8938 (0.9156) grad: 0.1510 (0.1505) time: 0.4641 data: 0.0036 max mem: 22446 +train: [2] [120/400] eta: 0:02:17 lr: 0.000138 loss: 0.8692 (0.9133) grad: 0.1494 (0.1517) time: 0.4725 data: 0.0036 max mem: 22446 +train: [2] [140/400] eta: 0:02:06 lr: 0.000141 loss: 0.8496 (0.9057) grad: 0.1595 (0.1545) time: 0.4690 data: 0.0035 max mem: 22446 +train: [2] [160/400] eta: 0:01:56 lr: 0.000144 loss: 0.8540 (0.9033) grad: 0.1603 (0.1569) time: 0.4661 data: 0.0033 max mem: 22446 +train: [2] [180/400] eta: 0:01:46 lr: 0.000147 loss: 0.8536 (0.8987) grad: 0.1597 (0.1588) time: 0.4676 data: 0.0033 max mem: 22446 +train: [2] [200/400] eta: 0:01:36 lr: 0.000150 loss: 0.8355 (0.8911) grad: 0.1555 (0.1586) time: 0.4699 data: 0.0033 max mem: 22446 +train: [2] [220/400] eta: 0:01:26 lr: 0.000153 loss: 0.8292 (0.8910) grad: 0.1549 (0.1601) time: 0.4619 data: 0.0033 max mem: 22446 +train: [2] [240/400] eta: 0:01:16 lr: 0.000156 loss: 0.8684 (0.8880) grad: 0.1791 (0.1635) time: 0.4595 data: 0.0033 max mem: 22446 +train: [2] [260/400] eta: 0:01:06 lr: 0.000159 loss: 0.8390 (0.8852) grad: 0.1952 (0.1680) time: 0.4747 data: 0.0034 max mem: 22446 +train: [2] [280/400] eta: 0:00:57 lr: 0.000162 loss: 0.8424 (0.8837) grad: 0.1971 (0.1705) time: 0.4657 data: 0.0034 max mem: 22446 +train: [2] [300/400] eta: 0:00:48 lr: 0.000165 loss: 0.8562 (0.8811) grad: 0.2095 (0.1745) time: 0.6344 data: 0.1722 max mem: 22446 +train: [2] [320/400] eta: 0:00:38 lr: 0.000168 loss: 0.8141 (0.8802) grad: 0.2304 (0.1790) time: 0.4606 data: 0.0055 max mem: 22446 +train: [2] [340/400] eta: 0:00:29 lr: 0.000171 loss: 0.8534 (0.8792) grad: 0.2612 (0.1838) time: 0.4661 data: 0.0033 max mem: 22446 +train: [2] [360/400] eta: 0:00:19 lr: 0.000174 loss: 0.9001 (0.8811) grad: 0.2554 (0.1876) time: 0.4773 data: 0.0036 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 0.8331 (0.8789) grad: 0.2317 (0.1920) time: 0.4669 data: 0.0034 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.7825 (0.8744) grad: 0.2345 (0.1942) time: 0.4600 data: 0.0033 max mem: 22446 +train: [2] Total time: 0:03:12 (0.4821 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.7825 (0.8744) grad: 0.2345 (0.1942) +eval (validation): [2] [ 0/63] eta: 0:03:28 time: 3.3016 data: 3.0558 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:21 time: 0.3704 data: 0.0028 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3396 data: 0.0030 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3408 data: 0.0032 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3328 data: 0.0031 max mem: 22446 +eval (validation): [2] Total time: 0:00:25 (0.4013 s / it) +cv: [2] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.040 acc: 0.988 f1: 0.988 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:07 lr: nan time: 3.3185 data: 2.9629 max mem: 22446 +train: [3] [ 20/400] eta: 0:03:43 lr: 0.000183 loss: 0.6560 (0.7025) grad: 0.2134 (0.2613) time: 0.4507 data: 0.0036 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:09 lr: 0.000186 loss: 0.7090 (0.7275) grad: 0.2137 (0.2420) time: 0.4633 data: 0.0032 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:50 lr: 0.000189 loss: 0.7315 (0.7420) grad: 0.2265 (0.2416) time: 0.4508 data: 0.0035 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:37 lr: 0.000192 loss: 0.7188 (0.7590) grad: 0.2526 (0.2441) time: 0.4567 data: 0.0033 max mem: 22446 +train: [3] [100/400] eta: 0:02:25 lr: 0.000195 loss: 0.7351 (0.7574) grad: 0.2648 (0.2497) time: 0.4661 data: 0.0033 max mem: 22446 +train: [3] [120/400] eta: 0:02:15 lr: 0.000198 loss: 0.7607 (0.7727) grad: 0.2636 (0.2527) time: 0.4709 data: 0.0034 max mem: 22446 +train: [3] [140/400] eta: 0:02:04 lr: 0.000201 loss: 0.7743 (0.7920) grad: 0.2414 (0.2532) time: 0.4623 data: 0.0033 max mem: 22446 +train: [3] [160/400] eta: 0:01:54 lr: 0.000204 loss: 0.7743 (0.7926) grad: 0.2545 (0.2556) time: 0.4550 data: 0.0033 max mem: 22446 +train: [3] [180/400] eta: 0:01:44 lr: 0.000207 loss: 0.7962 (0.7995) grad: 0.2681 (0.2594) time: 0.4724 data: 0.0033 max mem: 22446 +train: [3] [200/400] eta: 0:01:34 lr: 0.000210 loss: 0.8204 (0.8041) grad: 0.2808 (0.2644) time: 0.4582 data: 0.0033 max mem: 22446 +train: [3] [220/400] eta: 0:01:25 lr: 0.000213 loss: 0.7549 (0.7978) grad: 0.2932 (0.2682) time: 0.4558 data: 0.0033 max mem: 22446 +train: [3] [240/400] eta: 0:01:15 lr: 0.000216 loss: 0.7406 (0.8093) grad: 0.2918 (0.2708) time: 0.4472 data: 0.0033 max mem: 22446 +train: [3] [260/400] eta: 0:01:05 lr: 0.000219 loss: 0.7874 (0.8082) grad: 0.3005 (0.2738) time: 0.4745 data: 0.0034 max mem: 22446 +train: [3] [280/400] eta: 0:00:56 lr: 0.000222 loss: 0.7494 (0.8032) grad: 0.2958 (0.2739) time: 0.4575 data: 0.0034 max mem: 22446 +train: [3] [300/400] eta: 0:00:47 lr: 0.000225 loss: 0.6639 (0.8032) grad: 0.2782 (0.2749) time: 0.6039 data: 0.1625 max mem: 22446 +train: [3] [320/400] eta: 0:00:38 lr: 0.000228 loss: 0.6540 (0.8004) grad: 0.2837 (0.2755) time: 0.4587 data: 0.0034 max mem: 22446 +train: [3] [340/400] eta: 0:00:28 lr: 0.000231 loss: 0.6004 (0.7872) grad: 0.2556 (0.2729) time: 0.4621 data: 0.0035 max mem: 22446 +train: [3] [360/400] eta: 0:00:19 lr: 0.000234 loss: 0.5423 (0.7760) grad: 0.2260 (0.2768) time: 0.4671 data: 0.0034 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 0.6417 (0.7758) grad: 0.2978 (0.2773) time: 0.4646 data: 0.0035 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 0.6417 (0.7699) grad: 0.2919 (0.2782) time: 0.4508 data: 0.0033 max mem: 22446 +train: [3] Total time: 0:03:09 (0.4749 s / it) +train: [3] Summary: lr: 0.000240 loss: 0.6417 (0.7699) grad: 0.2919 (0.2782) +eval (validation): [3] [ 0/63] eta: 0:03:26 time: 3.2715 data: 2.9691 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:22 time: 0.3759 data: 0.0038 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3344 data: 0.0034 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3299 data: 0.0031 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3234 data: 0.0030 max mem: 22446 +eval (validation): [3] Total time: 0:00:24 (0.3966 s / it) +cv: [3] best hparam: (1.6, 1.0) (027) ('027_lr1.6e+00_wd1.0e+00') loss: 0.041 acc: 0.987 f1: 0.985 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:22:57 lr: nan time: 3.4431 data: 3.0989 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:46 lr: 0.000243 loss: 0.6430 (0.7179) grad: 0.2564 (0.2662) time: 0.4535 data: 0.0026 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:10 lr: 0.000246 loss: 0.7416 (0.7458) grad: 0.2658 (0.2701) time: 0.4579 data: 0.0033 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:51 lr: 0.000249 loss: 0.6439 (0.7035) grad: 0.2658 (0.2793) time: 0.4591 data: 0.0036 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:38 lr: 0.000252 loss: 0.5556 (0.6804) grad: 0.2600 (0.2842) time: 0.4585 data: 0.0035 max mem: 22446 +train: [4] [100/400] eta: 0:02:26 lr: 0.000255 loss: 0.5622 (0.6649) grad: 0.2757 (0.2824) time: 0.4597 data: 0.0034 max mem: 22446 +train: [4] [120/400] eta: 0:02:15 lr: 0.000258 loss: 0.5798 (0.6726) grad: 0.2925 (0.2899) time: 0.4633 data: 0.0033 max mem: 22446 +train: [4] [140/400] eta: 0:02:05 lr: 0.000261 loss: 0.6229 (0.6706) grad: 0.3114 (0.2963) time: 0.4767 data: 0.0035 max mem: 22446 +train: [4] [160/400] eta: 0:01:54 lr: 0.000264 loss: 0.6841 (0.6884) grad: 0.3278 (0.3040) time: 0.4383 data: 0.0032 max mem: 22446 +train: [4] [180/400] eta: 0:01:44 lr: 0.000267 loss: 0.7320 (0.6873) grad: 0.3274 (0.3071) time: 0.4736 data: 0.0034 max mem: 22446 +train: [4] [200/400] eta: 0:01:35 lr: 0.000270 loss: 0.7320 (0.6958) grad: 0.3518 (0.3303) time: 0.4732 data: 0.0035 max mem: 22446 +train: [4] [220/400] eta: 0:01:25 lr: 0.000273 loss: 0.7679 (0.7053) grad: 0.3563 (0.3374) time: 0.4624 data: 0.0035 max mem: 22446 +train: [4] [240/400] eta: 0:01:15 lr: 0.000276 loss: 0.6506 (0.7131) grad: 0.3563 (0.3481) time: 0.4583 data: 0.0033 max mem: 22446 +train: [4] [260/400] eta: 0:01:06 lr: 0.000279 loss: 0.7226 (0.7453) grad: 0.4038 (0.3578) time: 0.4863 data: 0.0035 max mem: 22446 +train: [4] [280/400] eta: 0:00:56 lr: 0.000282 loss: 1.0113 (0.7664) grad: 0.4726 (0.3718) time: 0.4592 data: 0.0035 max mem: 22446 +train: [4] [300/400] eta: 0:00:48 lr: 0.000285 loss: 0.8014 (0.7845) grad: 0.4889 (0.3789) time: 0.6185 data: 0.1685 max mem: 22446 +train: [4] [320/400] eta: 0:00:38 lr: 0.000288 loss: 0.6369 (0.7754) grad: 0.4529 (0.3883) time: 0.4683 data: 0.0034 max mem: 22446 +train: [4] [340/400] eta: 0:00:28 lr: 0.000291 loss: 0.6462 (0.7744) grad: 0.4291 (0.3898) time: 0.4758 data: 0.0032 max mem: 22446 +train: [4] [360/400] eta: 0:00:19 lr: 0.000294 loss: 0.7625 (0.7884) grad: 0.4730 (0.3975) time: 0.4732 data: 0.0034 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 1.0632 (0.8118) grad: 0.5163 (0.4034) time: 0.4770 data: 0.0035 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 1.1322 (0.8190) grad: 0.4333 (0.4032) time: 0.4608 data: 0.0034 max mem: 22446 +train: [4] Total time: 0:03:12 (0.4804 s / it) +train: [4] Summary: lr: 0.000300 loss: 1.1322 (0.8190) grad: 0.4333 (0.4032) +eval (validation): [4] [ 0/63] eta: 0:03:16 time: 3.1144 data: 2.8371 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:21 time: 0.3740 data: 0.0045 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3547 data: 0.0029 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3306 data: 0.0031 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3300 data: 0.0032 max mem: 22446 +eval (validation): [4] Total time: 0:00:25 (0.4005 s / it) +cv: [4] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 0.035 acc: 0.989 f1: 0.987 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:22:26 lr: nan time: 3.3659 data: 3.0158 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:43 lr: 0.000300 loss: 0.6198 (0.6762) grad: 0.3977 (0.3969) time: 0.4487 data: 0.0035 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:09 lr: 0.000300 loss: 0.7446 (0.7313) grad: 0.4047 (0.4028) time: 0.4597 data: 0.0034 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:51 lr: 0.000300 loss: 0.7473 (0.7619) grad: 0.4024 (0.4022) time: 0.4650 data: 0.0035 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:38 lr: 0.000300 loss: 0.7849 (0.7676) grad: 0.4108 (0.4277) time: 0.4636 data: 0.0034 max mem: 22446 +train: [5] [100/400] eta: 0:02:26 lr: 0.000300 loss: 0.8067 (0.8150) grad: 0.4606 (0.4337) time: 0.4575 data: 0.0034 max mem: 22446 +train: [5] [120/400] eta: 0:02:15 lr: 0.000300 loss: 0.9279 (0.8314) grad: 0.4473 (0.4517) time: 0.4700 data: 0.0034 max mem: 22446 +train: [5] [140/400] eta: 0:02:05 lr: 0.000300 loss: 0.9229 (0.8608) grad: 0.4598 (0.4576) time: 0.4686 data: 0.0034 max mem: 22446 +train: [5] [160/400] eta: 0:01:54 lr: 0.000299 loss: 0.8974 (0.8804) grad: 0.4477 (0.4525) time: 0.4543 data: 0.0032 max mem: 22446 +train: [5] [180/400] eta: 0:01:45 lr: 0.000299 loss: 0.8250 (0.8828) grad: 0.4477 (0.4566) time: 0.4654 data: 0.0034 max mem: 22446 +train: [5] [200/400] eta: 0:01:35 lr: 0.000299 loss: 0.8250 (0.8909) grad: 0.4671 (0.4604) time: 0.4624 data: 0.0034 max mem: 22446 +train: [5] [220/400] eta: 0:01:25 lr: 0.000299 loss: 0.8907 (0.9019) grad: 0.4717 (0.4619) time: 0.4648 data: 0.0035 max mem: 22446 +train: [5] [240/400] eta: 0:01:15 lr: 0.000299 loss: 0.7697 (0.8909) grad: 0.4669 (0.4625) time: 0.4473 data: 0.0028 max mem: 22446 +train: [5] [260/400] eta: 0:01:06 lr: 0.000299 loss: 0.7697 (0.9182) grad: 0.4710 (0.4675) time: 0.4800 data: 0.0035 max mem: 22446 +train: [5] [280/400] eta: 0:00:56 lr: 0.000298 loss: 1.0079 (0.9286) grad: 0.4953 (0.4695) time: 0.4667 data: 0.0035 max mem: 22446 +train: [5] [300/400] eta: 0:00:48 lr: 0.000298 loss: 0.7826 (0.9221) grad: 0.5007 (0.4752) time: 0.6112 data: 0.1675 max mem: 22446 +train: [5] [320/400] eta: 0:00:38 lr: 0.000298 loss: 0.7234 (0.9143) grad: 0.4612 (0.4753) time: 0.4695 data: 0.0028 max mem: 22446 +train: [5] [340/400] eta: 0:00:28 lr: 0.000298 loss: 0.7678 (0.9150) grad: 0.4382 (0.4738) time: 0.4578 data: 0.0033 max mem: 22446 +train: [5] [360/400] eta: 0:00:19 lr: 0.000297 loss: 0.6108 (0.9075) grad: 0.4283 (0.4719) time: 0.4599 data: 0.0035 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.7005 (0.9076) grad: 0.4398 (0.4743) time: 0.4710 data: 0.0035 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 0.7005 (0.8965) grad: 0.4328 (0.4713) time: 0.4591 data: 0.0036 max mem: 22446 +train: [5] Total time: 0:03:11 (0.4777 s / it) +train: [5] Summary: lr: 0.000297 loss: 0.7005 (0.8965) grad: 0.4328 (0.4713) +eval (validation): [5] [ 0/63] eta: 0:03:21 time: 3.2004 data: 2.9102 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:21 time: 0.3685 data: 0.0036 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3524 data: 0.0030 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3329 data: 0.0032 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3314 data: 0.0031 max mem: 22446 +eval (validation): [5] Total time: 0:00:25 (0.4002 s / it) +cv: [5] best hparam: (1.9, 1.0) (028) ('028_lr1.9e+00_wd1.0e+00') loss: 0.035 acc: 0.989 f1: 0.987 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [6] [ 0/400] eta: 0:22:25 lr: nan time: 3.3640 data: 3.0063 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:51 lr: 0.000296 loss: 0.6909 (0.6646) grad: 0.4344 (0.4444) time: 0.4715 data: 0.0036 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:15 lr: 0.000296 loss: 0.5544 (0.6931) grad: 0.4128 (0.4048) time: 0.4754 data: 0.0035 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:55 lr: 0.000296 loss: 0.6195 (0.7013) grad: 0.3954 (0.4081) time: 0.4595 data: 0.0034 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:40 lr: 0.000295 loss: 0.7359 (0.7433) grad: 0.4516 (0.4154) time: 0.4587 data: 0.0033 max mem: 22446 +train: [6] [100/400] eta: 0:02:28 lr: 0.000295 loss: 0.6910 (0.7201) grad: 0.4572 (0.4129) time: 0.4605 data: 0.0032 max mem: 22446 +train: [6] [120/400] eta: 0:02:16 lr: 0.000295 loss: 0.5495 (0.6992) grad: 0.3795 (0.4066) time: 0.4640 data: 0.0034 max mem: 22446 +train: [6] [140/400] eta: 0:02:06 lr: 0.000294 loss: 0.4983 (0.6890) grad: 0.3795 (0.4090) time: 0.4672 data: 0.0034 max mem: 22446 +train: [6] [160/400] eta: 0:01:55 lr: 0.000294 loss: 0.5169 (0.6809) grad: 0.4180 (0.4105) time: 0.4644 data: 0.0033 max mem: 22446 +train: [6] [180/400] eta: 0:01:46 lr: 0.000293 loss: 0.6503 (0.6794) grad: 0.4144 (0.4172) time: 0.4734 data: 0.0034 max mem: 22446 +train: [6] [200/400] eta: 0:01:36 lr: 0.000293 loss: 0.6435 (0.7065) grad: 0.4140 (0.4201) time: 0.4721 data: 0.0034 max mem: 22446 +train: [6] [220/400] eta: 0:01:26 lr: 0.000292 loss: 0.4866 (0.6953) grad: 0.4371 (0.4215) time: 0.4562 data: 0.0035 max mem: 22446 +train: [6] [240/400] eta: 0:01:16 lr: 0.000292 loss: 0.4791 (0.6850) grad: 0.3748 (0.4171) time: 0.4540 data: 0.0034 max mem: 22446 +train: [6] [260/400] eta: 0:01:06 lr: 0.000291 loss: 0.5246 (0.6757) grad: 0.3908 (0.4156) time: 0.4836 data: 0.0034 max mem: 22446 +train: [6] [280/400] eta: 0:00:57 lr: 0.000291 loss: 0.5641 (0.6769) grad: 0.3936 (0.4140) time: 0.4651 data: 0.0034 max mem: 22446 +train: [6] [300/400] eta: 0:00:48 lr: 0.000290 loss: 0.5464 (0.6710) grad: 0.3442 (0.4113) time: 0.6204 data: 0.1661 max mem: 22446 +train: [6] [320/400] eta: 0:00:38 lr: 0.000290 loss: 0.4543 (0.6571) grad: 0.3442 (0.4072) time: 0.4649 data: 0.0031 max mem: 22446 +train: [6] [340/400] eta: 0:00:28 lr: 0.000289 loss: 0.4457 (0.6514) grad: 0.3238 (0.4048) time: 0.4574 data: 0.0034 max mem: 22446 +train: [6] [360/400] eta: 0:00:19 lr: 0.000288 loss: 0.5066 (0.6439) grad: 0.3417 (0.4027) time: 0.4712 data: 0.0035 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.4752 (0.6356) grad: 0.3400 (0.3984) time: 0.4691 data: 0.0036 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.3852 (0.6232) grad: 0.3154 (0.3945) time: 0.4524 data: 0.0035 max mem: 22446 +train: [6] Total time: 0:03:12 (0.4806 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.3852 (0.6232) grad: 0.3154 (0.3945) +eval (validation): [6] [ 0/63] eta: 0:03:23 time: 3.2258 data: 2.9260 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:21 time: 0.3701 data: 0.0038 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3440 data: 0.0030 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3226 data: 0.0030 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3231 data: 0.0029 max mem: 22446 +eval (validation): [6] Total time: 0:00:24 (0.3953 s / it) +cv: [6] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.033 acc: 0.990 f1: 0.988 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:22:13 lr: nan time: 3.3339 data: 2.9813 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:43 lr: 0.000286 loss: 0.3906 (0.4870) grad: 0.3479 (0.3521) time: 0.4515 data: 0.0030 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:09 lr: 0.000286 loss: 0.3771 (0.4119) grad: 0.3221 (0.3275) time: 0.4607 data: 0.0033 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:51 lr: 0.000285 loss: 0.3936 (0.4529) grad: 0.3127 (0.3286) time: 0.4606 data: 0.0034 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:38 lr: 0.000284 loss: 0.3800 (0.4287) grad: 0.3079 (0.3140) time: 0.4607 data: 0.0035 max mem: 22446 +train: [7] [100/400] eta: 0:02:25 lr: 0.000284 loss: 0.3287 (0.4269) grad: 0.3054 (0.3207) time: 0.4556 data: 0.0035 max mem: 22446 +train: [7] [120/400] eta: 0:02:15 lr: 0.000283 loss: 0.3253 (0.4203) grad: 0.3271 (0.3239) time: 0.4701 data: 0.0035 max mem: 22446 +train: [7] [140/400] eta: 0:02:05 lr: 0.000282 loss: 0.3283 (0.4238) grad: 0.3259 (0.3206) time: 0.4663 data: 0.0033 max mem: 22446 +train: [7] [160/400] eta: 0:01:54 lr: 0.000282 loss: 0.3382 (0.4186) grad: 0.2585 (0.3157) time: 0.4492 data: 0.0033 max mem: 22446 +train: [7] [180/400] eta: 0:01:44 lr: 0.000281 loss: 0.3382 (0.4289) grad: 0.2861 (0.3178) time: 0.4734 data: 0.0034 max mem: 22446 +train: [7] [200/400] eta: 0:01:35 lr: 0.000280 loss: 0.3393 (0.4212) grad: 0.2977 (0.3170) time: 0.4677 data: 0.0035 max mem: 22446 +train: [7] [220/400] eta: 0:01:25 lr: 0.000279 loss: 0.2758 (0.4227) grad: 0.2977 (0.3144) time: 0.4603 data: 0.0035 max mem: 22446 +train: [7] [240/400] eta: 0:01:15 lr: 0.000278 loss: 0.3149 (0.4257) grad: 0.2759 (0.3141) time: 0.4620 data: 0.0035 max mem: 22446 +train: [7] [260/400] eta: 0:01:06 lr: 0.000278 loss: 0.2897 (0.4193) grad: 0.3079 (0.3154) time: 0.4876 data: 0.0034 max mem: 22446 +train: [7] [280/400] eta: 0:00:56 lr: 0.000277 loss: 0.3612 (0.4339) grad: 0.3099 (0.3152) time: 0.4652 data: 0.0038 max mem: 22446 +train: [7] [300/400] eta: 0:00:48 lr: 0.000276 loss: 0.3996 (0.4400) grad: 0.3180 (0.3180) time: 0.6087 data: 0.1696 max mem: 22446 +train: [7] [320/400] eta: 0:00:38 lr: 0.000275 loss: 0.3916 (0.4427) grad: 0.3320 (0.3166) time: 0.4774 data: 0.0037 max mem: 22446 +train: [7] [340/400] eta: 0:00:28 lr: 0.000274 loss: 0.3079 (0.4358) grad: 0.2880 (0.3145) time: 0.4697 data: 0.0033 max mem: 22446 +train: [7] [360/400] eta: 0:00:19 lr: 0.000273 loss: 0.2460 (0.4283) grad: 0.2408 (0.3107) time: 0.4820 data: 0.0037 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.2439 (0.4194) grad: 0.2379 (0.3071) time: 0.4609 data: 0.0036 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.2608 (0.4126) grad: 0.2469 (0.3046) time: 0.4556 data: 0.0034 max mem: 22446 +train: [7] Total time: 0:03:11 (0.4797 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.2608 (0.4126) grad: 0.2469 (0.3046) +eval (validation): [7] [ 0/63] eta: 0:03:23 time: 3.2224 data: 2.9759 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:21 time: 0.3595 data: 0.0037 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3656 data: 0.0031 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3380 data: 0.0033 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3409 data: 0.0032 max mem: 22446 +eval (validation): [7] Total time: 0:00:25 (0.4050 s / it) +cv: [7] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 0.036 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:22:31 lr: nan time: 3.3787 data: 3.0357 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:49 lr: 0.000270 loss: 0.1799 (0.2003) grad: 0.2498 (0.2271) time: 0.4656 data: 0.0030 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:13 lr: 0.000270 loss: 0.2095 (0.2363) grad: 0.2464 (0.2380) time: 0.4688 data: 0.0035 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:56 lr: 0.000269 loss: 0.2195 (0.2418) grad: 0.2352 (0.2351) time: 0.4787 data: 0.0034 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:42 lr: 0.000268 loss: 0.2054 (0.2431) grad: 0.2435 (0.2577) time: 0.4700 data: 0.0035 max mem: 22446 +train: [8] [100/400] eta: 0:02:29 lr: 0.000267 loss: 0.2054 (0.2519) grad: 0.2401 (0.2526) time: 0.4705 data: 0.0035 max mem: 22446 +train: [8] [120/400] eta: 0:02:19 lr: 0.000266 loss: 0.2763 (0.2672) grad: 0.2435 (0.2569) time: 0.4893 data: 0.0038 max mem: 22446 +train: [8] [140/400] eta: 0:02:08 lr: 0.000265 loss: 0.2510 (0.2746) grad: 0.2591 (0.2595) time: 0.4676 data: 0.0034 max mem: 22446 +train: [8] [160/400] eta: 0:01:57 lr: 0.000264 loss: 0.2416 (0.2762) grad: 0.2368 (0.2621) time: 0.4717 data: 0.0034 max mem: 22446 +train: [8] [180/400] eta: 0:01:47 lr: 0.000263 loss: 0.2191 (0.2723) grad: 0.2043 (0.2572) time: 0.4754 data: 0.0033 max mem: 22446 +train: [8] [200/400] eta: 0:01:37 lr: 0.000262 loss: 0.2076 (0.2743) grad: 0.2186 (0.2597) time: 0.4671 data: 0.0034 max mem: 22446 +train: [8] [220/400] eta: 0:01:27 lr: 0.000260 loss: 0.2601 (0.2800) grad: 0.2343 (0.2625) time: 0.4625 data: 0.0033 max mem: 22446 +train: [8] [240/400] eta: 0:01:17 lr: 0.000259 loss: 0.3043 (0.2829) grad: 0.2697 (0.2674) time: 0.4566 data: 0.0032 max mem: 22446 +train: [8] [260/400] eta: 0:01:07 lr: 0.000258 loss: 0.2820 (0.2836) grad: 0.3005 (0.2675) time: 0.4935 data: 0.0034 max mem: 22446 +train: [8] [280/400] eta: 0:00:57 lr: 0.000257 loss: 0.2191 (0.2836) grad: 0.2478 (0.2657) time: 0.4653 data: 0.0034 max mem: 22446 +train: [8] [300/400] eta: 0:00:49 lr: 0.000256 loss: 0.2484 (0.2857) grad: 0.2430 (0.2638) time: 0.6242 data: 0.1672 max mem: 22446 +train: [8] [320/400] eta: 0:00:39 lr: 0.000255 loss: 0.2484 (0.2831) grad: 0.1897 (0.2577) time: 0.4887 data: 0.0033 max mem: 22446 +train: [8] [340/400] eta: 0:00:29 lr: 0.000254 loss: 0.2001 (0.2790) grad: 0.1718 (0.2542) time: 0.4673 data: 0.0033 max mem: 22446 +train: [8] [360/400] eta: 0:00:19 lr: 0.000253 loss: 0.2001 (0.2744) grad: 0.1821 (0.2504) time: 0.4894 data: 0.0036 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.1673 (0.2720) grad: 0.1852 (0.2490) time: 0.4577 data: 0.0034 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.1996 (0.2695) grad: 0.1984 (0.2471) time: 0.4674 data: 0.0034 max mem: 22446 +train: [8] Total time: 0:03:14 (0.4875 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.1996 (0.2695) grad: 0.1984 (0.2471) +eval (validation): [8] [ 0/63] eta: 0:03:16 time: 3.1209 data: 2.8431 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:20 time: 0.3563 data: 0.0028 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3321 data: 0.0028 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3323 data: 0.0032 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3310 data: 0.0030 max mem: 22446 +eval (validation): [8] Total time: 0:00:24 (0.3888 s / it) +cv: [8] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.034 acc: 0.990 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:21:51 lr: nan time: 3.2794 data: 2.9390 max mem: 22446 +train: [9] [ 20/400] eta: 0:03:43 lr: 0.000249 loss: 0.1516 (0.2055) grad: 0.1561 (0.1735) time: 0.4527 data: 0.0024 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:09 lr: 0.000248 loss: 0.1881 (0.2153) grad: 0.2010 (0.1945) time: 0.4635 data: 0.0030 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:51 lr: 0.000247 loss: 0.1881 (0.2028) grad: 0.2012 (0.1805) time: 0.4581 data: 0.0034 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:38 lr: 0.000246 loss: 0.1616 (0.1976) grad: 0.1648 (0.1797) time: 0.4641 data: 0.0033 max mem: 22446 +train: [9] [100/400] eta: 0:02:26 lr: 0.000244 loss: 0.1545 (0.1934) grad: 0.1682 (0.1791) time: 0.4554 data: 0.0033 max mem: 22446 +train: [9] [120/400] eta: 0:02:14 lr: 0.000243 loss: 0.1496 (0.1908) grad: 0.1419 (0.1761) time: 0.4536 data: 0.0035 max mem: 22446 +train: [9] [140/400] eta: 0:02:04 lr: 0.000242 loss: 0.1610 (0.1917) grad: 0.1619 (0.1785) time: 0.4543 data: 0.0035 max mem: 22446 +train: [9] [160/400] eta: 0:01:53 lr: 0.000241 loss: 0.1610 (0.1904) grad: 0.1707 (0.1777) time: 0.4516 data: 0.0033 max mem: 22446 +train: [9] [180/400] eta: 0:01:43 lr: 0.000240 loss: 0.1539 (0.1883) grad: 0.1446 (0.1763) time: 0.4590 data: 0.0034 max mem: 22446 +train: [9] [200/400] eta: 0:01:34 lr: 0.000238 loss: 0.1558 (0.1875) grad: 0.1446 (0.1749) time: 0.4636 data: 0.0036 max mem: 22446 +train: [9] [220/400] eta: 0:01:24 lr: 0.000237 loss: 0.1650 (0.1929) grad: 0.1731 (0.1768) time: 0.4633 data: 0.0035 max mem: 22446 +train: [9] [240/400] eta: 0:01:15 lr: 0.000236 loss: 0.1690 (0.1901) grad: 0.1801 (0.1759) time: 0.4571 data: 0.0033 max mem: 22446 +train: [9] [260/400] eta: 0:01:05 lr: 0.000234 loss: 0.1663 (0.1924) grad: 0.1824 (0.1765) time: 0.4755 data: 0.0036 max mem: 22446 +train: [9] [280/400] eta: 0:00:56 lr: 0.000233 loss: 0.2069 (0.1955) grad: 0.1748 (0.1754) time: 0.4560 data: 0.0034 max mem: 22446 +train: [9] [300/400] eta: 0:00:48 lr: 0.000232 loss: 0.1686 (0.1959) grad: 0.1661 (0.1755) time: 0.6348 data: 0.1736 max mem: 22446 +train: [9] [320/400] eta: 0:00:38 lr: 0.000230 loss: 0.1383 (0.1926) grad: 0.1508 (0.1745) time: 0.4568 data: 0.0026 max mem: 22446 +train: [9] [340/400] eta: 0:00:28 lr: 0.000229 loss: 0.1549 (0.1912) grad: 0.1656 (0.1750) time: 0.4584 data: 0.0035 max mem: 22446 +train: [9] [360/400] eta: 0:00:19 lr: 0.000228 loss: 0.1773 (0.1900) grad: 0.1446 (0.1731) time: 0.4513 data: 0.0033 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.1410 (0.1874) grad: 0.1368 (0.1716) time: 0.4868 data: 0.0034 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.1186 (0.1840) grad: 0.1373 (0.1697) time: 0.4755 data: 0.0036 max mem: 22446 +train: [9] Total time: 0:03:10 (0.4768 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.1186 (0.1840) grad: 0.1373 (0.1697) +eval (validation): [9] [ 0/63] eta: 0:03:24 time: 3.2435 data: 2.9507 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:21 time: 0.3727 data: 0.0026 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3219 data: 0.0032 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3316 data: 0.0030 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3322 data: 0.0030 max mem: 22446 +eval (validation): [9] Total time: 0:00:24 (0.3918 s / it) +cv: [9] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 0.032 acc: 0.990 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:22:57 lr: nan time: 3.4429 data: 3.0418 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:54 lr: 0.000224 loss: 0.1754 (0.2002) grad: 0.1382 (0.1562) time: 0.4748 data: 0.0031 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:14 lr: 0.000222 loss: 0.1575 (0.1789) grad: 0.1382 (0.1432) time: 0.4616 data: 0.0032 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:53 lr: 0.000221 loss: 0.1380 (0.1704) grad: 0.1332 (0.1450) time: 0.4512 data: 0.0032 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:42 lr: 0.000220 loss: 0.1358 (0.1644) grad: 0.1479 (0.1494) time: 0.4962 data: 0.0034 max mem: 22446 +train: [10] [100/400] eta: 0:02:29 lr: 0.000218 loss: 0.1358 (0.1593) grad: 0.1271 (0.1421) time: 0.4572 data: 0.0035 max mem: 22446 +train: [10] [120/400] eta: 0:02:16 lr: 0.000217 loss: 0.1276 (0.1542) grad: 0.1259 (0.1415) time: 0.4444 data: 0.0034 max mem: 22446 +train: [10] [140/400] eta: 0:02:05 lr: 0.000215 loss: 0.1276 (0.1570) grad: 0.1378 (0.1432) time: 0.4548 data: 0.0033 max mem: 22446 +train: [10] [160/400] eta: 0:01:55 lr: 0.000214 loss: 0.1230 (0.1533) grad: 0.1378 (0.1422) time: 0.4592 data: 0.0033 max mem: 22446 +train: [10] [180/400] eta: 0:01:45 lr: 0.000213 loss: 0.1230 (0.1519) grad: 0.1250 (0.1393) time: 0.4559 data: 0.0032 max mem: 22446 +train: [10] [200/400] eta: 0:01:35 lr: 0.000211 loss: 0.1429 (0.1537) grad: 0.1292 (0.1402) time: 0.4594 data: 0.0034 max mem: 22446 +train: [10] [220/400] eta: 0:01:25 lr: 0.000210 loss: 0.1362 (0.1519) grad: 0.1314 (0.1396) time: 0.4623 data: 0.0033 max mem: 22446 +train: [10] [240/400] eta: 0:01:15 lr: 0.000208 loss: 0.1217 (0.1512) grad: 0.1314 (0.1404) time: 0.4497 data: 0.0031 max mem: 22446 +train: [10] [260/400] eta: 0:01:06 lr: 0.000207 loss: 0.1333 (0.1490) grad: 0.1425 (0.1400) time: 0.4766 data: 0.0035 max mem: 22446 +train: [10] [280/400] eta: 0:00:56 lr: 0.000205 loss: 0.1258 (0.1472) grad: 0.1243 (0.1382) time: 0.4547 data: 0.0033 max mem: 22446 +train: [10] [300/400] eta: 0:00:48 lr: 0.000204 loss: 0.1128 (0.1458) grad: 0.1140 (0.1370) time: 0.5982 data: 0.1688 max mem: 22446 +train: [10] [320/400] eta: 0:00:38 lr: 0.000202 loss: 0.1039 (0.1444) grad: 0.1060 (0.1349) time: 0.4428 data: 0.0031 max mem: 22446 +train: [10] [340/400] eta: 0:00:28 lr: 0.000201 loss: 0.1143 (0.1426) grad: 0.1032 (0.1336) time: 0.4551 data: 0.0035 max mem: 22446 +train: [10] [360/400] eta: 0:00:19 lr: 0.000199 loss: 0.1049 (0.1419) grad: 0.1017 (0.1311) time: 0.4542 data: 0.0033 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.1018 (0.1416) grad: 0.0884 (0.1299) time: 0.4723 data: 0.0036 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.1131 (0.1408) grad: 0.0964 (0.1283) time: 0.4497 data: 0.0034 max mem: 22446 +train: [10] Total time: 0:03:09 (0.4743 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.1131 (0.1408) grad: 0.0964 (0.1283) +eval (validation): [10] [ 0/63] eta: 0:03:27 time: 3.2978 data: 3.0095 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:21 time: 0.3600 data: 0.0036 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3216 data: 0.0026 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3322 data: 0.0033 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3327 data: 0.0032 max mem: 22446 +eval (validation): [10] Total time: 0:00:24 (0.3891 s / it) +cv: [10] best hparam: (7.1, 1.0) (036) ('036_lr7.1e+00_wd1.0e+00') loss: 0.062 acc: 0.990 f1: 0.988 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:22:18 lr: nan time: 3.3460 data: 2.9952 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:54 lr: 0.000195 loss: 0.1009 (0.1165) grad: 0.0877 (0.0917) time: 0.4805 data: 0.0025 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:13 lr: 0.000193 loss: 0.1039 (0.1134) grad: 0.0892 (0.0867) time: 0.4564 data: 0.0035 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:52 lr: 0.000192 loss: 0.0999 (0.1120) grad: 0.0880 (0.0865) time: 0.4444 data: 0.0032 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:39 lr: 0.000190 loss: 0.1039 (0.1124) grad: 0.0818 (0.0869) time: 0.4755 data: 0.0034 max mem: 22446 +train: [11] [100/400] eta: 0:02:27 lr: 0.000189 loss: 0.1039 (0.1100) grad: 0.0679 (0.0867) time: 0.4641 data: 0.0034 max mem: 22446 +train: [11] [120/400] eta: 0:02:15 lr: 0.000187 loss: 0.0985 (0.1096) grad: 0.0786 (0.0874) time: 0.4451 data: 0.0033 max mem: 22446 +train: [11] [140/400] eta: 0:02:04 lr: 0.000186 loss: 0.1040 (0.1095) grad: 0.0818 (0.0877) time: 0.4504 data: 0.0033 max mem: 22446 +train: [11] [160/400] eta: 0:01:54 lr: 0.000184 loss: 0.1000 (0.1094) grad: 0.0821 (0.0875) time: 0.4426 data: 0.0034 max mem: 22446 +train: [11] [180/400] eta: 0:01:44 lr: 0.000183 loss: 0.0981 (0.1088) grad: 0.0827 (0.0888) time: 0.4569 data: 0.0035 max mem: 22446 +train: [11] [200/400] eta: 0:01:34 lr: 0.000181 loss: 0.0892 (0.1086) grad: 0.1025 (0.0902) time: 0.4601 data: 0.0034 max mem: 22446 +train: [11] [220/400] eta: 0:01:24 lr: 0.000180 loss: 0.0892 (0.1081) grad: 0.0888 (0.0893) time: 0.4572 data: 0.0035 max mem: 22446 +train: [11] [240/400] eta: 0:01:14 lr: 0.000178 loss: 0.1103 (0.1083) grad: 0.0900 (0.0907) time: 0.4422 data: 0.0031 max mem: 22446 +train: [11] [260/400] eta: 0:01:05 lr: 0.000177 loss: 0.1092 (0.1085) grad: 0.0939 (0.0904) time: 0.4713 data: 0.0034 max mem: 22446 +train: [11] [280/400] eta: 0:00:56 lr: 0.000175 loss: 0.0983 (0.1085) grad: 0.0992 (0.0913) time: 0.4549 data: 0.0036 max mem: 22446 +train: [11] [300/400] eta: 0:00:47 lr: 0.000174 loss: 0.0994 (0.1080) grad: 0.1139 (0.0930) time: 0.6056 data: 0.1698 max mem: 22446 +train: [11] [320/400] eta: 0:00:38 lr: 0.000172 loss: 0.0994 (0.1075) grad: 0.0763 (0.0918) time: 0.4497 data: 0.0029 max mem: 22446 +train: [11] [340/400] eta: 0:00:28 lr: 0.000170 loss: 0.0979 (0.1069) grad: 0.0699 (0.0910) time: 0.4471 data: 0.0034 max mem: 22446 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 0.0995 (0.1062) grad: 0.0811 (0.0909) time: 0.4485 data: 0.0035 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.0957 (0.1057) grad: 0.0850 (0.0907) time: 0.4755 data: 0.0035 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.0957 (0.1055) grad: 0.0821 (0.0903) time: 0.4578 data: 0.0035 max mem: 22446 +train: [11] Total time: 0:03:08 (0.4718 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.0957 (0.1055) grad: 0.0821 (0.0903) +eval (validation): [11] [ 0/63] eta: 0:03:17 time: 3.1410 data: 2.8980 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:21 time: 0.3666 data: 0.0040 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3361 data: 0.0027 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3237 data: 0.0031 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3213 data: 0.0031 max mem: 22446 +eval (validation): [11] Total time: 0:00:24 (0.3895 s / it) +cv: [11] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 0.032 acc: 0.990 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:22:16 lr: nan time: 3.3418 data: 2.9599 max mem: 22446 +train: [12] [ 20/400] eta: 0:03:48 lr: 0.000164 loss: 0.0907 (0.0988) grad: 0.1031 (0.0882) time: 0.4636 data: 0.0032 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:12 lr: 0.000163 loss: 0.0907 (0.0940) grad: 0.0703 (0.0761) time: 0.4671 data: 0.0034 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:53 lr: 0.000161 loss: 0.0835 (0.0911) grad: 0.0703 (0.0750) time: 0.4544 data: 0.0035 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:39 lr: 0.000160 loss: 0.0850 (0.0907) grad: 0.0749 (0.0733) time: 0.4629 data: 0.0033 max mem: 22446 +train: [12] [100/400] eta: 0:02:27 lr: 0.000158 loss: 0.0957 (0.0931) grad: 0.0726 (0.0730) time: 0.4716 data: 0.0034 max mem: 22446 +train: [12] [120/400] eta: 0:02:16 lr: 0.000156 loss: 0.0957 (0.0956) grad: 0.0709 (0.0733) time: 0.4555 data: 0.0035 max mem: 22446 +train: [12] [140/400] eta: 0:02:05 lr: 0.000155 loss: 0.0930 (0.0956) grad: 0.0674 (0.0721) time: 0.4661 data: 0.0035 max mem: 22446 +train: [12] [160/400] eta: 0:01:55 lr: 0.000153 loss: 0.0846 (0.0949) grad: 0.0674 (0.0720) time: 0.4498 data: 0.0033 max mem: 22446 +train: [12] [180/400] eta: 0:01:44 lr: 0.000152 loss: 0.0859 (0.0950) grad: 0.0684 (0.0723) time: 0.4401 data: 0.0034 max mem: 22446 +train: [12] [200/400] eta: 0:01:35 lr: 0.000150 loss: 0.0858 (0.0940) grad: 0.0514 (0.0705) time: 0.4769 data: 0.0034 max mem: 22446 +train: [12] [220/400] eta: 0:01:25 lr: 0.000149 loss: 0.0858 (0.0953) grad: 0.0552 (0.0711) time: 0.4695 data: 0.0036 max mem: 22446 +train: [12] [240/400] eta: 0:01:15 lr: 0.000147 loss: 0.0922 (0.0954) grad: 0.0726 (0.0712) time: 0.4451 data: 0.0031 max mem: 22446 +train: [12] [260/400] eta: 0:01:06 lr: 0.000145 loss: 0.0871 (0.0946) grad: 0.0526 (0.0697) time: 0.4765 data: 0.0033 max mem: 22446 +train: [12] [280/400] eta: 0:00:56 lr: 0.000144 loss: 0.0864 (0.0942) grad: 0.0523 (0.0694) time: 0.4525 data: 0.0034 max mem: 22446 +train: [12] [300/400] eta: 0:00:47 lr: 0.000142 loss: 0.0890 (0.0942) grad: 0.0664 (0.0689) time: 0.6034 data: 0.1709 max mem: 22446 +train: [12] [320/400] eta: 0:00:38 lr: 0.000141 loss: 0.0876 (0.0936) grad: 0.0591 (0.0681) time: 0.4598 data: 0.0031 max mem: 22446 +train: [12] [340/400] eta: 0:00:28 lr: 0.000139 loss: 0.0837 (0.0933) grad: 0.0453 (0.0673) time: 0.4545 data: 0.0035 max mem: 22446 +train: [12] [360/400] eta: 0:00:19 lr: 0.000138 loss: 0.0845 (0.0929) grad: 0.0453 (0.0667) time: 0.4461 data: 0.0034 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.0863 (0.0932) grad: 0.0733 (0.0672) time: 0.4597 data: 0.0034 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.0869 (0.0929) grad: 0.0665 (0.0666) time: 0.4556 data: 0.0033 max mem: 22446 +train: [12] Total time: 0:03:09 (0.4740 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.0869 (0.0929) grad: 0.0665 (0.0666) +eval (validation): [12] [ 0/63] eta: 0:03:18 time: 3.1501 data: 2.8636 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:21 time: 0.3622 data: 0.0033 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3457 data: 0.0031 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3296 data: 0.0031 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3227 data: 0.0031 max mem: 22446 +eval (validation): [12] Total time: 0:00:24 (0.3931 s / it) +cv: [12] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.162 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:22:19 lr: nan time: 3.3487 data: 2.9938 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:49 lr: 0.000133 loss: 0.0912 (0.0922) grad: 0.0465 (0.0566) time: 0.4657 data: 0.0022 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:13 lr: 0.000131 loss: 0.0817 (0.0827) grad: 0.0394 (0.0526) time: 0.4701 data: 0.0032 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:54 lr: 0.000130 loss: 0.0760 (0.0821) grad: 0.0446 (0.0536) time: 0.4583 data: 0.0033 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:39 lr: 0.000128 loss: 0.0821 (0.0840) grad: 0.0443 (0.0514) time: 0.4610 data: 0.0029 max mem: 22446 +train: [13] [100/400] eta: 0:02:28 lr: 0.000127 loss: 0.0811 (0.0835) grad: 0.0427 (0.0517) time: 0.4763 data: 0.0034 max mem: 22446 +train: [13] [120/400] eta: 0:02:16 lr: 0.000125 loss: 0.0780 (0.0832) grad: 0.0575 (0.0524) time: 0.4451 data: 0.0033 max mem: 22446 +train: [13] [140/400] eta: 0:02:05 lr: 0.000124 loss: 0.0746 (0.0835) grad: 0.0591 (0.0530) time: 0.4488 data: 0.0034 max mem: 22446 +train: [13] [160/400] eta: 0:01:54 lr: 0.000122 loss: 0.0754 (0.0829) grad: 0.0424 (0.0518) time: 0.4475 data: 0.0035 max mem: 22446 +train: [13] [180/400] eta: 0:01:44 lr: 0.000120 loss: 0.0830 (0.0834) grad: 0.0433 (0.0515) time: 0.4552 data: 0.0033 max mem: 22446 +train: [13] [200/400] eta: 0:01:34 lr: 0.000119 loss: 0.0833 (0.0840) grad: 0.0456 (0.0512) time: 0.4533 data: 0.0033 max mem: 22446 +train: [13] [220/400] eta: 0:01:24 lr: 0.000117 loss: 0.0768 (0.0838) grad: 0.0484 (0.0513) time: 0.4621 data: 0.0032 max mem: 22446 +train: [13] [240/400] eta: 0:01:15 lr: 0.000116 loss: 0.0771 (0.0837) grad: 0.0508 (0.0513) time: 0.4437 data: 0.0032 max mem: 22446 +train: [13] [260/400] eta: 0:01:05 lr: 0.000114 loss: 0.0771 (0.0835) grad: 0.0426 (0.0508) time: 0.4671 data: 0.0033 max mem: 22446 +train: [13] [280/400] eta: 0:00:56 lr: 0.000113 loss: 0.0773 (0.0828) grad: 0.0426 (0.0515) time: 0.4604 data: 0.0033 max mem: 22446 +train: [13] [300/400] eta: 0:00:47 lr: 0.000111 loss: 0.0773 (0.0828) grad: 0.0509 (0.0516) time: 0.6103 data: 0.1711 max mem: 22446 +train: [13] [320/400] eta: 0:00:38 lr: 0.000110 loss: 0.0816 (0.0826) grad: 0.0425 (0.0509) time: 0.4481 data: 0.0026 max mem: 22446 +train: [13] [340/400] eta: 0:00:28 lr: 0.000108 loss: 0.0744 (0.0822) grad: 0.0380 (0.0506) time: 0.4508 data: 0.0035 max mem: 22446 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 0.0622 (0.0816) grad: 0.0386 (0.0501) time: 0.4510 data: 0.0035 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.0707 (0.0816) grad: 0.0404 (0.0498) time: 0.4604 data: 0.0034 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.0806 (0.0815) grad: 0.0404 (0.0494) time: 0.4607 data: 0.0033 max mem: 22446 +train: [13] Total time: 0:03:08 (0.4722 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.0806 (0.0815) grad: 0.0404 (0.0494) +eval (validation): [13] [ 0/63] eta: 0:03:22 time: 3.2087 data: 2.9620 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:20 time: 0.3306 data: 0.0033 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3315 data: 0.0025 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3284 data: 0.0032 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3266 data: 0.0029 max mem: 22446 +eval (validation): [13] Total time: 0:00:23 (0.3787 s / it) +cv: [13] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.153 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [14] [ 0/400] eta: 0:22:01 lr: nan time: 3.3048 data: 2.9601 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:39 lr: 0.000102 loss: 0.0763 (0.0774) grad: 0.0391 (0.0390) time: 0.4422 data: 0.0033 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:09 lr: 0.000101 loss: 0.0763 (0.0766) grad: 0.0408 (0.0413) time: 0.4738 data: 0.0033 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:52 lr: 0.000099 loss: 0.0709 (0.0748) grad: 0.0412 (0.0410) time: 0.4627 data: 0.0034 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:37 lr: 0.000098 loss: 0.0686 (0.0747) grad: 0.0416 (0.0423) time: 0.4465 data: 0.0032 max mem: 22446 +train: [14] [100/400] eta: 0:02:26 lr: 0.000096 loss: 0.0673 (0.0747) grad: 0.0407 (0.0424) time: 0.4813 data: 0.0034 max mem: 22446 +train: [14] [120/400] eta: 0:02:15 lr: 0.000095 loss: 0.0733 (0.0762) grad: 0.0375 (0.0423) time: 0.4657 data: 0.0033 max mem: 22446 +train: [14] [140/400] eta: 0:02:05 lr: 0.000093 loss: 0.0807 (0.0777) grad: 0.0398 (0.0427) time: 0.4609 data: 0.0032 max mem: 22446 +train: [14] [160/400] eta: 0:01:54 lr: 0.000092 loss: 0.0678 (0.0770) grad: 0.0408 (0.0427) time: 0.4566 data: 0.0034 max mem: 22446 +train: [14] [180/400] eta: 0:01:44 lr: 0.000090 loss: 0.0666 (0.0763) grad: 0.0383 (0.0429) time: 0.4502 data: 0.0033 max mem: 22446 +train: [14] [200/400] eta: 0:01:34 lr: 0.000089 loss: 0.0731 (0.0766) grad: 0.0371 (0.0428) time: 0.4541 data: 0.0033 max mem: 22446 +train: [14] [220/400] eta: 0:01:25 lr: 0.000088 loss: 0.0731 (0.0763) grad: 0.0379 (0.0426) time: 0.4812 data: 0.0035 max mem: 22446 +train: [14] [240/400] eta: 0:01:15 lr: 0.000086 loss: 0.0738 (0.0761) grad: 0.0387 (0.0424) time: 0.4614 data: 0.0033 max mem: 22446 +train: [14] [260/400] eta: 0:01:06 lr: 0.000085 loss: 0.0668 (0.0755) grad: 0.0387 (0.0426) time: 0.4614 data: 0.0032 max mem: 22446 +train: [14] [280/400] eta: 0:00:56 lr: 0.000083 loss: 0.0763 (0.0763) grad: 0.0415 (0.0426) time: 0.4717 data: 0.0037 max mem: 22446 +train: [14] [300/400] eta: 0:00:48 lr: 0.000082 loss: 0.0771 (0.0762) grad: 0.0407 (0.0426) time: 0.6286 data: 0.1718 max mem: 22446 +train: [14] [320/400] eta: 0:00:38 lr: 0.000081 loss: 0.0695 (0.0760) grad: 0.0398 (0.0423) time: 0.4487 data: 0.0030 max mem: 22446 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 0.0798 (0.0764) grad: 0.0374 (0.0422) time: 0.4523 data: 0.0035 max mem: 22446 +train: [14] [360/400] eta: 0:00:19 lr: 0.000078 loss: 0.0811 (0.0766) grad: 0.0406 (0.0424) time: 0.4671 data: 0.0036 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.0764 (0.0766) grad: 0.0428 (0.0423) time: 0.4604 data: 0.0033 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.0695 (0.0766) grad: 0.0396 (0.0423) time: 0.4557 data: 0.0034 max mem: 22446 +train: [14] Total time: 0:03:10 (0.4766 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.0695 (0.0766) grad: 0.0396 (0.0423) +eval (validation): [14] [ 0/63] eta: 0:03:14 time: 3.0868 data: 2.8371 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:21 time: 0.3770 data: 0.0039 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3537 data: 0.0030 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3256 data: 0.0032 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3242 data: 0.0031 max mem: 22446 +eval (validation): [14] Total time: 0:00:25 (0.3978 s / it) +cv: [14] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.140 acc: 0.991 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:21:58 lr: nan time: 3.2966 data: 2.9511 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:40 lr: 0.000074 loss: 0.0698 (0.0789) grad: 0.0350 (0.0427) time: 0.4443 data: 0.0038 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:09 lr: 0.000072 loss: 0.0743 (0.0773) grad: 0.0431 (0.0430) time: 0.4689 data: 0.0033 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:51 lr: 0.000071 loss: 0.0682 (0.0733) grad: 0.0395 (0.0417) time: 0.4578 data: 0.0035 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:36 lr: 0.000070 loss: 0.0636 (0.0726) grad: 0.0358 (0.0415) time: 0.4446 data: 0.0032 max mem: 22446 +train: [15] [100/400] eta: 0:02:26 lr: 0.000068 loss: 0.0637 (0.0713) grad: 0.0360 (0.0415) time: 0.4817 data: 0.0035 max mem: 22446 +train: [15] [120/400] eta: 0:02:15 lr: 0.000067 loss: 0.0645 (0.0711) grad: 0.0360 (0.0412) time: 0.4596 data: 0.0034 max mem: 22446 +train: [15] [140/400] eta: 0:02:04 lr: 0.000066 loss: 0.0663 (0.0711) grad: 0.0388 (0.0410) time: 0.4613 data: 0.0034 max mem: 22446 +train: [15] [160/400] eta: 0:01:54 lr: 0.000064 loss: 0.0663 (0.0708) grad: 0.0396 (0.0414) time: 0.4531 data: 0.0035 max mem: 22446 +train: [15] [180/400] eta: 0:01:44 lr: 0.000063 loss: 0.0656 (0.0711) grad: 0.0386 (0.0410) time: 0.4509 data: 0.0035 max mem: 22446 +train: [15] [200/400] eta: 0:01:34 lr: 0.000062 loss: 0.0757 (0.0719) grad: 0.0375 (0.0407) time: 0.4469 data: 0.0035 max mem: 22446 +train: [15] [220/400] eta: 0:01:24 lr: 0.000061 loss: 0.0695 (0.0713) grad: 0.0396 (0.0407) time: 0.4655 data: 0.0034 max mem: 22446 +train: [15] [240/400] eta: 0:01:15 lr: 0.000059 loss: 0.0684 (0.0711) grad: 0.0395 (0.0408) time: 0.4635 data: 0.0033 max mem: 22446 +train: [15] [260/400] eta: 0:01:05 lr: 0.000058 loss: 0.0671 (0.0713) grad: 0.0371 (0.0406) time: 0.4627 data: 0.0034 max mem: 22446 +train: [15] [280/400] eta: 0:00:56 lr: 0.000057 loss: 0.0642 (0.0712) grad: 0.0363 (0.0405) time: 0.4732 data: 0.0035 max mem: 22446 +train: [15] [300/400] eta: 0:00:47 lr: 0.000056 loss: 0.0656 (0.0712) grad: 0.0363 (0.0403) time: 0.6068 data: 0.1706 max mem: 22446 +train: [15] [320/400] eta: 0:00:38 lr: 0.000054 loss: 0.0673 (0.0709) grad: 0.0378 (0.0404) time: 0.4545 data: 0.0034 max mem: 22446 +train: [15] [340/400] eta: 0:00:28 lr: 0.000053 loss: 0.0642 (0.0713) grad: 0.0412 (0.0406) time: 0.4639 data: 0.0032 max mem: 22446 +train: [15] [360/400] eta: 0:00:19 lr: 0.000052 loss: 0.0642 (0.0714) grad: 0.0419 (0.0407) time: 0.4555 data: 0.0032 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.0674 (0.0711) grad: 0.0397 (0.0405) time: 0.4723 data: 0.0034 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.0690 (0.0714) grad: 0.0366 (0.0403) time: 0.4783 data: 0.0034 max mem: 22446 +train: [15] Total time: 0:03:10 (0.4755 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.0690 (0.0714) grad: 0.0366 (0.0403) +eval (validation): [15] [ 0/63] eta: 0:03:18 time: 3.1555 data: 2.8716 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:21 time: 0.3748 data: 0.0030 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:09 time: 0.3274 data: 0.0035 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3244 data: 0.0032 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3230 data: 0.0031 max mem: 22446 +eval (validation): [15] Total time: 0:00:24 (0.3902 s / it) +cv: [15] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.137 acc: 0.991 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:22:02 lr: nan time: 3.3074 data: 2.9249 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:43 lr: 0.000048 loss: 0.0662 (0.0734) grad: 0.0352 (0.0380) time: 0.4524 data: 0.0036 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:11 lr: 0.000047 loss: 0.0662 (0.0731) grad: 0.0353 (0.0390) time: 0.4749 data: 0.0029 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:54 lr: 0.000046 loss: 0.0664 (0.0715) grad: 0.0337 (0.0377) time: 0.4706 data: 0.0034 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:38 lr: 0.000045 loss: 0.0696 (0.0715) grad: 0.0334 (0.0374) time: 0.4486 data: 0.0033 max mem: 22446 +train: [16] [100/400] eta: 0:02:27 lr: 0.000044 loss: 0.0705 (0.0719) grad: 0.0362 (0.0377) time: 0.4703 data: 0.0034 max mem: 22446 +train: [16] [120/400] eta: 0:02:16 lr: 0.000043 loss: 0.0667 (0.0716) grad: 0.0389 (0.0381) time: 0.4628 data: 0.0034 max mem: 22446 +train: [16] [140/400] eta: 0:02:05 lr: 0.000042 loss: 0.0665 (0.0717) grad: 0.0389 (0.0384) time: 0.4540 data: 0.0035 max mem: 22446 +train: [16] [160/400] eta: 0:01:54 lr: 0.000041 loss: 0.0664 (0.0713) grad: 0.0380 (0.0385) time: 0.4579 data: 0.0033 max mem: 22446 +train: [16] [180/400] eta: 0:01:45 lr: 0.000040 loss: 0.0698 (0.0713) grad: 0.0365 (0.0382) time: 0.4688 data: 0.0034 max mem: 22446 +train: [16] [200/400] eta: 0:01:35 lr: 0.000039 loss: 0.0722 (0.0722) grad: 0.0378 (0.0387) time: 0.4572 data: 0.0034 max mem: 22446 +train: [16] [220/400] eta: 0:01:25 lr: 0.000038 loss: 0.0682 (0.0718) grad: 0.0378 (0.0385) time: 0.4731 data: 0.0034 max mem: 22446 +train: [16] [240/400] eta: 0:01:15 lr: 0.000036 loss: 0.0674 (0.0714) grad: 0.0348 (0.0381) time: 0.4638 data: 0.0033 max mem: 22446 +train: [16] [260/400] eta: 0:01:06 lr: 0.000035 loss: 0.0716 (0.0718) grad: 0.0358 (0.0383) time: 0.4689 data: 0.0033 max mem: 22446 +train: [16] [280/400] eta: 0:00:56 lr: 0.000034 loss: 0.0692 (0.0715) grad: 0.0382 (0.0386) time: 0.4655 data: 0.0033 max mem: 22446 +train: [16] [300/400] eta: 0:00:48 lr: 0.000033 loss: 0.0643 (0.0715) grad: 0.0396 (0.0388) time: 0.6090 data: 0.1705 max mem: 22446 +train: [16] [320/400] eta: 0:00:38 lr: 0.000032 loss: 0.0700 (0.0715) grad: 0.0382 (0.0388) time: 0.4368 data: 0.0034 max mem: 22446 +train: [16] [340/400] eta: 0:00:28 lr: 0.000031 loss: 0.0735 (0.0721) grad: 0.0378 (0.0390) time: 0.4574 data: 0.0031 max mem: 22446 +train: [16] [360/400] eta: 0:00:19 lr: 0.000031 loss: 0.0726 (0.0719) grad: 0.0364 (0.0389) time: 0.4589 data: 0.0034 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.0651 (0.0715) grad: 0.0370 (0.0389) time: 0.4577 data: 0.0033 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.0598 (0.0713) grad: 0.0419 (0.0391) time: 0.4785 data: 0.0033 max mem: 22446 +train: [16] Total time: 0:03:10 (0.4767 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.0598 (0.0713) grad: 0.0419 (0.0391) +eval (validation): [16] [ 0/63] eta: 0:03:17 time: 3.1393 data: 2.8962 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:21 time: 0.3588 data: 0.0036 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:10 time: 0.3822 data: 0.0026 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3408 data: 0.0032 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3427 data: 0.0032 max mem: 22446 +eval (validation): [16] Total time: 0:00:25 (0.4085 s / it) +cv: [16] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.135 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:21:47 lr: nan time: 3.2690 data: 2.9212 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:40 lr: 0.000028 loss: 0.0560 (0.0648) grad: 0.0380 (0.0374) time: 0.4449 data: 0.0038 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:09 lr: 0.000027 loss: 0.0664 (0.0698) grad: 0.0386 (0.0394) time: 0.4702 data: 0.0033 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:52 lr: 0.000026 loss: 0.0712 (0.0726) grad: 0.0388 (0.0391) time: 0.4658 data: 0.0035 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:38 lr: 0.000025 loss: 0.0764 (0.0735) grad: 0.0373 (0.0390) time: 0.4617 data: 0.0036 max mem: 22446 +train: [17] [100/400] eta: 0:02:26 lr: 0.000024 loss: 0.0722 (0.0729) grad: 0.0372 (0.0389) time: 0.4619 data: 0.0033 max mem: 22446 +train: [17] [120/400] eta: 0:02:15 lr: 0.000023 loss: 0.0669 (0.0723) grad: 0.0376 (0.0385) time: 0.4575 data: 0.0034 max mem: 22446 +train: [17] [140/400] eta: 0:02:04 lr: 0.000023 loss: 0.0663 (0.0718) grad: 0.0363 (0.0385) time: 0.4553 data: 0.0034 max mem: 22446 +train: [17] [160/400] eta: 0:01:54 lr: 0.000022 loss: 0.0727 (0.0716) grad: 0.0364 (0.0387) time: 0.4628 data: 0.0033 max mem: 22446 +train: [17] [180/400] eta: 0:01:44 lr: 0.000021 loss: 0.0692 (0.0710) grad: 0.0364 (0.0386) time: 0.4560 data: 0.0033 max mem: 22446 +train: [17] [200/400] eta: 0:01:34 lr: 0.000020 loss: 0.0629 (0.0707) grad: 0.0347 (0.0386) time: 0.4670 data: 0.0033 max mem: 22446 +train: [17] [220/400] eta: 0:01:25 lr: 0.000019 loss: 0.0629 (0.0708) grad: 0.0362 (0.0385) time: 0.4715 data: 0.0035 max mem: 22446 +train: [17] [240/400] eta: 0:01:15 lr: 0.000019 loss: 0.0624 (0.0707) grad: 0.0352 (0.0385) time: 0.4792 data: 0.0034 max mem: 22446 +train: [17] [260/400] eta: 0:01:06 lr: 0.000018 loss: 0.0618 (0.0708) grad: 0.0365 (0.0385) time: 0.4578 data: 0.0033 max mem: 22446 +train: [17] [280/400] eta: 0:00:56 lr: 0.000017 loss: 0.0668 (0.0707) grad: 0.0364 (0.0382) time: 0.4709 data: 0.0034 max mem: 22446 +train: [17] [300/400] eta: 0:00:48 lr: 0.000016 loss: 0.0645 (0.0705) grad: 0.0359 (0.0384) time: 0.6216 data: 0.1712 max mem: 22446 +train: [17] [320/400] eta: 0:00:38 lr: 0.000016 loss: 0.0681 (0.0706) grad: 0.0402 (0.0386) time: 0.4425 data: 0.0026 max mem: 22446 +train: [17] [340/400] eta: 0:00:28 lr: 0.000015 loss: 0.0679 (0.0705) grad: 0.0367 (0.0384) time: 0.4568 data: 0.0033 max mem: 22446 +train: [17] [360/400] eta: 0:00:19 lr: 0.000014 loss: 0.0669 (0.0702) grad: 0.0336 (0.0382) time: 0.4566 data: 0.0033 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.0665 (0.0705) grad: 0.0358 (0.0382) time: 0.4656 data: 0.0033 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.0646 (0.0703) grad: 0.0371 (0.0381) time: 0.4725 data: 0.0033 max mem: 22446 +train: [17] Total time: 0:03:10 (0.4772 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.0646 (0.0703) grad: 0.0371 (0.0381) +eval (validation): [17] [ 0/63] eta: 0:03:12 time: 3.0589 data: 2.7829 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:20 time: 0.3439 data: 0.0036 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3456 data: 0.0029 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3269 data: 0.0032 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3259 data: 0.0032 max mem: 22446 +eval (validation): [17] Total time: 0:00:24 (0.3852 s / it) +cv: [17] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.134 acc: 0.991 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:28 lr: nan time: 3.3725 data: 3.0229 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:47 lr: 0.000012 loss: 0.0609 (0.0673) grad: 0.0355 (0.0368) time: 0.4587 data: 0.0027 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:13 lr: 0.000012 loss: 0.0637 (0.0708) grad: 0.0373 (0.0378) time: 0.4773 data: 0.0029 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:54 lr: 0.000011 loss: 0.0664 (0.0703) grad: 0.0375 (0.0375) time: 0.4610 data: 0.0035 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:39 lr: 0.000011 loss: 0.0706 (0.0717) grad: 0.0375 (0.0377) time: 0.4501 data: 0.0033 max mem: 22446 +train: [18] [100/400] eta: 0:02:27 lr: 0.000010 loss: 0.0679 (0.0717) grad: 0.0375 (0.0379) time: 0.4674 data: 0.0033 max mem: 22446 +train: [18] [120/400] eta: 0:02:16 lr: 0.000009 loss: 0.0625 (0.0696) grad: 0.0363 (0.0375) time: 0.4578 data: 0.0034 max mem: 22446 +train: [18] [140/400] eta: 0:02:05 lr: 0.000009 loss: 0.0680 (0.0701) grad: 0.0383 (0.0379) time: 0.4505 data: 0.0035 max mem: 22446 +train: [18] [160/400] eta: 0:01:54 lr: 0.000008 loss: 0.0689 (0.0697) grad: 0.0377 (0.0377) time: 0.4537 data: 0.0034 max mem: 22446 +train: [18] [180/400] eta: 0:01:44 lr: 0.000008 loss: 0.0631 (0.0690) grad: 0.0377 (0.0380) time: 0.4593 data: 0.0034 max mem: 22446 +train: [18] [200/400] eta: 0:01:34 lr: 0.000007 loss: 0.0670 (0.0691) grad: 0.0389 (0.0382) time: 0.4570 data: 0.0033 max mem: 22446 +train: [18] [220/400] eta: 0:01:25 lr: 0.000007 loss: 0.0651 (0.0686) grad: 0.0367 (0.0380) time: 0.4577 data: 0.0033 max mem: 22446 +train: [18] [240/400] eta: 0:01:15 lr: 0.000006 loss: 0.0623 (0.0684) grad: 0.0379 (0.0381) time: 0.4680 data: 0.0033 max mem: 22446 +train: [18] [260/400] eta: 0:01:06 lr: 0.000006 loss: 0.0621 (0.0688) grad: 0.0359 (0.0378) time: 0.4690 data: 0.0035 max mem: 22446 +train: [18] [280/400] eta: 0:00:56 lr: 0.000006 loss: 0.0645 (0.0688) grad: 0.0383 (0.0380) time: 0.4609 data: 0.0028 max mem: 22446 +train: [18] [300/400] eta: 0:00:48 lr: 0.000005 loss: 0.0626 (0.0685) grad: 0.0387 (0.0381) time: 0.6480 data: 0.1766 max mem: 22446 +train: [18] [320/400] eta: 0:00:38 lr: 0.000005 loss: 0.0647 (0.0685) grad: 0.0371 (0.0380) time: 0.4492 data: 0.0028 max mem: 22446 +train: [18] [340/400] eta: 0:00:28 lr: 0.000004 loss: 0.0686 (0.0688) grad: 0.0372 (0.0381) time: 0.4608 data: 0.0034 max mem: 22446 +train: [18] [360/400] eta: 0:00:19 lr: 0.000004 loss: 0.0665 (0.0686) grad: 0.0362 (0.0382) time: 0.4555 data: 0.0034 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.0627 (0.0681) grad: 0.0338 (0.0380) time: 0.4658 data: 0.0033 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.0599 (0.0678) grad: 0.0347 (0.0379) time: 0.4792 data: 0.0034 max mem: 22446 +train: [18] Total time: 0:03:11 (0.4779 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.0599 (0.0678) grad: 0.0347 (0.0379) +eval (validation): [18] [ 0/63] eta: 0:03:18 time: 3.1517 data: 2.9150 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:20 time: 0.3454 data: 0.0042 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:09 time: 0.3417 data: 0.0031 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3305 data: 0.0033 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3299 data: 0.0032 max mem: 22446 +eval (validation): [18] Total time: 0:00:24 (0.3876 s / it) +cv: [18] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.134 acc: 0.991 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:39 lr: nan time: 3.3994 data: 3.0571 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:45 lr: 0.000003 loss: 0.0690 (0.0650) grad: 0.0336 (0.0345) time: 0.4526 data: 0.0026 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:12 lr: 0.000003 loss: 0.0690 (0.0655) grad: 0.0349 (0.0366) time: 0.4748 data: 0.0030 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:54 lr: 0.000002 loss: 0.0633 (0.0659) grad: 0.0363 (0.0362) time: 0.4701 data: 0.0035 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:39 lr: 0.000002 loss: 0.0672 (0.0679) grad: 0.0380 (0.0369) time: 0.4457 data: 0.0034 max mem: 22446 +train: [19] [100/400] eta: 0:02:26 lr: 0.000002 loss: 0.0690 (0.0697) grad: 0.0379 (0.0369) time: 0.4578 data: 0.0034 max mem: 22446 +train: [19] [120/400] eta: 0:02:16 lr: 0.000002 loss: 0.0643 (0.0689) grad: 0.0360 (0.0366) time: 0.4683 data: 0.0033 max mem: 22446 +train: [19] [140/400] eta: 0:02:05 lr: 0.000001 loss: 0.0618 (0.0679) grad: 0.0348 (0.0366) time: 0.4566 data: 0.0035 max mem: 22446 +train: [19] [160/400] eta: 0:01:55 lr: 0.000001 loss: 0.0646 (0.0685) grad: 0.0339 (0.0365) time: 0.4707 data: 0.0033 max mem: 22446 +train: [19] [180/400] eta: 0:01:45 lr: 0.000001 loss: 0.0659 (0.0686) grad: 0.0376 (0.0368) time: 0.4606 data: 0.0034 max mem: 22446 +train: [19] [200/400] eta: 0:01:35 lr: 0.000001 loss: 0.0638 (0.0686) grad: 0.0376 (0.0369) time: 0.4496 data: 0.0035 max mem: 22446 +train: [19] [220/400] eta: 0:01:25 lr: 0.000001 loss: 0.0691 (0.0692) grad: 0.0361 (0.0371) time: 0.4464 data: 0.0033 max mem: 22446 +train: [19] [240/400] eta: 0:01:15 lr: 0.000001 loss: 0.0691 (0.0688) grad: 0.0372 (0.0370) time: 0.4641 data: 0.0034 max mem: 22446 +train: [19] [260/400] eta: 0:01:06 lr: 0.000000 loss: 0.0616 (0.0687) grad: 0.0346 (0.0369) time: 0.4701 data: 0.0035 max mem: 22446 +train: [19] [280/400] eta: 0:00:56 lr: 0.000000 loss: 0.0683 (0.0690) grad: 0.0349 (0.0371) time: 0.4547 data: 0.0034 max mem: 22446 +train: [19] [300/400] eta: 0:00:48 lr: 0.000000 loss: 0.0694 (0.0690) grad: 0.0391 (0.0372) time: 0.6219 data: 0.1682 max mem: 22446 +train: [19] [320/400] eta: 0:00:38 lr: 0.000000 loss: 0.0683 (0.0688) grad: 0.0411 (0.0374) time: 0.4557 data: 0.0029 max mem: 22446 +train: [19] [340/400] eta: 0:00:28 lr: 0.000000 loss: 0.0601 (0.0685) grad: 0.0397 (0.0374) time: 0.4629 data: 0.0033 max mem: 22446 +train: [19] [360/400] eta: 0:00:19 lr: 0.000000 loss: 0.0635 (0.0686) grad: 0.0362 (0.0375) time: 0.4571 data: 0.0032 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.0639 (0.0685) grad: 0.0379 (0.0375) time: 0.4694 data: 0.0033 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.0624 (0.0683) grad: 0.0367 (0.0376) time: 0.4774 data: 0.0033 max mem: 22446 +train: [19] Total time: 0:03:10 (0.4770 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.0624 (0.0683) grad: 0.0367 (0.0376) +eval (validation): [19] [ 0/63] eta: 0:03:09 time: 3.0158 data: 2.7830 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:20 time: 0.3571 data: 0.0036 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:09 time: 0.3452 data: 0.0030 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3372 data: 0.0032 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3353 data: 0.0032 max mem: 22446 +eval (validation): [19] Total time: 0:00:24 (0.3921 s / it) +cv: [19] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.134 acc: 0.992 f1: 0.991 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9915674603174603, "hparam": [12, 1.0], "hparam_id": 39, "epoch": 19, "is_best": false, "best_score": 0.9915674603174603} +eval (train): [20] [ 0/297] eta: 0:14:20 time: 2.8962 data: 2.6137 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:17 time: 0.3771 data: 0.0038 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:48 time: 0.3455 data: 0.0031 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:35 time: 0.3573 data: 0.0037 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:23 time: 0.3413 data: 0.0035 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:14 time: 0.3462 data: 0.0033 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:06 time: 0.3446 data: 0.0034 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:58 time: 0.3615 data: 0.0035 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:50 time: 0.3504 data: 0.0033 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:43 time: 0.3642 data: 0.0035 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:35 time: 0.3445 data: 0.0032 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3548 data: 0.0034 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3497 data: 0.0033 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3390 data: 0.0033 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3510 data: 0.0034 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3445 data: 0.0033 max mem: 22446 +eval (train): [20] Total time: 0:01:47 (0.3614 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:15 time: 3.0963 data: 2.8163 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3810 data: 0.0035 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3339 data: 0.0029 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3420 data: 0.0033 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3430 data: 0.0032 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.3989 s / it) +eval (test): [20] [ 0/79] eta: 0:03:55 time: 2.9789 data: 2.7437 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:30 time: 0.3883 data: 0.0247 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3375 data: 0.0033 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3381 data: 0.0031 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3248 data: 0.0028 max mem: 22446 +eval (test): [20] Total time: 0:00:30 (0.3842 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9915674603174603, "hparam": [12, 1.0], "hparam_id": 39, "epoch": 13, "is_best": true, "best_score": 0.9915674603174603} +eval (train): [20] [ 0/297] eta: 0:14:53 time: 3.0095 data: 2.7238 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:09 time: 0.3399 data: 0.0031 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:47 time: 0.3649 data: 0.0029 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:33 time: 0.3494 data: 0.0031 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:23 time: 0.3448 data: 0.0033 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:14 time: 0.3587 data: 0.0034 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:05 time: 0.3387 data: 0.0034 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:57 time: 0.3406 data: 0.0033 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:49 time: 0.3397 data: 0.0032 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:42 time: 0.3396 data: 0.0033 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:34 time: 0.3474 data: 0.0034 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:27 time: 0.3540 data: 0.0031 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3558 data: 0.0034 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3490 data: 0.0034 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3469 data: 0.0030 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3483 data: 0.0030 max mem: 22446 +eval (train): [20] Total time: 0:01:46 (0.3581 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:08 time: 2.9876 data: 2.7130 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3605 data: 0.0036 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3447 data: 0.0034 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3269 data: 0.0031 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3250 data: 0.0031 max mem: 22446 +eval (validation): [20] Total time: 0:00:24 (0.3890 s / it) +eval (test): [20] [ 0/79] eta: 0:03:59 time: 3.0375 data: 2.7598 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:28 time: 0.3575 data: 0.0030 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3359 data: 0.0030 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3530 data: 0.0034 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3423 data: 0.0031 max mem: 22446 +eval (test): [20] Total time: 0:00:30 (0.3837 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|-------:|-----:|------------:|:----------|:-----------|-----------:|--------:|-----------:|--------:|-----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 13 | 0.0036 | 0.05 | 39 | [12, 1.0] | train | 0.00026492 | 0.99989 | 7.1827e-05 | 0.99989 | 8.0734e-05 | +| flat_mae | patch | attn | hcpya_task21 | best | 13 | 0.0036 | 0.05 | 39 | [12, 1.0] | validation | 0.15325 | 0.99157 | 0.0014458 | 0.99057 | 0.0017972 | +| flat_mae | patch | attn | hcpya_task21 | best | 13 | 0.0036 | 0.05 | 39 | [12, 1.0] | test | 0.26222 | 0.9881 | 0.0014792 | 0.98585 | 0.0019358 | + + +done! total time: 1:19:39 diff --git a/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/train_log.json b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..ee2fb3ac46dc27d353e236cf6812af3871a5e3d0 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.1159111148118974, "train/grad": 0.2124594309180975, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.061976318359375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.059254150390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.054808349609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.050220947265625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.04576171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.039423828125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.032344970703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.024400634765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.013863525390625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.00262451171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.9915087890625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.974825439453125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.958271484375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.9339459228515623, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.91048828125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.887745361328125, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.8582891845703124, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.8231689453125, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.783106384277344, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.7450375366210937, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.6969288635253905, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.6457620239257813, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.5866009521484377, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.5207149505615236, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.449155578613281, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.359940528869629, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.277021427154541, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.199780979156494, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0937092018127443, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.9689945316314696, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.860623528957367, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7665835678577424, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.64745837777853, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.5493750826269388, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4424557577073573, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.3452585097774863, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2497125754505396, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1665961067192256, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0844549138005823, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.9926426878478378, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.9280400047916919, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.8756201690621674, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.8134123893547803, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.7647973509598524, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.7137209578696638, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.6644315401744098, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6265113090723753, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5846070621535182, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.5536693838145584, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03961445198394358, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.039565025577321646, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03948224386200309, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03940043142065406, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.039317073877900836, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03920205771923065, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.039070871556177736, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03892624657601118, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03873498611152172, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.038527435762807725, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03832067997194827, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.03800302210263908, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03768967906013131, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03722422405146062, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.036761685460805896, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03630568338558078, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03571726103313267, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.035022583734244106, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03426433018408716, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03360070775263011, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.032838682299479845, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03212227211333811, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.031385720055550334, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.030654932698234917, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.029933070009574295, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.029103373335674407, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.028371784770861268, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.027712409542873503, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.026823382759466767, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.025792607027105986, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02490306327585131, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02412369433324784, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.023100679558701813, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.022244790960103275, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.021329350406304, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.020461195344105364, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.019565472682006656, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.018803340272279458, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01806446091970429, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.017329823311883957, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.016660917463013902, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.016056711386190727, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.015349847174366004, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014913573994126636, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.014583017590339295, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.014424910194647964, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.014299393363762647, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.014205414310563356, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.014369555386947468, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.0262129306793213, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.017911672592163, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0040934085845947, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.990391492843628, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.9769234657287598, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.958115816116333, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.93691086769104, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.9135570526123047, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.882641315460205, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.849935293197632, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.817903995513916, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7700819969177246, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7237985134124756, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.657519578933716, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5947000980377197, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.53532338142395, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.460902452468872, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3746273517608643, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.278684377670288, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.188937187194824, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.0759358406066895, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.9559377431869507, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.817610740661621, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.664597988128662, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.500950574874878, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.3036808967590332, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.1291199922561646, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.9768967032432556, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.7867422699928284, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.5948204398155212, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.4571903645992279, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.3589267432689667, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.26370900869369507, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.20678319036960602, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.15629203617572784, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.11060453206300735, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07857882231473923, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06767316162586212, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.062245212495326996, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06148063763976097, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.06398511677980423, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0629202201962471, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.0572500005364418, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.05465266481041908, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.05202297121286392, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.053823668509721756, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.06480350345373154, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.09441298246383667, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.12050195783376694, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.08134920634920635, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.08531746031746032, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.09300595238095238, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.10367063492063493, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.12202380952380952, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.1701388888888889, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.24206349206349206, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2951388888888889, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.31746031746031744, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.3244047619047619, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.31374007936507936, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.28869047619047616, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2586805555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.23462301587301587, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22544642857142858, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.22271825396825398, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2390873015873016, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2896825396825397, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.3871527777777778, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.4677579365079365, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.5572916666666666, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.6009424603174603, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.6245039682539683, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.6510416666666666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.6810515873015873, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.730406746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.7896825396825397, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8452380952380952, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.886656746031746, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8983134920634921, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9025297619047619, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9097222222222222, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9268353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9476686507936508, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9615575396825397, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.972718253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9791666666666666, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9709821428571429, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9652777777777778, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0318014575200691, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.03292170577360697, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.03612008415163826, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0403609241690653, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.04938139508807995, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.06838109448829267, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.09099031143052876, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.10587401253137232, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.1207117142252466, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.12873181745512988, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.1262061994171957, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11636384559505593, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09409625917456299, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07676021943148992, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.07253630873369778, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.07236144071597111, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.09252393643431998, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1403144650817432, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2121628496033556, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2626244616503871, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.31763903994754517, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.344879041466688, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.36927220093258756, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.406165079938359, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.4589134345350268, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.5537222854118343, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.6889236055500788, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.7952830325714889, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8611100610389102, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8762211232780591, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.8804197015555941, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.8909896597178995, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9147585905058092, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9392776230398625, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9536847221705171, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9655699009571166, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9708253634795674, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9733236632096601, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9742896932934999, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9748892942573064, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9716993352339036, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9715481324921591, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9748445088476364, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9775150369345097, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9802249583183511, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.978266261214428, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.971268411236429, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9604456027010452, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9519796514002152, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 0.7137209578696638, "validation/loss_best": 0.05202297121286392, "validation/acc_best": 0.9833829365079365, "validation/f1_best": 0.9802249583183511} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.1719359731674195, "train/grad": 0.15049986448138952, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.9560205078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.938197021484375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.908848876953125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.8802447509765625, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.852093505859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.8137701416015624, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.771307373046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.7253692626953123, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.666705322265625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.6063876342773438, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.5489373779296876, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.46560791015625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.3872720336914064, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.27729248046875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.174477844238281, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.0772562408447266, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.9552061462402344, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.8130789184570313, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.6560151481628418, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.5129782485961913, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.3426431512832642, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.176484808921814, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.0058935105800628, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.8431939357519149, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.6967694997787476, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.5509542827308178, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.4432104656100273, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.36209378654137253, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.2760041386913508, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.20314577679149806, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.15800163025967776, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.12914823370985687, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.102634089384228, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.08658332768827677, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0737828089017421, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06665845587849617, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0628435964602977, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06037128569558263, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.05828026574105025, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05610394371673465, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.055280518317595125, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.054312194716185334, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.054911012928932905, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.057511301292106506, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.05941304873675108, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.07008000457659364, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.08008844271302223, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.11317770692519843, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.13822194952517747, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03728589428588748, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03696168160997331, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.036423832336440684, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03589843887835741, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.035374279478564856, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03465622237883508, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03385560343042016, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.032986031379550695, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03189293473958969, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.030813036523759364, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.029856695998460053, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028606931203976272, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027585461176931857, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02637123445980251, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.025424198899418115, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.024653894491493702, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023807260263711213, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02291756889782846, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021975675094872714, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021106601702049376, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02002165007404983, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01889665177091956, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.017644230048172175, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01633341809269041, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015078198965638875, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.013759350453037769, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.012614055194426328, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.01158762957667932, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.010487199211493135, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.009444823270896449, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00876101495930925, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.008303629660513252, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.007890499634086154, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.007636949073057622, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.007463441386062186, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007359469444782008, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007328089469810947, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.007210881218197756, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007072560922824778, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.006946646379656158, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.006884763783600647, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.006751752258569468, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.007009377491194755, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.007300841580727138, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.007620240470278077, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.008927154384437017, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.009582361297216267, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.013158514100068715, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.015255238269455731, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.8680713176727295, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.8381943702697754, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.7898876667022705, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.7432775497436523, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.6978113651275635, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.6372873783111572, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.5716233253479004, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.50181245803833, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.4152488708496094, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.3280084133148193, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.246087074279785, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.1277916431427, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.016477346420288, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.8593758344650269, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.7121121883392334, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.5737358331680298, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.4024134874343872, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.2092071771621704, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.0079535245895386, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.8389386534690857, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.6585111021995544, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.5062631964683533, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.377123087644577, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.2823704779148102, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.21600750088691711, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.15567491948604584, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.10906913876533508, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.08469589054584503, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.07052411139011383, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.061794672161340714, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0568578876554966, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.05328406020998955, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04909229651093483, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.045879025012254715, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.0424325130879879, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.039408545941114426, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0374860018491745, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.037745919078588486, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.037430934607982635, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.03771286457777023, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.03821408003568649, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.039807114750146866, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.0466446653008461, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.04418862611055374, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.07530634105205536, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.12092077732086182, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.07609279453754425, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.2881752550601959, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.23473775386810303, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.32589285714285715, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.3271329365079365, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.31374007936507936, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.28596230158730157, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.25818452380952384, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.2425595238095238, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.23735119047619047, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2490079365079365, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2978670634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.373015873015873, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.4521329365079365, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5463789682539683, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.589781746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6272321428571429, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6527777777777778, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6785714285714286, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.716765873015873, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7782738095238095, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8358134920634921, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8735119047619048, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8978174603174603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9201388888888888, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9303075396825397, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9573412698412699, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.972718253968254, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9801587301587301, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.982390873015873, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.982390873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9831349206349206, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9672619047619048, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9568452380952381, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9712301587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.12678620591780967, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.1321381931744024, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.131636099266291, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1187648944857536, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.09736886044080359, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.08711372903469303, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.08524862069942366, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.09811200246126353, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.14313892569292921, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.20187010064023467, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.25742861450870885, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.3094336088387998, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.3353253757721833, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.3801106595042926, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.41430598391815565, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.4597293562122526, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.5392306391853215, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.6575959459217634, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.7768528452403467, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.844067339040977, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.8799683786472176, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.908857065030501, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9213781771829143, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9334363739662795, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.949726698584733, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.966816374369096, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9711440233764014, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9749871116572073, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.975670653559327, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9773022231178697, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9773073461415303, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9779279210759484, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.97972553245977, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9808623201452533, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9815452338432747, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9840917512576306, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9819386163039933, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9817760912279337, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9801334558994202, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9810496167327004, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9791364645222762, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9798119661522843, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9763366056721985, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9807533409446203, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.969738714586266, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9646312105923678, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9754715407640899, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9551619356781981, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9636495101089941, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 0.06665845587849617, "validation/loss_best": 0.039408545941114426, "validation/acc_best": 0.9873511904761905, "validation/f1_best": 0.9840917512576306} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 0.8744008299708367, "train/grad": 0.19418307457119227, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.757369384765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.7148663330078127, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.647186279296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.5832034301757814, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.522569274902344, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.442521667480469, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.3569293212890625, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.2666329956054687, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.1537094116210938, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.0389579010009764, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.9303302764892578, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.7735131072998047, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.627715835571289, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.428063793182373, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.250637559890747, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.0943798017501831, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.916622748374939, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.7377129662036895, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.5756099557876587, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.45777170836925507, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.3478382247686386, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.2645332848280668, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.19780114663764833, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.14936790391802787, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.1157301186118275, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.08970987108536065, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.07646577249281108, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.069963025059551, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.06459131374955178, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.06049366682767868, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.05778778845444322, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.055817897412925956, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.054051570985466244, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.05282146682031453, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.051813114648684856, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.05147710270248353, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05168963717296719, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05154467807151377, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.05170497343875468, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05252016307786107, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.056054925844073296, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06565646812319756, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.08276633724570275, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.12455916269682347, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.17090424057096243, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.31786416419781743, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.4312464135698974, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.1202991070318966, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.2322644596919416, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03415038509294391, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03332922361791134, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.032016719160601496, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.030788920689374208, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.029678965406492352, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.028341143447905778, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.027109614219516517, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02603944796137512, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02498393479734659, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024139309097081424, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023465674892067908, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02258444727398455, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02177391822449863, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020599292097613214, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01946301172953099, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01838498454540968, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.017077829772606492, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.015651782378554344, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01423190367873758, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.013093988031614571, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.011912748126778751, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01085778113687411, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.009764194053132088, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00875524198752828, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007962826741859316, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007229045600397512, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.006794458593940362, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006626254090806469, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006544387120520696, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006536446383106523, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006585490521974862, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006642739792587235, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006716816325788386, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00673151662980672, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006727966781472788, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0067184642853681, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006741138354118448, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006835599725600332, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007038113291782793, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.007183781916392035, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.007837589536502492, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.009234378369292244, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.010583885157320765, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014108082426828332, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.019706332069672497, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.029511167269956787, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.03505415076564532, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07121005071597188, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.10476016004671919, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.635387897491455, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.5795090198516846, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.492238759994507, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.411409378051758, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.3358094692230225, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.236534595489502, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.1302571296691895, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.017401695251465, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.8751599788665771, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.7301572561264038, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.593855857849121, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.3995959758758545, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.2244874238967896, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.9968649744987488, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.8100008964538574, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.6597681045532227, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.5066391825675964, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.3747910261154175, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.27793991565704346, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.21791039407253265, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.16072878241539001, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11113596707582474, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.08219779282808304, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.06956437975168228, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.06232965365052223, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.057092130184173584, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.0538424551486969, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.051330793648958206, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.047657813876867294, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04248139634728432, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.040997590869665146, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04098443686962128, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.039998702704906464, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03869218751788139, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03824442997574806, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.03987562283873558, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.04511639103293419, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04736285284161568, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04872377961874008, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.0471634641289711, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.049000393599271774, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.115958072245121, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1807224601507187, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.16516119241714478, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5000885128974915, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9597887396812439, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.9734229445457458, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.171147584915161, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.6729073524475098, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.23735119047619047, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.23115079365079366, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.23958333333333334, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.28075396825396826, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.34871031746031744, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4476686507936508, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5391865079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5912698412698413, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6292162698412699, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6569940476190477, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6803075396825397, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7247023809523809, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.767609126984127, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8323412698412699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8705357142857143, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8911210317460317, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9114583333333334, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9298115079365079, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9461805555555556, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9598214285714286, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9724702380952381, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9828869047619048, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.988343253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9662698412698413, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.957093253968254, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9563492063492064, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9709821428571429, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0800719701914348, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.07501566237438585, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.08637914649957541, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.12515642540057445, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.18388956395403247, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.2544546186870843, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.31527182251846764, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.34358659242120826, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3817037794370143, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.4171400588998686, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.45084227669951343, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.5397957878344842, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.6189960121676646, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7641478714175608, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8334934295560233, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8671335806130074, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8965446804815062, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9202648348260655, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9388679687858059, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.953167084168951, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9660425686985159, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9707737062965847, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9726669935784958, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9742429773985264, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9767786547507717, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9777550308359737, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9783524158037435, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9785069820489125, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9795808128251389, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9806682241953097, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.981736297375926, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9823081203292399, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9844183449554345, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9839334765576763, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9868154880659924, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9875757550928548, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9836837163961889, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9825876651195289, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9821297158575784, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9852726782302588, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.982856369956323, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9736164193069421, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9627963389076536, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9702985557633154, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9464046927745313, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9452390119778622, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9467063405200775, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9644370331320792, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9640096397337091, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 0.05147710270248353, "validation/loss_best": 0.03987562283873558, "validation/acc_best": 0.988343253968254, "validation/f1_best": 0.9875757550928548} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 0.7698652730882167, "train/grad": 0.27818755596876144, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.515599670410156, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.448984375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.345081481933594, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.2482940673828127, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.156647033691406, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.0352586364746093, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.9044396209716796, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.7657076263427733, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.5934236526489258, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.4222163200378417, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.2666417503356933, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.057250714302063, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.8818149995803833, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.6749758207798005, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.5238015586137772, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.41540009796619415, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.31688204035162926, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.23735539309680462, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.17479900799691678, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.13400842158123852, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.10080885716713965, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.08147243468090892, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.07090121722780168, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0645634269155562, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.06012872309423983, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.056193893421441314, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.053180077970027925, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.050691034262999896, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.04754149698652327, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.044272350622341035, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.042023145351558926, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.040667673209682106, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.03932643407955766, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.038449667524546385, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0376354065630585, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.038412062246352435, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.03916416654363274, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04062699982896447, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04903885540552437, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.053767519257962706, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06872666401788592, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.10038422667421401, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1716984952893108, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.2834148702584207, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6286865465063601, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.8351833421178162, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.9570095139555632, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.9527704458124937, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.5580772522557527, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.029063313361257316, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.027955663995817302, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02650161461904645, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.025449247658252717, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.024673487702384592, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023859084108844398, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023125036638230085, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022394887143746017, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021475115297362207, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020491027077659964, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019516201862134038, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.018080285494215788, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.016766489585861563, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.015069683589972556, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.013663076031953096, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012518305147532374, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.011326045088935643, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010131289220880716, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008904086593538522, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007985867061652243, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007174262354383245, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006693082811543718, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00648321948829107, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006390797492349521, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006337199926492758, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00628895124187693, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0062170958094066005, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006148628774099052, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006038171608233825, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005878233183466364, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.005764037757180631, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005719370561419055, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.005678649482142646, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.005657697824499337, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005620914129613084, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00572856496219174, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0058338886400451885, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006040217516274424, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007485689734021434, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008420490380231058, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010352285808476153, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.013772210694442037, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0192946591164673, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.029647284288967058, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.04594091625756389, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05894920310958758, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.10193687835140913, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.13216212507133573, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.15462132809981086, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.3719053268432617, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.293503761291504, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.1712732315063477, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.0569350719451904, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.9484739303588867, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.8047809600830078, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.6506657600402832, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.4891935586929321, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.2932510375976562, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.1056692600250244, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.9428868293762207, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.7379025220870972, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.580520749092102, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.41472315788269043, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.31174060702323914, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.24764113128185272, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.18959404528141022, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.133281871676445, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.09352105110883713, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.07806573808193207, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06802409142255783, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0613362193107605, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.05608882009983063, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.05177344009280205, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04766007140278816, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04390527680516243, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04175848513841629, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04101625084877014, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.043034132570028305, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.047453101724386215, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04999460652470589, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04980384185910225, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.048472773283720016, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.050713807344436646, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05660645291209221, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05937471240758896, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06664945930242538, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.07837826013565063, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12958824634552002, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.12118381261825562, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13534323871135712, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.1482847034931183, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.19475765526294708, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.525709867477417, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5256069898605347, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.029631495475769, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.1698317527771, "validation/loss_047_lr4.3e+01_wd1.0e+00": 4.190983772277832, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.876973628997803, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.33308531746031744, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.40674603174603174, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5161210317460317, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5788690476190477, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6106150793650794, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6381448412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6631944444444444, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6994047619047619, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7485119047619048, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8050595238095238, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8519345238095238, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8841765873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9037698412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9191468253968254, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9308035714285714, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9449404761904762, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9734623015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.982390873015873, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.986359126984127, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9838789682539683, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9816468253968254, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.982390873015873, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9821428571428571, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9821428571428571, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9813988095238095, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9766865079365079, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9650297619047619, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9645337301587301, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9642857142857143, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.17265490976592635, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.22496887191588916, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.29703338143042085, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.33141465814105237, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3581284025832575, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3937169177509907, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.4324913136760587, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.5028528750433704, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.5940367067784671, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7164489095407559, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8065151184332613, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8593566760365889, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8884947078056319, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9089443499888772, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9213006037874372, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9371574992423296, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.95712904104153, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9680169755434594, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.970832476870561, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.973043455173278, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9752646750073469, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9778878997535336, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9804679962999188, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9817158003035981, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9826044280488391, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9833764316780249, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.983352775366991, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9848448623436526, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9825532557326715, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9813082773711871, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.979428200648577, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.981063408265392, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.98048675607368, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9805497836284938, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9803516656244746, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9806355029145897, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.978440611517229, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9761671227957223, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9675849257420349, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9760672627621587, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9707459841395141, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.975423136433285, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.974680310145604, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9687396213190638, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9689394568617906, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9655837921808557, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9466166464558573, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9615386018559682, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9519892421888253, "id_best": 27, "lr_best": 0.00047999999999999996, "wd_best": 0.05, "train/loss_best": 0.050691034262999896, "validation/loss_best": 0.04101625084877014, "validation/acc_best": 0.9873511904761905, "validation/f1_best": 0.9848448623436526} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 0.8189869196712971, "train/grad": 0.4032485236227512, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.2450364685058593, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.1541978454589845, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.011816177368164, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.8782009887695312, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.7520205688476562, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.5866235733032226, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.412880344390869, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.2365322494506836, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.0320776748657225, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8480267858505249, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.6986491203308105, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.5252113401889801, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4053023910522461, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.291969725638628, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.22235801361501217, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.17319202728569508, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.1281003474444151, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09627551044337451, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.07933608752675354, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.07117530992254614, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.06469495705328882, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.05998932851478458, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.055959179112687704, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.05244638409465551, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04939884966239333, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.046151537811383606, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.04364158319309354, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.04186381042003631, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.04026091994717717, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.03888031579554081, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.037536755930632354, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.03680449574254453, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.03670305802486837, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.03716539453715086, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.03679752795957029, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.04109449318610132, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04546862579882145, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04410480562597513, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07123951022513211, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1185857327003032, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.2389326655678451, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.30182070375420156, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.7197290368471294, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.0170322315394877, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.3889402678236364, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.107976367780939, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.657881831973791, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.481964500574395, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.36831246948801, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02545385277830064, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.024693390410393475, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02377500580623746, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023060532500967384, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022421336565166712, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021563888639211656, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020592591250315307, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019503611754626036, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018104871194809677, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016724194451235234, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.015512332669459283, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013929796740412712, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012630914808250964, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.011151586938649417, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.010036731027066708, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008992301437538118, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007804776262491942, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006863013812107965, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006443611056311056, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006280082089360804, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006168958612252026, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006108261639019475, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006061259084381163, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.006015979134244845, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005976912344340235, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00593206012446899, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005912203181942459, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0059504694608040155, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006088950139528606, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006228457232064102, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006231549871445168, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006210062430764083, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006219507223140681, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006309903158253292, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006349489442654886, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006744172131730011, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006963684226939222, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00729022946456098, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010597413542745926, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.015172240312622307, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.025447263021914156, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.030749852791007014, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05021690186645856, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06315270731563381, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0796891432638749, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.10823210414286581, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.15867075476011472, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.16876311608941474, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.22211239373725827, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.095526695251465, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.991816520690918, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.8289183378219604, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.676524043083191, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.5338762998580933, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.3502103090286255, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.162709355354309, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.9797892570495605, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7796700596809387, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6113576889038086, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.4840168356895447, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.35048142075538635, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2690790593624115, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.19456271827220917, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.14014938473701477, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1019054502248764, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.07998453080654144, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.06841882318258286, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.061227597296237946, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.05688412860035896, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.05300768464803696, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.05035040155053139, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.047650158405303955, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04507016763091087, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.042517565190792084, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04034049063920975, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03927793353796005, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03995157405734062, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.042313557118177414, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.040962278842926025, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0389399416744709, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03769165650010109, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03670410439372063, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03533170372247696, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05291656404733658, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.057974349707365036, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06483012437820435, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.06235765293240547, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.08678086847066879, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1269780546426773, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3658762276172638, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.6449872255325317, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.1181434392929077, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.6288361549377441, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.0978031158447266, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.929672956466675, "validation/loss_046_lr3.6e+01_wd1.0e+00": 5.102052688598633, "validation/loss_047_lr4.3e+01_wd1.0e+00": 7.270814895629883, "validation/loss_048_lr5.0e+01_wd1.0e+00": 8.273480415344238, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.564484126984127, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6001984126984127, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6321924603174603, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6564980158730159, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6879960317460317, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7338789682539683, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7891865079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8395337301587301, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.878968253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9020337301587301, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9144345238095238, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9322916666666666, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9481646825396826, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9677579365079365, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9813988095238095, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.982390873015873, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9828869047619048, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.984375, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.986359126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.988343253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9836309523809523, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9836309523809523, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9828869047619048, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.984375, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.96875, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9675099206349206, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9657738095238095, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9697420634920635, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3233785547831195, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3460394050082759, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.3814007206605947, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.41546539573634, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.474238683732868, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5599538322992214, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.674109628853415, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7783313018406863, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8490651500437474, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8824748433971908, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9005584019463454, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9233556708690261, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9411115407015674, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9627580527788265, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9705084326574827, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9730462382897666, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9752029687476468, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9764721114811176, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9777124117944154, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9777103294746388, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9792206033067807, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9800514423120165, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9804102216676116, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9808606094050114, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9817662295714739, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.982713836760252, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.983438601743153, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9840644179534173, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9822789669098759, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9831733085609651, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9853045689739691, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9864317712130645, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9868749807050996, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9874951802838322, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9828219998619128, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9811095302500059, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9785677782943071, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9791220823766731, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9813688607494797, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9790424943992397, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9693484924998252, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9547326881666608, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9670808688986825, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9604923735107395, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9711189548463806, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9655937088675832, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.962462152374138, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9607756121417484, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9624139877271302, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 0.03716539453715086, "validation/loss_best": 0.03533170372247696, "validation/acc_best": 0.9893353174603174, "validation/f1_best": 0.9874951802838322} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 0.8964732600003481, "train/grad": 0.4713268059492111, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9722007751464843, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.855496597290039, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.6730245971679687, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.504763641357422, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.3502287292480468, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.1566627407073975, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.9672741031646729, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7913828086853028, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6097041392326354, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.467597815990448, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.3687226229906082, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.2722074267268181, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.2110619854554534, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.14660190112888813, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.10752046093344689, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.08848494839854538, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.0769272396620363, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06925067808479071, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06374736004509032, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.05988854704424739, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.05606885139830411, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.05270453899167478, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.04944125946611166, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0462406677659601, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04318689563311637, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.03968640595674515, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.03702731844969094, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.03491051034070551, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.03201623321510851, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.02919694559648633, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.027438996369019152, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.026031652046367527, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.024196782745420933, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.022161962231621146, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.02296074405312538, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.023916652519255878, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.04772407629527151, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06778071406297385, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.11497757002711297, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.2321077861916274, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.3288382910750806, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.715125765344128, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.1521926508750766, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.3084056353569031, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.3583513645082714, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.3078941370546815, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.241883226512, "train/loss_047_lr4.3e+01_wd1.0e+00": 6.561894627828151, "train/loss_048_lr5.0e+01_wd1.0e+00": 8.110079347873107, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023394960910081863, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022791751967743037, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021869826978072524, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020967446835711598, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020061999466270207, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018801867119036614, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.017439826191402972, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01607578310184181, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.014535541161894799, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.013143524439074098, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.011968715898692608, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.010605354490689934, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.009594253425020724, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008148960112594068, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00705621408415027, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0065192612854298206, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0062526835943572225, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006113757842686027, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006029231012216769, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005966602502739988, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005886654034839012, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005804095754865557, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0057366298433043994, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005688892645121086, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005651682463940233, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005567887893412262, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005512717855162918, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005463843519391958, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005271182806172874, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005003671491285786, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.004866981217928696, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004777504761586897, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004665288547839736, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.004704370266917977, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00506135697101854, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005413171215905095, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008543011922492951, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.010472010422272433, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014667186250135274, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.024170125710279534, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03011784550144512, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05636544139902824, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07372314968414463, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.08187326477753701, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.11171546080177729, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.17620159278241748, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1721642751029392, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.20447552406347844, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.23053042051975495, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.8299281597137451, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.7015665769577026, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.5031474828720093, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.323365330696106, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.1618722677230835, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9661079049110413, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7822958827018738, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6196063756942749, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.46047765016555786, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.34602445363998413, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.27265748381614685, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.20096270740032196, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.1484542042016983, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.09599220752716064, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.07664196193218231, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.06739974766969681, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.060280755162239075, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.05498429015278816, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.050948645919561386, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.048220060765743256, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0455305278301239, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.042953744530677795, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04053012281656265, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.0385616309940815, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03758929669857025, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03709445148706436, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.036713022738695145, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03617295250296593, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03519435599446297, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03597185015678406, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03752700611948967, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03825176879763603, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.037912867963314056, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03618659824132919, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.042793963104486465, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04825038090348244, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.09825237840414047, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2092464119195938, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.24972942471504211, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.27044224739074707, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4909326434135437, "validation/loss_041_lr1.6e+01_wd1.0e+00": 1.3803917169570923, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.0244946479797363, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.5310558080673218, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.8789892196655273, "validation/loss_045_lr3.1e+01_wd1.0e+00": 6.497152805328369, "validation/loss_046_lr3.6e+01_wd1.0e+00": 6.916540622711182, "validation/loss_047_lr4.3e+01_wd1.0e+00": 5.762554168701172, "validation/loss_048_lr5.0e+01_wd1.0e+00": 8.558969497680664, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6374007936507936, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6579861111111112, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7033730158730159, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7425595238095238, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7938988095238095, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8526785714285714, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8836805555555556, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9000496031746031, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9176587301587301, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9283234126984127, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.941468253968254, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9742063492063492, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9789186507936508, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.980406746031746, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9818948412698413, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.986359126984127, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.988343253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9789186507936508, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.972718253968254, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9796626984126984, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9655257936507936, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9618055555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9672619047619048, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3927750073248386, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.41960349102837796, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5032516107433733, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5773552201519607, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6849935201153309, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8063889483517073, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8596679110470374, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8836448239505523, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9064604464959606, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9184799456362381, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9325541530054867, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9573097405670171, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9686684166515255, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9732157695876188, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9746321390047902, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9766306102588134, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9801736330444629, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9807266182392773, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9813171340595194, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9817240522662115, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9819036759897293, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9827958476126377, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9838804921397925, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9848262505117378, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9845177030201303, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.985830890067841, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9861153047579585, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9858515098681098, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9871118366204737, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9854570309436459, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.984558154196434, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9844539320113116, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9842540234219125, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9861400246420896, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.986115302561365, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9857893023935557, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9778462595839824, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9687422339302287, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9641966797730119, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9753278744909941, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9686108129385509, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9495077924787054, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9721495137348884, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9746356306511, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9630873474132636, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9582088175987606, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9595449981962014, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9656754928659899, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9590639724135944, "id_best": 28, "lr_best": 0.00057, "wd_best": 0.05, "train/loss_best": 0.03201623321510851, "validation/loss_best": 0.03519435599446297, "validation/acc_best": 0.9888392857142857, "validation/f1_best": 0.9871118366204737} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.6231718244403601, "train/grad": 0.39453187853097915, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.7215348052978516, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.5830133438110352, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.3717107200622558, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1845256805419921, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0207925510406495, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.828470950126648, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.6552235293388367, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.5088035953044892, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3752428233623505, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2855336406826973, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.22769188784062863, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.16455009803175927, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.11951475754380227, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.08761978372931481, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.07534987546503544, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06857903500087559, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.06273457101546227, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05786443864926696, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05350418763235212, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.050150377564132216, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04648902729153633, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.04308795835822821, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.039741949429735544, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.03635183054953814, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.03301223068498075, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.029423478133976458, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.026549780648201703, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.02431182427331805, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.021520265052095054, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.018292773971334098, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.01656403818167746, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.015627038041129707, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.015034518782049417, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.014033132363110781, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.016744734924286603, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.020511402990669014, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.032265330478549, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.08189557794481515, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0783891656063497, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1956517032440752, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.2050610288232565, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.4737397846672684, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.5366002067085356, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.9320687396079301, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.9058721424639224, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.068817014042288, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.2652253249753267, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.814806242957711, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.025321260159835, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022142815375700592, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021422666339203714, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020212423857301473, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019011410363018514, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.017856058520264922, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.016390853701159357, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.014968593120574952, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.013622282268479467, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.012110286669339984, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.010857449686154723, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.009928089187014847, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008634904609061777, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007427684926660731, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0065555430308450015, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006277357999933884, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006149115778971464, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006029936594422906, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005925254307221621, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005801544412970543, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005686179237673059, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.005547699190792628, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00541333197557833, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00528947240265552, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00514858114591334, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004976639016531408, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004757330592838116, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004554272980603855, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004378446261107456, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.004135312141734175, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0037484081875300036, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.003528463639668189, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0033688108962087425, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0035383077644655712, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0032609237293399927, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0035822319819817493, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0046689996805162086, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0064730730048881925, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.013245409159036967, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.012071218865212927, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.021995937619234907, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.021849594404407337, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03902380127771466, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.045862015669387926, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.07645405838677247, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.11364885553575294, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.13301239006429447, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.15131689472701806, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.16484161539015052, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.19081106230819386, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5975160598754883, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4515082836151123, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2326617240905762, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0439807176589966, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8831818699836731, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7005212903022766, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5417264103889465, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.41299718618392944, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3027169406414032, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.23076896369457245, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.18064764142036438, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.12052495032548904, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.08861958235502243, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07073655724525452, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06228106468915939, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.05712363123893738, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.05251740291714668, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0485849529504776, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.045288264751434326, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04271475598216057, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04033590853214264, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03833838924765587, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03659393638372421, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03522610291838646, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03420797362923622, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03316514566540718, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03227686136960983, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03179130330681801, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.031658291816711426, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.031649332493543625, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03226570412516594, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03259638696908951, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03387682884931564, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.04392777383327484, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.045116059482097626, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06003822386264801, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07534339278936386, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.08690308034420013, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12472361326217651, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.24213846027851105, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.35377153754234314, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4327026307582855, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5097777247428894, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.952588438987732, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.7803058624267578, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.391563892364502, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.5437533855438232, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.9041669368743896, "validation/loss_048_lr5.0e+01_wd1.0e+00": 6.4539971351623535, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6763392857142857, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7152777777777778, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7728174603174603, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8291170634920635, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8683035714285714, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8985615079365079, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9171626984126984, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9298115079365079, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9439484126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9583333333333334, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9699900793650794, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9759424603174603, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9779265873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9809027777777778, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9764384920634921, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9769345238095238, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9717261904761905, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4522835878717024, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5289072279525824, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6438855727317706, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7652046072592592, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8374966753662255, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8803955282814958, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9068824767775095, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9205095356481573, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9362072931547064, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9506640213965489, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9637073060390199, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9698757222887083, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9719728119513702, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9757724532889789, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9783470378045642, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9804382166935326, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9813468041084044, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9825666375585238, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9835621904106744, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9837896098847966, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9841581501987845, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9847098363582131, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9857043103542479, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9857043103542479, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.985871235784024, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9862402989877415, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.985259298584374, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9866909220022249, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9876726965791883, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.987484520285608, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9866271792055837, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9877779575304386, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.986896788383884, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9855077080941849, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9856638252578155, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9827106596387032, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9822982634799609, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9812273876161984, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9839459919878024, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9777405060552357, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9763476463480988, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9821743102835911, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9820040359563571, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.974703294783132, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9749986411683274, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9813382426191358, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9733417564276069, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9731891697185181, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9657649714826215, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.015627038041129707, "validation/loss_best": 0.03259638696908951, "validation/acc_best": 0.9900793650793651, "validation/f1_best": 0.9877779575304386} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.412649632692337, "train/grad": 0.30461729429662227, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4996561813354492, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.348272171020508, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.1256637382507324, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9382981109619141, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.783059720993042, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6112516617774963, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.4674613833427429, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.35684345960617064, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.26569623276591303, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.20378667265176773, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.15685696549713612, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10838413819670677, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0876265195477754, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07436901782639324, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06717252720147371, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.062442881241440776, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.0579748491384089, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.053829283937811855, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.049979170141741636, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04685306148603559, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.043396832328289746, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.040126056559383866, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.036558287516236306, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.032982688238844274, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.029470649091526867, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0257448205165565, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.022691555516794325, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.020030852956697345, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.016651781285181643, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.013527926625683903, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.011066234642639757, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.009879843248054385, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.007490333952009678, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.007392422184348106, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.009828364253044128, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.016721327966079117, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.024547082502394915, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06872721055522561, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07842461936175824, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0920945141185075, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.14667405851185322, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.13501234928146005, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.20960153421387076, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.5091156545095146, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.9896405457518995, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.449279543189332, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.1437258049286902, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.4291341274324805, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.234817607477307, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021172984205186367, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020271344240754843, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018785571753978728, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.017391417757607996, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.016151651423424482, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014672838398255408, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.013252452430315316, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.011911446626763791, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01058656910667196, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.00954880533274263, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008505776664242149, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007137607095064596, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0066069765808060765, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0063622665032744405, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006229712927015498, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006139274880406447, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006035933701205067, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005924308862886391, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005802364131086506, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005698180108156521, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00557780185539741, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00545515485922806, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0052711130061652515, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005057174348621629, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004827891382010421, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004596955661691026, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004396542551548918, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004187620803713798, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.003875828429154353, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0034999101653374965, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0031022860913799376, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0027728191189089558, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0021672037661119247, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.002635551623579886, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.002769341031089425, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.004550704864304862, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006596655091474873, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01176768955021731, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.013119896475131985, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01702023729715521, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.020614678181739964, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.021029928294637144, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.027478337556620464, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04241849068089296, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0835165846238603, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.09804522925056953, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.12099057783761269, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.12491731452861615, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.14155942753025308, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3990466594696045, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2437549829483032, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.019600510597229, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8357710838317871, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.6869828701019287, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5262646079063416, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.39630088210105896, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.3008701503276825, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.2229653149843216, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.16560600697994232, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.12020988762378693, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.08472458273172379, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.07124699652194977, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.0612642727792263, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.055857982486486435, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.05221810191869736, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04883419722318649, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04582985118031502, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04299236461520195, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04064501076936722, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0384308286011219, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.036869727075099945, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0356048159301281, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03452320396900177, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.033229466527700424, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.031821660697460175, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.0308638047426939, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03054165467619896, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.030686980113387108, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.031033936887979507, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03361615538597107, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.037056416273117065, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03393419831991196, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03557303175330162, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.039861034601926804, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05490737408399582, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06868793815374374, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13234871625900269, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.19508033990859985, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.30390509963035583, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4372488856315613, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5826546549797058, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5496561527252197, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.0122836828231812, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.83502995967865, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.9710471630096436, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.894479513168335, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.2409775257110596, "validation/loss_048_lr5.0e+01_wd1.0e+00": 6.261715888977051, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7289186507936508, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7690972222222222, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8360615079365079, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8782242063492064, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8973214285714286, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9188988095238095, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9320436507936508, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9444444444444444, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9605654761904762, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9732142857142857, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9816468253968254, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.984375, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.984375, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.986359126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9831349206349206, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.982390873015873, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9841269841269841, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9722222222222222, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5510423759490594, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.634172660885236, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7762019598187487, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8529924008593982, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8799996368216353, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.909748105294055, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.923615591159768, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9369042143591182, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9542226780613271, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.967468842862549, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9710423191668516, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9740856679302058, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9763542625021169, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9788021187689667, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9803643163174343, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.980540144502238, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9805400853662302, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9804825065840562, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9822822892975079, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9834001540088677, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9845912047607377, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9846819631883628, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.986163817648759, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9867714286062139, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9864008641179868, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9868768191628592, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9863802184897114, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9876199572096919, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9883476862240628, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9886180586779599, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9874088300407703, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9867433176871083, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9890375079148644, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9893204580302976, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.987218277628245, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9855031807127019, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9845167041240863, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9826294124631796, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.979379006210633, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9839396143471115, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9775883124878697, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9793373627890787, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9830197082752401, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9797249798409992, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9795818659412623, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9696700606972098, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9695874143087363, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.975166523157596, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9643129618128311, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 0.007392422184348106, "validation/loss_best": 0.03557303175330162, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9893204580302976} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.26954573146998884, "train/grad": 0.2471068384312093, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3049531936645509, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1475208568572999, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.9250283527374268, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.7471334958076477, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.6065294241905212, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.4595006000995636, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.3457827007770538, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.26504173457622526, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.19707973711192608, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.14303563233464955, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.10601516570895911, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.08185597511939705, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.07144483493641018, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06280218568630516, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.057409741347655655, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.05352057627402246, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.049466045824810866, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.045486452737823126, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04157248842529952, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03829865594394505, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.0346215103380382, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.031196355000138284, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.02773052637465298, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.024406193243339657, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.02122303783893585, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.017699450803920627, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.014722119709476829, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.012186831878498196, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.009165528705343604, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.006288739610463381, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.004442657483741641, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0034101040940731765, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.002575612710788846, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0015371302049607038, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.002766339424997568, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.005263462802395225, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0069263993296772245, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.03806805423460901, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0708286840096116, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07990975427441299, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.061627243934199216, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06618403384461999, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07047569184564054, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.18361656122840941, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.4430709249060601, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.936856688382104, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.035808920925483, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.1822600688505918, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.063394659049809, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01982059730216861, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.018747189291752876, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.017053159992210565, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015573053043335677, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014323048209771514, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012853093156591057, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011468223482370376, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0102973707113415, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009168120506219566, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.007973872984293848, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.006921264815609902, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.006252151767257601, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006001076024258509, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005795951579930261, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005666344325290993, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00556833800743334, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0054445066579501145, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005309304764086847, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005154020017507719, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00501368004566757, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00483634822696331, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004660239190925495, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004454626203878434, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004236923471835325, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.003984903253804077, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.003659679871161643, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0033394646949818706, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0030096628833598516, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.002530459425152003, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.002058384870115333, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0017369993763213642, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.001353947527645687, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0009701621805572814, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0008340015401950041, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.001106627303486789, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.001968792780539559, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.002286019060692652, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.009109468108746013, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.012190905831767957, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.013938236918838196, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01335985048609678, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.014158455241372594, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01538577526266878, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.026525654771155035, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05071815190189702, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08084535643449825, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.08546033137700393, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.08534278115496077, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1245837572402933, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2349579334259033, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.076828956604004, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.8570285439491272, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6847310066223145, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5504699945449829, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.41237688064575195, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.30878719687461853, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.23618683218955994, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.17158286273479462, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.11863023787736893, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.08950874954462051, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.07187997549772263, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.06341346353292465, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05632919818162918, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.05195484682917595, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04888464882969856, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.045866481959819794, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04302334040403366, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04080889746546745, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03908313438296318, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0372597835958004, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03557321056723595, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.034020740538835526, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.032511331140995026, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03134212642908096, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03054845705628395, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03015083447098732, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03008701652288437, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.029924532398581505, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03134608641266823, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.033392153680324554, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03409843146800995, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03444940224289894, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03760280832648277, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03995859995484352, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.050967026501894, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.07252187281847, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17921149730682373, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.21156872808933258, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.23439925909042358, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.24937349557876587, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.32740411162376404, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5378174185752869, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.2652517557144165, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.7504183053970337, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.0355751514434814, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.8995394706726074, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.462538242340088, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.766898155212402, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7686011904761905, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8189484126984127, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8715277777777778, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.894593253968254, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9079861111111112, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9221230158730159, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9384920634920635, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9546130952380952, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.970734126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9754464285714286, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.980406746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.984375, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.984375, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.986359126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.988343253968254, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.982390873015873, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.988343253968254, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9744543650793651, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6320077410968532, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7396819500083187, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8392388629829678, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8750829183435519, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8948039326068279, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9116375701785626, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.930469916792482, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9482003874949938, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9649365067075765, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9695036281838186, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9721886836310111, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9750442536531099, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9776310782404986, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9789806234376675, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9802438963584489, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9802245328241985, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9802824844844145, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9810006294510271, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9821302700406781, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9830760633486351, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9836896172160342, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9843626235659341, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9841829104387265, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9853099305487275, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9852667501021163, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9858310716655259, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9861493120303949, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9863066752059463, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9868023387474445, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9861163500092802, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9870084245286017, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9865964097773462, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9885624449736474, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9873645932975343, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9874019938995288, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9876638088931977, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9832373343736424, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9814703918075984, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.980969725452661, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9834667571027451, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9859738325393937, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9863834248664276, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9829576563374289, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9764333385790005, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9747314290420678, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9801760009043551, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9670229806123364, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9750081875355556, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9654561870204263, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.002575612710788846, "validation/loss_best": 0.03444940224289894, "validation/acc_best": 0.9898313492063492, "validation/f1_best": 0.9885624449736474} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.1840345035493374, "train/grad": 0.16967120949178935, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.1709819412231446, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.0125487899780274, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7960191917419434, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6292845964431762, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5020306825637817, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.37489271521568296, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.28252180308103564, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.21717349961400031, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.15500671822577716, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.10798478860408067, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.0862354737892747, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.07221790046431124, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06490402483381331, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05820897470228374, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.053704244615510106, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.05019727826118469, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.046403207490220665, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.042508407067507505, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03857345843687653, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03523747374303639, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.0315791551861912, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.028050777204334736, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.024555091885849832, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.021209905641153454, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.017982260268181564, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.014379545859992505, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.011244696341454983, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.008898501135408878, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.006264822352677584, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0036686237435787916, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0023390221130102874, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0016411873884499073, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.001145545244216919, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0007113504782319069, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0012575505580753087, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0005871331226080656, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0028839735500514507, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.010636956142261624, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.02066259359009564, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.046694461768493054, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06408404212445021, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.03573668722063303, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.03244861636310816, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.13724664163775743, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.17940273490734399, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.39696739917621016, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.39763525075279177, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.5271452702395618, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.1939958450198174, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018948710951954127, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017791795395314693, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01607408912386745, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014646366043016314, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.013435638276860118, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.011976651535369455, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010659650380257518, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.009572731133084744, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.008254765185993164, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.00697754502762109, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0064288438018411396, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.006153631996130571, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006029683874221519, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005940797365619801, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005873566982336343, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0058089959574863315, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005708985786186531, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005563874117215164, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005370426707959269, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0051871645112987605, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00494713119813241, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004691196823259815, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0044135279531474225, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004111287903506309, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0037715144976391457, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00330809859209694, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0028126218906982105, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0024147393559542252, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0019076211723222513, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0013169628032301262, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0009927283813158283, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0007890507921183599, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0004317250391068228, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0004994440571363157, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00043664369930695556, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.000575517581355598, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0015166568698331063, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0050604530091511325, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006682025256873405, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008945534696273951, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010801183589217264, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.009220837177597245, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.007793789706863005, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.022631061684847263, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03675383915176607, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03827606815449825, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05071627295028334, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.05481295084033066, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.08161092394741136, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.102582573890686, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9460533261299133, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7352675795555115, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5755937099456787, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.4553297758102417, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3379303812980652, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.2542061507701874, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.19328132271766663, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.1324695646762848, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.0926806777715683, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.0763542503118515, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.06457310169935226, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.05821092426776886, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05237877741456032, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04858667775988579, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04584241658449173, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04287945479154587, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04010648652911186, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03760514780879021, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03582514449954033, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.034071121364831924, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.032724779099226, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0318429060280323, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.031526509672403336, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.031884580850601196, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.032549697905778885, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.032145190984010696, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03202392905950546, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03103046678006649, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03285324200987816, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03486991673707962, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03497545048594475, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03484891727566719, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03556385263800621, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.04258355870842934, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06325512379407883, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06106790900230408, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.15416832268238068, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.18967080116271973, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2509952187538147, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.5282895565032959, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.28112584352493286, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.529145359992981, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.9369126558303833, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.1249361038208008, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.6571600437164307, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.4941997528076172, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.969738245010376, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.657472610473633, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8110119047619048, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8561507936507936, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8913690476190477, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9104662698412699, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9223710317460317, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9357638888888888, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9508928571428571, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9660218253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9744543650793651, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9786706349206349, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.982390873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.988343253968254, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.988343253968254, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9751984126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.984375, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9784226190476191, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9828869047619048, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7210770628642338, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.813759622659614, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8720448918345606, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8986479228660527, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9125502910760879, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9276655827260479, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9437418528668338, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9595908160856271, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9684105321351588, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9726524422156994, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9748228084529365, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.977043346782537, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9792491451461979, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9810431040930523, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9813845018856336, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9829021880196458, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9838649370184915, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9856721972903569, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9863919153527493, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9869364861111734, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9874347693313636, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9874340906877401, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9878066995651326, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9884955878462671, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9895269201880025, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.988045133077059, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.98694678678039, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9879798976199426, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9880223838662467, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9872865859208918, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9870696896289727, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9859828470564018, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9886531970310702, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9875841141704063, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9864036020605318, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9819821488286001, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9854435075024923, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9806524907258815, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9841562961405266, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9846653653318569, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9748769748335762, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9850145949138278, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9835333218417924, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9809294288547495, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9823484207681465, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9751604943999966, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9820993592747894, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.981933419263611, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9800009246104696, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 0.017982260268181564, "validation/loss_best": 0.031884580850601196, "validation/acc_best": 0.9900793650793651, "validation/f1_best": 0.9895269201880025} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.14078039512038232, "train/grad": 0.12826168202795088, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.0538024425506591, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8989984226226807, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.692988920211792, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5391187572479248, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4253401780128479, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3169188994169235, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.24028651118278505, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.18233095474541186, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.12439861241728067, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.09224284857511521, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.07898518656380475, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0686123060528189, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06255971444770694, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.056719955932348964, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.052603539787232874, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04932097242213786, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.045715513871982694, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.041891525723040106, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0379823630489409, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.03456176050938666, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03059649333357811, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.026822908567264676, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.022922258488833905, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.01922970592044294, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.015757515616714953, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.012046706024557352, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.009007814452052116, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.00644215707667172, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0036281486693769695, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0018270543869584798, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.001136744376271963, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0007706875447183847, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00036908520385622976, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0002576426137238741, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0002651864290237427, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0034825906157493593, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0007403812650591135, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.008533418262377381, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0187642355915159, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.01778735587373376, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.03435774714685977, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.018576858723536135, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.0509476875141263, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.10629702958278359, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.08936287811025977, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.12359672943130136, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.2725226011686027, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.36379214719869196, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.5430183223076165, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01808171544224024, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.016910562962293624, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015256561846472323, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.013919194373302161, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0127848940808326, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.011458713866304607, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010320702407043428, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.009234975359868258, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0076383571443147955, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.006616466370178387, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0062676532805198805, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.006027841663453728, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0058933291712310164, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005751865975908004, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005643879595445469, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005530158439651132, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0053880517656216395, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005216330948169343, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005005303364159772, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004797826360008912, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0045401682872034145, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004274110707920045, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003950755962287076, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003581860295016668, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0031847469745844136, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0027136221673572437, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.002290297092149558, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0018722262010123813, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0012658473846386188, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0007528221651045896, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0005287703967223933, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0003523818948406188, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00014330289537156204, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00010611214909658884, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00011690850117702212, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0011684856356234264, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0003989978419370743, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0016716280066803194, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00555290466074276, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.004720335977026951, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00787434207082372, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.005451996208465627, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.009607967169055917, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.016718138195224955, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.017941844323346452, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.022128033452358132, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0362103943741186, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.041634905017387196, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.05318430095028868, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9984824061393738, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8456905484199524, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6451135873794556, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.4971804916858673, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.389390766620636, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.28867730498313904, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.2174970805644989, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.16109751164913177, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.10646206140518188, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.08060663938522339, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.06952092796564102, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.060526907444000244, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.05533488839864731, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.050450827926397324, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04706680402159691, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.044604163616895676, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.042058065533638, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.039707642048597336, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03752852976322174, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.0360230877995491, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03442879393696785, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0331738106906414, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03224029392004013, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03153638914227486, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.030967997387051582, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03073924407362938, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.031088147312402725, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.031202925369143486, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.031408265233039856, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.032286472618579865, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03356482833623886, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03358537703752518, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.034514155238866806, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03517962992191315, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03940428048372269, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05012182146310806, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06233067810535431, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1386851817369461, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2175520658493042, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2744457423686981, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.22626107931137085, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3659614622592926, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.4224812984466553, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.689382016658783, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.0770686864852905, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.5635349750518799, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.3070353269577026, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.00791335105896, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.23958683013916, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.839781746031746, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8722718253968254, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8973214285714286, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9164186507936508, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9298115079365079, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9441964285714286, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9608134920634921, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9717261904761905, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9761904761904762, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9789186507936508, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9806547619047619, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9828869047619048, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.986359126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.988343253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.984375, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9813988095238095, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7827555851320349, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8430181330633969, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8803519252645138, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9063280419599322, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.920689715175862, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9368825051921067, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9542934877444629, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9651417514327844, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9699656202894561, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9731567898769878, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9755962082343618, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9779070678723581, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9796160646127463, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9807488950103505, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.980946981562307, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9820233939932018, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9826563695570796, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.983386929226317, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9846503572871195, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9852925287037082, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9849959057636772, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9849959057636772, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9861795408133739, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.987140680875, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9876193292094029, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9878967605100745, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9875351119013817, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9878953781843711, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9881008793714443, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9880008123707288, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9879862228396586, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9872578951810372, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.988193091942931, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9872336904356783, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9875161755496207, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9868186939949879, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9876396876776397, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9859896891389012, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9844152541592993, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9829427545191727, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9838293914652526, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9835332310015814, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9842898527986224, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9833871520091625, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9815239095357866, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9805576052578457, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9794096062984711, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9750581072246692, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9772167112718668, "id_best": 36, "lr_best": 0.0021299999999999995, "wd_best": 0.05, "train/loss_best": 0.0007403812650591135, "validation/loss_best": 0.06233067810535431, "validation/acc_best": 0.9900793650793651, "validation/f1_best": 0.9876396876776397} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.10552971962839365, "train/grad": 0.09029850659891964, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9548716354370117, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8050918388366699, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6104686737060547, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.46929927468299865, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.3683176642656326, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.27547392845153806, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.20891868188977242, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.15364400763064623, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.10359407788142562, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.08194825337268412, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.07220303675159812, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06368783903308213, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.058466203678399324, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05305024404078722, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.048945164643228053, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04560033567249775, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.041818873630836605, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.037791605861857536, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0337343495991081, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.030303943026810883, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02638694962486625, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.022668430879712105, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01889749750494957, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.015219963397830725, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.011740315407514572, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.007961525218561291, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.005129469009116292, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0032401403412222864, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0017562173772603274, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0009516537003219128, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0006198683753609657, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00045648444443941117, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003037153463810682, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00020457440987229348, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00013515889644622804, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00037836032919585705, "train/loss_036_lr7.1e+00_wd1.0e+00": 7.647262886166573e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.00028201550245285037, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.006842284323647618, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.01811261580325663, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.006786057054996491, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.010745290145277977, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.005673621622845531, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.03167643578723073, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.04510555715300143, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.026353506511077283, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.05855188655667007, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.11052503340877592, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.21694559190422297, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017238427097909154, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01600652258377522, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.014284780267626047, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01287209561560303, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011643924349918962, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.010290467396844178, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00919720474164933, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.00810971210245043, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006832973677664995, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.006311372414929792, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.006115674211177975, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005948677011765539, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005840766397304833, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0057129684940446165, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005600397203816101, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005500669393222779, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.00536147674836684, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005195533522055484, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004999173314135987, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004800794975599274, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004531661971413996, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004225278460944537, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003857007968472317, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003439865953696426, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00297845655324636, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.002354049883579137, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0017383590339886722, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0012124340416266933, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0006994313363065885, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00038810431690762927, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002526408052381157, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00018665004802187468, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00012040471594900736, "train/grad_033_lr4.3e+00_wd1.0e+00": 9.025663067404821e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 6.141690081221895e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0002546949907642215, "train/grad_036_lr7.1e+00_wd1.0e+00": 6.251922464414861e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0004011849678137595, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.002974362411806351, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.004319582221336145, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.003200051894918449, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0021037390816835417, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0022435870170604937, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00570372971400976, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.010182196082348166, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.007878134138422086, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.019060718764515535, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.024508660093576443, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03273966226888973, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.918184757232666, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.7697675824165344, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.5785411596298218, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.4408184289932251, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.3434646427631378, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.25497353076934814, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.19075828790664673, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.13632796704769135, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.09160732477903366, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.07363250851631165, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.06492409110069275, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.057466261088848114, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.052953604608774185, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04851629212498665, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.045355819165706635, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.042948167771101, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.040528055280447006, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03819753974676132, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03620966151356697, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03469855338335037, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03330770879983902, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03242183476686478, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03191763535141945, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03157985955476761, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03149037063121796, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03188689798116684, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03228697180747986, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03254241868853569, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03287799656391144, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03367047756910324, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03448928892612457, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.034587230533361435, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03490206599235535, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03495265543460846, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03943237289786339, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04964258894324303, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.055958665907382965, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10646423697471619, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.16261760890483856, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.21932615339756012, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.19769522547721863, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.306384801864624, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.36709994077682495, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6308673620223999, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.8366265296936035, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.5426653623580933, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.2945237159729004, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.8432111740112305, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.8485924005508423, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8635912698412699, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8839285714285714, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9060019841269841, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9213789682539683, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.935515873015873, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9503968253968254, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9652777777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9747023809523809, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9786706349206349, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9801587301587301, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9821428571428571, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9831349206349206, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8251955719863627, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8618240188638583, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8928489414745048, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9111032797478946, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.927473145946536, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9432068058274746, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9589400427687939, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9682446725583441, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9722923828977321, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9754990830913348, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9772178432384374, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9799243233311785, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9801540147351864, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.981583560450022, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9823162689249, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9824840802352888, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9823460673501007, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9842533180702775, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9842450056551205, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9854486236474774, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9864028332381681, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.987321237847136, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9870298073162213, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9881037944622109, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9881415816999665, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.989080867238329, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9885182394946613, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9879327297781796, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9881114597159436, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9877504177644257, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9875720353136401, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9871270449474452, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9881061177764036, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9879444181775023, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9869737990668943, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9884731803977351, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9868615874661354, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.987146543670686, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9824738058785096, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9857775373574262, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9855372353019967, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9847910649195284, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9852519274819962, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9846084455325564, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9847423495831213, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9808726613277247, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9788566645744721, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9773642543912212, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.979987568115433, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 0.007961525218561291, "validation/loss_best": 0.03188689798116684, "validation/acc_best": 0.9903273809523809, "validation/f1_best": 0.989080867238329} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.09285749480128289, "train/grad": 0.06657808562740683, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.8940662574768067, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.7476869583129883, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.5605467128753662, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.42761050760746, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.33503339648246766, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.25125656828284265, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.18868587851524354, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.13435441546142102, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.09378303876146674, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07750072778202594, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06916283649392425, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06155901118181646, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05660939280875027, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.051352584343403576, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04729045414365828, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04392525428906083, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.040163199519738554, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03613869735039771, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.032053021667525175, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.028591628344729544, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.024641744857653976, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02093065496534109, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01710826172493398, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.013331422135233879, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.009759318502619862, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.006183289606124163, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.003809115309268236, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0024008119758218527, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0013371005840599538, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0007806566078215838, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005268358625471592, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00038851517252624034, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00027374240569770335, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00018540862947702408, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00012312994338572024, "train/loss_035_lr6.0e+00_wd1.0e+00": 6.847522221505643e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.8588082641363145e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 7.559070363640785e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.004143856512382626, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0034775867499411105, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0036443463899195195, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0019233217276632786, "train/loss_042_lr1.9e+01_wd1.0e+00": 6.34700059890747e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.015992033192887903, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.014120698720216752, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.021592286797240377, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.018568888092413544, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.06642414716072381, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.12081150985322893, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.016777130700647833, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015589742078445852, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.013950837245211005, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012587508242577315, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.011424230632837862, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.010204118138644844, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.009141024127602577, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.00782818057341501, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0066460704058408735, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.006247545151272789, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.006067277911351993, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005901039182208479, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005783557246322743, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005636067382874898, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005496126084472053, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005357447838759981, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005191522939421702, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0049733426346210766, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004723141090653371, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00449554651757353, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004211808840918821, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0038925286637095267, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003495244922087295, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0030350880381593017, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002515432825748576, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.001872604858945124, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00131500495794171, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0009027281486487481, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005281672686578531, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.000308499239217781, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002029136777309759, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00014817479152043234, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00010281999880589864, "train/grad_033_lr4.3e+00_wd1.0e+00": 7.560071659554524e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.418265156322377e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 4.317608456517519e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.4759325054652948e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.4327627861040645e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0015365198409091252, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.001028606170526481, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0016649145311223004, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0008816637980419948, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00020838818858070522, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.004634683222883549, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0045248266011861694, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.004681389704268581, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.00806450381192079, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01373818224578354, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.017304097309739176, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.85714191198349, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.7128068208694458, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.5296975374221802, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.4004512429237366, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.3114510178565979, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.23163394629955292, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.17091774940490723, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.1188252791762352, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.08332083374261856, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06921377032995224, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.06182295083999634, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05537961795926094, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.051312241703271866, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04716436192393303, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04429928585886955, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04208939149975777, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03974318876862526, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03761850297451019, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03585099056363106, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03456851467490196, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0333026722073555, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03240475803613663, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03164776414632797, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03102492354810238, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03085610643029213, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03148514777421951, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.032352179288864136, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.033030781894922256, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03319774195551872, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03388373926281929, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03464609384536743, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03460553288459778, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03475840762257576, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.035137418657541275, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.039451614022254944, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.048508621752262115, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.054240632802248, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09963251650333405, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.15595635771751404, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1623258739709854, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1554826945066452, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.28129369020462036, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3483418822288513, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6425281763076782, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.71268630027771, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.337804913520813, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.9497893452644348, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.1288102865219116, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.4741407632827759, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8712797619047619, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8916170634920635, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9136904761904762, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9283234126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9417162698412699, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9556051587301587, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9697420634920635, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9754464285714286, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9796626984126984, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9811507936507936, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.984375, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.984375, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9898313492063492, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9838789682539683, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8407730116592622, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8737666094641228, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9033015455168758, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9191262775370939, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9340193459692282, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9491126591245401, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9632249808231473, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9690644027279222, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9744184446913962, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9763102975658252, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9779358518540797, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9803332741502598, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9799657938993788, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.980684971639865, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9821371878334828, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9829043607138006, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9832537705783937, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9839096461952404, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9853458918904007, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9853497025726576, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9853621501636647, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9853613590288485, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9861202525572398, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9877449528768022, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9877424678152555, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9873206556656197, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9874870082327102, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9870360866466766, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9873943756315865, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9864886400561574, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9873145591535546, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9873905991028309, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.987297063410424, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9876773022438947, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9867947932322616, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9888484099250873, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9868716670698134, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9876775102358395, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9834759455734964, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9891854578278008, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9887803410364836, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9859143004107447, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9845214633458894, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9837474115246675, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9841912882321389, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9821405274740048, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9827058517852313, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9820884532234508, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9802007082806238, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.0034775867499411105, "validation/loss_best": 0.1623258739709854, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9891854578278008} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.08148923337459564, "train/grad": 0.04941790090873838, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.832004828453064, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.6886793494224548, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.5088443970680236, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.3841933447122574, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.29984427958726884, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.22450764566659928, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.16597455255687238, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.11620802093297243, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.08500954982824624, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07222860527224839, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06528777745552361, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05848030535504222, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.053957153148949145, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04887419395148754, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04486435103230178, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04148101699538529, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03765514510683715, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03359069465659559, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0294783631619066, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02597797002643347, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.022000217465683817, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.01836771620437503, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.014764507655054332, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.011313316887244582, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.008045792765915393, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004912375593557954, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002955528357997537, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0018402170110493898, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0010534641519188881, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006333217397332192, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00044004774652421473, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00033755874261260033, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00024215253069996833, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00017467660829424857, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00011904026381671428, "train/loss_035_lr6.0e+00_wd1.0e+00": 5.4992660880088805e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.391536884009838e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 5.221292376518249e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0011125759780406953, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.002062661498785019, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.000661709439009428, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0010676465090364217, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00013137666508555412, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.003664741674438119, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0010354260448366404, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.002275662859901786, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0033618164248764514, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.03991626388393343, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.033247022805735466, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.016206095241941512, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015014542257413268, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.013386003910563887, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.012039103358983994, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0109374885330908, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009825835563242435, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00873068114509806, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.007359713374171406, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00643859420903027, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.006140834592515603, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005983595261350274, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005838597526308149, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0057376732875127345, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0055943473818479104, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005456474386737682, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005318915995303541, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005133434301242233, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004900679130805656, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004645497576275375, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004400563279050402, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004082140179234557, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003746703125798376, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0033483114164846485, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0028469836595468225, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0022328041000582743, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0015216088318629772, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.001000620149425231, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006699021786880622, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0004006729650063789, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00024159196254913695, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00016659041906677885, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001265279688141163, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.24273802775133e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.817697050792048e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.046135411362229e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.543372193533401e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.3666105582762497e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 8.110539238951223e-06, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0002677575575833385, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0008728890824475178, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0003064332675663999, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00046423104087592125, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0003080252761950988, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0025163145285154787, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00044666775780641743, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.001343352654380053, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.002491983283772793, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.007638008692753047, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.00478520895652196, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.8119998574256897, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6713272333145142, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.4947633147239685, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.37227439880371094, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.28938475251197815, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.21468110382556915, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.15575839579105377, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.10695163905620575, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.0781833603978157, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06614048033952713, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.059577979147434235, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05343373864889145, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04966737702488899, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04564720392227173, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04276810958981514, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04051360860466957, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.038191527128219604, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.036168575286865234, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.034407902508974075, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03329341486096382, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.032180964946746826, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03153997287154198, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.031163351610302925, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.031039834022521973, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.031125549226999283, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.031422507017850876, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.031969718635082245, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03238753601908684, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.033029887825250626, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03385818377137184, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0348568931221962, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03451228514313698, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.034759845584630966, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03504456207156181, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03915688768029213, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04825156554579735, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05305478721857071, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09653613716363907, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14164245128631592, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.15324951708316803, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.14657434821128845, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2787252366542816, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.33289989829063416, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5841671228408813, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6699686050415039, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.0128662586212158, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.7005149126052856, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.1078277826309204, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.1079230308532715, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8779761904761905, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8970734126984127, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9184027777777778, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9300595238095238, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9459325396825397, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9603174603174603, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9729662698412699, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9769345238095238, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9799107142857143, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9821428571428571, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.986359126984127, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.988343253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.988343253968254, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9915674603174603, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.986359126984127, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.986359126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9853670634920635, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8523011813093501, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8796056080282975, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9080475156103054, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.920898116483825, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9386163357840894, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.953367815257286, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9668822973588492, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9705095604764137, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9749518955465685, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9773935778307415, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9790757292194824, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.980802020116573, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9811972269117155, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9813769214310326, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9824946432106259, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9830866148951827, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9832665299229892, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.98544434697233, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9863447793295886, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9871299503966148, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9873132776390199, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9864928438735321, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9873668425224686, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9873665093496897, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9879301027691592, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9884432357078795, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9886664489914397, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9884722068109139, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9872190532774875, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.987136176702769, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9870338580955546, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9864866405358006, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.987110361802261, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9870491213473674, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9873373657534746, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9890270511451165, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.98727391195248, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9876775102358395, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9850116832676544, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9905728077034007, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9893495462427893, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9848128657402933, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9844186090574423, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9842273732599881, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9843986130204001, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9842707410286909, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9857532470558503, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9823434987747355, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9826923436590269, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.002062661498785019, "validation/loss_best": 0.15324951708316803, "validation/acc_best": 0.9915674603174603, "validation/f1_best": 0.9905728077034007} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.07664811495691538, "train/grad": 0.042266241908073425, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7977550458908081, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.6584872841835022, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.48513230323791506, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.36649256587028506, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.2869863733649254, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.2153779814392328, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.1573757764697075, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.11024646865203977, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.08331688408739865, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.07144954533316195, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06460455522872507, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.05783653711900115, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05316692815162241, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04804099103435874, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04396519660949707, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04053834710270166, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03669457664713263, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.032544854655861856, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.028370658233761788, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.024820892475545406, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.020822058180347085, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.017146419882774353, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.013485683826729655, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.010072880545631052, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.00696605620905757, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0040792890451848505, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0023861624393612148, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0015472039673477412, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0009588444046676159, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006006706133484841, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0004293647687882185, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00033068404532968996, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00024233747273683548, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001703068148344755, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00011881319805979728, "train/loss_035_lr6.0e+00_wd1.0e+00": 5.7031679898500445e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.106820397078991e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 8.515696972608566e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 4.074610769748688e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.780087620019913e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.411678433418274e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.199521452188492e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 4.348130896687508e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00012717514298856257, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0005472067836672068, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.8868688493967056e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.004819641122594476, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.005944990236312151, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0016408446244895458, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01614049176685512, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01494271244853735, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.013309637666679919, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01196766714565456, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010887877526693046, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009773524317424745, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00854465236188844, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0071145741012878716, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006389029722195119, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0061644841451197864, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.006043394453590736, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005904997593606822, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005789923617849127, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0056313729251269255, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0054742383788106965, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0053159766388125715, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005122938623535447, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004881150494911708, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0045948268444044515, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00431072264444083, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003948411026794929, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0035546501021599397, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0030754267977317795, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0025244385070982388, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0019339913278236054, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0012905590180162109, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0008250428524479502, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005552493415234495, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00034929140123495016, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00021885149233639824, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00015793836782449945, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00012145637178491597, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.798474944796907e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.522361939460098e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.023547904215775e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.159249143919851e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.1984186818038153e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 8.18180838199553e-06, "train/grad_038_lr9.8e+00_wd1.0e+00": 7.791472624469797e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00013812402773498195, "train/grad_040_lr1.4e+01_wd1.0e+00": 3.3906902820854446e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00016028675991808675, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0001409019790002271, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.000343696331768922, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0004768844406074024, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00014665273888428082, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0007677459231820598, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.002353653666826616, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0005906881130823308, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7800522446632385, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6421128511428833, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.4703293740749359, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.3526497185230255, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.2741341292858124, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.2030174285173416, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.14520488679409027, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.09996110200881958, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07515081018209457, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06441162526607513, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05827385187149048, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05258661136031151, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.048888642340898514, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.045062143355607986, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04232344403862953, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04023831710219383, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03818150982260704, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03631698340177536, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.034835491329431534, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03380100429058075, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.032890435308218, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03217135742306709, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03168907389044762, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03139191120862961, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03162185102701187, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.032313790172338486, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03271966800093651, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.033068448305130005, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.033443376421928406, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03409560024738312, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.034969720989465714, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.034812718629837036, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03486516699194908, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.035064734518527985, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03900548443198204, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04797488823533058, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.051977258175611496, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.0943218544125557, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1380452662706375, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13980315625667572, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.14127475023269653, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.23867358267307281, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2985822260379791, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5346539616584778, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6170465350151062, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8954581618309021, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6585923433303833, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8034564256668091, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.0170676708221436, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8844246031746031, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9002976190476191, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9201388888888888, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9350198412698413, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9474206349206349, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9640376984126984, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9732142857142857, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9771825396825397, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9801587301587301, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.982390873015873, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.988343253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9910714285714286, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9903273809523809, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9848710317460317, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8622767975105801, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.884160631943053, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9099911991454102, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9260364695871817, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.939919262949355, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9575987932795601, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.966799613414888, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9707785517631484, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9751418515171136, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9773972162673692, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9788969058441617, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9814338315821873, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9803484003159049, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9814202878717864, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9834316619507154, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9838030283309526, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9850790040958659, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9858451909082491, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9860190615623494, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9866564207549257, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9869819816081398, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9871666766036388, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9873410182679875, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9869961960698942, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9878247560612778, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9869588971877638, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9870896780575089, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9871653585238592, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9873381037100173, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9875720353136401, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9865889591430155, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9864866405358006, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.987110361802261, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9874993714974403, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9869737990668943, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9890270511451165, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9874543209917619, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9882220809942637, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9851892569529603, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9900828873718711, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.989170141915159, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9859220538106093, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9861123744495293, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9826047887837052, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9846143665218553, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9848123860949928, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9852578386927041, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9845532163952322, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9820605575021245, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 4.780087620019913e-06, "validation/loss_best": 0.13980315625667572, "validation/acc_best": 0.9910714285714286, "validation/f1_best": 0.9900828873718711} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.07136431768536568, "train/grad": 0.04031481392681599, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7660561633110047, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.6282341992855072, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.45822900712490083, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.34338342159986496, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.26754032373428344, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.1987075900286436, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.14177432630211115, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09821572463959455, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07511129623278975, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06477199953049421, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.05865035045892, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.052640655096620324, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.04840003507211804, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04358567305840552, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.039697978235781195, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.036352572543546555, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03261692630127072, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.028627203572541476, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.024564918074756862, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02118627483956516, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.01741866304539144, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.014021239429712295, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.010762271033599972, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00775528515689075, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.005151208406314254, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0029267813730984925, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0017668261751532554, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0011999461706727742, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.000768633522093296, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004933802131563425, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003554262686520815, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0002748455200344324, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0002079394180327654, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001442362554371357, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00010542138479650021, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.5269671827554705e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.724164165556431e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.961006343364716e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 4.597287625074387e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.4028791338205337e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.868782564997673e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.502435818314552e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.816625103354454e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00013125799596309663, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0015447072498500347, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.7008354663848875e-10, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0020571148302406072, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0013281311746686697, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015762432618066667, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014601722294464707, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01300380680244416, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011672405197750778, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010606110724620522, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009469735668972135, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00820875549223274, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0069574545847717675, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006355166474822909, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0061264037049841135, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005980379628017545, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005831981278606691, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005706315109564457, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005551772784674541, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005396131452871486, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005250617913698079, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005058442948065931, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004829967937257607, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004558635430294089, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004292657846453949, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003931471254254575, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0035236931678082326, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00302742087347724, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0024410812551650452, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001809798894901178, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0011429643727024087, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0007242904960003216, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0004940023697599827, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00031438668591363236, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00020071910477781785, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00014507017835057922, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00011328333904884857, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.315882063925529e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.0903467178832217e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.600473379696268e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.8572759472638154e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.1420321354970183e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 6.7614163222793395e-06, "train/grad_038_lr9.8e+00_wd1.0e+00": 6.870734203870573e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 3.0896367314935817e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.4226878157685574e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.931504845766726e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.0745532917718553e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0002561922608077385, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.0309665858409626e-08, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0008874760556615458, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.6031984198139777e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0004900846123269505, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0004960052885732423, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7583434581756592, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6224111318588257, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.4541035294532776, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.33973902463912964, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.2640570402145386, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.19500909745693207, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.13775591552257538, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.09531822055578232, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07303173094987869, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06301485002040863, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.057245876640081406, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05182160809636116, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04824620857834816, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04454458877444267, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04179193824529648, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03972720354795456, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03753264248371124, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03562324121594429, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.033797599375247955, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03267224505543709, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03154013305902481, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0308846328407526, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.030381429940462112, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030299441888928413, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.030663281679153442, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.031709037721157074, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.032692354172468185, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03314658999443054, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.033774226903915405, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.034653086215257645, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03521217033267021, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.035052020102739334, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03498021885752678, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03519239276647568, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03916877135634422, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.047801367938518524, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05130424723029137, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09281934052705765, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1355978101491928, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1370992362499237, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13753870129585266, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.22945477068424225, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2884795069694519, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.518869161605835, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5922242403030396, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9018840193748474, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6306599974632263, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8423197865486145, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.9368499517440796, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8859126984126984, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9005456349206349, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9216269841269841, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9352678571428571, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9484126984126984, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9645337301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9747023809523809, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9784226190476191, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9806547619047619, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9828869047619048, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.986359126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.988343253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.988343253968254, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.988343253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9908234126984127, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9846230158730159, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8652620859838245, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.884659832838543, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9114721173900927, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9266821920752024, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9414288676493121, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9582213601095211, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.968417000176684, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9720220746985616, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9758597162743282, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9780674280322376, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9800633983509787, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9814274950325442, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9812351289000224, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9830232347777423, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.983250159649713, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9850656633688532, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.985073172393017, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9852653542454622, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9858007083372249, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9863568995900913, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9866624725543035, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9864733332142976, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9862873704128434, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9867047824631573, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9872216230387151, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.987224761209516, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9863227851790609, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9868569904385118, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9860698262304565, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9860408216869574, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9865916963399479, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9859421601729795, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.987110361802261, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9873204505847025, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.987515080658044, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9888484099250873, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9874543209917619, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9876775102358395, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9846449850285239, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9896272239562192, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9893495462427893, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9858672033508847, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9857459708652074, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9843822215136426, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9845485309506687, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9833792893426194, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9855286120996228, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9823697054690219, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9818806052681243, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 1.4028791338205337e-06, "validation/loss_best": 0.1370992362499237, "validation/acc_best": 0.9908234126984127, "validation/f1_best": 0.9896272239562192} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.07126804698258639, "train/grad": 0.03909217623062432, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7541525506973267, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.6187474918365479, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.45200774431228635, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.33971725046634677, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.2656888675689697, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.19801580153405665, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.1411718549579382, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09962985713034868, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07815422987565399, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06811569408513606, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06202215555123985, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.055807793131098155, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.051332354303449394, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04622675659134984, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04219186098314822, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03878009282052517, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.034803222231566906, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.030710817854851483, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.026445248713716864, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.022837962377816438, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.018744959011673928, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.014987687459215521, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.011219043983146548, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.007770521529018879, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0049540730193257335, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.00273219040594995, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.001665504276752472, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0011403792351484298, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0007271494157612324, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.00046658946201205253, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00033840887248516084, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0002691082470119, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0002020713407546282, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00014651227742433547, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00010378764010965825, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.88586351275444e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.215732984244823e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 5.453918129205704e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 4.114676266908646e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 9.027961641550064e-07, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.0843202471733093e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4921074509620668e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7383508384227752e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.3224780559539794e-09, "train/loss_044_lr2.6e+01_wd1.0e+00": 4.880130290985108e-09, "train/loss_045_lr3.1e+01_wd1.0e+00": 9.932992979884147e-06, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.5634806156158446e-10, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.959052264690399e-08, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.1119991540908813e-08, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01574558581225574, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014580686083063484, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01296917078550905, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01161857198458165, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010569356614723802, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009494046233594417, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00824866094160825, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.007006439655087888, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0064183186716400085, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0061630383878946305, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005999669302836992, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005819280944415368, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.005674839581479319, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00548172470589634, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005304907146492042, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005137918948894367, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004914074334374163, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004658488991844933, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004359656048764009, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004075060496543301, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0036979446324403396, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0032842574267124293, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.002787127750561922, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0022248214192950398, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001651635358321073, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0010291158965264912, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0006461625237716362, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0004453954716518638, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002888126870220731, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00018649481401553203, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00013433087358862393, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00010436068795797837, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.803187105764664e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.703116533254615e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.3939219942501495e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.922601591105689e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.1594333811139067e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 6.979856566252571e-06, "train/grad_038_lr9.8e+00_wd1.0e+00": 5.874915134653483e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.7574848097649374e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.009303815331539e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.937546213880993e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 6.673895585598501e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.976563681832937e-07, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.10730130171061e-08, "train/grad_045_lr3.1e+01_wd1.0e+00": 3.3990533471223026e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.509065079446931e-09, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.3959386742746436e-07, "train/grad_048_lr5.0e+01_wd1.0e+00": 1.4568776905793068e-07, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7449595332145691, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6102956533432007, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.44405826926231384, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.3318503201007843, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.25776275992393494, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.19001051783561707, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.13334953784942627, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.09263036400079727, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07181665301322937, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06233784183859825, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05674108862876892, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05145678669214249, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.047961678355932236, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.044207461178302765, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04156169295310974, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03950442373752594, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03742304444313049, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.035569120198488235, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03395189717411995, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03285118564963341, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03195574879646301, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.031315840780735016, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.031128404662013054, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.031067028641700745, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03150829300284386, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03245441988110542, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03320586308836937, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03367266431450844, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03388993814587593, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03458172082901001, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03541437163949013, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03493325039744377, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03503333404660225, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03515392914414406, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.039251115173101425, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04767769202589989, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0510793998837471, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09180804342031479, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13420313596725464, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13489285111427307, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.1353793442249298, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.22431279718875885, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2826094329357147, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5064032673835754, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5744048953056335, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8720659613609314, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6079447865486145, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8096215128898621, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.8966748714447021, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8874007936507936, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9017857142857143, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9221230158730159, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9372519841269841, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9499007936507936, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9662698412698413, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9749503968253969, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9784226190476191, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9806547619047619, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9828869047619048, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.986359126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.988343253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.988343253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9915674603174603, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9848710317460317, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8669160009595575, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8868843936845907, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9120160382526818, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9291846277271538, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9433727549854941, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9602727058101288, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.968785452137828, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9723736164036716, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9758597162743282, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9781157455124546, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9806985841600735, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9810200375135951, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9823119550543135, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9823051366743001, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9825314407013861, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9838221578144235, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9841805405138777, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9849077885819484, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9852704160660194, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9858189280188674, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9857154064890491, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9854933245390585, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9853514530154225, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9870356531623992, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9873709901426244, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9875380646479202, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9868518784108447, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9866694881158389, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9871587866878767, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9859385944933986, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.987136176702769, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9859421601729795, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.987110361802261, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9873204505847025, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9869737990668943, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9884910824849598, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9874550831282133, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.988446019553385, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9850067638082429, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9906587083498731, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9893540800595659, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9858672033508847, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9862898225183917, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9843822215136426, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9845485309506687, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9833792893426194, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9849876787528341, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9818214672611228, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9824073305817478, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 9.027961641550064e-07, "validation/loss_best": 0.13489285111427307, "validation/acc_best": 0.9915674603174603, "validation/f1_best": 0.9906587083498731} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.07026279784739017, "train/grad": 0.038129831766709685, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.745407018661499, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.6108220481872558, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.4453053832054138, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.33431911855936053, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.2613529771566391, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.1942901186645031, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.13785255890339612, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09765195170417428, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07709529284387827, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06718360340222716, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.06116784150712192, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.054987355144694446, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.05062634576112032, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.045715701207518576, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04179009459912777, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03847067006863654, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.0346391611546278, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.030600637327879668, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.02635738728567958, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.022699622213840486, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.018632004810497163, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.014826777875423431, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.011081619150936604, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.007641498548910022, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004805302377790212, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0026134352385997774, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.001578680220991373, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.001073209410533309, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006974465120583773, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.00045613990165293216, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00033538460731506346, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0002609070483595133, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00019293327815830707, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00014038956724107265, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00010633387602865696, "train/loss_035_lr6.0e+00_wd1.0e+00": 5.1855333149433134e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.1080860644578933e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 7.107499986886978e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.4088378995656965e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.217098906636238e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.652650535106659e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.052467316389084e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.476181834936142e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.6763806343078614e-09, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.539025783538818e-09, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.0381063222885132e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.1175870895385742e-10, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.1641532182693481e-09, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.5232712030410767e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015281184008345008, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014142416785471141, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012604033574461937, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011345263989642262, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010371841276064515, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009284998739603907, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00795643858029507, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006733484290307388, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006199618623359129, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005985454485053197, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005857279891497456, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0057130275579402225, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0055921504492289385, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005425635743013118, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005265792340796906, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005108461855561472, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004897948527359404, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004653278286859859, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004356093400565442, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004060751730721676, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003699440107884584, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003273973738978384, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0027581569962057983, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0021550670677243033, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0015511329588480294, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0009447181543146144, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005973508567694807, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0004148335289210081, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002706006882908696, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00017639853562286589, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001286490569873422, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.966354255539045e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.488388138881418e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.615256171040528e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.193591914827266e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.8208995050817976e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.0784895121312985e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 7.760861737484935e-06, "train/grad_038_lr9.8e+00_wd1.0e+00": 6.11402426436932e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.536161768400499e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 3.738885893859212e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 3.7589056413241732e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 9.532046942554075e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 6.643637529761432e-07, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.1006080806010088e-08, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.354050595967596e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 3.3713566044106933e-09, "train/grad_047_lr4.3e+01_wd1.0e+00": 6.865056212806831e-07, "train/grad_048_lr5.0e+01_wd1.0e+00": 4.7151092739933256e-07, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7381512522697449, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6040447354316711, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.4388725757598877, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.32789185643196106, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.25465506315231323, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1875276267528534, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.13101626932621002, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.09143659472465515, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07118703424930573, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06184699013829231, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.056437719613313675, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.051176100969314575, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.047683507204055786, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04410167783498764, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.041422564536333084, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.039351608604192734, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.037216510623693466, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.035277437418699265, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03368251398205757, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.032609082758426666, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.031674306839704514, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.031006906181573868, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.030600061640143394, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030698850750923157, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.031353309750556946, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03227901831269264, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.033092983067035675, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.033583298325538635, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.0340815931558609, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.034747470170259476, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03551514074206352, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.035184912383556366, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.035103823989629745, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03522687405347824, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03933103382587433, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.047724831849336624, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05095430836081505, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09092157334089279, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13323912024497986, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1342025250196457, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13436616957187653, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.22125038504600525, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2794954776763916, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4983040690422058, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5664074420928955, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8602637648582458, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5972558856010437, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.793061375617981, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.8759521245956421, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.888640873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9045138888888888, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9223710317460317, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9357638888888888, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9489087301587301, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9655257936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9747023809523809, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9791666666666666, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9813988095238095, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.988343253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.988343253968254, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.988343253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9913194444444444, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9846230158730159, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.869571148795746, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8904969372625761, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9120669294411557, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9274674204323616, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9419639278587104, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9596366871544555, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9686038115051642, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9732792797641556, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9767548385492895, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9779358518540797, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9798773798313467, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9817831828274854, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9819562444552872, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9826642927148986, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9830754426221223, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9843554864752384, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9856314169472628, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9856241122883783, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.985265721976717, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9863571530059752, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9866242555264607, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.986302816020707, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9864404815717055, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9873740124095692, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9880904623585243, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9874030582771993, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9868518784108447, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9868478586831879, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9871587866878767, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9865789074913871, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9865916963399479, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9859421601729795, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.987110361802261, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9873204505847025, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9869737990668943, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9886697549579236, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9876334580312409, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.988446019553385, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9851842564062954, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9903549918106422, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9890262388743736, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9859974370049953, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9862898225183917, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9843822215136426, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9845485309506687, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9833792893426194, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.985166331885611, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9823697054690219, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9818806052681243, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 1.217098906636238e-06, "validation/loss_best": 0.1342025250196457, "validation/acc_best": 0.9913194444444444, "validation/f1_best": 0.9903549918106422} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.06781073173508048, "train/grad": 0.037898586094379426, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7316620588302613, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5987739133834838, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.43579758286476133, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.3264578330516815, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.25469167947769167, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.18810710892081262, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.1319831806793809, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09239947635680437, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07206112570129335, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.062189597086980936, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.05630421665497124, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.050412223190069196, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.046288799084723, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.04167448023334146, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.038019197378307584, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03488495572470129, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03141569011844694, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.027651947056874633, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.02378545840270817, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.020476783625781536, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.0167020695656538, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.013233666932210326, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.009849852062761783, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.006820974219590425, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004285363368690014, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0023316352535039187, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0014189345948398113, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0009710483439266681, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006339867785573006, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004167577251791954, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00030370106920599935, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00023552823811769484, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00017667966894805432, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00012624219059944152, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.14203654974699e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.596843384206295e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.7244649827480317e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 6.328942254185676e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.4939514696598054e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.277150586247444e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.6435963809490204e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.0822975784540176e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.0489469170570373e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 4.1124410927295684e-07, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.5832483768463136e-10, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.9645318388938904e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 9.778887033462525e-10, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.21540641784668e-10, "train/loss_048_lr5.0e+01_wd1.0e+00": 7.531605660915375e-08, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01542674368713051, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014269272764213383, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01271061119157821, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011437387571204454, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.010452694515697658, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009355424952227622, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.007967958685476333, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006659081742400303, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006087809723103419, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.005850400446215645, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005701869236654602, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005540768777136691, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00541238707897719, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005235392356407829, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005066633762908168, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.004906597149092704, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0047009912162320686, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004450407527328935, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004155905878724298, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.003868467858846998, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0034816314194176814, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0030405758861161303, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0025258822469913867, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.001959866045290255, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0013955410462949658, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0008512683444496361, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005480855218775105, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003840705204038386, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002542864276210821, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0001676201042437242, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00012255995196937874, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.626222740735101e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.225336922260795e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.4232975749073376e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.188486573582395e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.6523628611982986e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.025243253758191e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 6.822844353244068e-06, "train/grad_038_lr9.8e+00_wd1.0e+00": 5.197842827377108e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.80761180413299e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 3.4616391714438016e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 3.64636196109142e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 7.98977080347388e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 6.671044417963124e-07, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.2911665225945826e-08, "train/grad_045_lr3.1e+01_wd1.0e+00": 6.841211984299636e-07, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.9530843695773874e-09, "train/grad_047_lr4.3e+01_wd1.0e+00": 7.623873504148402e-07, "train/grad_048_lr5.0e+01_wd1.0e+00": 2.3578176825820134e-07, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7355611324310303, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6016641855239868, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.4370421767234802, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.32617801427841187, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.25352975726127625, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.1864938735961914, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.13024306297302246, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.09084766358137131, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07098741829395294, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.061663929373025894, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.056316111236810684, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05109955370426178, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04757799580693245, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04390987753868103, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04130415990948677, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03925903141498566, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.037239935249090195, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03526676446199417, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.033670105040073395, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03255535662174225, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03166574984788895, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.031005041673779488, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.030602937564253807, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030705194920301437, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.031293924897909164, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.032196030020713806, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03309488296508789, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.033592309802770615, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.034051574766635895, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.034596819430589676, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03545662760734558, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03521159291267395, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03509986773133278, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03533893823623657, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.039276108145713806, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04769346863031387, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05086217075586319, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09071352332830429, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13332873582839966, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13369977474212646, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13405214250087738, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.22073613107204437, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.27855828404426575, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4969542324542999, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5628394484519958, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8537272214889526, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5942211747169495, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.788567066192627, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.8662570118904114, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8903769841269841, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9060019841269841, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9228670634920635, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.939484126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9494047619047619, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9660218253968254, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9749503968253969, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9794146825396826, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9811507936507936, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.984375, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.986359126984127, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.988343253968254, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.988343253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9913194444444444, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9908234126984127, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9848710317460317, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.871158771112364, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8920150132551599, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9128145862961391, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9311868583044238, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9425582355046733, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9600869137410057, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9687856041249916, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9738266962229022, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9765769466738425, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9779358518540797, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.980424087206942, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9817831828274854, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9819562444552872, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9828436895377342, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9830699935648204, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9843477007104365, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9856314169472628, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.985446013569056, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9852678826019838, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9861790441365408, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9866242555264607, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9864779416459164, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9866629545691623, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9873740124095692, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9880904623585243, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9874030582771993, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9868518784108447, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9866694881158389, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9871587866878767, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9865789074913871, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.987136176702769, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9859421601729795, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9872931861231901, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9873204505847025, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9869737990668943, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9886697549579236, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9876334580312409, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.988446019553385, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9851842564062954, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9902962407598912, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9895771659366174, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9859974370049953, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9862898225183917, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9843822215136426, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9849056065777155, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9833792893426194, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9849876787528341, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9818214672611228, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9820605575021245, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 1.277150586247444e-06, "validation/loss_best": 0.13369977474212646, "validation/acc_best": 0.9913194444444444, "validation/f1_best": 0.9902962407598912} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.06831835189834237, "train/grad": 0.03757934408262372, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.7331027579307556, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.5996210408210755, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.43631107807159425, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.32709366977214815, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.25535144343972205, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.1890795973688364, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.1332545405626297, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.09411789543926716, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.07414762630127371, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.06458433064632117, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.058789647882804275, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.052773134903982284, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.048464379804208875, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.043480263827368616, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.039491268238052726, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.03615664780139923, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03222608488053083, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.028115850985050202, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.023946837559342384, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.02046858753077686, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.01654935005120933, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.013043759781867266, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.009634833820164203, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.006614655116572976, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.004210248533636332, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002358941547572613, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.001461086617782712, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0010095008835196496, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006561319436877966, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004282188229262829, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00031177792698144913, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00024579975754022597, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00018290875479578972, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00013205804862082005, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.552115574479103e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 4.624657332897186e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.584680914878845e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.501109942793846e-06, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8176715821027756e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5845056623220443e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.191653475165367e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.6483198851346969e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.924548462033272e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.0533258318901062e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.6135315895080565e-09, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.5918707251548766e-08, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.656612873077393e-10, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.827735781669617e-09, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.7154961824417116e-08, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015159478960558772, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.014009572793729603, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.012450364935211837, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.011175322437193245, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01017405243590474, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.009088960799854249, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00783133557299152, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.006725406168261543, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.006253864195896313, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.006054323234129697, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.005924988367478363, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.005761768811498769, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0056155734043568375, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.005397914136992767, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.005196483903564513, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005011819305364043, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.004763697102462174, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004479586370871402, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004156723384658108, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.003851324689021567, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00345571178215323, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0030345163953461453, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0025344872082496295, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0019790498683505573, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0014204411315586185, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.000863954300966725, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005524740826876951, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003888934149563283, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002560294118302409, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00016771126378444022, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00012249332315604987, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.528524441748231e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.200110593430509e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.4082246092548305e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.2005227107893006e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.773317820683019e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.9831577005842148e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 6.67041964160891e-06, "train/grad_038_lr9.8e+00_wd1.0e+00": 5.150919518427018e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 3.0062985990080545e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.968329959615346e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.020660674374066e-06, "train/grad_042_lr1.9e+01_wd1.0e+00": 9.61110174654994e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 1.6939964438137241e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.6967178679071207e-08, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.3580664714200786e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.819291102848399e-09, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.0350373267548668e-07, "train/grad_048_lr5.0e+01_wd1.0e+00": 2.614845665835822e-07, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.7351709604263306, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.6012982726097107, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.43679875135421753, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.3259629011154175, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.2533935606479645, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.18632598221302032, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.13010098040103912, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.09078765660524368, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.07091182470321655, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.06169653683900833, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.05632421001791954, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.05108874291181564, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.04756345599889755, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.04390518739819527, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04130076244473457, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.03929094225168228, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.03725794330239296, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.03525712341070175, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03359299525618553, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.032542917877435684, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03164287656545639, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0310386773198843, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03061361238360405, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.030673513188958168, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.031249068677425385, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03223707154393196, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03303469717502594, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03361572325229645, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.0340813547372818, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.034683603793382645, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03546219319105148, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0352364256978035, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03511033579707146, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03535465896129608, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03918572887778282, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04770398139953613, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05079571530222893, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09078773856163025, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1332191526889801, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1336444765329361, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.13414189219474792, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.22076630592346191, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2785183787345886, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.49667051434516907, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5629968643188477, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8533860445022583, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5949915647506714, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.7874357104301453, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.8647449016571045, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8893849206349206, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.9055059523809523, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.9228670634920635, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.9392361111111112, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9501488095238095, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9655257936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9749503968253969, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9794146825396826, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9813988095238095, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9826388888888888, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.986359126984127, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.986359126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.988343253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9915674603174603, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9905753968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.988343253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9851190476190477, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9848710317460317, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.8701057000134608, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.8914617473500696, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.9128145862961391, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.9310104547098554, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.9432815144807573, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.959279515838014, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9687856041249916, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9738266962229022, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9767560933063447, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9779358518540797, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9802443027213404, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9817831828274854, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9819562444552872, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9828436895377342, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9825314407013861, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9848829872790448, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9856314169472628, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9852678826019838, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9852678826019838, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9861790441365408, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9866242555264607, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9866560635031968, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9871983797447396, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9875972256931295, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9880904623585243, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9874030582771993, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9868518784108447, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9868478586831879, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9866079120348862, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9865789074913871, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.987136176702769, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9859421601729795, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9872931861231901, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9873204505847025, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9869737990668943, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9886697549579236, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9876334580312409, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.988446019553385, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9851842564062954, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9906587083498731, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9890262388743736, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9859974370049953, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9862898225183917, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9843822215136426, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9847272719221216, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9833792893426194, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9849876787528341, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9818214672611228, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.982434388701976, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 1.5845056623220443e-06, "validation/loss_best": 0.1336444765329361, "validation/acc_best": 0.9915674603174603, "validation/f1_best": 0.9906587083498731} diff --git a/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/config.yaml b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8a4c043f74e0348f7c38be5ddeb861bfbf39f329 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n800_1; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..9f847fec022e9006260b2ef8701c4194bb950811 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 14, "eval/id_best": 17, "eval/lr_best": 9.599999999999999e-05, "eval/wd_best": 0.05, "eval/train/loss": 2.074416160583496, "eval/train/acc": 0.3773932819078644, "eval/train/acc_std": 0.0023685172896544043, "eval/train/f1": 0.31818418508788654, "eval/train/f1_std": 0.0024370692553956582, "eval/validation/loss": 2.39967942237854, "eval/validation/acc": 0.2825765965300849, "eval/validation/acc_std": 0.005395734164998907, "eval/validation/f1": 0.218613949908294, "eval/validation/f1_std": 0.004883268154647929, "eval/test/loss": 2.2970030307769775, "eval/test/acc": 0.3070500927643785, "eval/test/acc_std": 0.005250920818990066, "eval/test/f1": 0.23502535936565971, "eval/test/f1_std": 0.005413437034446197, "eval/testid/loss": 2.255535125732422, "eval/testid/acc": 0.3026797763639869, "eval/testid/acc_std": 0.005920858246657257, "eval/testid/f1": 0.2458918451077734, "eval/testid/f1_std": 0.0058978125424438725} diff --git a/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..85d808af734b7b07cc9edda5b3db4d22d89309ac --- /dev/null +++ b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 14, "eval/best/id_best": 17, "eval/best/lr_best": 9.599999999999999e-05, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.074416160583496, "eval/best/train/acc": 0.3773932819078644, "eval/best/train/acc_std": 0.0023685172896544043, "eval/best/train/f1": 0.31818418508788654, "eval/best/train/f1_std": 0.0024370692553956582, "eval/best/validation/loss": 2.39967942237854, "eval/best/validation/acc": 0.2825765965300849, "eval/best/validation/acc_std": 0.005395734164998907, "eval/best/validation/f1": 0.218613949908294, "eval/best/validation/f1_std": 0.004883268154647929, "eval/best/test/loss": 2.2970030307769775, "eval/best/test/acc": 0.3070500927643785, "eval/best/test/acc_std": 0.005250920818990066, "eval/best/test/f1": 0.23502535936565971, "eval/best/test/f1_std": 0.005413437034446197, "eval/best/testid/loss": 2.255535125732422, "eval/best/testid/acc": 0.3026797763639869, "eval/best/testid/acc_std": 0.005920858246657257, "eval/best/testid/f1": 0.2458918451077734, "eval/best/testid/f1_std": 0.0058978125424438725} diff --git a/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..6296585d3c3b592d27170832a55d926aca897bdd --- /dev/null +++ b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 18, "eval/last/lr_best": 0.00011399999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 1.9630322456359863, "eval/last/train/acc": 0.4120901072559083, "eval/last/train/acc_std": 0.0024781255102494094, "eval/last/train/f1": 0.3584289526292089, "eval/last/train/f1_std": 0.002642871062807365, "eval/last/validation/loss": 2.401456117630005, "eval/last/validation/acc": 0.28017718715393136, "eval/last/validation/acc_std": 0.0055398086022188705, "eval/last/validation/f1": 0.22203810274058847, "eval/last/validation/f1_std": 0.004952148315896624, "eval/last/test/loss": 2.289011001586914, "eval/last/test/acc": 0.31410018552875696, "eval/last/test/acc_std": 0.0052283465798944, "eval/last/test/f1": 0.24939518513483958, "eval/last/test/f1_std": 0.005311589302613517, "eval/last/testid/loss": 2.20564341545105, "eval/last/testid/acc": 0.3175245806824754, "eval/last/testid/acc_std": 0.005906220538350812, "eval/last/testid/f1": 0.267585870199021, "eval/last/testid/f1_std": 0.005883405334057796} diff --git a/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..011da8820a7d7cdfcdf7357d66a100bb0da9d92f --- /dev/null +++ b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,14,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",train,2.074416160583496,0.3773932819078644,0.0023685172896544043,0.31818418508788654,0.0024370692553956582 +flat_mae,patch,attn,nsd_cococlip,best,14,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",validation,2.39967942237854,0.2825765965300849,0.005395734164998907,0.218613949908294,0.004883268154647929 +flat_mae,patch,attn,nsd_cococlip,best,14,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",test,2.2970030307769775,0.3070500927643785,0.005250920818990066,0.23502535936565971,0.005413437034446197 +flat_mae,patch,attn,nsd_cococlip,best,14,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",testid,2.255535125732422,0.3026797763639869,0.005920858246657257,0.2458918451077734,0.0058978125424438725 diff --git a/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..011da8820a7d7cdfcdf7357d66a100bb0da9d92f --- /dev/null +++ b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,14,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",train,2.074416160583496,0.3773932819078644,0.0023685172896544043,0.31818418508788654,0.0024370692553956582 +flat_mae,patch,attn,nsd_cococlip,best,14,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",validation,2.39967942237854,0.2825765965300849,0.005395734164998907,0.218613949908294,0.004883268154647929 +flat_mae,patch,attn,nsd_cococlip,best,14,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",test,2.2970030307769775,0.3070500927643785,0.005250920818990066,0.23502535936565971,0.005413437034446197 +flat_mae,patch,attn,nsd_cococlip,best,14,9.599999999999999e-05,0.05,17,"[0.32, 1.0]",testid,2.255535125732422,0.3026797763639869,0.005920858246657257,0.2458918451077734,0.0058978125424438725 diff --git a/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..6b81812930ed19b7cd58bcb1063a62ee86ea7d49 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",train,1.9630322456359863,0.4120901072559083,0.0024781255102494094,0.3584289526292089,0.002642871062807365 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",validation,2.401456117630005,0.28017718715393136,0.0055398086022188705,0.22203810274058847,0.004952148315896624 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",test,2.289011001586914,0.31410018552875696,0.0052283465798944,0.24939518513483958,0.005311589302613517 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",testid,2.20564341545105,0.3175245806824754,0.005906220538350812,0.267585870199021,0.005883405334057796 diff --git a/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/log.txt b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..2537b8b9d9beac3873b0954ee1e509305597c859 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,962 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:46:54 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n800_1; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:22:25 lr: nan time: 3.3631 data: 2.8958 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:42 lr: 0.000003 loss: 3.1802 (3.1876) grad: 0.1598 (0.1630) time: 0.4462 data: 0.0040 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:09 lr: 0.000006 loss: 3.1724 (3.1729) grad: 0.1598 (0.1628) time: 0.4621 data: 0.0046 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:52 lr: 0.000009 loss: 3.1532 (3.1708) grad: 0.1587 (0.1624) time: 0.4699 data: 0.0048 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:38 lr: 0.000012 loss: 3.1532 (3.1673) grad: 0.1591 (0.1614) time: 0.4617 data: 0.0048 max mem: 22448 +train: [0] [100/400] eta: 0:02:26 lr: 0.000015 loss: 3.1585 (3.1668) grad: 0.1588 (0.1602) time: 0.4521 data: 0.0049 max mem: 22448 +train: [0] [120/400] eta: 0:02:14 lr: 0.000018 loss: 3.1619 (3.1650) grad: 0.1471 (0.1578) time: 0.4492 data: 0.0048 max mem: 22448 +train: [0] [140/400] eta: 0:02:03 lr: 0.000021 loss: 3.1427 (3.1626) grad: 0.1471 (0.1575) time: 0.4477 data: 0.0046 max mem: 22448 +train: [0] [160/400] eta: 0:01:53 lr: 0.000024 loss: 3.1436 (3.1593) grad: 0.1601 (0.1587) time: 0.4546 data: 0.0048 max mem: 22448 +train: [0] [180/400] eta: 0:01:43 lr: 0.000027 loss: 3.1247 (3.1571) grad: 0.1593 (0.1585) time: 0.4485 data: 0.0048 max mem: 22448 +train: [0] [200/400] eta: 0:01:33 lr: 0.000030 loss: 3.1373 (3.1558) grad: 0.1495 (0.1571) time: 0.4496 data: 0.0049 max mem: 22448 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 3.1439 (3.1544) grad: 0.1489 (0.1568) time: 0.4481 data: 0.0045 max mem: 22448 +train: [0] [240/400] eta: 0:01:14 lr: 0.000036 loss: 3.1224 (3.1519) grad: 0.1519 (0.1563) time: 0.4496 data: 0.0048 max mem: 22448 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 3.1164 (3.1492) grad: 0.1494 (0.1556) time: 0.4552 data: 0.0044 max mem: 22448 +train: [0] [280/400] eta: 0:00:55 lr: 0.000042 loss: 3.0960 (3.1452) grad: 0.1494 (0.1553) time: 0.4780 data: 0.0050 max mem: 22448 +train: [0] [300/400] eta: 0:00:46 lr: 0.000045 loss: 3.0722 (3.1403) grad: 0.1515 (0.1554) time: 0.4521 data: 0.0046 max mem: 22448 +train: [0] [320/400] eta: 0:00:37 lr: 0.000048 loss: 3.0708 (3.1367) grad: 0.1596 (0.1561) time: 0.4429 data: 0.0047 max mem: 22448 +train: [0] [340/400] eta: 0:00:27 lr: 0.000051 loss: 3.0799 (3.1333) grad: 0.1607 (0.1562) time: 0.4500 data: 0.0047 max mem: 22448 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 3.0610 (3.1290) grad: 0.1608 (0.1570) time: 0.4553 data: 0.0050 max mem: 22448 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 3.0511 (3.1251) grad: 0.1707 (0.1578) time: 0.4643 data: 0.0049 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.0503 (3.1221) grad: 0.1760 (0.1586) time: 0.4447 data: 0.0048 max mem: 22448 +train: [0] Total time: 0:03:04 (0.4620 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.0503 (3.1221) grad: 0.1760 (0.1586) +eval (validation): [0] [ 0/85] eta: 0:04:37 time: 3.2693 data: 2.9808 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:33 time: 0.3724 data: 0.0035 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:19 time: 0.3608 data: 0.0045 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:10 time: 0.3524 data: 0.0040 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3360 data: 0.0042 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3283 data: 0.0041 max mem: 22448 +eval (validation): [0] Total time: 0:00:33 (0.3922 s / it) +cv: [0] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 2.590 acc: 0.239 f1: 0.161 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:20:35 lr: nan time: 3.0876 data: 2.7375 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:43 lr: 0.000063 loss: 3.0175 (3.0089) grad: 0.1669 (0.1683) time: 0.4635 data: 0.0036 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:05 lr: 0.000066 loss: 3.0169 (3.0095) grad: 0.1639 (0.1637) time: 0.4387 data: 0.0043 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:48 lr: 0.000069 loss: 2.9976 (2.9961) grad: 0.1606 (0.1644) time: 0.4590 data: 0.0048 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:34 lr: 0.000072 loss: 2.9832 (2.9974) grad: 0.1715 (0.1677) time: 0.4440 data: 0.0047 max mem: 22448 +train: [1] [100/400] eta: 0:02:23 lr: 0.000075 loss: 2.9843 (2.9923) grad: 0.1725 (0.1699) time: 0.4486 data: 0.0047 max mem: 22448 +train: [1] [120/400] eta: 0:02:11 lr: 0.000078 loss: 2.9796 (2.9889) grad: 0.1751 (0.1708) time: 0.4349 data: 0.0046 max mem: 22448 +train: [1] [140/400] eta: 0:02:01 lr: 0.000081 loss: 2.9861 (2.9871) grad: 0.1751 (0.1722) time: 0.4433 data: 0.0045 max mem: 22448 +train: [1] [160/400] eta: 0:01:51 lr: 0.000084 loss: 2.9808 (2.9867) grad: 0.1772 (0.1724) time: 0.4462 data: 0.0044 max mem: 22448 +train: [1] [180/400] eta: 0:01:41 lr: 0.000087 loss: 2.9808 (2.9868) grad: 0.1726 (0.1733) time: 0.4479 data: 0.0046 max mem: 22448 +train: [1] [200/400] eta: 0:01:32 lr: 0.000090 loss: 2.9747 (2.9842) grad: 0.1737 (0.1744) time: 0.4457 data: 0.0046 max mem: 22448 +train: [1] [220/400] eta: 0:01:22 lr: 0.000093 loss: 2.9276 (2.9774) grad: 0.1965 (0.1770) time: 0.4430 data: 0.0045 max mem: 22448 +train: [1] [240/400] eta: 0:01:13 lr: 0.000096 loss: 2.9289 (2.9740) grad: 0.1973 (0.1774) time: 0.4538 data: 0.0048 max mem: 22448 +train: [1] [260/400] eta: 0:01:04 lr: 0.000099 loss: 2.9511 (2.9729) grad: 0.1835 (0.1782) time: 0.4480 data: 0.0047 max mem: 22448 +train: [1] [280/400] eta: 0:00:54 lr: 0.000102 loss: 2.9483 (2.9694) grad: 0.1835 (0.1788) time: 0.4413 data: 0.0047 max mem: 22448 +train: [1] [300/400] eta: 0:00:45 lr: 0.000105 loss: 2.9278 (2.9676) grad: 0.1830 (0.1794) time: 0.4379 data: 0.0048 max mem: 22448 +train: [1] [320/400] eta: 0:00:36 lr: 0.000108 loss: 2.9031 (2.9635) grad: 0.1886 (0.1803) time: 0.4408 data: 0.0044 max mem: 22448 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 2.8863 (2.9584) grad: 0.1919 (0.1809) time: 0.4507 data: 0.0047 max mem: 22448 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 2.8957 (2.9569) grad: 0.1879 (0.1814) time: 0.4489 data: 0.0048 max mem: 22448 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 2.8996 (2.9536) grad: 0.1879 (0.1822) time: 0.4521 data: 0.0049 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.8930 (2.9513) grad: 0.1971 (0.1833) time: 0.4453 data: 0.0048 max mem: 22448 +train: [1] Total time: 0:03:01 (0.4539 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.8930 (2.9513) grad: 0.1971 (0.1833) +eval (validation): [1] [ 0/85] eta: 0:04:31 time: 3.1911 data: 2.9593 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:32 time: 0.3720 data: 0.0056 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:19 time: 0.3349 data: 0.0035 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:09 time: 0.3413 data: 0.0040 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3268 data: 0.0041 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3180 data: 0.0038 max mem: 22448 +eval (validation): [1] Total time: 0:00:32 (0.3787 s / it) +cv: [1] best hparam: (9.8, 1.0) (038) ('038_lr9.8e+00_wd1.0e+00') loss: 2.442 acc: 0.263 f1: 0.203 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:21:21 lr: nan time: 3.2029 data: 2.8613 max mem: 22448 +train: [2] [ 20/400] eta: 0:03:37 lr: 0.000123 loss: 2.9027 (2.8878) grad: 0.2227 (0.2194) time: 0.4397 data: 0.0036 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:05 lr: 0.000126 loss: 2.8862 (2.8846) grad: 0.2132 (0.2154) time: 0.4573 data: 0.0048 max mem: 22448 +train: [2] [ 60/400] eta: 0:02:47 lr: 0.000129 loss: 2.8795 (2.8815) grad: 0.2055 (0.2114) time: 0.4430 data: 0.0050 max mem: 22448 +train: [2] [ 80/400] eta: 0:02:33 lr: 0.000132 loss: 2.8706 (2.8793) grad: 0.2055 (0.2104) time: 0.4398 data: 0.0047 max mem: 22448 +train: [2] [100/400] eta: 0:02:21 lr: 0.000135 loss: 2.8329 (2.8714) grad: 0.2118 (0.2110) time: 0.4401 data: 0.0048 max mem: 22448 +train: [2] [120/400] eta: 0:02:10 lr: 0.000138 loss: 2.8390 (2.8676) grad: 0.2124 (0.2124) time: 0.4432 data: 0.0049 max mem: 22448 +train: [2] [140/400] eta: 0:02:01 lr: 0.000141 loss: 2.8683 (2.8697) grad: 0.2166 (0.2137) time: 0.4626 data: 0.0050 max mem: 22448 +train: [2] [160/400] eta: 0:01:51 lr: 0.000144 loss: 2.8358 (2.8645) grad: 0.2206 (0.2154) time: 0.4607 data: 0.0049 max mem: 22448 +train: [2] [180/400] eta: 0:01:42 lr: 0.000147 loss: 2.8092 (2.8590) grad: 0.2249 (0.2169) time: 0.4555 data: 0.0050 max mem: 22448 +train: [2] [200/400] eta: 0:01:32 lr: 0.000150 loss: 2.8015 (2.8544) grad: 0.2221 (0.2177) time: 0.4440 data: 0.0045 max mem: 22448 +train: [2] [220/400] eta: 0:01:23 lr: 0.000153 loss: 2.8121 (2.8547) grad: 0.2295 (0.2194) time: 0.4515 data: 0.0048 max mem: 22448 +train: [2] [240/400] eta: 0:01:13 lr: 0.000156 loss: 2.8660 (2.8566) grad: 0.2354 (0.2212) time: 0.4570 data: 0.0048 max mem: 22448 +train: [2] [260/400] eta: 0:01:04 lr: 0.000159 loss: 2.8486 (2.8535) grad: 0.2330 (0.2217) time: 0.4441 data: 0.0047 max mem: 22448 +train: [2] [280/400] eta: 0:00:55 lr: 0.000162 loss: 2.8191 (2.8511) grad: 0.2446 (0.2248) time: 0.4439 data: 0.0048 max mem: 22448 +train: [2] [300/400] eta: 0:00:45 lr: 0.000165 loss: 2.8313 (2.8580) grad: 0.2938 (0.2493) time: 0.4442 data: 0.0049 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=80.85 > 63.56) at step 559. Freezing. +train: [2] [320/400] eta: 0:00:36 lr: 0.000168 loss: 3.1773 (2.9072) grad: 0.9874 (0.3157) time: 0.4432 data: 0.0047 max mem: 22448 +train: [2] [340/400] eta: 0:00:27 lr: 0.000171 loss: 2.9616 (2.9048) grad: 0.3125 (0.3142) time: 0.4426 data: 0.0045 max mem: 22448 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 2.9304 (2.9241) grad: 0.3382 (0.3520) time: 0.4379 data: 0.0047 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=75.14 > 63.56) at step 584. Freezing. +WARNING: classifier 46 (36, 1.0) diverged (loss=70.04 > 63.56) at step 589. Freezing. +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 3.5811 (2.9773) grad: 1.5016 (0.4164) time: 0.4456 data: 0.0047 max mem: 22448 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 2.8732 (2.9682) grad: 0.2083 (0.4056) time: 0.4527 data: 0.0046 max mem: 22448 +train: [2] Total time: 0:03:01 (0.4549 s / it) +train: [2] Summary: lr: 0.000180 loss: 2.8732 (2.9682) grad: 0.2083 (0.4056) +eval (validation): [2] [ 0/85] eta: 0:04:29 time: 3.1752 data: 2.8829 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:32 time: 0.3624 data: 0.0048 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:18 time: 0.3420 data: 0.0036 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:09 time: 0.3423 data: 0.0040 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3412 data: 0.0041 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3307 data: 0.0040 max mem: 22448 +eval (validation): [2] Total time: 0:00:32 (0.3820 s / it) +cv: [2] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.432 acc: 0.270 f1: 0.202 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:20:26 lr: nan time: 3.0657 data: 2.6973 max mem: 22448 +train: [3] [ 20/400] eta: 0:03:36 lr: 0.000183 loss: 2.7531 (2.7812) grad: 0.1970 (0.2003) time: 0.4450 data: 0.0035 max mem: 22448 +train: [3] [ 40/400] eta: 0:03:03 lr: 0.000186 loss: 2.7808 (2.8027) grad: 0.2072 (0.2088) time: 0.4452 data: 0.0047 max mem: 22448 +train: [3] [ 60/400] eta: 0:02:45 lr: 0.000189 loss: 2.7813 (2.7910) grad: 0.2105 (0.2083) time: 0.4401 data: 0.0050 max mem: 22448 +train: [3] [ 80/400] eta: 0:02:31 lr: 0.000192 loss: 2.7740 (2.7875) grad: 0.2051 (0.2080) time: 0.4359 data: 0.0049 max mem: 22448 +train: [3] [100/400] eta: 0:02:20 lr: 0.000195 loss: 2.7558 (2.7840) grad: 0.2051 (0.2089) time: 0.4373 data: 0.0048 max mem: 22448 +train: [3] [120/400] eta: 0:02:09 lr: 0.000198 loss: 2.7544 (2.7804) grad: 0.2130 (0.2101) time: 0.4361 data: 0.0049 max mem: 22448 +train: [3] [140/400] eta: 0:01:59 lr: 0.000201 loss: 2.7544 (2.7803) grad: 0.2246 (0.2133) time: 0.4370 data: 0.0049 max mem: 22448 +train: [3] [160/400] eta: 0:01:49 lr: 0.000204 loss: 2.7922 (2.7837) grad: 0.2298 (0.2152) time: 0.4515 data: 0.0050 max mem: 22448 +train: [3] [180/400] eta: 0:01:40 lr: 0.000207 loss: 2.7901 (2.7807) grad: 0.2228 (0.2162) time: 0.4441 data: 0.0047 max mem: 22448 +train: [3] [200/400] eta: 0:01:30 lr: 0.000210 loss: 2.7823 (2.7809) grad: 0.2256 (0.2171) time: 0.4409 data: 0.0048 max mem: 22448 +train: [3] [220/400] eta: 0:01:21 lr: 0.000213 loss: 2.7875 (2.7807) grad: 0.2286 (0.2187) time: 0.4320 data: 0.0046 max mem: 22448 +train: [3] [240/400] eta: 0:01:12 lr: 0.000216 loss: 2.7728 (2.7797) grad: 0.2374 (0.2211) time: 0.4579 data: 0.0046 max mem: 22448 +train: [3] [260/400] eta: 0:01:03 lr: 0.000219 loss: 2.7762 (2.7781) grad: 0.2427 (0.2228) time: 0.4463 data: 0.0049 max mem: 22448 +train: [3] [280/400] eta: 0:00:54 lr: 0.000222 loss: 2.7691 (2.7772) grad: 0.2407 (0.2247) time: 0.4483 data: 0.0049 max mem: 22448 +train: [3] [300/400] eta: 0:00:45 lr: 0.000225 loss: 2.7819 (2.7791) grad: 0.2591 (0.2299) time: 0.4409 data: 0.0048 max mem: 22448 +train: [3] [320/400] eta: 0:00:36 lr: 0.000228 loss: 2.8653 (2.7970) grad: 0.3640 (0.2674) time: 0.4372 data: 0.0046 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=66.98 > 63.56) at step 765. Freezing. +train: [3] [340/400] eta: 0:00:26 lr: 0.000231 loss: 3.0563 (2.8245) grad: 0.6724 (0.3159) time: 0.4383 data: 0.0047 max mem: 22448 +train: [3] [360/400] eta: 0:00:17 lr: 0.000234 loss: 2.8296 (2.8241) grad: 0.2396 (0.3113) time: 0.4396 data: 0.0048 max mem: 22448 +train: [3] [380/400] eta: 0:00:08 lr: 0.000237 loss: 2.7630 (2.8199) grad: 0.2319 (0.3073) time: 0.4537 data: 0.0049 max mem: 22448 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 2.7284 (2.8154) grad: 0.2254 (0.3029) time: 0.4595 data: 0.0050 max mem: 22448 +train: [3] Total time: 0:03:00 (0.4504 s / it) +train: [3] Summary: lr: 0.000240 loss: 2.7284 (2.8154) grad: 0.2254 (0.3029) +eval (validation): [3] [ 0/85] eta: 0:04:33 time: 3.2129 data: 2.9220 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:31 time: 0.3529 data: 0.0042 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:18 time: 0.3432 data: 0.0040 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3597 data: 0.0042 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3400 data: 0.0042 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3285 data: 0.0042 max mem: 22448 +eval (validation): [3] Total time: 0:00:32 (0.3835 s / it) +cv: [3] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.441 acc: 0.274 f1: 0.198 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:20:40 lr: nan time: 3.1011 data: 2.7752 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:38 lr: 0.000243 loss: 2.6583 (2.6855) grad: 0.2186 (0.2243) time: 0.4501 data: 0.0034 max mem: 22448 +train: [4] [ 40/400] eta: 0:03:03 lr: 0.000246 loss: 2.6978 (2.6998) grad: 0.2262 (0.2251) time: 0.4407 data: 0.0047 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:44 lr: 0.000249 loss: 2.7210 (2.7020) grad: 0.2285 (0.2274) time: 0.4332 data: 0.0048 max mem: 22448 +train: [4] [ 80/400] eta: 0:02:31 lr: 0.000252 loss: 2.7100 (2.7058) grad: 0.2221 (0.2259) time: 0.4406 data: 0.0046 max mem: 22448 +train: [4] [100/400] eta: 0:02:19 lr: 0.000255 loss: 2.7244 (2.7169) grad: 0.2263 (0.2272) time: 0.4344 data: 0.0048 max mem: 22448 +train: [4] [120/400] eta: 0:02:09 lr: 0.000258 loss: 2.7244 (2.7148) grad: 0.2346 (0.2292) time: 0.4399 data: 0.0047 max mem: 22448 +train: [4] [140/400] eta: 0:01:59 lr: 0.000261 loss: 2.6846 (2.7167) grad: 0.2412 (0.2330) time: 0.4390 data: 0.0046 max mem: 22448 +train: [4] [160/400] eta: 0:01:49 lr: 0.000264 loss: 2.7352 (2.7194) grad: 0.2579 (0.2361) time: 0.4466 data: 0.0048 max mem: 22448 +train: [4] [180/400] eta: 0:01:40 lr: 0.000267 loss: 2.7389 (2.7254) grad: 0.2605 (0.2401) time: 0.4513 data: 0.0046 max mem: 22448 +train: [4] [200/400] eta: 0:01:31 lr: 0.000270 loss: 2.7308 (2.7233) grad: 0.2713 (0.2438) time: 0.4478 data: 0.0049 max mem: 22448 +train: [4] [220/400] eta: 0:01:21 lr: 0.000273 loss: 2.7389 (2.7339) grad: 0.2789 (0.2612) time: 0.4323 data: 0.0046 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=72.07 > 63.56) at step 918. Freezing. +train: [4] [240/400] eta: 0:01:12 lr: 0.000276 loss: 2.8329 (2.7772) grad: 0.6461 (0.3237) time: 0.4362 data: 0.0046 max mem: 22448 +train: [4] [260/400] eta: 0:01:03 lr: 0.000279 loss: 2.7829 (2.7739) grad: 0.2480 (0.3169) time: 0.4443 data: 0.0048 max mem: 22448 +train: [4] [280/400] eta: 0:00:54 lr: 0.000282 loss: 2.7388 (2.7711) grad: 0.2273 (0.3113) time: 0.4356 data: 0.0046 max mem: 22448 +train: [4] [300/400] eta: 0:00:44 lr: 0.000285 loss: 2.7388 (2.7687) grad: 0.2392 (0.3063) time: 0.4348 data: 0.0047 max mem: 22448 +train: [4] [320/400] eta: 0:00:35 lr: 0.000288 loss: 2.7168 (2.7646) grad: 0.2254 (0.3008) time: 0.4380 data: 0.0048 max mem: 22448 +train: [4] [340/400] eta: 0:00:26 lr: 0.000291 loss: 2.7011 (2.7624) grad: 0.2224 (0.2963) time: 0.4416 data: 0.0050 max mem: 22448 +train: [4] [360/400] eta: 0:00:17 lr: 0.000294 loss: 2.7067 (2.7603) grad: 0.2267 (0.2926) time: 0.4486 data: 0.0049 max mem: 22448 +train: [4] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.6965 (2.7568) grad: 0.2310 (0.2893) time: 0.4568 data: 0.0049 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.6878 (2.7531) grad: 0.2270 (0.2863) time: 0.4500 data: 0.0048 max mem: 22448 +train: [4] Total time: 0:02:59 (0.4494 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.6878 (2.7531) grad: 0.2270 (0.2863) +eval (validation): [4] [ 0/85] eta: 0:04:33 time: 3.2183 data: 2.9826 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:30 time: 0.3381 data: 0.0028 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:19 time: 0.4038 data: 0.0040 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:10 time: 0.3571 data: 0.0044 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3279 data: 0.0037 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3200 data: 0.0036 max mem: 22448 +eval (validation): [4] Total time: 0:00:33 (0.3907 s / it) +cv: [4] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 2.431 acc: 0.266 f1: 0.198 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:20:42 lr: nan time: 3.1052 data: 2.7298 max mem: 22448 +train: [5] [ 20/400] eta: 0:03:43 lr: 0.000300 loss: 2.6031 (2.6105) grad: 0.2329 (0.2339) time: 0.4624 data: 0.0042 max mem: 22448 +train: [5] [ 40/400] eta: 0:03:04 lr: 0.000300 loss: 2.6532 (2.6559) grad: 0.2348 (0.2391) time: 0.4332 data: 0.0047 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:46 lr: 0.000300 loss: 2.6790 (2.6626) grad: 0.2424 (0.2436) time: 0.4400 data: 0.0050 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:32 lr: 0.000300 loss: 2.6790 (2.6612) grad: 0.2515 (0.2442) time: 0.4355 data: 0.0051 max mem: 22448 +train: [5] [100/400] eta: 0:02:20 lr: 0.000300 loss: 2.6733 (2.6641) grad: 0.2484 (0.2469) time: 0.4379 data: 0.0046 max mem: 22448 +train: [5] [120/400] eta: 0:02:09 lr: 0.000300 loss: 2.6290 (2.6546) grad: 0.2465 (0.2474) time: 0.4357 data: 0.0048 max mem: 22448 +train: [5] [140/400] eta: 0:01:59 lr: 0.000300 loss: 2.5988 (2.6465) grad: 0.2377 (0.2456) time: 0.4373 data: 0.0049 max mem: 22448 +train: [5] [160/400] eta: 0:01:49 lr: 0.000299 loss: 2.5864 (2.6437) grad: 0.2365 (0.2457) time: 0.4376 data: 0.0049 max mem: 22448 +train: [5] [180/400] eta: 0:01:40 lr: 0.000299 loss: 2.6244 (2.6456) grad: 0.2377 (0.2451) time: 0.4505 data: 0.0050 max mem: 22448 +train: [5] [200/400] eta: 0:01:30 lr: 0.000299 loss: 2.6427 (2.6449) grad: 0.2419 (0.2450) time: 0.4385 data: 0.0048 max mem: 22448 +train: [5] [220/400] eta: 0:01:21 lr: 0.000299 loss: 2.6015 (2.6432) grad: 0.2419 (0.2441) time: 0.4394 data: 0.0048 max mem: 22448 +train: [5] [240/400] eta: 0:01:12 lr: 0.000299 loss: 2.5920 (2.6415) grad: 0.2379 (0.2442) time: 0.4275 data: 0.0047 max mem: 22448 +train: [5] [260/400] eta: 0:01:03 lr: 0.000299 loss: 2.5983 (2.6381) grad: 0.2380 (0.2438) time: 0.4496 data: 0.0050 max mem: 22448 +train: [5] [280/400] eta: 0:00:53 lr: 0.000298 loss: 2.6222 (2.6393) grad: 0.2383 (0.2444) time: 0.4418 data: 0.0049 max mem: 22448 +train: [5] [300/400] eta: 0:00:44 lr: 0.000298 loss: 2.6344 (2.6356) grad: 0.2505 (0.2445) time: 0.4308 data: 0.0047 max mem: 22448 +train: [5] [320/400] eta: 0:00:35 lr: 0.000298 loss: 2.6078 (2.6351) grad: 0.2510 (0.2450) time: 0.4303 data: 0.0048 max mem: 22448 +train: [5] [340/400] eta: 0:00:26 lr: 0.000298 loss: 2.6278 (2.6346) grad: 0.2529 (0.2454) time: 0.4322 data: 0.0045 max mem: 22448 +train: [5] [360/400] eta: 0:00:17 lr: 0.000297 loss: 2.6262 (2.6339) grad: 0.2553 (0.2461) time: 0.4454 data: 0.0049 max mem: 22448 +train: [5] [380/400] eta: 0:00:08 lr: 0.000297 loss: 2.6203 (2.6339) grad: 0.2593 (0.2469) time: 0.4525 data: 0.0051 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.5571 (2.6310) grad: 0.2469 (0.2468) time: 0.4400 data: 0.0048 max mem: 22448 +train: [5] Total time: 0:02:58 (0.4472 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.5571 (2.6310) grad: 0.2469 (0.2468) +eval (validation): [5] [ 0/85] eta: 0:04:20 time: 3.0704 data: 2.8006 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:31 time: 0.3482 data: 0.0045 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:19 time: 0.3717 data: 0.0033 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:09 time: 0.3450 data: 0.0041 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3373 data: 0.0041 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3296 data: 0.0041 max mem: 22448 +eval (validation): [5] Total time: 0:00:32 (0.3838 s / it) +cv: [5] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 2.377 acc: 0.282 f1: 0.210 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:21:15 lr: nan time: 3.1884 data: 2.8359 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:38 lr: 0.000296 loss: 2.5697 (2.5340) grad: 0.2450 (0.2472) time: 0.4448 data: 0.0051 max mem: 22448 +train: [6] [ 40/400] eta: 0:03:01 lr: 0.000296 loss: 2.5697 (2.5585) grad: 0.2484 (0.2491) time: 0.4303 data: 0.0043 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:43 lr: 0.000296 loss: 2.5576 (2.5564) grad: 0.2468 (0.2498) time: 0.4327 data: 0.0047 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:30 lr: 0.000295 loss: 2.5257 (2.5468) grad: 0.2518 (0.2512) time: 0.4409 data: 0.0049 max mem: 22448 +train: [6] [100/400] eta: 0:02:19 lr: 0.000295 loss: 2.5680 (2.5494) grad: 0.2549 (0.2518) time: 0.4328 data: 0.0048 max mem: 22448 +train: [6] [120/400] eta: 0:02:08 lr: 0.000295 loss: 2.5727 (2.5536) grad: 0.2533 (0.2523) time: 0.4427 data: 0.0049 max mem: 22448 +train: [6] [140/400] eta: 0:01:58 lr: 0.000294 loss: 2.5888 (2.5591) grad: 0.2547 (0.2531) time: 0.4428 data: 0.0047 max mem: 22448 +train: [6] [160/400] eta: 0:01:49 lr: 0.000294 loss: 2.5970 (2.5654) grad: 0.2592 (0.2534) time: 0.4386 data: 0.0046 max mem: 22448 +train: [6] [180/400] eta: 0:01:39 lr: 0.000293 loss: 2.5733 (2.5636) grad: 0.2611 (0.2546) time: 0.4478 data: 0.0051 max mem: 22448 +train: [6] [200/400] eta: 0:01:30 lr: 0.000293 loss: 2.5552 (2.5640) grad: 0.2606 (0.2548) time: 0.4456 data: 0.0049 max mem: 22448 +train: [6] [220/400] eta: 0:01:21 lr: 0.000292 loss: 2.5913 (2.5619) grad: 0.2562 (0.2554) time: 0.4344 data: 0.0046 max mem: 22448 +train: [6] [240/400] eta: 0:01:11 lr: 0.000292 loss: 2.5674 (2.5629) grad: 0.2548 (0.2556) time: 0.4281 data: 0.0049 max mem: 22448 +train: [6] [260/400] eta: 0:01:03 lr: 0.000291 loss: 2.5386 (2.5587) grad: 0.2540 (0.2549) time: 0.4569 data: 0.0051 max mem: 22448 +train: [6] [280/400] eta: 0:00:53 lr: 0.000291 loss: 2.5386 (2.5597) grad: 0.2412 (0.2546) time: 0.4305 data: 0.0048 max mem: 22448 +train: [6] [300/400] eta: 0:00:44 lr: 0.000290 loss: 2.5718 (2.5602) grad: 0.2408 (0.2543) time: 0.4340 data: 0.0049 max mem: 22448 +train: [6] [320/400] eta: 0:00:35 lr: 0.000290 loss: 2.5854 (2.5601) grad: 0.2517 (0.2545) time: 0.4338 data: 0.0049 max mem: 22448 +train: [6] [340/400] eta: 0:00:26 lr: 0.000289 loss: 2.5863 (2.5617) grad: 0.2518 (0.2544) time: 0.4438 data: 0.0048 max mem: 22448 +train: [6] [360/400] eta: 0:00:17 lr: 0.000288 loss: 2.5454 (2.5587) grad: 0.2497 (0.2543) time: 0.4503 data: 0.0050 max mem: 22448 +train: [6] [380/400] eta: 0:00:08 lr: 0.000288 loss: 2.5133 (2.5607) grad: 0.2544 (0.2543) time: 0.4523 data: 0.0052 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.5686 (2.5613) grad: 0.2509 (0.2540) time: 0.4363 data: 0.0049 max mem: 22448 +train: [6] Total time: 0:02:58 (0.4474 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.5686 (2.5613) grad: 0.2509 (0.2540) +eval (validation): [6] [ 0/85] eta: 0:04:18 time: 3.0413 data: 2.7883 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:32 time: 0.3797 data: 0.0038 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:19 time: 0.3433 data: 0.0042 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:09 time: 0.3413 data: 0.0040 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3233 data: 0.0039 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3124 data: 0.0037 max mem: 22448 +eval (validation): [6] Total time: 0:00:32 (0.3786 s / it) +cv: [6] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 2.383 acc: 0.277 f1: 0.218 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [7] [ 0/400] eta: 0:21:22 lr: nan time: 3.2061 data: 2.8221 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:41 lr: 0.000286 loss: 2.4391 (2.4487) grad: 0.2409 (0.2482) time: 0.4508 data: 0.0043 max mem: 22448 +train: [7] [ 40/400] eta: 0:03:03 lr: 0.000286 loss: 2.4444 (2.4648) grad: 0.2478 (0.2535) time: 0.4366 data: 0.0047 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:45 lr: 0.000285 loss: 2.4615 (2.4577) grad: 0.2562 (0.2561) time: 0.4356 data: 0.0047 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:31 lr: 0.000284 loss: 2.4924 (2.4723) grad: 0.2571 (0.2548) time: 0.4292 data: 0.0045 max mem: 22448 +train: [7] [100/400] eta: 0:02:19 lr: 0.000284 loss: 2.4602 (2.4658) grad: 0.2507 (0.2541) time: 0.4405 data: 0.0048 max mem: 22448 +train: [7] [120/400] eta: 0:02:09 lr: 0.000283 loss: 2.4520 (2.4685) grad: 0.2542 (0.2555) time: 0.4360 data: 0.0047 max mem: 22448 +train: [7] [140/400] eta: 0:01:59 lr: 0.000282 loss: 2.5021 (2.4723) grad: 0.2542 (0.2553) time: 0.4386 data: 0.0049 max mem: 22448 +train: [7] [160/400] eta: 0:01:49 lr: 0.000282 loss: 2.4951 (2.4731) grad: 0.2538 (0.2557) time: 0.4389 data: 0.0048 max mem: 22448 +train: [7] [180/400] eta: 0:01:40 lr: 0.000281 loss: 2.5011 (2.4797) grad: 0.2643 (0.2573) time: 0.4530 data: 0.0050 max mem: 22448 +train: [7] [200/400] eta: 0:01:30 lr: 0.000280 loss: 2.5011 (2.4799) grad: 0.2650 (0.2577) time: 0.4421 data: 0.0051 max mem: 22448 +train: [7] [220/400] eta: 0:01:21 lr: 0.000279 loss: 2.4654 (2.4773) grad: 0.2649 (0.2579) time: 0.4429 data: 0.0048 max mem: 22448 +train: [7] [240/400] eta: 0:01:12 lr: 0.000278 loss: 2.4745 (2.4796) grad: 0.2560 (0.2582) time: 0.4395 data: 0.0047 max mem: 22448 +train: [7] [260/400] eta: 0:01:03 lr: 0.000278 loss: 2.4829 (2.4787) grad: 0.2560 (0.2580) time: 0.4485 data: 0.0047 max mem: 22448 +train: [7] [280/400] eta: 0:00:54 lr: 0.000277 loss: 2.4464 (2.4770) grad: 0.2501 (0.2577) time: 0.4517 data: 0.0050 max mem: 22448 +train: [7] [300/400] eta: 0:00:45 lr: 0.000276 loss: 2.4350 (2.4770) grad: 0.2560 (0.2580) time: 0.4293 data: 0.0049 max mem: 22448 +train: [7] [320/400] eta: 0:00:35 lr: 0.000275 loss: 2.4516 (2.4775) grad: 0.2560 (0.2577) time: 0.4441 data: 0.0049 max mem: 22448 +train: [7] [340/400] eta: 0:00:26 lr: 0.000274 loss: 2.4585 (2.4753) grad: 0.2542 (0.2572) time: 0.4402 data: 0.0048 max mem: 22448 +train: [7] [360/400] eta: 0:00:17 lr: 0.000273 loss: 2.4585 (2.4766) grad: 0.2547 (0.2575) time: 0.4597 data: 0.0049 max mem: 22448 +train: [7] [380/400] eta: 0:00:08 lr: 0.000272 loss: 2.4939 (2.4781) grad: 0.2609 (0.2580) time: 0.4460 data: 0.0048 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.5493 (2.4794) grad: 0.2615 (0.2584) time: 0.4355 data: 0.0047 max mem: 22448 +train: [7] Total time: 0:02:59 (0.4494 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.5493 (2.4794) grad: 0.2615 (0.2584) +eval (validation): [7] [ 0/85] eta: 0:04:17 time: 3.0322 data: 2.7441 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:33 time: 0.3826 data: 0.0043 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:19 time: 0.3551 data: 0.0041 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:10 time: 0.3372 data: 0.0042 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3281 data: 0.0039 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3141 data: 0.0039 max mem: 22448 +eval (validation): [7] Total time: 0:00:32 (0.3827 s / it) +cv: [7] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.396 acc: 0.275 f1: 0.212 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:22:37 lr: nan time: 3.3933 data: 2.9955 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:46 lr: 0.000270 loss: 2.3107 (2.3557) grad: 0.2341 (0.2385) time: 0.4554 data: 0.0034 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:06 lr: 0.000270 loss: 2.3605 (2.3722) grad: 0.2417 (0.2443) time: 0.4377 data: 0.0049 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:47 lr: 0.000269 loss: 2.3864 (2.3836) grad: 0.2486 (0.2467) time: 0.4356 data: 0.0047 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:32 lr: 0.000268 loss: 2.4021 (2.3931) grad: 0.2525 (0.2508) time: 0.4366 data: 0.0047 max mem: 22448 +train: [8] [100/400] eta: 0:02:21 lr: 0.000267 loss: 2.4021 (2.3930) grad: 0.2614 (0.2551) time: 0.4431 data: 0.0048 max mem: 22448 +train: [8] [120/400] eta: 0:02:10 lr: 0.000266 loss: 2.3901 (2.3939) grad: 0.2777 (0.2599) time: 0.4428 data: 0.0049 max mem: 22448 +train: [8] [140/400] eta: 0:02:00 lr: 0.000265 loss: 2.3901 (2.3977) grad: 0.2777 (0.2616) time: 0.4413 data: 0.0050 max mem: 22448 +train: [8] [160/400] eta: 0:01:50 lr: 0.000264 loss: 2.4100 (2.3980) grad: 0.2760 (0.2647) time: 0.4441 data: 0.0051 max mem: 22448 +train: [8] [180/400] eta: 0:01:41 lr: 0.000263 loss: 2.3829 (2.3951) grad: 0.2716 (0.2649) time: 0.4576 data: 0.0047 max mem: 22448 +train: [8] [200/400] eta: 0:01:31 lr: 0.000262 loss: 2.3973 (2.3982) grad: 0.2701 (0.2657) time: 0.4592 data: 0.0049 max mem: 22448 +train: [8] [220/400] eta: 0:01:22 lr: 0.000260 loss: 2.4125 (2.3992) grad: 0.2659 (0.2653) time: 0.4495 data: 0.0052 max mem: 22448 +train: [8] [240/400] eta: 0:01:13 lr: 0.000259 loss: 2.4097 (2.3984) grad: 0.2597 (0.2653) time: 0.4343 data: 0.0043 max mem: 22448 +train: [8] [260/400] eta: 0:01:03 lr: 0.000258 loss: 2.4167 (2.4023) grad: 0.2597 (0.2657) time: 0.4405 data: 0.0048 max mem: 22448 +train: [8] [280/400] eta: 0:00:54 lr: 0.000257 loss: 2.4196 (2.4026) grad: 0.2605 (0.2659) time: 0.4470 data: 0.0049 max mem: 22448 +train: [8] [300/400] eta: 0:00:45 lr: 0.000256 loss: 2.4040 (2.4033) grad: 0.2605 (0.2660) time: 0.4359 data: 0.0049 max mem: 22448 +train: [8] [320/400] eta: 0:00:36 lr: 0.000255 loss: 2.3879 (2.4024) grad: 0.2594 (0.2654) time: 0.4404 data: 0.0048 max mem: 22448 +train: [8] [340/400] eta: 0:00:27 lr: 0.000254 loss: 2.3879 (2.4021) grad: 0.2608 (0.2658) time: 0.4455 data: 0.0047 max mem: 22448 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 2.4223 (2.4033) grad: 0.2621 (0.2655) time: 0.4545 data: 0.0050 max mem: 22448 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 2.4428 (2.4056) grad: 0.2570 (0.2652) time: 0.4427 data: 0.0048 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.4299 (2.4071) grad: 0.2599 (0.2658) time: 0.4401 data: 0.0049 max mem: 22448 +train: [8] Total time: 0:03:00 (0.4521 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.4299 (2.4071) grad: 0.2599 (0.2658) +eval (validation): [8] [ 0/85] eta: 0:04:22 time: 3.0907 data: 2.8504 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:31 time: 0.3610 data: 0.0052 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:18 time: 0.3477 data: 0.0037 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:10 time: 0.3705 data: 0.0042 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3283 data: 0.0041 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3163 data: 0.0039 max mem: 22448 +eval (validation): [8] Total time: 0:00:32 (0.3845 s / it) +cv: [8] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.431 acc: 0.271 f1: 0.213 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:22:29 lr: nan time: 3.3748 data: 3.0294 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:40 lr: 0.000249 loss: 2.3543 (2.3470) grad: 0.2615 (0.2689) time: 0.4410 data: 0.0041 max mem: 22448 +train: [9] [ 40/400] eta: 0:03:04 lr: 0.000248 loss: 2.3603 (2.3538) grad: 0.2585 (0.2629) time: 0.4382 data: 0.0040 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:44 lr: 0.000247 loss: 2.3480 (2.3470) grad: 0.2560 (0.2600) time: 0.4272 data: 0.0049 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:31 lr: 0.000246 loss: 2.3433 (2.3536) grad: 0.2570 (0.2616) time: 0.4359 data: 0.0048 max mem: 22448 +train: [9] [100/400] eta: 0:02:19 lr: 0.000244 loss: 2.3312 (2.3522) grad: 0.2671 (0.2630) time: 0.4395 data: 0.0049 max mem: 22448 +train: [9] [120/400] eta: 0:02:09 lr: 0.000243 loss: 2.3312 (2.3500) grad: 0.2635 (0.2628) time: 0.4420 data: 0.0049 max mem: 22448 +train: [9] [140/400] eta: 0:01:59 lr: 0.000242 loss: 2.3230 (2.3499) grad: 0.2633 (0.2637) time: 0.4361 data: 0.0051 max mem: 22448 +train: [9] [160/400] eta: 0:01:49 lr: 0.000241 loss: 2.3374 (2.3469) grad: 0.2670 (0.2648) time: 0.4397 data: 0.0048 max mem: 22448 +train: [9] [180/400] eta: 0:01:40 lr: 0.000240 loss: 2.3606 (2.3513) grad: 0.2728 (0.2659) time: 0.4519 data: 0.0049 max mem: 22448 +train: [9] [200/400] eta: 0:01:31 lr: 0.000238 loss: 2.3285 (2.3491) grad: 0.2759 (0.2670) time: 0.4560 data: 0.0050 max mem: 22448 +train: [9] [220/400] eta: 0:01:21 lr: 0.000237 loss: 2.3333 (2.3478) grad: 0.2730 (0.2681) time: 0.4487 data: 0.0050 max mem: 22448 +train: [9] [240/400] eta: 0:01:12 lr: 0.000236 loss: 2.3523 (2.3521) grad: 0.2715 (0.2680) time: 0.4447 data: 0.0051 max mem: 22448 +train: [9] [260/400] eta: 0:01:03 lr: 0.000234 loss: 2.3811 (2.3516) grad: 0.2562 (0.2672) time: 0.4448 data: 0.0049 max mem: 22448 +train: [9] [280/400] eta: 0:00:54 lr: 0.000233 loss: 2.3554 (2.3514) grad: 0.2581 (0.2673) time: 0.4562 data: 0.0054 max mem: 22448 +train: [9] [300/400] eta: 0:00:45 lr: 0.000232 loss: 2.3454 (2.3515) grad: 0.2727 (0.2680) time: 0.4451 data: 0.0050 max mem: 22448 +train: [9] [320/400] eta: 0:00:36 lr: 0.000230 loss: 2.3454 (2.3528) grad: 0.2708 (0.2681) time: 0.4469 data: 0.0048 max mem: 22448 +train: [9] [340/400] eta: 0:00:27 lr: 0.000229 loss: 2.3265 (2.3509) grad: 0.2705 (0.2686) time: 0.4536 data: 0.0048 max mem: 22448 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 2.3396 (2.3516) grad: 0.2789 (0.2694) time: 0.4464 data: 0.0049 max mem: 22448 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 2.3319 (2.3508) grad: 0.2706 (0.2693) time: 0.4420 data: 0.0048 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.3319 (2.3521) grad: 0.2696 (0.2698) time: 0.4409 data: 0.0048 max mem: 22448 +train: [9] Total time: 0:03:00 (0.4517 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.3319 (2.3521) grad: 0.2696 (0.2698) +eval (validation): [9] [ 0/85] eta: 0:04:50 time: 3.4133 data: 3.1013 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:32 time: 0.3530 data: 0.0116 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:19 time: 0.3450 data: 0.0037 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:09 time: 0.3352 data: 0.0039 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3133 data: 0.0037 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3100 data: 0.0036 max mem: 22448 +eval (validation): [9] Total time: 0:00:31 (0.3741 s / it) +cv: [9] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.381 acc: 0.281 f1: 0.215 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:22:43 lr: nan time: 3.4094 data: 3.0067 max mem: 22448 +train: [10] [ 20/400] eta: 0:04:02 lr: 0.000224 loss: 2.2634 (2.2727) grad: 0.2691 (0.2690) time: 0.4995 data: 0.0055 max mem: 22448 +train: [10] [ 40/400] eta: 0:03:15 lr: 0.000222 loss: 2.2634 (2.2601) grad: 0.2629 (0.2656) time: 0.4406 data: 0.0044 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:51 lr: 0.000221 loss: 2.2666 (2.2802) grad: 0.2518 (0.2606) time: 0.4315 data: 0.0047 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:37 lr: 0.000220 loss: 2.2678 (2.2701) grad: 0.2496 (0.2589) time: 0.4487 data: 0.0048 max mem: 22448 +train: [10] [100/400] eta: 0:02:24 lr: 0.000218 loss: 2.2678 (2.2675) grad: 0.2555 (0.2592) time: 0.4391 data: 0.0048 max mem: 22448 +train: [10] [120/400] eta: 0:02:13 lr: 0.000217 loss: 2.2719 (2.2690) grad: 0.2619 (0.2608) time: 0.4476 data: 0.0048 max mem: 22448 +train: [10] [140/400] eta: 0:02:01 lr: 0.000215 loss: 2.2734 (2.2730) grad: 0.2669 (0.2614) time: 0.4210 data: 0.0045 max mem: 22448 +train: [10] [160/400] eta: 0:01:51 lr: 0.000214 loss: 2.2698 (2.2745) grad: 0.2626 (0.2619) time: 0.4498 data: 0.0045 max mem: 22448 +train: [10] [180/400] eta: 0:01:42 lr: 0.000213 loss: 2.2654 (2.2758) grad: 0.2632 (0.2625) time: 0.4562 data: 0.0049 max mem: 22448 +train: [10] [200/400] eta: 0:01:32 lr: 0.000211 loss: 2.2910 (2.2761) grad: 0.2646 (0.2633) time: 0.4452 data: 0.0048 max mem: 22448 +train: [10] [220/400] eta: 0:01:22 lr: 0.000210 loss: 2.2910 (2.2772) grad: 0.2646 (0.2639) time: 0.4327 data: 0.0047 max mem: 22448 +train: [10] [240/400] eta: 0:01:13 lr: 0.000208 loss: 2.2925 (2.2787) grad: 0.2670 (0.2640) time: 0.4332 data: 0.0047 max mem: 22448 +train: [10] [260/400] eta: 0:01:03 lr: 0.000207 loss: 2.3208 (2.2798) grad: 0.2625 (0.2640) time: 0.4356 data: 0.0046 max mem: 22448 +train: [10] [280/400] eta: 0:00:54 lr: 0.000205 loss: 2.3360 (2.2806) grad: 0.2649 (0.2642) time: 0.4485 data: 0.0048 max mem: 22448 +train: [10] [300/400] eta: 0:00:45 lr: 0.000204 loss: 2.2241 (2.2780) grad: 0.2649 (0.2641) time: 0.4408 data: 0.0045 max mem: 22448 +train: [10] [320/400] eta: 0:00:36 lr: 0.000202 loss: 2.2238 (2.2767) grad: 0.2648 (0.2645) time: 0.4446 data: 0.0047 max mem: 22448 +train: [10] [340/400] eta: 0:00:27 lr: 0.000201 loss: 2.2676 (2.2770) grad: 0.2723 (0.2647) time: 0.4356 data: 0.0045 max mem: 22448 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 2.2690 (2.2767) grad: 0.2628 (0.2648) time: 0.4302 data: 0.0047 max mem: 22448 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 2.2646 (2.2768) grad: 0.2666 (0.2652) time: 0.4322 data: 0.0049 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.2708 (2.2778) grad: 0.2727 (0.2657) time: 0.4361 data: 0.0046 max mem: 22448 +train: [10] Total time: 0:03:00 (0.4503 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.2708 (2.2778) grad: 0.2727 (0.2657) +eval (validation): [10] [ 0/85] eta: 0:04:15 time: 3.0019 data: 2.7145 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:31 time: 0.3543 data: 0.0038 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:18 time: 0.3240 data: 0.0040 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:09 time: 0.3270 data: 0.0039 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3171 data: 0.0038 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3121 data: 0.0037 max mem: 22448 +eval (validation): [10] Total time: 0:00:30 (0.3638 s / it) +cv: [10] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.394 acc: 0.278 f1: 0.217 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:21:53 lr: nan time: 3.2841 data: 2.8903 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:47 lr: 0.000195 loss: 2.1619 (2.1919) grad: 0.2574 (0.2607) time: 0.4638 data: 0.0050 max mem: 22448 +train: [11] [ 40/400] eta: 0:03:05 lr: 0.000193 loss: 2.2168 (2.2149) grad: 0.2596 (0.2630) time: 0.4295 data: 0.0051 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:45 lr: 0.000192 loss: 2.1972 (2.1998) grad: 0.2672 (0.2653) time: 0.4274 data: 0.0050 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:31 lr: 0.000190 loss: 2.1958 (2.2118) grad: 0.2682 (0.2678) time: 0.4288 data: 0.0050 max mem: 22448 +train: [11] [100/400] eta: 0:02:19 lr: 0.000189 loss: 2.1860 (2.2062) grad: 0.2741 (0.2689) time: 0.4325 data: 0.0048 max mem: 22448 +train: [11] [120/400] eta: 0:02:08 lr: 0.000187 loss: 2.1619 (2.1959) grad: 0.2651 (0.2694) time: 0.4395 data: 0.0048 max mem: 22448 +train: [11] [140/400] eta: 0:01:58 lr: 0.000186 loss: 2.1694 (2.1974) grad: 0.2688 (0.2695) time: 0.4314 data: 0.0048 max mem: 22448 +train: [11] [160/400] eta: 0:01:48 lr: 0.000184 loss: 2.2318 (2.2065) grad: 0.2699 (0.2697) time: 0.4334 data: 0.0048 max mem: 22448 +train: [11] [180/400] eta: 0:01:39 lr: 0.000183 loss: 2.2365 (2.2082) grad: 0.2706 (0.2709) time: 0.4281 data: 0.0048 max mem: 22448 +train: [11] [200/400] eta: 0:01:30 lr: 0.000181 loss: 2.2456 (2.2120) grad: 0.2786 (0.2713) time: 0.4485 data: 0.0050 max mem: 22448 +train: [11] [220/400] eta: 0:01:20 lr: 0.000180 loss: 2.2824 (2.2201) grad: 0.2760 (0.2712) time: 0.4420 data: 0.0049 max mem: 22448 +train: [11] [240/400] eta: 0:01:11 lr: 0.000178 loss: 2.2546 (2.2200) grad: 0.2643 (0.2713) time: 0.4399 data: 0.0048 max mem: 22448 +train: [11] [260/400] eta: 0:01:02 lr: 0.000177 loss: 2.2352 (2.2211) grad: 0.2689 (0.2717) time: 0.4232 data: 0.0047 max mem: 22448 +train: [11] [280/400] eta: 0:00:53 lr: 0.000175 loss: 2.2368 (2.2228) grad: 0.2772 (0.2723) time: 0.4493 data: 0.0049 max mem: 22448 +train: [11] [300/400] eta: 0:00:44 lr: 0.000174 loss: 2.2488 (2.2272) grad: 0.2805 (0.2727) time: 0.4309 data: 0.0050 max mem: 22448 +train: [11] [320/400] eta: 0:00:35 lr: 0.000172 loss: 2.2540 (2.2289) grad: 0.2805 (0.2737) time: 0.4350 data: 0.0046 max mem: 22448 +train: [11] [340/400] eta: 0:00:26 lr: 0.000170 loss: 2.2520 (2.2297) grad: 0.2848 (0.2746) time: 0.4319 data: 0.0050 max mem: 22448 +train: [11] [360/400] eta: 0:00:17 lr: 0.000169 loss: 2.2153 (2.2289) grad: 0.2821 (0.2747) time: 0.4295 data: 0.0050 max mem: 22448 +train: [11] [380/400] eta: 0:00:08 lr: 0.000167 loss: 2.2004 (2.2263) grad: 0.2695 (0.2744) time: 0.4338 data: 0.0047 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.2136 (2.2301) grad: 0.2680 (0.2740) time: 0.4312 data: 0.0047 max mem: 22448 +train: [11] Total time: 0:02:57 (0.4432 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.2136 (2.2301) grad: 0.2680 (0.2740) +eval (validation): [11] [ 0/85] eta: 0:04:29 time: 3.1725 data: 2.9238 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:33 time: 0.3850 data: 0.0053 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:19 time: 0.3605 data: 0.0040 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:10 time: 0.3438 data: 0.0042 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3249 data: 0.0041 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3167 data: 0.0041 max mem: 22448 +eval (validation): [11] Total time: 0:00:32 (0.3881 s / it) +cv: [11] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.399 acc: 0.282 f1: 0.221 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:21:26 lr: nan time: 3.2175 data: 2.8313 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:37 lr: 0.000164 loss: 2.1284 (2.1344) grad: 0.2549 (0.2583) time: 0.4409 data: 0.0038 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:02 lr: 0.000163 loss: 2.1284 (2.1371) grad: 0.2589 (0.2605) time: 0.4384 data: 0.0051 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:43 lr: 0.000161 loss: 2.1357 (2.1414) grad: 0.2583 (0.2599) time: 0.4229 data: 0.0051 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:29 lr: 0.000160 loss: 2.1430 (2.1446) grad: 0.2583 (0.2594) time: 0.4261 data: 0.0049 max mem: 22448 +train: [12] [100/400] eta: 0:02:17 lr: 0.000158 loss: 2.1749 (2.1530) grad: 0.2636 (0.2611) time: 0.4291 data: 0.0049 max mem: 22448 +train: [12] [120/400] eta: 0:02:06 lr: 0.000156 loss: 2.1735 (2.1570) grad: 0.2595 (0.2599) time: 0.4250 data: 0.0050 max mem: 22448 +train: [12] [140/400] eta: 0:01:56 lr: 0.000155 loss: 2.1623 (2.1559) grad: 0.2595 (0.2617) time: 0.4267 data: 0.0049 max mem: 22448 +train: [12] [160/400] eta: 0:01:47 lr: 0.000153 loss: 2.1588 (2.1564) grad: 0.2730 (0.2635) time: 0.4248 data: 0.0049 max mem: 22448 +train: [12] [180/400] eta: 0:01:37 lr: 0.000152 loss: 2.1726 (2.1577) grad: 0.2718 (0.2642) time: 0.4233 data: 0.0048 max mem: 22448 +train: [12] [200/400] eta: 0:01:28 lr: 0.000150 loss: 2.1821 (2.1619) grad: 0.2681 (0.2652) time: 0.4433 data: 0.0048 max mem: 22448 +train: [12] [220/400] eta: 0:01:19 lr: 0.000149 loss: 2.1821 (2.1655) grad: 0.2694 (0.2655) time: 0.4337 data: 0.0049 max mem: 22448 +train: [12] [240/400] eta: 0:01:10 lr: 0.000147 loss: 2.1619 (2.1631) grad: 0.2755 (0.2672) time: 0.4391 data: 0.0051 max mem: 22448 +train: [12] [260/400] eta: 0:01:01 lr: 0.000145 loss: 2.1280 (2.1628) grad: 0.2782 (0.2677) time: 0.4271 data: 0.0048 max mem: 22448 +train: [12] [280/400] eta: 0:00:52 lr: 0.000144 loss: 2.1280 (2.1596) grad: 0.2721 (0.2679) time: 0.4437 data: 0.0050 max mem: 22448 +train: [12] [300/400] eta: 0:00:44 lr: 0.000142 loss: 2.1215 (2.1621) grad: 0.2767 (0.2687) time: 0.4314 data: 0.0045 max mem: 22448 +train: [12] [320/400] eta: 0:00:35 lr: 0.000141 loss: 2.1427 (2.1640) grad: 0.2740 (0.2688) time: 0.4316 data: 0.0047 max mem: 22448 +train: [12] [340/400] eta: 0:00:26 lr: 0.000139 loss: 2.1343 (2.1640) grad: 0.2696 (0.2689) time: 0.4253 data: 0.0049 max mem: 22448 +train: [12] [360/400] eta: 0:00:17 lr: 0.000138 loss: 2.1474 (2.1637) grad: 0.2682 (0.2688) time: 0.4279 data: 0.0046 max mem: 22448 +train: [12] [380/400] eta: 0:00:08 lr: 0.000136 loss: 2.1572 (2.1634) grad: 0.2682 (0.2689) time: 0.4218 data: 0.0048 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.1522 (2.1629) grad: 0.2728 (0.2693) time: 0.4257 data: 0.0050 max mem: 22448 +train: [12] Total time: 0:02:55 (0.4380 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.1522 (2.1629) grad: 0.2728 (0.2693) +eval (validation): [12] [ 0/85] eta: 0:04:24 time: 3.1160 data: 2.8727 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:31 time: 0.3539 data: 0.0057 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:18 time: 0.3245 data: 0.0036 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:09 time: 0.3548 data: 0.0043 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3262 data: 0.0041 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3149 data: 0.0041 max mem: 22448 +eval (validation): [12] Total time: 0:00:31 (0.3737 s / it) +cv: [12] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.428 acc: 0.275 f1: 0.216 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:22:12 lr: nan time: 3.3304 data: 2.9279 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:39 lr: 0.000133 loss: 2.0858 (2.0981) grad: 0.2572 (0.2614) time: 0.4389 data: 0.0030 max mem: 22448 +train: [13] [ 40/400] eta: 0:03:03 lr: 0.000131 loss: 2.1035 (2.0959) grad: 0.2600 (0.2624) time: 0.4372 data: 0.0044 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:46 lr: 0.000130 loss: 2.1240 (2.1095) grad: 0.2636 (0.2644) time: 0.4471 data: 0.0051 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:31 lr: 0.000128 loss: 2.0965 (2.1042) grad: 0.2638 (0.2648) time: 0.4279 data: 0.0048 max mem: 22448 +train: [13] [100/400] eta: 0:02:19 lr: 0.000127 loss: 2.0892 (2.1005) grad: 0.2609 (0.2643) time: 0.4224 data: 0.0047 max mem: 22448 +train: [13] [120/400] eta: 0:02:08 lr: 0.000125 loss: 2.0924 (2.0940) grad: 0.2604 (0.2646) time: 0.4292 data: 0.0048 max mem: 22448 +train: [13] [140/400] eta: 0:01:57 lr: 0.000124 loss: 2.1089 (2.1021) grad: 0.2662 (0.2660) time: 0.4257 data: 0.0048 max mem: 22448 +train: [13] [160/400] eta: 0:01:47 lr: 0.000122 loss: 2.0965 (2.0985) grad: 0.2682 (0.2664) time: 0.4245 data: 0.0049 max mem: 22448 +train: [13] [180/400] eta: 0:01:38 lr: 0.000120 loss: 2.0849 (2.1036) grad: 0.2682 (0.2669) time: 0.4290 data: 0.0047 max mem: 22448 +train: [13] [200/400] eta: 0:01:29 lr: 0.000119 loss: 2.0849 (2.0983) grad: 0.2652 (0.2669) time: 0.4493 data: 0.0048 max mem: 22448 +train: [13] [220/400] eta: 0:01:20 lr: 0.000117 loss: 2.0642 (2.1002) grad: 0.2652 (0.2675) time: 0.4326 data: 0.0048 max mem: 22448 +train: [13] [240/400] eta: 0:01:11 lr: 0.000116 loss: 2.1325 (2.1020) grad: 0.2692 (0.2679) time: 0.4341 data: 0.0050 max mem: 22448 +train: [13] [260/400] eta: 0:01:02 lr: 0.000114 loss: 2.1219 (2.1038) grad: 0.2607 (0.2673) time: 0.4234 data: 0.0046 max mem: 22448 +train: [13] [280/400] eta: 0:00:53 lr: 0.000113 loss: 2.1048 (2.1021) grad: 0.2583 (0.2668) time: 0.4458 data: 0.0049 max mem: 22448 +train: [13] [300/400] eta: 0:00:44 lr: 0.000111 loss: 2.0934 (2.1017) grad: 0.2552 (0.2659) time: 0.4298 data: 0.0049 max mem: 22448 +train: [13] [320/400] eta: 0:00:35 lr: 0.000110 loss: 2.1057 (2.1031) grad: 0.2632 (0.2664) time: 0.4323 data: 0.0047 max mem: 22448 +train: [13] [340/400] eta: 0:00:26 lr: 0.000108 loss: 2.1171 (2.1039) grad: 0.2655 (0.2660) time: 0.4306 data: 0.0048 max mem: 22448 +train: [13] [360/400] eta: 0:00:17 lr: 0.000107 loss: 2.0996 (2.1036) grad: 0.2655 (0.2664) time: 0.4314 data: 0.0047 max mem: 22448 +train: [13] [380/400] eta: 0:00:08 lr: 0.000105 loss: 2.0791 (2.1030) grad: 0.2688 (0.2667) time: 0.4341 data: 0.0046 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.0808 (2.1040) grad: 0.2665 (0.2668) time: 0.4366 data: 0.0048 max mem: 22448 +train: [13] Total time: 0:02:56 (0.4410 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.0808 (2.1040) grad: 0.2665 (0.2668) +eval (validation): [13] [ 0/85] eta: 0:04:24 time: 3.1171 data: 2.8238 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:33 time: 0.3786 data: 0.0047 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:18 time: 0.3307 data: 0.0042 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:09 time: 0.3352 data: 0.0043 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3259 data: 0.0042 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3176 data: 0.0041 max mem: 22448 +eval (validation): [13] Total time: 0:00:31 (0.3756 s / it) +cv: [13] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.421 acc: 0.277 f1: 0.223 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:22:03 lr: nan time: 3.3095 data: 2.9214 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:53 lr: 0.000102 loss: 2.0083 (2.0070) grad: 0.2468 (0.2490) time: 0.4788 data: 0.0050 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:07 lr: 0.000101 loss: 2.0250 (2.0288) grad: 0.2475 (0.2535) time: 0.4223 data: 0.0047 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:46 lr: 0.000099 loss: 2.0430 (2.0272) grad: 0.2566 (0.2558) time: 0.4308 data: 0.0048 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:34 lr: 0.000098 loss: 2.0507 (2.0382) grad: 0.2529 (0.2557) time: 0.4570 data: 0.0048 max mem: 22448 +train: [14] [100/400] eta: 0:02:21 lr: 0.000096 loss: 2.0507 (2.0412) grad: 0.2517 (0.2565) time: 0.4260 data: 0.0046 max mem: 22448 +train: [14] [120/400] eta: 0:02:10 lr: 0.000095 loss: 2.0276 (2.0391) grad: 0.2638 (0.2578) time: 0.4306 data: 0.0049 max mem: 22448 +train: [14] [140/400] eta: 0:01:59 lr: 0.000093 loss: 2.0159 (2.0391) grad: 0.2681 (0.2592) time: 0.4296 data: 0.0049 max mem: 22448 +train: [14] [160/400] eta: 0:01:49 lr: 0.000092 loss: 2.0247 (2.0366) grad: 0.2629 (0.2593) time: 0.4325 data: 0.0048 max mem: 22448 +train: [14] [180/400] eta: 0:01:39 lr: 0.000090 loss: 2.0103 (2.0338) grad: 0.2584 (0.2598) time: 0.4314 data: 0.0049 max mem: 22448 +train: [14] [200/400] eta: 0:01:30 lr: 0.000089 loss: 1.9961 (2.0324) grad: 0.2633 (0.2605) time: 0.4483 data: 0.0050 max mem: 22448 +train: [14] [220/400] eta: 0:01:21 lr: 0.000088 loss: 2.0134 (2.0337) grad: 0.2704 (0.2614) time: 0.4415 data: 0.0050 max mem: 22448 +train: [14] [240/400] eta: 0:01:12 lr: 0.000086 loss: 2.0720 (2.0389) grad: 0.2660 (0.2618) time: 0.4418 data: 0.0049 max mem: 22448 +train: [14] [260/400] eta: 0:01:02 lr: 0.000085 loss: 2.0720 (2.0402) grad: 0.2663 (0.2621) time: 0.4255 data: 0.0046 max mem: 22448 +train: [14] [280/400] eta: 0:00:53 lr: 0.000083 loss: 2.0555 (2.0414) grad: 0.2654 (0.2617) time: 0.4465 data: 0.0051 max mem: 22448 +train: [14] [300/400] eta: 0:00:44 lr: 0.000082 loss: 2.0794 (2.0463) grad: 0.2643 (0.2620) time: 0.4371 data: 0.0048 max mem: 22448 +train: [14] [320/400] eta: 0:00:35 lr: 0.000081 loss: 2.0794 (2.0473) grad: 0.2713 (0.2629) time: 0.4336 data: 0.0049 max mem: 22448 +train: [14] [340/400] eta: 0:00:26 lr: 0.000079 loss: 2.0723 (2.0489) grad: 0.2741 (0.2634) time: 0.4366 data: 0.0050 max mem: 22448 +train: [14] [360/400] eta: 0:00:17 lr: 0.000078 loss: 2.0662 (2.0500) grad: 0.2628 (0.2632) time: 0.4312 data: 0.0049 max mem: 22448 +train: [14] [380/400] eta: 0:00:08 lr: 0.000076 loss: 2.0486 (2.0485) grad: 0.2625 (0.2632) time: 0.4323 data: 0.0049 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.0421 (2.0492) grad: 0.2685 (0.2638) time: 0.4258 data: 0.0048 max mem: 22448 +train: [14] Total time: 0:02:57 (0.4448 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.0421 (2.0492) grad: 0.2685 (0.2638) +eval (validation): [14] [ 0/85] eta: 0:04:27 time: 3.1445 data: 2.9058 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:30 time: 0.3394 data: 0.0089 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:18 time: 0.3393 data: 0.0033 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:09 time: 0.3403 data: 0.0040 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3318 data: 0.0043 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3292 data: 0.0043 max mem: 22448 +eval (validation): [14] Total time: 0:00:31 (0.3744 s / it) +cv: [14] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.400 acc: 0.283 f1: 0.219 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [15] [ 0/400] eta: 0:22:07 lr: nan time: 3.3196 data: 2.9799 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:36 lr: 0.000074 loss: 2.0039 (2.0133) grad: 0.2513 (0.2553) time: 0.4332 data: 0.0043 max mem: 22448 +train: [15] [ 40/400] eta: 0:02:59 lr: 0.000072 loss: 2.0039 (1.9994) grad: 0.2513 (0.2525) time: 0.4237 data: 0.0048 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:41 lr: 0.000071 loss: 1.9629 (1.9915) grad: 0.2522 (0.2532) time: 0.4297 data: 0.0048 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:30 lr: 0.000070 loss: 1.9629 (1.9821) grad: 0.2501 (0.2526) time: 0.4551 data: 0.0049 max mem: 22448 +train: [15] [100/400] eta: 0:02:18 lr: 0.000068 loss: 1.9324 (1.9751) grad: 0.2486 (0.2535) time: 0.4296 data: 0.0050 max mem: 22448 +train: [15] [120/400] eta: 0:02:08 lr: 0.000067 loss: 1.9750 (1.9795) grad: 0.2573 (0.2549) time: 0.4308 data: 0.0050 max mem: 22448 +train: [15] [140/400] eta: 0:01:58 lr: 0.000066 loss: 2.0047 (1.9865) grad: 0.2648 (0.2570) time: 0.4350 data: 0.0049 max mem: 22448 +train: [15] [160/400] eta: 0:01:48 lr: 0.000064 loss: 1.9902 (1.9848) grad: 0.2601 (0.2575) time: 0.4322 data: 0.0052 max mem: 22448 +train: [15] [180/400] eta: 0:01:38 lr: 0.000063 loss: 2.0216 (1.9932) grad: 0.2626 (0.2588) time: 0.4290 data: 0.0049 max mem: 22448 +train: [15] [200/400] eta: 0:01:29 lr: 0.000062 loss: 2.0407 (1.9943) grad: 0.2596 (0.2583) time: 0.4480 data: 0.0049 max mem: 22448 +train: [15] [220/400] eta: 0:01:20 lr: 0.000061 loss: 2.0181 (1.9952) grad: 0.2526 (0.2585) time: 0.4405 data: 0.0047 max mem: 22448 +train: [15] [240/400] eta: 0:01:11 lr: 0.000059 loss: 2.0079 (1.9952) grad: 0.2596 (0.2590) time: 0.4370 data: 0.0049 max mem: 22448 +train: [15] [260/400] eta: 0:01:02 lr: 0.000058 loss: 2.0297 (2.0003) grad: 0.2599 (0.2590) time: 0.4319 data: 0.0048 max mem: 22448 +train: [15] [280/400] eta: 0:00:53 lr: 0.000057 loss: 2.0297 (1.9980) grad: 0.2498 (0.2584) time: 0.4500 data: 0.0050 max mem: 22448 +train: [15] [300/400] eta: 0:00:44 lr: 0.000056 loss: 1.9884 (1.9985) grad: 0.2495 (0.2584) time: 0.4318 data: 0.0048 max mem: 22448 +train: [15] [320/400] eta: 0:00:35 lr: 0.000054 loss: 2.0104 (1.9990) grad: 0.2592 (0.2586) time: 0.4343 data: 0.0046 max mem: 22448 +train: [15] [340/400] eta: 0:00:26 lr: 0.000053 loss: 2.0149 (1.9996) grad: 0.2581 (0.2586) time: 0.4340 data: 0.0049 max mem: 22448 +train: [15] [360/400] eta: 0:00:17 lr: 0.000052 loss: 1.9924 (2.0011) grad: 0.2583 (0.2588) time: 0.4380 data: 0.0046 max mem: 22448 +train: [15] [380/400] eta: 0:00:08 lr: 0.000051 loss: 1.9715 (2.0001) grad: 0.2549 (0.2586) time: 0.4280 data: 0.0047 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 1.9929 (2.0006) grad: 0.2499 (0.2584) time: 0.4326 data: 0.0049 max mem: 22448 +train: [15] Total time: 0:02:57 (0.4430 s / it) +train: [15] Summary: lr: 0.000050 loss: 1.9929 (2.0006) grad: 0.2499 (0.2584) +eval (validation): [15] [ 0/85] eta: 0:05:01 time: 3.5513 data: 3.2416 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:32 time: 0.3520 data: 0.0043 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:18 time: 0.3317 data: 0.0029 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:09 time: 0.3259 data: 0.0041 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:01 time: 0.3196 data: 0.0037 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3182 data: 0.0036 max mem: 22448 +eval (validation): [15] Total time: 0:00:31 (0.3732 s / it) +cv: [15] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.405 acc: 0.280 f1: 0.222 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:23:42 lr: nan time: 3.5566 data: 3.1854 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:40 lr: 0.000048 loss: 1.9277 (1.9506) grad: 0.2376 (0.2359) time: 0.4319 data: 0.0033 max mem: 22448 +train: [16] [ 40/400] eta: 0:03:03 lr: 0.000047 loss: 1.9308 (1.9468) grad: 0.2357 (0.2361) time: 0.4351 data: 0.0046 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:43 lr: 0.000046 loss: 1.9411 (1.9406) grad: 0.2407 (0.2398) time: 0.4262 data: 0.0050 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:31 lr: 0.000045 loss: 1.9606 (1.9529) grad: 0.2479 (0.2425) time: 0.4421 data: 0.0051 max mem: 22448 +train: [16] [100/400] eta: 0:02:19 lr: 0.000044 loss: 1.9542 (1.9486) grad: 0.2506 (0.2431) time: 0.4301 data: 0.0047 max mem: 22448 +train: [16] [120/400] eta: 0:02:08 lr: 0.000043 loss: 1.9374 (1.9510) grad: 0.2475 (0.2441) time: 0.4284 data: 0.0048 max mem: 22448 +train: [16] [140/400] eta: 0:01:58 lr: 0.000042 loss: 1.9374 (1.9500) grad: 0.2459 (0.2444) time: 0.4295 data: 0.0050 max mem: 22448 +train: [16] [160/400] eta: 0:01:48 lr: 0.000041 loss: 1.9773 (1.9552) grad: 0.2476 (0.2458) time: 0.4247 data: 0.0046 max mem: 22448 +train: [16] [180/400] eta: 0:01:38 lr: 0.000040 loss: 1.9890 (1.9554) grad: 0.2505 (0.2461) time: 0.4277 data: 0.0051 max mem: 22448 +train: [16] [200/400] eta: 0:01:29 lr: 0.000039 loss: 1.9339 (1.9519) grad: 0.2457 (0.2457) time: 0.4510 data: 0.0049 max mem: 22448 +train: [16] [220/400] eta: 0:01:20 lr: 0.000038 loss: 1.9264 (1.9519) grad: 0.2385 (0.2454) time: 0.4423 data: 0.0053 max mem: 22448 +train: [16] [240/400] eta: 0:01:11 lr: 0.000036 loss: 1.9537 (1.9512) grad: 0.2460 (0.2461) time: 0.4385 data: 0.0043 max mem: 22448 +train: [16] [260/400] eta: 0:01:02 lr: 0.000035 loss: 1.9590 (1.9554) grad: 0.2580 (0.2471) time: 0.4319 data: 0.0049 max mem: 22448 +train: [16] [280/400] eta: 0:00:53 lr: 0.000034 loss: 1.9590 (1.9554) grad: 0.2580 (0.2476) time: 0.4508 data: 0.0050 max mem: 22448 +train: [16] [300/400] eta: 0:00:44 lr: 0.000033 loss: 1.9555 (1.9562) grad: 0.2501 (0.2480) time: 0.4327 data: 0.0049 max mem: 22448 +train: [16] [320/400] eta: 0:00:35 lr: 0.000032 loss: 2.0071 (1.9611) grad: 0.2508 (0.2485) time: 0.4343 data: 0.0049 max mem: 22448 +train: [16] [340/400] eta: 0:00:26 lr: 0.000031 loss: 1.9761 (1.9610) grad: 0.2508 (0.2487) time: 0.4278 data: 0.0049 max mem: 22448 +train: [16] [360/400] eta: 0:00:17 lr: 0.000031 loss: 1.9676 (1.9621) grad: 0.2516 (0.2491) time: 0.4273 data: 0.0048 max mem: 22448 +train: [16] [380/400] eta: 0:00:08 lr: 0.000030 loss: 1.9679 (1.9621) grad: 0.2566 (0.2498) time: 0.4298 data: 0.0047 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 1.9656 (1.9616) grad: 0.2612 (0.2503) time: 0.4274 data: 0.0047 max mem: 22448 +train: [16] Total time: 0:02:56 (0.4419 s / it) +train: [16] Summary: lr: 0.000029 loss: 1.9656 (1.9616) grad: 0.2612 (0.2503) +eval (validation): [16] [ 0/85] eta: 0:04:39 time: 3.2824 data: 2.9825 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:32 time: 0.3563 data: 0.0047 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:18 time: 0.3398 data: 0.0042 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:09 time: 0.3367 data: 0.0043 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3203 data: 0.0038 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3194 data: 0.0038 max mem: 22448 +eval (validation): [16] Total time: 0:00:31 (0.3757 s / it) +cv: [16] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.407 acc: 0.279 f1: 0.221 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:45 lr: nan time: 3.4130 data: 3.0300 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:38 lr: 0.000028 loss: 1.8836 (1.9050) grad: 0.2295 (0.2389) time: 0.4338 data: 0.0044 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:01 lr: 0.000027 loss: 1.8994 (1.9200) grad: 0.2421 (0.2419) time: 0.4278 data: 0.0044 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:42 lr: 0.000026 loss: 1.9188 (1.9314) grad: 0.2389 (0.2397) time: 0.4249 data: 0.0049 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:30 lr: 0.000025 loss: 1.9127 (1.9215) grad: 0.2371 (0.2397) time: 0.4504 data: 0.0049 max mem: 22448 +train: [17] [100/400] eta: 0:02:18 lr: 0.000024 loss: 1.9127 (1.9295) grad: 0.2376 (0.2392) time: 0.4262 data: 0.0048 max mem: 22448 +train: [17] [120/400] eta: 0:02:08 lr: 0.000023 loss: 1.9338 (1.9309) grad: 0.2417 (0.2398) time: 0.4359 data: 0.0048 max mem: 22448 +train: [17] [140/400] eta: 0:01:57 lr: 0.000023 loss: 1.9223 (1.9302) grad: 0.2417 (0.2400) time: 0.4298 data: 0.0047 max mem: 22448 +train: [17] [160/400] eta: 0:01:48 lr: 0.000022 loss: 1.9261 (1.9280) grad: 0.2416 (0.2407) time: 0.4288 data: 0.0050 max mem: 22448 +train: [17] [180/400] eta: 0:01:38 lr: 0.000021 loss: 1.9111 (1.9259) grad: 0.2397 (0.2404) time: 0.4299 data: 0.0047 max mem: 22448 +train: [17] [200/400] eta: 0:01:29 lr: 0.000020 loss: 1.9120 (1.9263) grad: 0.2385 (0.2404) time: 0.4440 data: 0.0050 max mem: 22448 +train: [17] [220/400] eta: 0:01:20 lr: 0.000019 loss: 1.9384 (1.9283) grad: 0.2461 (0.2413) time: 0.4506 data: 0.0050 max mem: 22448 +train: [17] [240/400] eta: 0:01:11 lr: 0.000019 loss: 1.9384 (1.9277) grad: 0.2413 (0.2420) time: 0.4390 data: 0.0050 max mem: 22448 +train: [17] [260/400] eta: 0:01:02 lr: 0.000018 loss: 1.9142 (1.9272) grad: 0.2412 (0.2424) time: 0.4237 data: 0.0048 max mem: 22448 +train: [17] [280/400] eta: 0:00:53 lr: 0.000017 loss: 1.9359 (1.9286) grad: 0.2416 (0.2422) time: 0.4416 data: 0.0048 max mem: 22448 +train: [17] [300/400] eta: 0:00:44 lr: 0.000016 loss: 1.9396 (1.9284) grad: 0.2388 (0.2419) time: 0.4422 data: 0.0050 max mem: 22448 +train: [17] [320/400] eta: 0:00:35 lr: 0.000016 loss: 1.9312 (1.9282) grad: 0.2348 (0.2414) time: 0.4319 data: 0.0050 max mem: 22448 +train: [17] [340/400] eta: 0:00:26 lr: 0.000015 loss: 1.9351 (1.9275) grad: 0.2348 (0.2415) time: 0.4333 data: 0.0049 max mem: 22448 +train: [17] [360/400] eta: 0:00:17 lr: 0.000014 loss: 1.9337 (1.9288) grad: 0.2414 (0.2417) time: 0.4345 data: 0.0050 max mem: 22448 +train: [17] [380/400] eta: 0:00:08 lr: 0.000014 loss: 1.9324 (1.9282) grad: 0.2449 (0.2423) time: 0.4407 data: 0.0051 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 1.8790 (1.9267) grad: 0.2444 (0.2420) time: 0.4308 data: 0.0047 max mem: 22448 +train: [17] Total time: 0:02:57 (0.4431 s / it) +train: [17] Summary: lr: 0.000013 loss: 1.8790 (1.9267) grad: 0.2444 (0.2420) +eval (validation): [17] [ 0/85] eta: 0:04:38 time: 3.2772 data: 2.9787 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:31 time: 0.3518 data: 0.0051 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:18 time: 0.3419 data: 0.0033 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:09 time: 0.3404 data: 0.0043 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3176 data: 0.0039 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3080 data: 0.0036 max mem: 22448 +eval (validation): [17] Total time: 0:00:31 (0.3738 s / it) +cv: [17] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.398 acc: 0.282 f1: 0.225 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:23:20 lr: nan time: 3.5020 data: 3.1487 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:49 lr: 0.000012 loss: 1.9455 (1.9412) grad: 0.2383 (0.2385) time: 0.4597 data: 0.0068 max mem: 22448 +train: [18] [ 40/400] eta: 0:03:09 lr: 0.000012 loss: 1.9056 (1.9116) grad: 0.2382 (0.2378) time: 0.4451 data: 0.0043 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:47 lr: 0.000011 loss: 1.8539 (1.9061) grad: 0.2433 (0.2408) time: 0.4238 data: 0.0048 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:34 lr: 0.000011 loss: 1.8924 (1.9039) grad: 0.2385 (0.2388) time: 0.4518 data: 0.0050 max mem: 22448 +train: [18] [100/400] eta: 0:02:22 lr: 0.000010 loss: 1.9144 (1.9057) grad: 0.2333 (0.2381) time: 0.4412 data: 0.0049 max mem: 22448 +train: [18] [120/400] eta: 0:02:11 lr: 0.000009 loss: 1.9132 (1.9035) grad: 0.2349 (0.2376) time: 0.4357 data: 0.0049 max mem: 22448 +train: [18] [140/400] eta: 0:02:00 lr: 0.000009 loss: 1.8924 (1.9074) grad: 0.2368 (0.2379) time: 0.4341 data: 0.0047 max mem: 22448 +train: [18] [160/400] eta: 0:01:50 lr: 0.000008 loss: 1.9314 (1.9091) grad: 0.2334 (0.2377) time: 0.4371 data: 0.0049 max mem: 22448 +train: [18] [180/400] eta: 0:01:40 lr: 0.000008 loss: 1.9112 (1.9067) grad: 0.2366 (0.2382) time: 0.4487 data: 0.0050 max mem: 22448 +train: [18] [200/400] eta: 0:01:31 lr: 0.000007 loss: 1.8965 (1.9076) grad: 0.2396 (0.2382) time: 0.4409 data: 0.0052 max mem: 22448 +train: [18] [220/400] eta: 0:01:22 lr: 0.000007 loss: 1.9099 (1.9083) grad: 0.2392 (0.2380) time: 0.4640 data: 0.0051 max mem: 22448 +train: [18] [240/400] eta: 0:01:13 lr: 0.000006 loss: 1.9195 (1.9111) grad: 0.2390 (0.2379) time: 0.4593 data: 0.0050 max mem: 22448 +train: [18] [260/400] eta: 0:01:03 lr: 0.000006 loss: 1.9347 (1.9130) grad: 0.2390 (0.2379) time: 0.4425 data: 0.0049 max mem: 22448 +train: [18] [280/400] eta: 0:00:54 lr: 0.000006 loss: 1.9347 (1.9123) grad: 0.2423 (0.2379) time: 0.4522 data: 0.0050 max mem: 22448 +train: [18] [300/400] eta: 0:00:45 lr: 0.000005 loss: 1.9162 (1.9115) grad: 0.2328 (0.2377) time: 0.4472 data: 0.0050 max mem: 22448 +train: [18] [320/400] eta: 0:00:36 lr: 0.000005 loss: 1.9204 (1.9139) grad: 0.2323 (0.2376) time: 0.4389 data: 0.0049 max mem: 22448 +train: [18] [340/400] eta: 0:00:27 lr: 0.000004 loss: 1.9089 (1.9106) grad: 0.2323 (0.2373) time: 0.4527 data: 0.0047 max mem: 22448 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 1.8800 (1.9105) grad: 0.2317 (0.2371) time: 0.4501 data: 0.0048 max mem: 22448 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 1.9002 (1.9102) grad: 0.2319 (0.2370) time: 0.4395 data: 0.0045 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 1.8809 (1.9087) grad: 0.2319 (0.2372) time: 0.4315 data: 0.0045 max mem: 22448 +train: [18] Total time: 0:03:01 (0.4531 s / it) +train: [18] Summary: lr: 0.000003 loss: 1.8809 (1.9087) grad: 0.2319 (0.2372) +eval (validation): [18] [ 0/85] eta: 0:04:22 time: 3.0935 data: 2.8604 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:31 time: 0.3540 data: 0.0064 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:18 time: 0.3423 data: 0.0035 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:09 time: 0.3270 data: 0.0040 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:01 time: 0.3155 data: 0.0038 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3094 data: 0.0036 max mem: 22448 +eval (validation): [18] Total time: 0:00:31 (0.3690 s / it) +cv: [18] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.421 acc: 0.280 f1: 0.222 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:23:20 lr: nan time: 3.5012 data: 3.1103 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:47 lr: 0.000003 loss: 1.9216 (1.9151) grad: 0.2276 (0.2316) time: 0.4546 data: 0.0031 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:09 lr: 0.000003 loss: 1.8974 (1.8921) grad: 0.2283 (0.2334) time: 0.4495 data: 0.0048 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:47 lr: 0.000002 loss: 1.9032 (1.9126) grad: 0.2338 (0.2341) time: 0.4231 data: 0.0047 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:35 lr: 0.000002 loss: 1.9256 (1.9110) grad: 0.2302 (0.2327) time: 0.4675 data: 0.0051 max mem: 22448 +train: [19] [100/400] eta: 0:02:22 lr: 0.000002 loss: 1.9113 (1.9107) grad: 0.2331 (0.2340) time: 0.4308 data: 0.0051 max mem: 22448 +train: [19] [120/400] eta: 0:02:11 lr: 0.000002 loss: 1.9090 (1.9104) grad: 0.2358 (0.2345) time: 0.4327 data: 0.0050 max mem: 22448 +train: [19] [140/400] eta: 0:02:00 lr: 0.000001 loss: 1.8919 (1.9061) grad: 0.2310 (0.2337) time: 0.4305 data: 0.0051 max mem: 22448 +train: [19] [160/400] eta: 0:01:50 lr: 0.000001 loss: 1.8773 (1.9017) grad: 0.2258 (0.2331) time: 0.4565 data: 0.0049 max mem: 22448 +train: [19] [180/400] eta: 0:01:41 lr: 0.000001 loss: 1.8661 (1.8979) grad: 0.2378 (0.2341) time: 0.4374 data: 0.0051 max mem: 22448 +train: [19] [200/400] eta: 0:01:31 lr: 0.000001 loss: 1.8732 (1.8988) grad: 0.2383 (0.2340) time: 0.4362 data: 0.0050 max mem: 22448 +train: [19] [220/400] eta: 0:01:22 lr: 0.000001 loss: 1.8887 (1.8976) grad: 0.2313 (0.2335) time: 0.4425 data: 0.0050 max mem: 22448 +train: [19] [240/400] eta: 0:01:12 lr: 0.000001 loss: 1.9061 (1.8973) grad: 0.2298 (0.2338) time: 0.4375 data: 0.0048 max mem: 22448 +train: [19] [260/400] eta: 0:01:03 lr: 0.000000 loss: 1.8974 (1.8969) grad: 0.2338 (0.2339) time: 0.4389 data: 0.0050 max mem: 22448 +train: [19] [280/400] eta: 0:00:54 lr: 0.000000 loss: 1.9041 (1.8994) grad: 0.2338 (0.2339) time: 0.4290 data: 0.0048 max mem: 22448 +train: [19] [300/400] eta: 0:00:45 lr: 0.000000 loss: 1.9041 (1.8988) grad: 0.2295 (0.2337) time: 0.4508 data: 0.0049 max mem: 22448 +train: [19] [320/400] eta: 0:00:36 lr: 0.000000 loss: 1.8935 (1.9007) grad: 0.2282 (0.2339) time: 0.4366 data: 0.0048 max mem: 22448 +train: [19] [340/400] eta: 0:00:26 lr: 0.000000 loss: 1.9152 (1.9022) grad: 0.2307 (0.2337) time: 0.4389 data: 0.0048 max mem: 22448 +train: [19] [360/400] eta: 0:00:17 lr: 0.000000 loss: 1.9152 (1.9028) grad: 0.2251 (0.2333) time: 0.4471 data: 0.0051 max mem: 22448 +train: [19] [380/400] eta: 0:00:08 lr: 0.000000 loss: 1.9055 (1.9028) grad: 0.2251 (0.2331) time: 0.4424 data: 0.0049 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 1.9238 (1.9051) grad: 0.2334 (0.2333) time: 0.4415 data: 0.0051 max mem: 22448 +train: [19] Total time: 0:02:59 (0.4495 s / it) +train: [19] Summary: lr: 0.000000 loss: 1.9238 (1.9051) grad: 0.2334 (0.2333) +eval (validation): [19] [ 0/85] eta: 0:04:28 time: 3.1549 data: 2.9144 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:30 time: 0.3400 data: 0.0047 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:18 time: 0.3262 data: 0.0046 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:09 time: 0.3441 data: 0.0040 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:01 time: 0.3345 data: 0.0040 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3237 data: 0.0040 max mem: 22448 +eval (validation): [19] Total time: 0:00:31 (0.3721 s / it) +cv: [19] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.401 acc: 0.280 f1: 0.222 +saving checkpoint experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.28017718715393136, "hparam": [0.38, 1.0], "hparam_id": 18, "epoch": 19, "is_best": false, "best_score": 0.2825765965300849} +eval (train): [20] [ 0/509] eta: 0:26:09 time: 3.0844 data: 2.7911 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:14 time: 0.3924 data: 0.0038 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:24 time: 0.3452 data: 0.0040 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:02:59 time: 0.3258 data: 0.0039 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:44 time: 0.3365 data: 0.0039 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:36 time: 0.3776 data: 0.0047 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:26 time: 0.3482 data: 0.0042 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:17 time: 0.3478 data: 0.0042 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:08 time: 0.3400 data: 0.0043 max mem: 22448 +eval (train): [20] [180/509] eta: 0:01:59 time: 0.3289 data: 0.0038 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:52 time: 0.3571 data: 0.0045 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:44 time: 0.3522 data: 0.0039 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:37 time: 0.3575 data: 0.0043 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:29 time: 0.3370 data: 0.0037 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:22 time: 0.3586 data: 0.0040 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:15 time: 0.3516 data: 0.0040 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:07 time: 0.3620 data: 0.0042 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:00 time: 0.3535 data: 0.0040 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:53 time: 0.3235 data: 0.0038 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:46 time: 0.3698 data: 0.0042 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:38 time: 0.3293 data: 0.0038 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:31 time: 0.3299 data: 0.0039 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:24 time: 0.3266 data: 0.0038 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:17 time: 0.3303 data: 0.0037 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3373 data: 0.0038 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3372 data: 0.0038 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3263 data: 0.0039 max mem: 22448 +eval (train): [20] Total time: 0:02:59 (0.3524 s / it) +eval (validation): [20] [ 0/85] eta: 0:03:58 time: 2.8075 data: 2.5423 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:29 time: 0.3306 data: 0.0035 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:17 time: 0.3274 data: 0.0038 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3329 data: 0.0040 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3172 data: 0.0039 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3170 data: 0.0039 max mem: 22448 +eval (validation): [20] Total time: 0:00:30 (0.3589 s / it) +eval (test): [20] [ 0/85] eta: 0:04:00 time: 2.8249 data: 2.5831 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:30 time: 0.3475 data: 0.0040 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:18 time: 0.3334 data: 0.0039 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:09 time: 0.3277 data: 0.0040 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3224 data: 0.0042 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3141 data: 0.0040 max mem: 22448 +eval (test): [20] Total time: 0:00:30 (0.3633 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:25 time: 3.2320 data: 2.9226 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:30 time: 0.3623 data: 0.0043 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:17 time: 0.3439 data: 0.0040 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3374 data: 0.0039 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3272 data: 0.0041 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3159 data: 0.0039 max mem: 22448 +eval (testid): [20] Total time: 0:00:31 (0.3785 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.2825765965300849, "hparam": [0.32, 1.0], "hparam_id": 17, "epoch": 14, "is_best": true, "best_score": 0.2825765965300849} +eval (train): [20] [ 0/509] eta: 0:24:58 time: 2.9433 data: 2.6615 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:03:57 time: 0.3625 data: 0.0170 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:14 time: 0.3418 data: 0.0038 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:02:57 time: 0.3565 data: 0.0041 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:44 time: 0.3486 data: 0.0043 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:36 time: 0.3759 data: 0.0044 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:26 time: 0.3499 data: 0.0041 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:17 time: 0.3528 data: 0.0040 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:10 time: 0.3685 data: 0.0046 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:01 time: 0.3485 data: 0.0039 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:55 time: 0.3909 data: 0.0044 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:48 time: 0.3923 data: 0.0046 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:40 time: 0.3563 data: 0.0043 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:32 time: 0.3638 data: 0.0042 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:25 time: 0.3976 data: 0.0048 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:17 time: 0.3627 data: 0.0042 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:10 time: 0.3389 data: 0.0039 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:02 time: 0.3620 data: 0.0041 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:55 time: 0.3789 data: 0.0040 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:47 time: 0.3707 data: 0.0045 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:40 time: 0.3655 data: 0.0037 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:32 time: 0.3700 data: 0.0042 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:25 time: 0.3812 data: 0.0047 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3582 data: 0.0040 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3596 data: 0.0043 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3523 data: 0.0042 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3277 data: 0.0041 max mem: 22448 +eval (train): [20] Total time: 0:03:08 (0.3700 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:30 time: 3.1806 data: 2.8892 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:32 time: 0.3633 data: 0.0034 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:18 time: 0.3367 data: 0.0037 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3423 data: 0.0041 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3390 data: 0.0039 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3387 data: 0.0038 max mem: 22448 +eval (validation): [20] Total time: 0:00:32 (0.3819 s / it) +eval (test): [20] [ 0/85] eta: 0:04:25 time: 3.1288 data: 2.8627 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:31 time: 0.3527 data: 0.0049 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:19 time: 0.3825 data: 0.0038 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3454 data: 0.0043 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3267 data: 0.0041 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3169 data: 0.0039 max mem: 22448 +eval (test): [20] Total time: 0:00:32 (0.3843 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:24 time: 3.2287 data: 2.9313 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:33 time: 0.4069 data: 0.0045 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:18 time: 0.3504 data: 0.0038 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3240 data: 0.0031 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3468 data: 0.0042 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3347 data: 0.0040 max mem: 22448 +eval (testid): [20] Total time: 0:00:32 (0.3928 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:------------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 14 | 9.6e-05 | 0.05 | 17 | [0.32, 1.0] | train | 2.0744 | 0.37739 | 0.0023685 | 0.31818 | 0.0024371 | +| flat_mae | patch | attn | nsd_cococlip | best | 14 | 9.6e-05 | 0.05 | 17 | [0.32, 1.0] | validation | 2.3997 | 0.28258 | 0.0053957 | 0.21861 | 0.0048833 | +| flat_mae | patch | attn | nsd_cococlip | best | 14 | 9.6e-05 | 0.05 | 17 | [0.32, 1.0] | test | 2.297 | 0.30705 | 0.0052509 | 0.23503 | 0.0054134 | +| flat_mae | patch | attn | nsd_cococlip | best | 14 | 9.6e-05 | 0.05 | 17 | [0.32, 1.0] | testid | 2.2555 | 0.30268 | 0.0059209 | 0.24589 | 0.0058978 | + + +done! total time: 1:22:43 diff --git a/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/train_log.json b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..199ef5c99fedc7cb2907501edbb0c9da8c7db7a1 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.122056838274002, "train/grad": 0.15862265221774577, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.17506591796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.174384765625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.173377685546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.172379150390625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.171434326171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.17017822265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.168804931640625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.167340087890625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.165479736328125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.16385986328125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.162320556640625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.160264892578125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.158411865234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.15626708984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1544775390625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.153018798828125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.151365966796875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.149814453125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.148248291015625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.147144775390625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.145828857421875, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.144759521484375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.14371826171875, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.14265380859375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.141920166015625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.141153564453125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.14072265625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1403857421875, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.140081787109375, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.139854736328125, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.139835205078125, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.13978759765625, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.1396240234375, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.13955078125, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.139217529296875, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.13842041015625, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.1363427734375, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.1309356689453125, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1181671142578127, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.0951477813720705, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0766571044921873, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.060124816894531, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0361005783081056, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.0154237747192383, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.9939691162109376, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.975271987915039, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.9612810134887697, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.945795774459839, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.934416790008545, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02367833539377898, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023623792976140975, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023535317084752025, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023450100449845194, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02336865560617298, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02325943216215819, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023143129972741008, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023019959782250226, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02287039338145405, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022723740451037883, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022593341772444545, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022418366079218686, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0222700320975855, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022090107728727163, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02194761170540005, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02183461220469326, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021713474518619477, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.021600055363960563, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021499269525520504, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021423723506741224, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.021349735679104923, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02128928478807211, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02123747954610735, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.021194771509617568, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.021163047668524088, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.021136969020590188, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.021123220985755323, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.021116769183427097, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.021112480112351478, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.021113203200511636, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.021113401991315187, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02110557532403618, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.021081409882754087, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.021036932072602214, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.02095242641866207, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.02084239376708865, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.020715697724372147, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.020672333096154034, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.020827494296245277, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.021419597784988583, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02209762342274189, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.022674670196138322, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02321818413678557, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02371596156153828, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.024560806797817348, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02570042378734797, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.02657798659056425, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.027666840245947243, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.028809934239834547, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1611249446868896, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1592347621917725, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1562304496765137, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.153529644012451, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1510376930236816, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.147831439971924, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1445729732513428, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.141352415084839, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.137824296951294, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1346895694732666, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.132230281829834, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.129441976547241, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.127454996109009, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1256103515625, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1245381832122803, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1238975524902344, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1234195232391357, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1230356693267822, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.122816562652588, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.122684955596924, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.12264084815979, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.122785806655884, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1232073307037354, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1239025592803955, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.124940872192383, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1263227462768555, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.12754225730896, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.128444194793701, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1292383670806885, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.128904104232788, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1270976066589355, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1243302822113037, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1197540760040283, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.115018844604492, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1073923110961914, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.093646287918091, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0565266609191895, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9698879718780518, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8650920391082764, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7635319232940674, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7097182273864746, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.663285970687866, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6364758014678955, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.625000476837158, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.569119930267334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.5900180339813232, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.5725886821746826, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.6132218837738037, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.751366376876831, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.054078995939461055, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05444813584348468, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05352528608342562, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05260243632336656, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.05352528608342562, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.05518641565153193, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.05795496493170912, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.05740125507567368, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.05832410483573274, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06164636397194537, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.058693244739756366, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06884459210040605, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.067921742340347, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06866002214839424, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07419712070874862, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07419712070874862, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0754891103728313, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07659653008490218, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07788851974898486, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07604282022886674, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07179771133259505, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.06459948320413436, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.05758582502768549, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.05463270579549649, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.05389442598744924, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.05555555555555555, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.059431524547803614, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.0651531930601698, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07364341085271318, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.08010335917312661, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07788851974898486, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08619416758951642, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.09892949427833149, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.12033960871170174, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.14581026208933184, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.17183462532299743, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19010705057216684, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20302694721299372, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20450350682908822, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21225544481358435, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2351421188630491, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.23864894795127353, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.2320044296788483, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.21650055370985605, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.20044296788482835, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014324538000319081, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014423500217664642, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014077787521299893, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013799850560012783, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01436309126807613, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015790801867797435, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.016528949089379425, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.016505162163958937, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.0154111805417488, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01493457537526674, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012582437102319807, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.011845866512182232, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.012218822095426865, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.011757756233854935, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.011666877132897963, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.012161862199100454, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.012638559300601994, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.012421706392266993, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.012821887107592545, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.012961895873782494, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.014772391952063653, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.014996888363463025, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.019017364184214663, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.019283777070840762, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.018529561744477698, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.014522725154088442, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.01077076583391671, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.00819566017704752, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.007771868936277778, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.008779626201203533, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.010450756813550328, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.012227014569037334, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.015635301702189162, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.01805304246407494, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.01920605081018732, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.0298287108527316, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.04227301894686003, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.058941509916803046, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.07815593912803588, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.09716756653154524, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.11198239253614824, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.11899268568103699, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.12197388924531423, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1407067151020557, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.16447294759270753, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.1608617687451579, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.15822200765610903, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.14332876005338227, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.12990106278625993, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 2.975271987915039, "validation/loss_best": 2.5900180339813232, "validation/acc_best": 0.23864894795127353, "validation/f1_best": 0.1608617687451579} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.95125066280365, "train/grad": 0.18326702378690243, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1522021484375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.14974609375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.146259765625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.14343017578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.14110107421875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.138555908203125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.136241455078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13431640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1324951171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.131153564453125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.130255126953125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.129368896484375, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.128782958984375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.128233642578125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.127838134765625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.12760986328125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.127279052734375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.12696533203125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.126353759765625, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.126058349609375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.1254736328125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.124779052734375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.123995361328125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.1230224609375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.12192626953125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.12014404296875, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.11775634765625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1137396240234376, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.10015625, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.062332763671875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.0147689819335937, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.9682534027099607, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.907274703979492, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.851820411682129, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.78739143371582, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.727754592895508, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.6706420516967775, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.6233855438232423, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.583855514526367, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.5572248458862306, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.543669776916504, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.527925338745117, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.523390045166016, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.52333291053772, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5297527265548707, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.549598174095154, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.5793947553634644, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.6329502773284914, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.6913272166252136, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021440320159308614, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02123002362437546, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020931459232233465, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020689775641076266, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020493300138041376, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02027898729313165, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020100824502296746, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01996365835890174, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01984448901377618, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019768780125305058, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019725997699424624, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019698309609666467, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019692000122740864, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0197063002968207, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01972722011618316, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.019748970768414437, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019778489819727837, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019811263643205165, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019845684212632476, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01987538516521454, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01990728630218655, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.019934024754911663, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01995430591981858, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.019961151229217648, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.019941305187530817, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01987172469496727, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.019764428990893066, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.019653350743465126, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.01963422548491508, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.020127255516126753, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.020853740298189222, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.021598692061379553, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.022651619156822563, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02354695533402264, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.024768171785399317, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.026214986331760882, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02769015762023628, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.029196196193806825, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03063111745752394, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03247667101211846, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03385106268338859, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03470817905850709, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03584316335618496, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03711240065284074, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03810525658540428, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.039845747416839, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04128976909443736, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0444147027656436, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.049322797982022167, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1364591121673584, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1339423656463623, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1307215690612793, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1285147666931152, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.126974105834961, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1256296634674072, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1247189044952393, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1241707801818848, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1237905025482178, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.123565912246704, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.12338924407959, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1231417655944824, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.122875213623047, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.12239146232605, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1218435764312744, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1212735176086426, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1205084323883057, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.119654417037964, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.118844747543335, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1182773113250732, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1178557872772217, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.117521047592163, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.117314338684082, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1171162128448486, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.116713523864746, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1144206523895264, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.105687141418457, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.0804412364959717, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9784741401672363, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.811018466949463, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7290079593658447, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6896133422851562, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6349234580993652, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.587672472000122, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5526673793792725, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5339503288269043, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.483530044555664, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.447753429412842, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.442343235015869, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.460188865661621, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.4549083709716797, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.469646453857422, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.521754741668701, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.596930503845215, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.74821138381958, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.82185959815979, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.8131020069122314, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.9553749561309814, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.963127374649048, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06035437430786268, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06127722406792174, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06533776301218161, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07161314138058324, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07272056109265411, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.07456626061277224, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.07918050941306755, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.09339239571797711, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.12403100775193798, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.15873015873015872, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.17571059431524547, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.19010705057216684, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20191952750092285, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21816168327796234, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23126614987080105, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24916943521594684, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2604282022886674, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.26301218161683276, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2563676633444075, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.25692137320044295, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2561830933923957, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2456626061277224, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22535991140642303, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.21225544481358435, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.21613141380583242, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.20967146548541898, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.19287559985234404, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.20081210778885197, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014571912977194583, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012688838757778514, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009627047948571013, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.00929294177189612, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0085500998258328, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008249413802016247, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.007803233342593071, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.007778601025969899, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.00788596723753391, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.00785005954589799, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.007755622510115277, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.00788241503208504, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.007630502838121295, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.008050397485814638, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.009246372499641534, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.00904063148611116, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.01062046314433185, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.013587727323212293, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.015316151251230701, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.016310284830746424, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.017295583499414898, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.016549691692845158, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.016469794722192278, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.016491224775927852, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.015644832683907508, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.015239481765638626, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.018264593019618493, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.030233366584209508, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.05241874980933612, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.0756681871972192, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.09207813172593395, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.10835311561363026, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1251945252610198, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.14411834814722102, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.15408148382516948, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1615676382500839, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18104301022910954, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19667565369669623, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.20348792331124213, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19390749857958353, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18951552581107423, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19063052325255522, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18025602762178786, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1550197208667263, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.14014031143181663, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.14777260179098775, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.13782340658366724, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.13113945666893922, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.12913101259577187, "id_best": 38, "lr_best": 0.00294, "wd_best": 0.05, "train/loss_best": 2.583855514526367, "validation/loss_best": 2.442343235015869, "validation/acc_best": 0.26301218161683276, "validation/f1_best": 0.20348792331124213} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 2.968183333873749, "train/grad": 0.4055521146953106, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13615478515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1346142578125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.13304931640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.13215087890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.131610107421875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.131158447265625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.130885009765625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.130614013671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.130262451171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.129859619140625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.129620361328125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1289404296875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.128370361328125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.127432861328125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.126546630859375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.125806884765625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.124638671875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.12325927734375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.12163818359375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.119970703125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.117392578125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.11322265625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.103482666015625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0748748779296875, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.017322235107422, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.9288189697265623, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.844826965332031, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.7714527893066405, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.6814421081542967, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.5964703369140625, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.540150451660156, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.498241596221924, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.4473151779174804, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.4105120658874513, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3792692279815673, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.357372970581055, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.345171947479248, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3327972793579104, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3379393672943114, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3507625579833986, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3767136335372925, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.387884397506714, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4325093746185305, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.4751405549049377, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5157031869888304, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.608741343021393, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.927914069890976, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.4523159897327425, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.438640768527985, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0201344952872023, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02001330969389528, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019894378930330275, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019839795231819154, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01981840157415718, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01981610816437751, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019827271271497013, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019844750952906907, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019868879998102783, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01988979606423527, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01990863569546491, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019933014782145618, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019954192135483027, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019980384833179415, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020000181123614312, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020016685160808265, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.020030624903738498, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020037256218492984, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02002787264995277, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.01999991949647665, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01993669195100665, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01983901042956859, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.019752070968970657, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02000922323204577, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.020958141479641198, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02242409751750529, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02372877084184438, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02503840997815132, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02680845534428954, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.028686207346618177, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.029916560789570213, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.030847858730703594, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.032120383698493245, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.033313175067305564, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03440142666921019, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0354941360745579, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0369653170928359, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.038096347423270345, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03901783426292241, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0405298354011029, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04154942164197564, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04206145924516022, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04500881662592292, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04673029465600848, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.047592215444892644, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05437834834679961, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.14351704774424434, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.12422614619135856, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.14162326907739042, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1258132457733154, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.125089645385742, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.124406576156616, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.124121904373169, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.123974084854126, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1238348484039307, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.123713731765747, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1235995292663574, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1234631538391113, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.123353958129883, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1233150959014893, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1232407093048096, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.123194694519043, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1231582164764404, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1231117248535156, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.122892141342163, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.122316598892212, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1210217475891113, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1184628009796143, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1148793697357178, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.107344388961792, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0912997722625732, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.0395193099975586, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.895099401473999, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.746272563934326, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6579320430755615, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.611668825149536, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.564284324645996, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4906439781188965, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4505136013031006, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4352681636810303, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4303438663482666, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4324207305908203, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.45125150680542, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.484464645385742, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.480189800262451, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.484549045562744, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.4644558429718018, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5216073989868164, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5075151920318604, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.603680372238159, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.5759902000427246, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6383464336395264, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6362929344177246, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.6471076011657715, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.69024395942688, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.067921742340347, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.067921742340347, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07050572166851236, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07069029162052418, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06995201181247693, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0695828719084533, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07475083056478406, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.08471760797342193, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.10538944259874493, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.15005537098560354, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.18327796234772978, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.19859726836471023, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2201919527500923, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23255813953488372, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25599852344038393, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.26891842008121075, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.26891842008121075, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.268733850129199, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.27002583979328165, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.260797342192691, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2567368032484312, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2563676633444075, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2528608342561831, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2604282022886674, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2497231450719823, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2264673311184939, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.24437061646363972, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23477297895902546, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2427094868955334, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2349575489110373, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.21963824289405684, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008983528039789938, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009329067454429326, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.010058065121239995, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.010439936620793734, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010802822946620367, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01129470264953135, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01164838687419756, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011938013676358356, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012528806510002289, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012769421654308312, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013542074847550621, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01402749561444014, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01384681894606731, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.013685516419860566, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.013052870166725211, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01317329956519576, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.012807803195327552, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.014339361350656388, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.014267548196155397, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.015300223754177262, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.017339505394900777, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.024009416906657786, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.03853934493666756, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.06893306861342413, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.09387863297738418, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.11453584170660804, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.141343462744109, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.15642874353462857, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18353595604424286, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19554673646752826, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19602337889399202, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19968018298180742, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.20228867468133402, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19728461930781996, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19348104450791112, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.19215832216766837, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1908488187822742, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.20495530210830148, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.20069112199257744, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.20495674461648625, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1691382334072087, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1787981425120774, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17473510110845447, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17714600721974347, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.17355404279997053, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.16746731366959458, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.4473151779174804, "validation/loss_best": 2.4324207305908203, "validation/acc_best": 0.27002583979328165, "validation/f1_best": 0.20228867468133402} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 2.81542072057724, "train/grad": 0.30285791285336017, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1295263671875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12905517578125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1285595703125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12811279296875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1277978515625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.127235107421875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.126708984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.126173095703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.125386962890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.124505615234375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.1236865234375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12244140625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1212841796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.119461669921875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.117635498046875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.115819091796875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.113138427734375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.108568115234375, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0985748291015627, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.074862060546875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.008442077636719, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.910351104736328, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.7978675842285154, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.6984632873535155, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.622056121826172, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.5437474822998047, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.4835615158081055, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.435039939880371, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.3851190757751466, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.334302053451538, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.2996474647521974, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.2747886753082276, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.2456412982940672, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.2280186939239504, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.222403031587601, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.2150271010398863, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2200699174404144, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2330035543441773, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.264863942861557, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.317060670852661, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3550360369682313, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.3888531064987184, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4535235488414764, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5283470273017885, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.636109081506729, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.950116637945175, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01971901294775307, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019723574751988052, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019741679625585675, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019760433519259095, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01977545448113233, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019794044196605683, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01980958165600896, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01982520737685263, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01984200392384082, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01985902458894998, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019872021209448577, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01988627784419805, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019896371616050602, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019897962189279497, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01988916388247162, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.019868032783269884, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.019823444052599372, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019745005727745592, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019661708902567625, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.019823623816482724, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02081681969575584, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02252697466406971, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02454580446705222, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02632655208930373, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.027694184416905045, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.029270434314385058, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.030779420007020233, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.032306656157597896, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0337510294560343, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03507370722480118, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.035992583446204665, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.036878480156883595, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03773109748028219, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03844267698936164, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.038920126426965, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04005250157788396, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0418014712817967, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.043176733516156675, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04458280669525266, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04626395156607032, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04722557151690125, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.048336959313601255, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05100252825766802, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05413909412920475, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.060479342546314, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.14864153178408743, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123887062072754, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1236588954925537, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123351812362671, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1230461597442627, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.122675657272339, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122178792953491, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1216020584106445, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1209347248077393, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.119985580444336, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1189894676208496, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.117990255355835, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1165614128112793, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.115199089050293, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.113269090652466, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.111330270767212, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.109175682067871, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.105242967605591, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.0954461097717285, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.0614359378814697, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.9655539989471436, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.804774284362793, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.7061455249786377, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.647580146789551, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5970330238342285, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5315330028533936, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4718992710113525, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4736411571502686, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.478992462158203, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4725539684295654, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4659159183502197, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4528822898864746, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4404449462890625, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.441077947616577, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.479907751083374, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5092720985412598, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5486881732940674, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.544151782989502, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6110761165618896, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5937325954437256, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5973868370056152, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.613507032394409, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.653200387954712, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.73750376701355, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.8391306400299072, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.9801182746887207, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.067921742340347, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07308970099667775, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07475083056478406, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07659653008490218, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07862679955703211, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07881136950904392, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07899593946105574, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.08102620893318568, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.08656330749354005, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.10022148394241417, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.13473606496862311, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.16629752676264303, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.18530823181985973, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2026578073089701, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2129937246216316, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2321889996308601, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25378368401624213, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25378368401624213, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25212255444813586, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2497231450719823, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2596899224806202, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2631967515688446, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2643041712809155, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2739018087855297, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.26208933185677374, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.26504245108896274, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2517534145441122, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.24197120708748615, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23717238833517904, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23108157991878922, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23015873015873015, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2410483573274271, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22443706164636398, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.1925064599483204, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009918201112914502, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.009840210245034571, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00962289171717456, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01007799944381006, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010400828929980009, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010668742283276125, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01170635538794368, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012927247441622167, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013886692268054376, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014135859687853516, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015379205690543083, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01677597140361142, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.018896044148383553, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.022819983604597017, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.02429011388574273, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.025278396014631027, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.02775665034533681, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.03402747705099727, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.046035518999566226, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.07328015094406563, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.09501312144394547, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1102823960080439, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.12708657329836373, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.13743216494266128, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.15752550999447018, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18477254334950158, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19012156228501062, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1918439249452686, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1920252271831624, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1977010282392252, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19758278807614374, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19523246587892965, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19838791406434778, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1843830700282342, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18807702933854972, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1881584110970539, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1878513879560296, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18242291424857227, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17979297821125628, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18923781851473254, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18917882204647543, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18183783095440778, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16519552663122747, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14712917394383065, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.14318984216942704, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.2456412982940672, "validation/loss_best": 2.441077947616577, "validation/acc_best": 0.2739018087855297, "validation/f1_best": 0.19838791406434778} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.7531185722351075, "train/grad": 0.28633379109203816, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.131612548828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.13119873046875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.130584716796875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.130001220703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12939697265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1286181640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.127705078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.126663818359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1254150390625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1239599609375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.122608642578125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12043701171875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.118311767578125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.114654541015625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1098583984375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1020697021484374, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0781976318359376, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.005371398925781, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.8850611877441406, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.776480407714844, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.6783551025390624, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.606424331665039, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.5337539672851563, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.4639775276184084, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.397942371368408, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.3326810073852537, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2875477933883666, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2527961254119875, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.224153597354889, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.192526454925537, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.172890675067902, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.1560617017745973, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.147647695541382, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.143519625663757, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.156238878965378, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.1645658731460573, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.1794583976268767, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2038992929458616, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.253461879491806, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.327718756198883, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3974567091464998, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4549443411827085, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5581595981121064, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.6523467123508455, "train/loss_044_lr2.6e+01_wd1.0e+00": 4.558577359914779, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019557765489444136, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01957015575375408, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01958452376537025, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019595407927408815, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019603793709538877, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0196134714782238, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019622896877117456, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019632351719774305, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019642212623730303, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01964948275592178, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01965298884548247, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019649620363488794, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019634810630232095, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019589436035603286, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019518941245041787, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.019447336811572312, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01954596587456763, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.020526122762821616, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022673709481023253, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.024748238478787245, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.026503438744693995, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.027793097058311105, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.029161462364718317, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.030832814732566476, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.032675952035933736, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0344540030695498, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.035641948506236074, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0365146697498858, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.037096121665090326, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03786276772618294, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03873909663408995, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.039327150974422694, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.040029493663460015, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.041638405695557594, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04252253472805023, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04382203744724393, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04618397718295455, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04760166432708502, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.048618317656219004, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05089485229924321, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05300981229171157, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0551981694996357, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05974257728084922, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06485967762768269, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.1061857732385397, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123633861541748, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.123460292816162, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123175859451294, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.122868776321411, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.122537136077881, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.122089385986328, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1214826107025146, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120762825012207, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.119687080383301, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1183738708496094, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1169474124908447, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1145780086517334, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1119236946105957, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.106630325317383, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.0972492694854736, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.075971841812134, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.995189905166626, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.831624984741211, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.707263469696045, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.644679069519043, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5932297706604004, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5499074459075928, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.500131368637085, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4483041763305664, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4307684898376465, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4336960315704346, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4324135780334473, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4209659099578857, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.433039903640747, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4738216400146484, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4989192485809326, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5327794551849365, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.52976655960083, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5981101989746094, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.558633327484131, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.570152759552002, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.601757287979126, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6592776775360107, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.701718807220459, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7447407245635986, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.819689989089966, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8960297107696533, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8474926948547363, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.104330062866211, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06921373200442968, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.073827980804725, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.08157991878922112, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.09726836471022518, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.1227390180878553, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.15596160944998155, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.18530823181985973, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.19915097822074565, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2159468438538206, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.23015873015873015, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24953857511997046, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26153562200073827, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2663344407530454, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2661498708010336, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.26411960132890366, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26559616094499816, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2643041712809155, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25212255444813586, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2500922849760059, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24160206718346253, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2469545957918051, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23901808785529716, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23421926910299004, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2349575489110373, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22425249169435216, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22111480251015134, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2129937246216316, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21077888519748986, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20468807678110004, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0072610691846685905, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.006793947939449293, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0061423464400573, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0064055461805161105, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.006269733981787371, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.006924589071951293, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.00788656591187814, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.008962693035889207, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011094675622509565, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.011840049122132532, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013647989735932314, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.015063937249001104, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01709608210865591, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.019600230273210336, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.026433576600037534, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.04044808133548061, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.05511576526806985, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.08304383196499317, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.11181345417658395, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1236651691827953, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.14332039354404202, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1592330356637834, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18027787805317874, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19304600588351728, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19788167867523865, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20191571534429287, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.203170772618912, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2084334355435248, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21622392228468654, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20461198872991004, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19889798676108028, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18789557675185817, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19139860350781834, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18204385467612702, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19646569675792877, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1897061764162362, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1935745043915539, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18591420962174285, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1794060514938427, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17639748185816825, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16499393057496867, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15396314540821965, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1694576207216315, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14511755983326832, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 2.397942371368408, "validation/loss_best": 2.4307684898376465, "validation/acc_best": 0.2663344407530454, "validation/f1_best": 0.19788167867523865} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.6309801363945007, "train/grad": 0.2467903597652912, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12413818359375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12351806640625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1225830078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12176513671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.120848388671875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.119583740234375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11823974609375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.116810302734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.114879150390625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.112857666015625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11053466796875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.1068212890625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1017138671875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0871966552734373, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0463287353515627, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.959844970703125, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.8215174865722656, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.698279495239258, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.6146121215820313, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.5495057678222657, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.4676973724365237, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.3937021255493165, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.326481885910034, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.264939351081848, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.2109901809692385, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.1591860461235046, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.11918253660202, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.088449115753174, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.062209630012512, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.0473228788375852, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.040091350078583, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.0313965678215027, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.030622845888138, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.017479050159454, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.0477748787403107, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.0774303770065305, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.0808117413520812, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.108216234445572, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.1612545692920686, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.239494768381119, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3088569527864458, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.391885939836502, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.523019179105759, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.568579763174057, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019836001913063228, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01984515999443829, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019859482063911856, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019872415671125056, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01988256086129695, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019896273161284627, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019910001130774617, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019920773766934873, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019929627180099486, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019932732032611967, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01992695066612214, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019901464162394404, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019854490836150943, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019818529789336025, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020236462536267937, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021562852477654815, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023900004317983985, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02605293309316039, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.027300249645486475, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.028471316378563642, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03053197305649519, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.032580496734008195, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03431388004682958, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0358760928735137, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03686685597524047, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.037925940090790394, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.038983418699353935, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03961249409243464, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.039859421383589506, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04084158996120095, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.042096725292503835, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04301697419956327, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.044121668562293055, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0455481113307178, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04642682984471321, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04840999944135547, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05049487398937345, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.051123189385980365, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.052420092821121214, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.053506382368505, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05454408647492528, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0572133270278573, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0631944097019732, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06440559633076191, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.12174916267395, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.121229648590088, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.120335340499878, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1194891929626465, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.118650436401367, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.117516279220581, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.116225004196167, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1147866249084473, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1128487586975098, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1105711460113525, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.108058452606201, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1029226779937744, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.094322443008423, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.06057071685791, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.954972505569458, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.811702013015747, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.694207191467285, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.6246140003204346, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.560431957244873, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.495985984802246, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4454739093780518, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4246764183044434, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.400702714920044, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.3766860961914062, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.3583195209503174, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.3681702613830566, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.390359401702881, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4398303031921387, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.485414743423462, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.516270875930786, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5507683753967285, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5759565830230713, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5869836807250977, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.615631341934204, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6707823276519775, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.7227392196655273, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7901053428649902, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.81933331489563, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.794278860092163, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8291962146759033, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8468058109283447, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9821617603302, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.84627628326416, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0160861015319824, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.067921742340347, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0710594315245478, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07216685123661867, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07770394979697305, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1020671834625323, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.13233665559246954, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.16002214839424142, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.18844592100406055, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.20191952750092285, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2216685123661868, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24141749723145073, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2596899224806202, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27113325950535255, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2751937984496124, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2822074566260613, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.28183831672203763, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.28183831672203763, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.28017718715393136, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2722406792174234, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.26541159099298633, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.26596530084902176, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2589516426725729, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2541528239202658, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2543373938722776, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2456626061277224, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2456626061277224, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21797711332595054, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21262458471760798, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21483942414174972, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20782576596530086, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1847545219638243, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2069029162052418, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.1906607604282023, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0066667341808458085, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.006936930272409286, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.006842143969526149, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.006561877768043689, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.007408362881907778, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008266149779532632, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.00967051993124613, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009797277628426485, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.010637807529628751, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012410478869476252, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012958732425751051, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.014177927551501256, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01782060718124629, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.040340732452305204, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.05808779040757724, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.08166635125311537, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1064120159366528, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1190915644059193, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.14076710116177527, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.16450374129611292, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.18328369799933894, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1970727970817797, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20242229581226287, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20978359653916812, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21377621022724846, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21384185674973624, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.22034251398390517, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21146565656733884, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20630495547667146, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2083721368658826, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.201530408848022, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19800877417720542, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19687692189580705, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18858784990780164, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17893438130230008, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.173836290408275, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17969383464452107, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17301053902994126, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17417655115730488, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17426728651911647, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16110730742409746, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1523791922233194, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17030364894789218, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.148356664288723, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 2.264939351081848, "validation/loss_best": 2.3766860961914062, "validation/acc_best": 0.2822074566260613, "validation/f1_best": 0.20978359653916812} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.5612549602985384, "train/grad": 0.2540136694908142, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1271630859375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.126343994140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12503662109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12391357421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1227197265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.121019287109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.119163818359375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11714111328125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.114285888671875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1109765625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.107017822265625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0978814697265626, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.078162841796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.9895065307617186, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.8419003295898437, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.728489303588867, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.642560501098633, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.567326011657715, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.482406349182129, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.4024446296691893, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.3234853744506836, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.2608432579040527, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.1978133964538573, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.1382064390182496, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.0889779233932497, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.0399824500083925, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.000910279750824, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.9706012868881226, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.950216670036316, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.9306588995456695, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.9229852789640427, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.923353134393692, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.9270485049486161, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.9174413001537323, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9480047988891602, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9819782280921936, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.007335741519928, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.042571981549263, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0983225905895235, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.2134520137310028, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.2841819685697557, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.340841921567917, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4386015099287035, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.548846343755722, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019801025013439357, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01980519702658057, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019809959204867483, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019813792929053307, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019816041216254233, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01981793249025941, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019816256798803805, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019808981926180424, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01979125015437603, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019758894443511962, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019713604436255992, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01962885754648596, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.019651742493733762, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020830305651761593, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023265310563147068, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02530575208365917, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02680108606815338, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.028100595418363808, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.030024167438969015, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03231941525824368, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.034042493458837274, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.035123518304899334, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.036199796441942454, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.037185866348445416, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03801443224772811, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.039226517751812934, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04051031913608313, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04133845172822476, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0419524235650897, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04225687257945537, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04321472616866231, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04450447168201208, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04529744585976005, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04715341987088323, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04717390257865191, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.048733214065432545, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05168068261817098, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05199340920895338, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.053574174866080286, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05609734101220965, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05644668091088533, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05805357661098242, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06080857278779149, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06588623432442546, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.120924711227417, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.120326280593872, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.119389295578003, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.118464708328247, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.117593765258789, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1163713932037354, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1149420738220215, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.113283395767212, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1107819080352783, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1073763370513916, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.102360725402832, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0869698524475098, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.043830394744873, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.8750979900360107, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.7203025817871094, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.641679048538208, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.580291271209717, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5153799057006836, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.455819606781006, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.424553155899048, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.408229351043701, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.399502992630005, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.3882782459259033, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.3832316398620605, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.386782646179199, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.422917127609253, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.466336965560913, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5052547454833984, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5305886268615723, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5986459255218506, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.6057841777801514, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.628495931625366, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6818361282348633, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7317802906036377, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.738474130630493, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.735677480697632, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.791417360305786, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.7621078491210938, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7443087100982666, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7142696380615234, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7169179916381836, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8821539878845215, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.706727981567383, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.71692156791687, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07881136950904392, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07936507936507936, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.09117755629383537, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.11609449981543005, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.14876338132152087, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.18604651162790697, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.20376522702104097, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2235142118863049, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.24455518641565152, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2633813215208564, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2737172388335179, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2727943890734588, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27648578811369506, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2748246585455888, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2766703580657069, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2757475083056478, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2622739018087855, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25950535252860835, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25489110372831303, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2517534145441122, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24750830564784054, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24769287559985234, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24732373569582872, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.24215577703949798, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2353266888150609, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21631598375784422, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23403469915097821, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22997416020671835, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21391657438169065, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23237356958287192, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22443706164636398, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.008829283829797217, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008404641417461929, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008621830409851513, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.009164023323927708, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009918967476817987, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.012058974251625101, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014427948499722835, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015819518561426882, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.018311456063104088, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02289968573070868, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.025364671998891756, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.03619337907841486, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.052131641931645496, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07298790084192973, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.10430758013250897, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.12398545453295494, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.14851122520349644, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.17250522918175584, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1921737843272556, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20549404252748246, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20959641402481263, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21277175361098402, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21319917642345954, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21795119967410517, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.22008502006389433, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21224460402442058, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21062718747730766, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20582236879296203, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20192820703513817, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19988177351401915, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19960641729951786, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.197141649731886, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19234162503462313, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18644198790321656, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18688972463794384, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.19349856875422114, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18701938272761778, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19380249930182672, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1825765067762121, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1955536789226061, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18604857926977955, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16574342382373208, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1838819194843091, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.17791881488205177, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 2.1382064390182496, "validation/loss_best": 2.3832316398620605, "validation/acc_best": 0.2766703580657069, "validation/f1_best": 0.21795119967410517} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.4794348812103273, "train/grad": 0.2584123960882425, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.125577392578125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1246142578125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12304443359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.121373291015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11989990234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1176708984375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11513916015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.112191162109375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10793212890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.101922607421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0927386474609375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.060654296875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9735311889648437, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7878689575195312, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6704184722900393, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.600506248474121, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.5257294845581053, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.4332020568847654, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.34130220413208, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.2693486404418945, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.201350688934326, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.1433206844329833, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.080791921615601, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.0214358139038087, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9706583309173584, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.9196799731254577, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8781350708007813, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.8496707105636596, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8322313219308852, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.820646988749504, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8171614372730256, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8108087849617005, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8137199300527573, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8043844974040986, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.8375173097848891, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.8739412665367126, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.8813243228197098, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9281950616836547, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9838156187534333, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.090374425649643, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.1678912514448165, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.256649605035782, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.349724144935608, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.374842270612717, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019336714674718678, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019340709741227327, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019344864473678173, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019347108528018, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01934764370787889, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019343474567867815, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019335352149792016, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01931788495276123, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01928160848096013, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019227730347774922, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01917583728209138, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019315744326449932, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02048848879057914, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02348519370891154, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.025616946555674078, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.026725401030853392, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.028065167302265763, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.030240948433056473, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.032695424649864434, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03439240746200085, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0353688498865813, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03635188763961196, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0375573696102947, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03873227203264833, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03968367103487253, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04092264238744974, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04193433463573456, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04274751529097557, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04378421325236559, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04465170960873365, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0461919404938817, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.047018262874335054, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.047479272317141295, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04888385633006692, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04940092070028186, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.050387425012886525, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05204864881932735, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05338081082329154, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.054079102259129284, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05614596489816904, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.054414464496076106, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05716685116291046, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.061252635587006804, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05992299793288112, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1202683448791504, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1196255683898926, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.118612051010132, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1175825595855713, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1165614128112793, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1151299476623535, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.113377094268799, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1111955642700195, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.107386589050293, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.100851535797119, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0880284309387207, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.031200647354126, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.8952951431274414, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.7189815044403076, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6351280212402344, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.579932928085327, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5140466690063477, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4464526176452637, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4080708026885986, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.3956117630004883, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.395721673965454, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.396368980407715, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.404816150665283, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.424899101257324, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4394609928131104, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4726381301879883, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4891695976257324, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.512301445007324, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.548722743988037, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.628427505493164, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.64754319190979, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.695478677749634, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.655904531478882, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7264182567596436, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.7248849868774414, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.760896921157837, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.768490791320801, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.807415246963501, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.866133213043213, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8354263305664062, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.763645648956299, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7591028213500977, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8264379501342773, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.940315008163452, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07585825027685493, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07881136950904392, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08397932816537468, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.09413067552602436, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.11720191952750092, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.14839424141749724, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1891842008121078, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2081949058693245, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2188999630860096, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.24197120708748615, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2619047619047619, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2733480989294943, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27500922849760057, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.27408637873754155, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27408637873754155, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2661498708010336, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26559616094499816, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2652270210409745, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2552602436323367, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.26264304171280917, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26098191214470284, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24935400516795866, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23791066814322628, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2364341085271318, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2410483573274271, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23385012919896642, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23901808785529716, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22572905131044665, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23034330011074197, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21077888519748986, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21410114433370248, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2264673311184939, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22997416020671835, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21650055370985605, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.010157639105686256, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010627389505164488, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0120178148980461, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013383394950138924, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015110534848545944, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.016002642011722995, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.01693690776512841, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.017705076830669245, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.018586979897106257, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02104512864069123, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.02780301462764746, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04653738600375632, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06680385738579497, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.10676416051236848, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.12869163069417786, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.13926567076698423, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.16643672460176454, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.18811819653253306, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20513043639019024, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21192809578985364, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21275314047395258, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21446502161441292, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20911219845741444, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21053393977903193, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21639049857444057, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21356623064659086, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.22054767415336354, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21625377775071405, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20433543744878271, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19520974936602498, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19019721815032686, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19141973862082992, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.20207008064660548, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19135156849698245, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19805699578039948, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18913222412785569, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.19111106193797425, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18256399638333576, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17313702573482123, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1750834827360246, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18436826566513573, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18122778371363824, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15877909311979524, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.14113767134358246, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.2693486404418945, "validation/loss_best": 2.3956117630004883, "validation/acc_best": 0.27500922849760057, "validation/f1_best": 0.21192809578985364} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.407073165178299, "train/grad": 0.2658139580488205, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.121434326171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.120411376953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.118682861328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.116922607421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.115181884765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.112677001953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10983642578125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.106187744140625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.099888916015625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0888836669921873, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0659442138671875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.96841796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.8168264770507814, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.666793746948242, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5863111877441405, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5216770553588868, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4356484603881836, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3435037994384764, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.258325023651123, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.190516414642334, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.126199493408203, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.067899479866028, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.004017333984375, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.944034160375595, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8930215871334075, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.837962919473648, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7902741199731826, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7562779575586318, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.729095288515091, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.71377014875412, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.704902431368828, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6883142894506455, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6896056890487672, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.675408890247345, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.711243742108345, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.7467670249938965, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.752599681019783, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8025138139724732, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8662211281061172, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.9554479432106018, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.0704649752378463, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.127428479194641, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.2033417296409605, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.2663291877508165, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01997048600111157, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019974297294393182, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019979190239682795, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019982409016229212, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019981984570622443, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0199785467190668, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019965522470884026, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019941094052046537, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019890799056738614, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019838750162161887, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019918518755584955, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021149814366362988, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023515071496367456, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02637538299895823, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.027849640911445022, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.029123060973361134, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03120214318856597, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03370430240407586, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.035746409436687825, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03690921592526138, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03750162928365171, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03833339602686465, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039575033644214275, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.040812109131366014, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04198380470275879, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04335508823394776, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04486221220344305, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04589120229706168, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04683786118403077, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04786321735009551, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04843033635988832, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04903820553794503, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.048989439494907853, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.050359178092330696, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05051710242405534, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05132517898455262, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05261840272694826, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05360629547387361, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.054632261674851176, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05475353103131056, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05612077398225665, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05555107833817601, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05667400300502777, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05757879471406341, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1194076538085938, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.118568181991577, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.117201089859009, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1157801151275635, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1143555641174316, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1122794151306152, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1096692085266113, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1060636043548584, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0989205837249756, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0836429595947266, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.0465798377990723, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.90421462059021, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.754124879837036, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.639529228210449, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5771095752716064, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5218021869659424, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4636549949645996, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.432297945022583, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4245920181274414, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4306466579437256, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.437666177749634, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4404683113098145, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.444270610809326, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4719109535217285, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4969942569732666, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.545489549636841, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.567312479019165, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5960896015167236, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6572859287261963, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.721435785293579, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.763115406036377, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.8179304599761963, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8397774696350098, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.977034568786621, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.91097354888916, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.936633825302124, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.957054853439331, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1284217834472656, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.06215500831604, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1692423820495605, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9848711490631104, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9042019844055176, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.806368112564087, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.91046404838562, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07456626061277224, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07733480989294943, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08176448874123293, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08803986710963455, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.09929863418235511, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.11443337024732374, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.14396456256921372, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.17201919527500922, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.19952011812476927, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.21834625322997417, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24049464747139165, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25802879291251385, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2672572905131045, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2713178294573643, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26651901070505724, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26578073089701, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26504245108896274, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26005906238464377, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24473975636766335, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24473975636766335, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24344776670358065, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23329641934293097, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23606496862310816, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23717238833517904, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21853082318198597, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23126614987080105, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2368032484311554, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21853082318198597, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2074566260612772, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21834625322997417, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2294204503506829, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2264673311184939, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20431893687707642, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009576014367938155, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011212736164514313, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012599849409900506, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014590492961561612, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015248906080242375, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.017418913642446323, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.019772262507726037, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02323927798213432, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.029711297399365566, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03928437114450393, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.04957520795859773, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.06855817621981171, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09519910972722678, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1240905297991508, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1444891749394834, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16785216536946787, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18970466392698027, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20483826799716343, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20989223786219383, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21281016369217717, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21243224331998137, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2130101171755124, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21243835710630513, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2116818347738393, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21120191389349996, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19788945170306257, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20093315368891243, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2017338568520217, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19208063470456235, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19159635092858318, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18332047641798774, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18271605934567203, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18278828427306468, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1654468550499129, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16982226004458, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17695585243819043, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16876017479446656, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1653585204200129, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1681639572807537, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1548313698672346, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17299788360980894, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1848693882015137, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17265199293215353, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16051349126344494, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.190516414642334, "validation/loss_best": 2.4306466579437256, "validation/acc_best": 0.2713178294573643, "validation/f1_best": 0.21281016369217717} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.352082996368408, "train/grad": 0.2697948880493641, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11902099609375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.117744140625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1154345703125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11345458984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11130859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10827880859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10450927734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.099420166015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.089285888671875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0669158935546874, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0112103271484374, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.8495822143554688, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7168218994140627, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.611199951171875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.534219169616699, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.457119140625, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3653984832763673, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2774699211120604, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1958316802978515, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.128256235122681, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0641271352767943, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.00443487405777, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9378313946723937, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8754677844047547, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8186181485652924, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7560934805870057, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.703268928527832, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6656218338012696, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.6353419202566146, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.612629623413086, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.586381846666336, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.5725378054380417, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5812651485204696, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.563329774737358, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6042615574598313, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.6491228729486465, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.6620959544181824, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.7223402506113052, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.788629970550537, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.9172742760181427, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.0187122690677644, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.0721331000328065, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.1607780873775484, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.2279143583774568, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019875642098486423, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01987761465832591, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01987866995856166, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019877239274792374, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019871293609030546, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0198589081177488, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01983611663803458, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019798500128090382, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019742479971610008, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019785502371378244, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020381923126988114, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02286472959443927, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.025263424087315797, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.027319712843745948, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.028737304070964455, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03041114435531199, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03273329264484346, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03480851089581847, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0363784749340266, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03740498698316515, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03799309197813273, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038768578860908745, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04001883438788354, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.041318592131137845, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04248495621606708, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.043668614085763696, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04486267969012261, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04569764755666256, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04697707407176495, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04796997992321849, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04874714775010944, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.049867399651557204, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.050354721155017615, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05147879289463162, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.052081133630126716, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05258923714980483, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.053638475798070434, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05488537715747952, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.055600413624197244, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05697700144723058, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05663410542532801, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.056067866627126935, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05706890048459172, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05729273840785026, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1186363697052, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1177072525024414, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1161537170410156, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.114596366882324, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.112989902496338, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1105523109436035, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1072261333465576, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.102097511291504, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0899367332458496, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0577306747436523, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9757208824157715, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.797236919403076, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6797659397125244, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.594183921813965, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.530985116958618, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4701428413391113, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.420081853866577, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.391899585723877, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3808960914611816, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.384753704071045, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.3953611850738525, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4026105403900146, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4169881343841553, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4427785873413086, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4859819412231445, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5428247451782227, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.580374240875244, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6276793479919434, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6910061836242676, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7492291927337646, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7698328495025635, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.829463243484497, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8445188999176025, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.9558680057525635, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.9168951511383057, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.936190605163574, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.959507703781128, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9935154914855957, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9609220027923584, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.961477041244507, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.91556978225708, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.860621452331543, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.874739408493042, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.767172336578369, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07013658176448874, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07585825027685493, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07715023994093761, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07954964931709117, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08342561830933924, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09062384643779993, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.11074197120708748, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.13418235511258766, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.16795865633074936, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19306016980435586, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21686969361387967, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.23181985972683647, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.25489110372831303, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26799557032115173, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2768549280177187, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2812846068660022, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27888519748984864, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2781469176818014, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27113325950535255, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26356589147286824, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2622739018087855, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2571059431524548, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24806201550387597, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24492432631967515, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23385012919896642, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23034330011074197, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22425249169435216, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2279438907345884, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23403469915097821, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2131782945736434, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22572905131044665, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22425249169435216, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2201919527500923, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22296050203026946, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21668512366186785, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21059431524547803, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011196252593132498, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013733816031136754, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014301919706629572, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.016397547688820676, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.018585491368947316, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.020757598872671774, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02356792203310869, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.026846561923721465, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03338627346612111, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.04924781494202755, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.06178854841892561, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.08673562948366358, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11029764784213243, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.13468013900911027, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15113131999171503, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17510088818268502, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19276177706542116, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2059845067374774, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21492379723942498, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21502327562945264, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2165547778329322, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2141837070910657, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2113433689523944, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2141885219933727, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21484673071262175, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21018548960204153, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21142120581990362, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2093547019613483, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20097859297894327, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19655053664043443, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.20273081254595238, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19320585262441314, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19286237853793897, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19361952883449782, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19115024361453914, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.194152781062113, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17908378797109958, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1787422854767562, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18111694818216786, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1749026341308765, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18054742132116922, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1890043208550031, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16908521044976052, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18242144952332137, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.1958316802978515, "validation/loss_best": 2.3808960914611816, "validation/acc_best": 0.2812846068660022, "validation/f1_best": 0.21492379723942498} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.2777545511722566, "train/grad": 0.2657055077701807, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.118487548828125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.117030029296875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.114566650390625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.112210693359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.109725341796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.106220703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1013916015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.094473876953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0782269287109374, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0343438720703126, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.9338592529296874, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.758305969238281, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.652977066040039, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5589500045776368, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4735813903808594, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3887902069091798, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3014630699157714, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.217510042190552, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1375890350341797, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0699314975738528, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0037207889556883, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9382360076904297, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8660048413276673, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7983267784118653, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7369424557685853, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6682457399368287, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.611058582663536, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5686861753463746, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.52510169506073, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4886271542310714, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4616641342639922, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.4428285139799117, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.4504618805646896, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.4283540737628937, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.450894061923027, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4839211040735245, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.5256519210338593, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5815721124410629, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.6568749499320985, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.7383103042840957, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.86001788854599, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9337296748161317, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.9936357003450393, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.0580987429618833, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019558691917918623, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019559369729831813, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019558895179070532, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01955576554406434, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019548036637715996, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01953259404283017, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019503572713583706, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019460213910788296, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01943615165539086, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019765329700894653, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02111015347763896, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023869176069274543, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02596561045385897, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.027718005310744045, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02949551054276526, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.031598562272265554, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03381327112205326, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03560703889466822, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03691510778851807, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03781446692533791, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.038420297130942346, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03922019863501191, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04037870168685913, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.041454216465353966, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.042428468000143764, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04385435800999403, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04505811627954245, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04603040110319853, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.046762618105858564, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04763540010899305, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.048930044155567884, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0496358329243958, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04967883037403226, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05103847881779075, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05087054345756769, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05156952816992998, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05402431366965175, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05415272673591971, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05524893058463931, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.054360288009047505, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05356205934658646, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05328091314062476, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05212895084172487, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.051844894886016846, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.118309259414673, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1173369884490967, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1157498359680176, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1140544414520264, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1122865676879883, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1094326972961426, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1052236557006836, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.097933769226074, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.07772159576416, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0172152519226074, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.896574020385742, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7296957969665527, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6414849758148193, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.565007448196411, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4962069988250732, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4474971294403076, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.415830612182617, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3990137577056885, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3940696716308594, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4048566818237305, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4225387573242188, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4351840019226074, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4565107822418213, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4880294799804688, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.527186632156372, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5921599864959717, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6353185176849365, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6940696239471436, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7835209369659424, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.908536911010742, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9555418491363525, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.962604522705078, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0126004219055176, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1554417610168457, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1367945671081543, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.1946332454681396, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.149195909500122, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0450544357299805, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1141319274902344, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0944204330444336, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.084503412246704, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9407906532287598, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.914130210876465, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9140567779541016, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.067921742340347, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07511997046880768, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07788851974898486, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08416389811738649, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0976375046142488, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.11830933923957179, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.14857881136950904, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.18567737172388335, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.20671834625322996, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.22572905131044665, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25249169435215946, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.262827611664821, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2722406792174234, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27445551864156514, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27759320782576596, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27722406792174237, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26965669988925806, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26411960132890366, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26098191214470284, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2604282022886674, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25341454411221853, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23606496862310816, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23477297895902546, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21816168327796234, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21797711332595054, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21816168327796234, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20967146548541898, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2131782945736434, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2074566260612772, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21502399409376152, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22093023255813954, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2115171650055371, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22185308231819859, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2233296419342931, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21963824289405684, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011670421014402374, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01309671473041301, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013741221377542, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014633587525960737, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.016971957532830522, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01989379103207783, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02215689562030171, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02730612082452191, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.03671804261979273, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.050746613196300354, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07097037742988943, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.10592388199512692, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.12715311291302878, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.14803129365849257, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1745153475955944, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18941583701738837, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20256630526613595, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20948009073916574, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21675399075363133, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22062238953384364, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21614483058542575, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21292194052221194, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21461473139368734, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21655068988457474, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21476155264848984, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20404855921733342, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2009008340690082, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2014614027157431, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19133541976272336, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1851411102756874, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18265543717837895, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18226854752741106, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17735015713710203, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17166260168906655, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17574491915507948, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17106620994763075, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17189950999723194, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18738113790883215, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18016135936560576, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17811382855221358, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16746574575698317, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1785372780758918, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18038950077313406, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1771400683759239, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.1375890350341797, "validation/loss_best": 2.3940696716308594, "validation/acc_best": 0.27759320782576596, "validation/f1_best": 0.21675399075363133} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.2300714325904845, "train/grad": 0.274036079198122, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1221826171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12067138671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.118094482421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.115423583984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1128125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1088134765625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1030810546875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0938348388671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0686041259765626, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.994801025390625, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.8673260498046873, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.710823974609375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.62436897277832, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5303924560546873, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.434533576965332, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.349935646057129, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.264546718597412, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.181357717514038, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0987119007110597, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.028736629486084, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9612900972366334, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8963557386398315, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8227027153968811, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.753354128599167, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6868403601646422, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.60844267308712, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5450809806585313, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4971428549289703, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4540932863950728, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4204472136497497, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3849283641576766, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.361695985198021, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.3601509484648704, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.323549361526966, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.3541777476668357, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.3847922560572625, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.3975477850437164, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.4706014108657837, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.5360224518179892, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.6338519793748856, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.7594218909740449, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.8481493294239044, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.9210092896223068, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.9834264099597931, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019679505783133208, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019678087932989002, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019676406644284727, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019670157665386795, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019663041937164963, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019644394507631658, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019613749566487967, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01957362118177116, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019640441285446286, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02046560146380216, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02233851016499102, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025082707311958073, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026841748412698506, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02863700090907514, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03081053517758846, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.033009831523522735, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03505140593275428, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036647337665781377, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037845716113224626, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03876593748107553, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.039434427414089444, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04037469048053026, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04166042534634471, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04288562582805753, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04403474135324359, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04549381390213966, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04701218426227569, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04812279844656587, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0491565447114408, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.050214729886502024, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.051478646863251924, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05244993200525641, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05285041246563196, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.053359556309878826, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05310387864708901, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05393435176461935, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05471513289958239, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05517857464030385, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05573066920042038, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.055403218995779754, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05427324837073684, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05361204449087381, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05245706412941217, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.05221582356840372, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1176774501800537, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1165356636047363, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.114642381668091, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1126511096954346, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1105148792266846, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1069910526275635, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1015422344207764, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0912933349609375, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.059596538543701, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9661002159118652, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.831796646118164, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.686931610107422, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6164019107818604, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.544487476348877, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4784176349639893, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4406747817993164, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.417344808578491, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4046154022216797, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.3992958068847656, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4070167541503906, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4208567142486572, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.434563636779785, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4526658058166504, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4852259159088135, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5241286754608154, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.583181619644165, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.624741792678833, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6737029552459717, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7417314052581787, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8488688468933105, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.88187837600708, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.9420273303985596, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0199220180511475, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.140150547027588, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.139450788497925, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.2162251472473145, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.106574773788452, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1129531860351562, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.044426679611206, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9579668045043945, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8589303493499756, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.888432264328003, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8383371829986572, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.767197370529175, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06866002214839424, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07788851974898486, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08102620893318568, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08877814691768181, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.10778885197489849, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.13252122554448137, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.16334440753045404, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.19232188999630861, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21114802510151348, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.23255813953488372, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2563676633444075, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.26947212993724623, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27611664820967147, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2796234772978959, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28183831672203763, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27722406792174237, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2737172388335179, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2713178294573643, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2646733111849391, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2606127722406792, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2552602436323367, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24234034699150978, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24880029531192321, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24381690660760427, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23754152823920266, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24141749723145073, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23569582871908454, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2277593207825766, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2203765227021041, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21686969361387967, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2220376522702104, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22905131044665927, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22905131044665927, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23385012919896642, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22517534145441123, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22757475083056478, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.24049464747139165, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011876255833303602, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013474767119356837, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.014843429347239317, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.016494542872154863, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.018632198407195162, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.021248454972537165, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.024162468740866434, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03200314936477429, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.0464485284248681, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.05875697673962014, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.08409000518450943, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11209623655313898, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.12922108050819084, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1515409115751023, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.17564653989908596, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19241524602937274, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2041867369361888, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.21441534095361872, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22140841154405747, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22193028172838533, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2211404904622695, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2213016056255637, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21906633953348287, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21922646501962337, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21824497202825613, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2061460110097005, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21267891791589574, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20721598188334187, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20040076705856677, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1981246595403829, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.2015882577981104, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19966815594810042, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.193973746801444, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18310559358471248, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18726769594386838, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18271829762512662, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1928539251569422, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1822550071949869, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1883673989808119, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1937860672686751, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.19336812964457692, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18902427284895554, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19003156529969187, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.19800273320258002, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.0987119007110597, "validation/loss_best": 2.3992958068847656, "validation/acc_best": 0.28183831672203763, "validation/f1_best": 0.22140841154405747} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.1628879249095916, "train/grad": 0.2692843392491341, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.114256591796875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11236572265625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.10947021484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.106395263671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1032177734375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.098284912109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0909423828125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.078033447265625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0393292236328127, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.932168273925781, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.796455535888672, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.659815673828125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5821059417724608, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.484577751159668, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.38486759185791, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.304799747467041, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.221996955871582, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1395288467407227, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0572487258911134, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9886299562454224, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9191628885269165, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.85072589635849, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.772818853855133, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7012216395139694, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6329080176353454, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.546819497346878, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4738850998878479, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4166018712520598, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3599611431360246, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3043373501300812, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2627393770217896, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2290123176574708, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2176729011535645, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.171210902929306, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.2084367260336877, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.232552630007267, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2434474900364876, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.304715622663498, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.3759759283065796, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.4863716769218445, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.6173989194631577, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.7087616443634033, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.803920609354973, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.8769860911369323, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019654417778365314, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019655048358254136, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01965319499373436, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01964634927455336, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01963622583076358, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019615911110304297, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019579962617717682, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019550466421060265, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019764468288049102, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0210920866439119, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02317077592946589, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025833274368196726, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027412785505875944, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029444822957739234, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.031940013663843275, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03416944235563278, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03605040694586933, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03744007128290832, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03847038520500064, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039298856314271685, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04003099206835031, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.040969960428774356, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04226374370977282, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043376378770917656, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0444222255051136, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04556175485253334, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04654270263388753, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.047487810533493754, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.048270749393850565, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.048945321664214135, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04985736628994346, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05017626291140914, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05063896855339408, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05112130729481578, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.051637695915997026, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.051881809327751396, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.052448213081806895, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05291724367067218, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05378993140533567, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.053504563830792905, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.052372829169034955, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05155456427484751, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05082154734060168, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04994144335389137, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1175150871276855, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1163737773895264, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1144044399261475, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1123108863830566, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.109971284866333, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1059727668762207, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0993247032165527, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.085796594619751, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0405306816101074, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.920825481414795, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.787165641784668, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.661400079727173, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.598862409591675, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.526139974594116, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.468196392059326, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4418978691101074, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.429755449295044, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4247021675109863, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.428492546081543, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4447004795074463, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.468536853790283, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.494943141937256, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5243117809295654, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.569240093231201, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.606137752532959, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.653996706008911, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.704186201095581, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7747743129730225, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8692920207977295, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0013277530670166, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0452029705047607, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0706284046173096, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.098846912384033, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2570438385009766, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.267076253890991, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3094322681427, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.224785327911377, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1667206287384033, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.133788585662842, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.1422245502471924, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.986600399017334, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9240620136260986, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.010164260864258, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9936673641204834, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06921373200442968, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0784422296050203, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.09043927648578812, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.11018826135105204, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.14119601328903655, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1699889258028793, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.19767441860465115, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21280915466961978, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2353266888150609, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.25692137320044295, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.260797342192691, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2661498708010336, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2731635289774825, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27500922849760057, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.26799557032115173, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26153562200073827, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2513842746400886, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24584717607973422, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24344776670358065, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22443706164636398, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22425249169435216, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23126614987080105, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21797711332595054, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21520856404577335, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2201919527500923, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2172388335179033, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23735695828719083, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2364341085271318, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22093023255813954, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21963824289405684, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011089344613117192, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.011989977805850373, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013544778397834563, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01445412184701083, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015550281087036483, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.018389505497497974, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.022720448889275408, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03577841339187311, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.047399019713733725, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.06477142568114339, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09017535463614172, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11494654762786179, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1295577797847143, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15414966197949384, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.17815749964977687, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18856137173074095, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19844552072425614, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20855837273903308, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21629737551908657, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21283796182761106, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21120536225688924, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2051456018445128, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20301016828731464, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20473579435127642, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20223775486647702, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19754708454289113, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19320867838606914, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1884357013295399, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.17444915012088946, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17672855628560571, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18047883201763848, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18468893167225398, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18442670573491307, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18090538677907184, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17749430920325815, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17893081154235788, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18293459849200558, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19145936825859547, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18354665991194608, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18251372607118255, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1916960163460438, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18996484900359623, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17635491888156718, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1777298271658089, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.0572487258911134, "validation/loss_best": 2.428492546081543, "validation/acc_best": 0.27500922849760057, "validation/f1_best": 0.21629737551908657} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.1040184223651885, "train/grad": 0.26680650413036344, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.117955322265625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.116029052734375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11292724609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1098583984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.106617431640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10119384765625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.093116455078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.077513427734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0268011474609375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.900798034667969, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7690303039550783, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6449214935302736, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5692247772216796, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.464821434020996, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3615769958496093, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.279061908721924, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1947108840942384, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.11077449798584, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0254891872406007, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.954042763710022, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.882896022796631, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8125243806838989, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.731567780971527, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6525928449630738, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.575611938238144, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.4838454085588455, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.401293540596962, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.3402005243301391, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2799417918920517, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.212192987203598, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.159809746146202, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1140887746214867, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0992751127481462, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0474287170171737, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0551821795105933, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.0679004898667335, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.0829724952578546, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.146964196562767, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.2117601916193963, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.3192854082584382, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.4524311769008635, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.566198811531067, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.6543030774593352, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.7507974600791931, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02001750918570906, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020015903669409453, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02001299975439906, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02000419980380684, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019989123595878483, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019960032687522472, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01991935356054455, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01990550828166306, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020277805011719465, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021927676661871374, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024016547976061703, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02653624298982322, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027993880119174717, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030128609891980886, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0326153140515089, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03467297418043017, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.036342269303277136, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03755391032435, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03852998623624444, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039384834840893744, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04010330781340599, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.041058233790099624, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04226952401921153, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043338757678866385, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.044270163364708424, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.045423921700567006, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04658254761248827, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04756228929385543, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.048099651634693145, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04835828796029091, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049007026702165606, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.049249944910407066, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.049500164035707714, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05003831261768937, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04990636054426432, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.050081477910280225, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05117473477497697, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05179240396246314, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.052871269937604666, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.052139759194105864, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.051212936826050284, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.051150433607399466, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04977179327979684, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.048645241111516954, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1171436309814453, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.115873098373413, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1136770248413086, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1113245487213135, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1086390018463135, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1039676666259766, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0959436893463135, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.07902193069458, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.020237684249878, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8846983909606934, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7556254863739014, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.642925977706909, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.584434986114502, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.508197784423828, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.452425956726074, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4254977703094482, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4109115600585938, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.403257131576538, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4059948921203613, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.421278476715088, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.440966844558716, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4625284671783447, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.489553451538086, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5311455726623535, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5645134449005127, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.617604970932007, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6686370372772217, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7438905239105225, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8496482372283936, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.008605718612671, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.070378541946411, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1369588375091553, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2716758251190186, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.3563497066497803, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.4287819862365723, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4395222663879395, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.362682580947876, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.375577449798584, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.2937185764312744, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2471132278442383, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0915091037750244, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.058817148208618, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.016277551651001, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.9538068771362305, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07216685123661867, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07493540051679587, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07881136950904392, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08564045773348099, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.09782207456626062, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1184939091915836, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.14876338132152087, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17847914359542266, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20579549649317092, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22111480251015134, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2484311554078996, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2678110003691399, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2731635289774825, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2766703580657069, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27648578811369506, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27703949796973054, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.268733850129199, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25987449243263194, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25452196382428943, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25341454411221853, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24769287559985234, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2499077150239941, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23772609819121446, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22868217054263565, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2277593207825766, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21207087486157253, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21705426356589147, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21797711332595054, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21705426356589147, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2220376522702104, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2072720561092654, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23181985972683647, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23661867847914358, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23385012919896642, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23145071982281284, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22849760059062385, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014093080096246947, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014919274554094877, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01642803728808908, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.018130149547598975, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.018933571755682308, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0224663943946222, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.02914650711953573, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.038833649359430025, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05046177787136968, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.07136787461471987, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09711833381044621, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12320271855886449, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14008489143593952, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1676190199692336, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.18650672217352474, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.19554184590657495, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20665383806215756, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2133260392695392, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2184140383841202, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2225878141548356, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21743994193657565, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21194189125636007, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21128567033426168, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20848057937372674, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21040883735402452, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2049742072903645, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21013098800900476, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20355339430154515, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19129005364431503, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1904080261614175, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18409693360036905, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18963874966105454, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17645182205075213, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18203420188099273, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17896457877770197, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1790042979468053, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18577665379732242, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18026222989666132, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1865286908484163, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19088378190900354, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1829734141937143, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18606892895336916, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1883435788010446, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18495304186198236, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 1.954042763710022, "validation/loss_best": 2.421278476715088, "validation/acc_best": 0.27703949796973054, "validation/f1_best": 0.2225878141548356} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.049208480119705, "train/grad": 0.2638073169440031, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.115416259765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11346923828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1101806640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.106898193359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1032421875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.09734375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0878106689453126, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0689385986328124, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.005325927734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.865661315917969, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.736285400390625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6175479888916016, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.541476020812988, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4339019012451173, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3337849044799803, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.253551368713379, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1715738582611084, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0886441612243654, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.003652639389038, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9311987113952638, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8565697479248047, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.780326771736145, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.6935269910097122, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.6151610261201859, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5358481067419052, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.435675424337387, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.349042870402336, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.2843044978380203, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2098685094714166, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1374406427145005, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.075998975932598, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0235880407691003, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.0004382458329202, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.9410985654592514, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.9409871229529381, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.9430823367834091, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.9499162316322327, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.983301417529583, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0368448495864868, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.1455588456988335, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2999642014503479, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.4027444741129875, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.512339980006218, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.6123110675811767, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019602146525867285, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019601109572686255, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019595642127096653, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019587913551367818, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019573203679174186, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019546775505878033, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0195121518522501, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019532722351141275, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020098139294423164, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02202560530975461, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024146286183968187, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.026521254274994136, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027955115856602788, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0303032760694623, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03292126995511353, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03497227136045694, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03654807273298502, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03765753347426653, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03857837155461311, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039370132107287646, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.040084859393537044, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04100503629073501, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.042088058311492205, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043153561148792506, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04409225290641189, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04524840662255883, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04623861901462078, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0470096985809505, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0476805948279798, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04831531869247556, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04851279290392995, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04879256134852767, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04846567342057824, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04897134495899081, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0489073390327394, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04955947533249855, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05016100388020277, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04986510958522558, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05095335012301803, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.051320234630256895, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.050534267965704205, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.050358899366110564, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04966108551248908, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04785301826894283, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.116488218307495, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1151316165924072, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1127889156341553, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1102585792541504, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.107388973236084, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.102278470993042, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.093263626098633, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.073378801345825, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0034067630767822, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.859889268875122, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.735952615737915, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6313576698303223, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.573176145553589, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4941911697387695, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.443018674850464, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4192676544189453, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.407452344894409, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.39967942237854, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.402583360671997, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4192450046539307, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.438046932220459, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4598333835601807, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4867846965789795, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.525438070297241, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.567884683609009, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6390371322631836, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7054128646850586, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7884321212768555, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.897712469100952, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.050884246826172, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0999395847320557, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1823976039886475, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2630345821380615, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4145138263702393, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.4790520668029785, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5325896739959717, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5806820392608643, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.5365610122680664, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.4531164169311523, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.4081857204437256, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2421493530273438, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.207413673400879, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.099480390548706, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.0454955101013184, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0753045404208195, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07604282022886674, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07881136950904392, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0843484680693983, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10132890365448505, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12200073827980805, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.15282392026578073, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1834625322997416, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2087486157253599, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22462163159837578, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2511997046880768, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26356589147286824, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.27150239940937615, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2766703580657069, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2825765965300849, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2798080472499077, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2742709486895533, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26688815060908083, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2617201919527501, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25987449243263194, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2552602436323367, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2532299741602067, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24178663713547435, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23920265780730898, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2320044296788483, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22148394241417496, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2216685123661868, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2115171650055371, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20653377630121816, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2115171650055371, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20376522702104097, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2072720561092654, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20671834625322996, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22277593207825766, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21631598375784422, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21779254337393872, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21650055370985605, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014187564121757036, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01466478888053352, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.016260169109679878, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01827864041621133, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01866745202905413, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02149829689973386, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.027692674447145352, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04145323148161178, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05275835500340115, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.07524673672389506, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10168192631595861, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12555336557323854, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14265766660441095, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.16923182541681045, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1860659242385558, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2000772893515316, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2104533812193752, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.218613949908294, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2209752416342301, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21879843312525074, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21652921986984697, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21600106384546602, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21529870884766641, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21267467212713165, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21394303030658732, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20439698574832635, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20513514196119595, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20223874226647012, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1922329537661666, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18878493554134554, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18804816892279108, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18898526436147786, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18328336276085458, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1813279552997328, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1790370816531506, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18281317605825284, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17591890693836562, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18398932141608873, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18821676207492555, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18271543718190408, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18489649916318662, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1806216283832529, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1896694314533606, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.19011652941491364, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.0886441612243654, "validation/loss_best": 2.39967942237854, "validation/acc_best": 0.2825765965300849, "validation/f1_best": 0.218613949908294} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.000593999624252, "train/grad": 0.2584022431820631, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11098876953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.108966064453125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1056787109375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1023291015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.098599853515625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.092452392578125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0823809814453127, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.061473388671875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.990224609375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.8460760498046875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.72228759765625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.611570129394531, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.535432929992676, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4215419769287108, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.319643898010254, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2385079574584963, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1547484588623047, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0702692890167236, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.983984613418579, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.909896001815796, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.833728187084198, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.7562954175472258, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.6690606784820556, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.5867972373962402, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.5033664268255233, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.3966484528779983, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.306504823565483, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.234083331823349, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1516259071230888, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.0690948370099067, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.9941120883822441, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.9342724734544754, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9000636279582978, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.842241939008236, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.8338343301415443, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.8204321956634522, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8051843121647835, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8464459073543549, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.8965126201510429, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.9876287245750427, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.1459874737262725, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.2526608660817147, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.3614998441934585, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.4745983695983886, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020010262606665493, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0200088850595057, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020000714720226825, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01998935273382813, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01996989394072443, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019935551462695003, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019893030002713204, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01992245817091316, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02059402842540294, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0225938406214118, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02469610502012074, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.027026742044836283, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.028469897424802184, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030915885912254453, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.033512762943282724, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035452877515926955, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.036919379513710736, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03791110293008387, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03875250091776252, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03953144256025553, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04018374120816588, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04106442166492343, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04209998900070786, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04312141405418515, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04399034708738327, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.045080789402127265, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.045798293761909005, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04635870534926653, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.046643079370260236, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.046860112342983484, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.046936133038252593, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.046330852936953305, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04610353345051408, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.046423708628863095, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04628188630566001, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04577810537070036, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04630682289600372, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04697806522250175, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04768971124663949, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04886308252811432, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0493768447637558, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.049801596105098725, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04907524911686778, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.047932795435190204, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1163575649261475, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.114964008331299, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1126439571380615, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1100728511810303, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.107106924057007, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1017355918884277, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.091977596282959, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0699384212493896, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9920923709869385, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.844560384750366, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.723964214324951, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.624114751815796, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.565729856491089, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.484950304031372, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4365155696868896, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.415027141571045, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.404954433441162, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.399223804473877, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.404741048812866, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.422719955444336, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.443305730819702, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4667463302612305, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.495025157928467, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5371735095977783, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5776774883270264, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6412930488586426, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7028214931488037, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7825913429260254, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.902186870574951, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0637171268463135, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.128545045852661, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2124364376068115, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2798924446105957, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.4602341651916504, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.5351266860961914, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.5722057819366455, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5916569232940674, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.623359441757202, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.4995009899139404, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.477151870727539, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.295236110687256, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.1905057430267334, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.147091865539551, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.009308099746704, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07290513104466592, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07475083056478406, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07622739018087855, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08139534883720931, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08656330749354005, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10243632336655592, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12513842746400886, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.15854558877814692, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.18733850129198967, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20616463639719454, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2264673311184939, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26947212993724623, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.27150239940937615, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27353266888150607, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27722406792174237, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28017718715393136, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27556293835363604, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.260797342192691, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2602436323366556, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25452196382428943, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2484311554078996, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24455518641565152, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21705426356589147, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22129937246216316, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20967146548541898, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20450350682908822, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19896640826873385, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20284237726098192, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21096345514950166, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21557770394979697, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2216685123661868, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2087486157253599, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21077888519748986, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014490951665772842, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014797714336636542, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.016825258066236443, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.018445451272839444, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.020047168025204298, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.023561648691350506, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.030116445148421423, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04348857001799414, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05507669894179432, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08176355061366335, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10739517641081499, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1259316048949926, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14568304805205143, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17378230199604816, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.19395207788114407, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20139108826467692, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20723284210336224, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.21436570972371305, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2220661925414991, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2188589290549864, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2166598330993236, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2114734976272512, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2115661777233991, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2090756292021815, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20576714324683643, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20641884136841548, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2094024195872044, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20394278893707937, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19425318863946894, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18769004707554907, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1894617783858048, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.191852196051437, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18876067735756588, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18454725277441653, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1805153303268395, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18268070781410295, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1812266914761962, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.188673936076834, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19493386887915928, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18938089833530677, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.19691274423847682, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19392016583823968, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18358983863944178, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18481275081623175, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 1.983984613418579, "validation/loss_best": 2.404741048812866, "validation/acc_best": 0.28017718715393136, "validation/f1_best": 0.2220661925414991} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 1.9616260653734208, "train/grad": 0.25026889950037, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1184375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.116336669921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.112813720703125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.109288330078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.105328369140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.098812255859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0879730224609374, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0650714111328123, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9873590087890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.8405728149414062, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7210682678222655, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6147965240478515, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.539163360595703, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4223031997680664, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3201838684082032, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.239611949920654, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1566097640991213, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.071590614318848, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.9832915592193603, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.9059541416168213, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.828267011642456, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.7490613639354706, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.6556827074289322, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.5683782452344894, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.4773048293590545, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.3642153167724609, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.2677296179533004, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.192307689189911, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.1093826600909233, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.0213354641199113, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.9458418470621109, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.8755442580580711, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.8295210820436477, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.7575976166129113, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.7396465176343918, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.7157936552166939, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.7010863357782364, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.7392961092293262, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.7631539678573609, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.838135192990303, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.9636937382817269, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.055862130522728, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.1783387088775634, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.3065610682964326, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019957212642766536, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01995431697461754, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01994462587404996, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019933498236350714, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019911960097961128, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01987908319104463, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019840430300682783, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019896080326288937, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020683084446936846, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02279970976524055, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024885241193696856, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.027116919662803413, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.028577360929921268, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03113016198389232, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03376221073791385, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03570202236995101, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03714602952823043, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.038110191253945234, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.038923491928726434, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039664715649560096, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.040290035735815764, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.041042168904095885, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04193375004455447, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04282131711021066, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04357626985758543, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.044466335214674475, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0451256968639791, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04555463356897235, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0455835030041635, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0453465505130589, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04509884171187878, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.044723906349390746, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04409697856754065, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.043468699660152196, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04350333034992218, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04305335896089673, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.043068738617002965, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04351487739011645, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04421372190117836, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04523425376042724, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0459214972704649, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.045989228952676056, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04617396838963032, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04530235152691603, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1162595748901367, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1148760318756104, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1124813556671143, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.109867811203003, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.106846570968628, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.101280927658081, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0910651683807373, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.067554473876953, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9847068786621094, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8350934982299805, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.716474771499634, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.619446277618408, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5613608360290527, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.480926275253296, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4345123767852783, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4143435955047607, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4051623344421387, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.400331735610962, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.407069444656372, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4250731468200684, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.448091745376587, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4749131202697754, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5054543018341064, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5518298149108887, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.598223924636841, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.668414354324341, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7390527725219727, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8312692642211914, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.939227342605591, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0845947265625, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1506154537200928, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2370405197143555, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3207645416259766, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.489835739135742, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.566103458404541, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.6298840045928955, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.650768518447876, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.684541940689087, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.606419324874878, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.590813159942627, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.4330053329467773, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.4276986122131348, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.341477394104004, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1705591678619385, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07235142118863049, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07567368032484312, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0799187892211148, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08637873754152824, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.1035437430786268, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1271686969361388, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.16094499815430047, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1908453303802141, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20911775562938353, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22702104097452935, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26799557032115173, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2702104097452935, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2724252491694352, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27759320782576596, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27906976744186046, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2757475083056478, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2676264304171281, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2576596530084902, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25692137320044295, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2532299741602067, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24640088593576967, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2427094868955334, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2412329272794389, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22277593207825766, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2174234034699151, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22443706164636398, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2117017349575489, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2117017349575489, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2131782945736434, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2070874861572536, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2069029162052418, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20579549649317092, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20967146548541898, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21336286452565523, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2174234034699151, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2115171650055371, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2100406053894426, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21225544481358435, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013949493197236006, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014581707291937548, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.016216893673758226, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.017260073479829552, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01923473626519265, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.022079453793503775, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.028836679054651013, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04431836059844716, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.056723451998408635, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.083173286723633, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10973617545125407, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1275079323759498, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1462631896336871, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1744064314812711, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.19388804470675083, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20109901479410172, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.2072394474360925, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2143456486297702, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22145808182629736, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21814145670594662, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21558721285128257, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2084300851088955, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21018437216521793, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20913876957069136, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20441050555833837, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20378730322092228, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20455392774865655, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19916638394125422, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19201912962649162, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18841496479342543, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1916738460300896, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19470528477950344, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18616492585870067, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18504778922482376, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18723836611938924, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18155252803710764, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18274151483506984, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18186835240956575, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18499428326024506, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18931014370592378, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18825516339951467, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18320136818284558, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17959570522069787, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18744347482905913, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 1.9832915592193603, "validation/loss_best": 2.407069444656372, "validation/acc_best": 0.27906976744186046, "validation/f1_best": 0.22145808182629736} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 1.9266926276683807, "train/grad": 0.24195654705166816, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11451904296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.112440185546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.10907958984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.105472412109375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10155517578125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.095037841796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.083846435546875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0601953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9785333251953126, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.827486267089844, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7066108703613283, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.60030158996582, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5236330795288087, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.406855239868164, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.307234516143799, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2278084754943848, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1452909469604493, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0592749977111815, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.9689539241790772, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.8914778137207031, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8125104904174805, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.7307682120800019, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.637160013318062, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.5483952260017395, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.456683394908905, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.3439245051145554, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.2455565512180329, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.1648973590135574, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.0768485161662102, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.9845866960287094, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.9077803057432174, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.8404414963722229, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.7757859496772289, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.7046244633197785, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.6843485498428344, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.6529266855120659, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.6228225520253181, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.6446771088242531, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.6621902388334274, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.7226386265456677, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.8310466596484184, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9198702090978622, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.012662016749382, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.1398128002882004, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01974501755554229, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019740236066281794, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019730226593092082, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01971706433687359, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019696393543854357, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01966236121952534, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01962744476273656, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019692428358830512, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020523889767937363, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02263294498436153, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024710921943187712, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.026921972827985884, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.028372863018885256, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030918226847425102, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03348484117537737, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0353812344186008, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03680982179008424, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03776662858203053, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.038578023239970206, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.039336392302066085, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03994716491550207, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04075536098331213, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.041692324932664636, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.042500099297612906, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04318563582375646, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04392852971330285, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04441260199993849, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04469468412920832, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.044656621403992174, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04442408422008157, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04408745631575584, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.043698753081262115, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.042331802379339936, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04128728903830051, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04061396989971399, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.039903523828834296, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03982384320348501, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04013844601809979, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04070119470357895, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0410652194917202, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.042038367558270694, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0431761665828526, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04312166495248675, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04327808756381273, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.116196870803833, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1147916316986084, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1123828887939453, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1097426414489746, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.106661319732666, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.101016044616699, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0904955863952637, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0661556720733643, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.980863094329834, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8304433822631836, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7127692699432373, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.616881847381592, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.558854103088379, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4775969982147217, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.431320905685425, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4106242656707764, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3999524116516113, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.392956256866455, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.397510051727295, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.413987636566162, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4366066455841064, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4607481956481934, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4886088371276855, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5314693450927734, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.578495502471924, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6532983779907227, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.72161865234375, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8089914321899414, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.919293165206909, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0793795585632324, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1566905975341797, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.252390146255493, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.3413968086242676, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.5192391872406006, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.6177430152893066, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.689009666442871, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.7132623195648193, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.7450954914093018, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.660801410675049, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.682846784591675, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.5181045532226562, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.493044853210449, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.381770610809326, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.2403922080993652, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07622739018087855, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08139534883720931, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08748615725359911, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.1050203026947213, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12975267626430417, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1626061277224068, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19047619047619047, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.21040974529346623, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2277593207825766, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2543373938722776, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26744186046511625, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.27113325950535255, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.27408637873754155, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27870062753783686, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28183831672203763, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.28054632705795496, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26799557032115173, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25987449243263194, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24953857511997046, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24141749723145073, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23237356958287192, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.21945367294204504, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21779254337393872, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22443706164636398, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21557770394979697, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21040974529346623, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21336286452565523, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20284237726098192, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2056109265411591, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21373200442967885, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2248062015503876, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21410114433370248, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21557770394979697, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21705426356589147, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014905973782218479, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014766748108896959, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.016895171286164957, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.017812877969504603, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.019943877881802703, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.023405638566069798, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.030388945293727886, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04492589926957435, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.0581778217160334, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08486139161773466, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10981787157994427, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12923704496610974, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14671243777718523, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17550504963589494, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.19291997114060722, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20217219292048672, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20985861476740983, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.216862522720782, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22486393090645085, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2247169990530954, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21840525066929325, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21542043110944933, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21435152593788653, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21464143898918175, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20881219357296918, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20453735761136116, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20592486312694766, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2031813012062588, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18838539365256288, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1878510522086766, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1904711359407495, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19145317206834153, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1858343552385828, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1804839871074846, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18411699072011758, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17680291674258145, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18286416280926066, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.182976552670124, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18971105457210635, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1836862930367932, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1899249588175186, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18378130488310318, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.189454031110434, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1863541904953376, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 1.9689539241790772, "validation/loss_best": 2.397510051727295, "validation/acc_best": 0.28183831672203763, "validation/f1_best": 0.22486393090645085} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 1.9086995607614516, "train/grad": 0.23719684340059757, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.116717529296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.114793701171875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.111295166015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.107947998046875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1040087890625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.09751953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.086463623046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0625115966796876, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.980140380859375, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.831069030761719, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.711769866943359, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.603910598754883, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5248843383789064, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4053042221069334, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.304554672241211, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.224180793762207, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1400323009490965, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.053129153251648, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.9634072256088257, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.88530207157135, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8056098818778992, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.722671126127243, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.629824131131172, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.5388640439510346, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.4465082049369813, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.3357272577285766, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.2359781670570373, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.1562395560741425, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.0669897916913031, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.9637376245856285, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.8792072096467018, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.812311255633831, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.7535404932498931, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.6796437759697437, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.6455339601635933, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.6135679388046265, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.5834432318806648, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.594856178611517, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.607369570583105, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.6596341697871685, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.7560957629978656, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.8245283490419388, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.903105853497982, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.022975155711174, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019869126752018927, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019863107101991774, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019851959552615882, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019836531076580285, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019815404498949647, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019781317701563238, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019748427257873118, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019818860553205014, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02067334822844714, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022792525487020612, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024839288825169205, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02703305546194315, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.028493332983925937, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.031089169243350624, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03365179924294352, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035527575751766564, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.036931714555248615, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03786642700433731, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.038658064529299735, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03936886046081781, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03997155802324414, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.040755923576653, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04164477739483118, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0424547697417438, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04305834852159023, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.043785771019756795, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04422156808897853, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04444011552259326, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04420592794194818, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0437572929635644, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.043048343975096943, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04236723069101572, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.040906449425965545, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.039965790882706645, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03921252965927124, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.038303327886387704, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03810064039193094, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.037937161652371286, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0381337970495224, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03870670285075903, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03954762307927012, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04045962203294039, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04060591159388423, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04084936894476414, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1161530017852783, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1147522926330566, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1123483180999756, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.109692335128784, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1065938472747803, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.10089373588562, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.090261459350586, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0656797885894775, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.979522466659546, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.828944444656372, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.711716890335083, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6165244579315186, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.55873441696167, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4781405925750732, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4330618381500244, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.413240432739258, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4035511016845703, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.3975086212158203, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.403280258178711, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4208366870880127, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4429550170898438, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4672393798828125, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4960641860961914, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5395495891571045, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.584193468093872, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6566336154937744, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.723310947418213, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.810683250427246, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9264650344848633, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0811729431152344, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.157069683074951, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2596404552459717, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.346719741821289, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.5333502292633057, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.6176583766937256, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.7005536556243896, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.714942216873169, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.7653534412384033, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.6925251483917236, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.7051265239715576, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.551574230194092, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.5141851902008057, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.473029613494873, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.3243250846862793, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07327427094868956, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07641196013289037, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08139534883720931, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08785529715762273, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10612772240679218, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12938353636028055, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1626061277224068, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19029162052417867, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20985603543743078, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22831303063861202, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25341454411221853, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.268733850129199, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2722406792174234, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2737172388335179, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27906976744186046, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2798080472499077, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27999261720191954, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2681801402731635, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26005906238464377, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25452196382428943, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2482465854558878, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24289405684754523, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24234034699150978, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22056109265411591, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22388335179032853, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21428571428571427, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2026578073089701, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2070874861572536, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21834625322997417, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21373200442967885, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20930232558139536, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.21114802510151348, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014654953278715402, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014795125527901265, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.016671568344003698, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.017794510123354354, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02001519808224355, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0235318372600864, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.0313824310945058, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04600071024117967, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.057981520420140716, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08481171691230283, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10953590536371323, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12872317356925259, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1475076871507507, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17469778431582603, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.19347161536081733, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.20301952940405676, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20925740429722417, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.21608991597990554, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22202063472255337, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2221993713397964, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21813794228605535, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21372195967583144, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2110467738684115, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21107745122498975, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2072172633469566, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20451459611057823, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2083242901479653, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20451938651496726, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1904282212169652, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18780764629395422, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19079544397375162, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19175869410229893, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18600514051260567, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1817853429783438, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18623887773171208, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18117280510485012, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18062161643700259, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1841093730685833, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19051171781827692, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18192201299748545, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1869305193724851, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18394091844228122, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18293970596871115, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18588625435365977, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 1.88530207157135, "validation/loss_best": 2.4208366870880127, "validation/acc_best": 0.27999261720191954, "validation/f1_best": 0.2221993713397964} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 1.9050614857673644, "train/grad": 0.23333700217306613, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.116922607421875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.114927978515625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1115771484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.108133544921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.10431884765625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.097763671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0864910888671875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.062413330078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.980194091796875, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.8319183349609376, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7133439636230468, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.606418685913086, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5286412048339844, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.412277412414551, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.313115520477295, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2344288444519043, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1522397327423097, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0665437602996826, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.9772549343109131, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.899156129360199, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.8178404998779296, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.736088193655014, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.6440983855724334, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.5532102924585343, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.4584926402568816, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.3398504215478897, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.2387461024522781, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.1568264681100846, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.0631470108032226, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.9660779491066933, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.8858080351352692, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.8115055984258652, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.7447925174236297, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.6752739202976227, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.6391994436085224, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.5940169143676758, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.5660327263176441, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.575619467496872, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.5766284947097301, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.6240599651634693, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.7168680754303932, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.7774935759603977, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.8433680722117424, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.9655143481492996, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.019500988968648016, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019496481069363653, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01948574570938945, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01946884624194354, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019448782317340374, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019413711070083082, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019379921518266202, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019451092351227998, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020307226576842367, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022416419954970478, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024452565321698785, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.026612959681078793, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02806227288208902, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030634943544864655, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0331712463311851, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03501664116978645, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03641370909288526, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03736999299377203, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03820522755384445, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03897326656617224, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03961925258859992, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04042420892044902, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.041387698389589785, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04211482807993889, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.042772045005112884, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0435081716440618, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04402747306972742, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.044219471290707586, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.043921393007040024, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.043378936573863026, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.042748556584119794, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04184469765052199, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04056260133162141, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.039526714384555815, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03869315361604095, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.03762639846652746, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.036949046747758986, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.03677692730911076, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.036685915980488065, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03735689599998295, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.038253670772537586, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.038957027047872544, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03895936261862516, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03925047941505909, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1161537170410156, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1147563457489014, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1123263835906982, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.109684944152832, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1065711975097656, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.100878953933716, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.090238094329834, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.065580368041992, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.979300022125244, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8286707401275635, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.711454153060913, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.616265296936035, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5583198070526123, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.4778058528900146, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4324240684509277, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.41239333152771, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4025087356567383, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.396022081375122, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.401456117630005, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4191319942474365, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.44096040725708, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.465184450149536, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4937264919281006, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.536943197250366, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5825719833374023, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.656276226043701, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7235236167907715, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8104419708251953, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.925888776779175, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.083897590637207, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.163238525390625, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.2642035484313965, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.348161220550537, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.5321204662323, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.6205673217773438, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.7062981128692627, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.7208642959594727, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.7705888748168945, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.6899404525756836, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.7042508125305176, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.549630880355835, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.510131597518921, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.4617273807525635, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.309589385986328, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07659653008490218, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08157991878922112, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08785529715762273, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10612772240679218, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12975267626430417, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.162421557770395, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1893687707641196, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2100406053894426, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.22831303063861202, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.26854928017718716, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.27260981912144705, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2737172388335179, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27999261720191954, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.28017718715393136, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2798080472499077, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25489110372831303, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24898486526393504, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24289405684754523, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2410483573274271, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2207456626061277, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21705426356589147, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22277593207825766, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22259136212624583, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21040974529346623, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21354743447766703, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20339608711701734, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.20431893687707642, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20431893687707642, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21280915466961978, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20801033591731266, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2174234034699151, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21336286452565523, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20911775562938353, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2115171650055371, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014834703458165099, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014763724577309838, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01676395444365224, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01787079067034903, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.019989233032766205, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.023612372998098544, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.031388067838911855, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.045941575375301545, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.058366259809075983, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08464356664860011, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10903389119888196, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1290119994590852, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14742372615840377, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.17486628524549028, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.19326789539977574, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2036133561187519, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.20919923678238506, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.21733947303662168, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.22203810274058847, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.22254612333242232, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21903682566250648, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21473279768082684, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21094236917001855, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2112060421283679, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2081016960462567, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2049499046306238, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20643340050362113, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20298646724434719, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19072406753989732, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1879938557521098, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19014430810367886, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19173418424618918, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18596287222516542, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18282981159169964, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.186616842228584, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17873051958229988, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18172857971160342, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18287185557024513, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1895043420150586, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18223512039341602, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18848233983275645, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18407699854916612, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18387395236512735, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.18715083000830834, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 1.9772549343109131, "validation/loss_best": 2.401456117630005, "validation/acc_best": 0.28017718715393136, "validation/f1_best": 0.22203810274058847} diff --git a/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic/config.yaml b/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4783c33a925871557932d3db10f33adc475457ec --- /dev/null +++ b/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..d20353c76301d6cff6040bdcd5f545716381cfb7 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,,1291.5496650148827,test,0.63,0.04868913225761988,0.6053333333333333,0.05168753037841695,0.6059631059631059,0.05180392048945063 +flat_mae,patch,logistic,ppmi_dx,1,0.3593813663804626,train,0.9359430604982206,0.010501220995948265,0.9315746753246753,0.011354530832971397,0.9271034039820167,0.012074407910602205 +flat_mae,patch,logistic,ppmi_dx,1,0.3593813663804626,test,0.6,0.04907783206295893,0.570999570999571,0.052479039875058024,0.5704584040747029,0.051730813065490794 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,train,0.8185053380782918,0.014877001059430059,0.7992632227700582,0.01724731151455374,0.7891110040676514,0.01721024528881648 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,test,0.66,0.04818339963099324,0.6353496353496353,0.051233026755402256,0.634125636672326,0.050602840192852 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,train,0.9395017793594306,0.009603274555189141,0.9351233159495871,0.010482971278848856,0.9291238492828089,0.01149996423135933 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,test,0.64,0.048093454856144406,0.6216897856242118,0.050416915741930586,0.6230899830220713,0.051157742851149254 +flat_mae,patch,logistic,ppmi_dx,4,0.046415888336127774,train,0.8185053380782918,0.016171584034684227,0.8002258280361325,0.01849874415026044,0.7908504602868764,0.018508899877656575 +flat_mae,patch,logistic,ppmi_dx,4,0.046415888336127774,test,0.73,0.04108331048004774,0.6923076923076923,0.04943483690214474,0.685483870967742,0.04632145491964051 +flat_mae,patch,logistic,ppmi_dx,5,0.046415888336127774,train,0.8220640569395018,0.01571360523869033,0.8036749807866974,0.01813982042047686,0.7937406336972811,0.018207915819935798 +flat_mae,patch,logistic,ppmi_dx,5,0.046415888336127774,test,0.66,0.04661973830900384,0.6263736263736264,0.05189636345300692,0.6239388794567062,0.04988594689318502 +flat_mae,patch,logistic,ppmi_dx,6,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,6,2.782559402207126,test,0.65,0.045114627339699925,0.6224786970121885,0.04912021166970833,0.6209677419354839,0.04850403196788844 +flat_mae,patch,logistic,ppmi_dx,7,0.046415888336127774,train,0.8362989323843416,0.014211822525442938,0.8184983922378084,0.01657380631432105,0.8070407835581246,0.016796067657919096 +flat_mae,patch,logistic,ppmi_dx,7,0.046415888336127774,test,0.6,0.048215765056669996,0.5659722222222222,0.05192358687579425,0.565365025466893,0.050432432785198704 +flat_mae,patch,logistic,ppmi_dx,8,0.3593813663804626,train,0.9359430604982206,0.010473633068478191,0.9315746753246753,0.011307454273909218,0.9271034039820167,0.012037934133075941 +flat_mae,patch,logistic,ppmi_dx,8,0.3593813663804626,test,0.59,0.045841029656847804,0.539894512400404,0.05115916822151085,0.5420203735144312,0.04791754511927046 +flat_mae,patch,logistic,ppmi_dx,9,0.046415888336127774,train,0.8096085409252669,0.01637676601362528,0.7896787506776489,0.01872996424603721,0.7801461143224149,0.018556541004961433 +flat_mae,patch,logistic,ppmi_dx,9,0.046415888336127774,test,0.73,0.03924334338457926,0.6923076923076923,0.04739981255949676,0.685483870967742,0.044295698674376854 +flat_mae,patch,logistic,ppmi_dx,10,0.046415888336127774,train,0.8096085409252669,0.015938096583616217,0.7875746689133576,0.018807538402692572,0.7766672018839649,0.018591901069685824 +flat_mae,patch,logistic,ppmi_dx,10,0.046415888336127774,test,0.65,0.04543082653881613,0.6178622120318812,0.050663240135537704,0.615874363327674,0.04909648871246988 +flat_mae,patch,logistic,ppmi_dx,11,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,11,166.81005372000556,test,0.58,0.04520840629794419,0.5320855614973261,0.05019155733687487,0.533955857385399,0.04730887175203646 +flat_mae,patch,logistic,ppmi_dx,12,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,12,1291.5496650148827,test,0.59,0.04743363785332093,0.5464100011063171,0.051160944862853254,0.5471137521222411,0.04912290961768734 +flat_mae,patch,logistic,ppmi_dx,13,0.046415888336127774,train,0.8185053380782918,0.015579342424702653,0.8016113625981145,0.017473921009196293,0.793459644615714,0.017526795301379693 +flat_mae,patch,logistic,ppmi_dx,13,0.046415888336127774,test,0.62,0.042733481018985574,0.5634191176470589,0.04948489442410754,0.566213921901528,0.044838967592050236 +flat_mae,patch,logistic,ppmi_dx,14,0.005994842503189409,train,0.7419928825622776,0.017534143690440204,0.7032788974457007,0.021592106155896684,0.695662063797902,0.019989953422449663 +flat_mae,patch,logistic,ppmi_dx,14,0.005994842503189409,test,0.6,0.040165552405014925,0.5143273433705683,0.050412567326175176,0.5297113752122241,0.042682538489037615 +flat_mae,patch,logistic,ppmi_dx,15,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,15,2.782559402207126,test,0.55,0.0513846046982946,0.5248653785239151,0.05198873782800649,0.5250424448217317,0.05200498316135411 +flat_mae,patch,logistic,ppmi_dx,16,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,16,166.81005372000556,test,0.62,0.04998439756564043,0.6006725514922235,0.05196760300505954,0.6018675721561969,0.05257276907658462 +flat_mae,patch,logistic,ppmi_dx,17,0.3593813663804626,train,0.9359430604982206,0.01006506359887641,0.9314420483348469,0.010947180724810235,0.9262336758724042,0.011853061546139382 +flat_mae,patch,logistic,ppmi_dx,17,0.3593813663804626,test,0.6,0.046982869218471536,0.5659722222222222,0.050500207948351444,0.565365025466893,0.04950552851672528 +flat_mae,patch,logistic,ppmi_dx,18,0.046415888336127774,train,0.8149466192170819,0.015407155101209028,0.7953272075302555,0.01784171621937615,0.7853511025476343,0.01780548900891648 +flat_mae,patch,logistic,ppmi_dx,18,0.046415888336127774,test,0.61,0.044643203290086614,0.5555555555555556,0.05112075357108382,0.5581494057724957,0.04701561288629165 +flat_mae,patch,logistic,ppmi_dx,19,0.046415888336127774,train,0.8238434163701067,0.015600067803529828,0.8085165695563441,0.01723443129257975,0.8012738171697709,0.017263498040322834 +flat_mae,patch,logistic,ppmi_dx,19,0.046415888336127774,test,0.6,0.047667494165311436,0.5755517826825127,0.04979650539455555,0.5755517826825127,0.04962291237171743 +flat_mae,patch,logistic,ppmi_dx,20,0.3593813663804626,train,0.9270462633451957,0.010663871342016844,0.9216884463099223,0.01159489505556057,0.9155293299079426,0.012254146229543513 +flat_mae,patch,logistic,ppmi_dx,20,0.3593813663804626,test,0.71,0.04023232531186831,0.6640018537828757,0.049636063138452674,0.6591680814940577,0.04515471587515827 +flat_mae,patch,logistic,ppmi_dx,21,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,21,21.54434690031882,test,0.7,0.044364201784772364,0.6703296703296704,0.049828402915884654,0.666383701188455,0.048101442402960805 +flat_mae,patch,logistic,ppmi_dx,22,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,22,1291.5496650148827,test,0.59,0.04822455391188186,0.5746446726838883,0.04905393717711153,0.5776740237691002,0.04990630718111656 +flat_mae,patch,logistic,ppmi_dx,23,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,23,166.81005372000556,test,0.55,0.05115317780939909,0.5366079703429101,0.05175427827388743,0.5403225806451613,0.05296975730511071 +flat_mae,patch,logistic,ppmi_dx,24,0.046415888336127774,train,0.8185053380782918,0.015100198516514791,0.7997484804024313,0.01723058177976322,0.789980732177264,0.017152694365103215 +flat_mae,patch,logistic,ppmi_dx,24,0.046415888336127774,test,0.65,0.043221527043823885,0.6072270227808326,0.05037159530314774,0.6056876061120543,0.04714665776810136 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,train,0.8113879003558719,0.01582861343357157,0.7892897566496888,0.018564656170924728,0.7781122885891671,0.018270808191633684 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,test,0.63,0.03920434159630793,0.5552350042072365,0.051413772285192484,0.5640916808149405,0.043530354416382054 +flat_mae,patch,logistic,ppmi_dx,26,0.3593813663804626,train,0.9252669039145908,0.011041527307721833,0.9203224369101146,0.011862450256279997,0.9166934275315779,0.012390635466157231 +flat_mae,patch,logistic,ppmi_dx,26,0.3593813663804626,test,0.62,0.046471772077251366,0.5876736111111112,0.05038113959407194,0.5865874363327674,0.04921353934461542 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,train,0.7170818505338078,0.016098336300876995,0.6583586626139818,0.02189271658484666,0.6562968315135945,0.018698035622136063 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,test,0.66,0.040967006236726645,0.6026180458158018,0.05204362503247763,0.6035653650254669,0.046037906457947204 +flat_mae,patch,logistic,ppmi_dx,28,0.046415888336127774,train,0.8149466192170819,0.014464452539926847,0.79630868740939,0.016601204859825275,0.7870905587668593,0.016600569455377207 +flat_mae,patch,logistic,ppmi_dx,28,0.046415888336127774,test,0.66,0.041378617666616165,0.5952380952380952,0.05250561213747245,0.5984719864176571,0.04537743839452286 +flat_mae,patch,logistic,ppmi_dx,29,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,29,2.782559402207126,test,0.65,0.049484781498961884,0.6224786970121885,0.053790495183553284,0.6209677419354839,0.05286412340023956 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,train,0.798932384341637,0.01583264822569263,0.7767920847758466,0.018290611335487966,0.7671269535431385,0.0180021282313089 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,test,0.69,0.03962937799158598,0.6343908479773559,0.05063765816336378,0.6328522920203735,0.04441017653414865 +flat_mae,patch,logistic,ppmi_dx,31,2.782559402207126,train,0.998220640569395,0.0017616880506869793,0.9981184064710746,0.0018662638465370123,0.9976851851851851,0.0022918256585325995 +flat_mae,patch,logistic,ppmi_dx,31,2.782559402207126,test,0.65,0.04854284293281554,0.6395839769333744,0.048944381707321113,0.6464346349745331,0.04962018796510489 +flat_mae,patch,logistic,ppmi_dx,32,0.046415888336127774,train,0.8220640569395018,0.015392928158963938,0.8041429686628749,0.017651572567550088,0.7946103618068936,0.017724416480376176 +flat_mae,patch,logistic,ppmi_dx,32,0.046415888336127774,test,0.65,0.0477029307275769,0.630450849963045,0.049559153738077455,0.6311544991511036,0.04979879842727304 +flat_mae,patch,logistic,ppmi_dx,33,0.046415888336127774,train,0.8256227758007118,0.015004748428389182,0.8080601092896175,0.01726991386163962,0.7983702633269107,0.017428336128676693 +flat_mae,patch,logistic,ppmi_dx,33,0.046415888336127774,test,0.58,0.048157923543275825,0.5495495495495495,0.05080376034675066,0.5492359932088284,0.050050657893976454 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,train,0.7170818505338078,0.016845477188128982,0.6630784370652179,0.021782752435209326,0.6597757439520445,0.019065634573130687 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,test,0.67,0.038361626659984056,0.6033177064551027,0.0508885173379912,0.6065365025466893,0.04342523222054545 +flat_mae,patch,logistic,ppmi_dx,35,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,35,21.54434690031882,test,0.59,0.04603536032225663,0.5746446726838883,0.046942749137610165,0.5776740237691002,0.04784205323700581 +flat_mae,patch,logistic,ppmi_dx,36,0.046415888336127774,train,0.8149466192170819,0.015613208915876397,0.7972582972582972,0.017511597646275533,0.7888300149860843,0.017426162995558766 +flat_mae,patch,logistic,ppmi_dx,36,0.046415888336127774,test,0.66,0.036657217570350314,0.5783730158730158,0.05038386710484335,0.5882852292020373,0.04089234891415433 +flat_mae,patch,logistic,ppmi_dx,37,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,37,166.81005372000556,test,0.61,0.04733497227209498,0.6010230179028133,0.04797490191957567,0.6090831918505942,0.04976116445929104 +flat_mae,patch,logistic,ppmi_dx,38,0.046415888336127774,train,0.8220640569395018,0.014610383020472588,0.8036749807866974,0.016757597153439382,0.7937406336972811,0.016779037114931943 +flat_mae,patch,logistic,ppmi_dx,38,0.046415888336127774,test,0.63,0.04302313331220775,0.5906626839252129,0.04810496629066816,0.5895585738539898,0.04605147136153626 +flat_mae,patch,logistic,ppmi_dx,39,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,39,1291.5496650148827,test,0.69,0.045587625513948404,0.6828644501278772,0.045943835964175575,0.6939728353140917,0.04704610303311438 +flat_mae,patch,logistic,ppmi_dx,40,0.3593813663804626,train,0.9252669039145908,0.011563790675927669,0.9209034247034381,0.012268165578763217,0.9201723399700279,0.012582087805077266 +flat_mae,patch,logistic,ppmi_dx,40,0.3593813663804626,test,0.67,0.04582140984299806,0.648,0.04858108310544392,0.6472835314091681,0.04861258451429922 +flat_mae,patch,logistic,ppmi_dx,41,0.005994842503189409,train,0.7241992882562278,0.016196395366156423,0.6726603160287845,0.021159871551508155,0.6681652751016913,0.018613338363497555 +flat_mae,patch,logistic,ppmi_dx,41,0.005994842503189409,test,0.73,0.03847793653511062,0.6871741397288842,0.048499691733899786,0.6803904923599321,0.04436196106128882 +flat_mae,patch,logistic,ppmi_dx,42,0.3593813663804626,train,0.9288256227758007,0.00969885328106587,0.9238244981498298,0.010502557595756539,0.9187138728323699,0.011147980204764452 +flat_mae,patch,logistic,ppmi_dx,42,0.3593813663804626,test,0.72,0.041099372257979806,0.6783088235294117,0.05005689185976156,0.6723259762308998,0.046534250950052286 +flat_mae,patch,logistic,ppmi_dx,43,0.046415888336127774,train,0.8167259786476868,0.015680499961789603,0.7965449976275416,0.018127499076160004,0.7859264611432242,0.01796867641427232 +flat_mae,patch,logistic,ppmi_dx,43,0.046415888336127774,test,0.66,0.04209560547135532,0.6155585707824514,0.048742333528141266,0.6137521222410866,0.04543300636857395 +flat_mae,patch,logistic,ppmi_dx,44,0.046415888336127774,train,0.8113879003558719,0.015335673284542627,0.7908785823609532,0.017863359075194277,0.7807214729180048,0.017812593575261076 +flat_mae,patch,logistic,ppmi_dx,44,0.046415888336127774,test,0.61,0.04346476273948818,0.5741893219783819,0.04855948711622552,0.5734295415959253,0.04694907026607988 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,train,0.8220640569395018,0.014977299050071514,0.8036749807866974,0.017313658709864437,0.7937406336972811,0.017465708242789375 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,test,0.65,0.040700245699504076,0.6072270227808326,0.04691184598176335,0.6056876061120543,0.04409291392845483 +flat_mae,patch,logistic,ppmi_dx,46,0.046415888336127774,train,0.8096085409252669,0.01551873812977109,0.7891655184261919,0.01783579177368128,0.7792763862128024,0.01764171266379934 +flat_mae,patch,logistic,ppmi_dx,46,0.046415888336127774,test,0.63,0.043655927432595,0.5906626839252129,0.04981375355922824,0.5895585738539898,0.04719537205759887 +flat_mae,patch,logistic,ppmi_dx,47,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,47,10000.0,test,0.69,0.046727683443543395,0.6828644501278772,0.04686563209253103,0.6939728353140917,0.047532730789822945 +flat_mae,patch,logistic,ppmi_dx,48,0.046415888336127774,train,0.8220640569395018,0.015280550355233456,0.8022240990990992,0.017721556647648316,0.7911314493684436,0.01763030073552879 +flat_mae,patch,logistic,ppmi_dx,48,0.046415888336127774,test,0.72,0.04557009545743788,0.6996996996996997,0.04887773405988543,0.6977928692699491,0.04849837976242292 +flat_mae,patch,logistic,ppmi_dx,49,0.046415888336127774,train,0.8185053380782918,0.014856932665300526,0.7977589929580446,0.017497636437946384,0.786501819738814,0.017415799951317285 +flat_mae,patch,logistic,ppmi_dx,49,0.046415888336127774,test,0.67,0.04308085421622927,0.6296711929076422,0.05040461627399023,0.6269100169779287,0.04732412138248857 +flat_mae,patch,logistic,ppmi_dx,50,0.3593813663804626,train,0.9234875444839857,0.010879373160958085,0.9186535386686864,0.011652190956175689,0.916118068935988,0.012183482750072363 +flat_mae,patch,logistic,ppmi_dx,50,0.3593813663804626,test,0.64,0.04530617617941289,0.609375,0.0498209759781038,0.6078098471986417,0.04858256322470786 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,train,0.798932384341637,0.015665192218290117,0.777885035762377,0.017925900280581508,0.7688664097623634,0.017649740995348807 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,test,0.66,0.04403827426228235,0.6155585707824514,0.05125524930599525,0.6137521222410866,0.04761753609092947 +flat_mae,patch,logistic,ppmi_dx,52,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,52,2.782559402207126,test,0.61,0.046644596686004264,0.5920075321686369,0.048216157483037,0.5938030560271647,0.04881468253000248 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,train,0.8185053380782918,0.015185890286330021,0.7992632227700582,0.017517830136015187,0.7891110040676514,0.01751229978717945 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,test,0.7,0.043746542720539634,0.6657754010695187,0.05003693701384135,0.6612903225806452,0.04770424904904955 +flat_mae,patch,logistic,ppmi_dx,54,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,54,2.782559402207126,test,0.56,0.046920575444041594,0.5225694444444444,0.04856317357016476,0.5229202037351443,0.047465171129280274 +flat_mae,patch,logistic,ppmi_dx,55,2.782559402207126,train,0.9928825622775801,0.0034695063332010347,0.9924669928288989,0.0036794315828140634,0.9916104688503533,0.0041330167831264255 +flat_mae,patch,logistic,ppmi_dx,55,2.782559402207126,test,0.68,0.044791655472866815,0.6715927750410509,0.04531206554992176,0.6808149405772496,0.04641980298251452 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,train,0.7277580071174378,0.015545065281193675,0.681083306443537,0.019846018989350026,0.6754040890601585,0.017822461751631057 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,test,0.66,0.039234495026698124,0.587178241864983,0.05182144912804727,0.5933786078098472,0.04343644276146577 +flat_mae,patch,logistic,ppmi_dx,57,0.005994842503189409,train,0.7348754448398577,0.015088915513908677,0.6884225409759819,0.019628214744022703,0.6820541639905802,0.017517116730360736 +flat_mae,patch,logistic,ppmi_dx,57,0.005994842503189409,test,0.72,0.038836807283812606,0.6666666666666667,0.05089059544154402,0.6621392190152802,0.04472711001961647 +flat_mae,patch,logistic,ppmi_dx,58,0.046415888336127774,train,0.8238434163701067,0.014604376500589142,0.8054037038980117,0.016868969092052863,0.7951857204024835,0.016961783544998648 +flat_mae,patch,logistic,ppmi_dx,58,0.046415888336127774,test,0.67,0.043777784320360474,0.6296711929076422,0.05072816561989559,0.6269100169779287,0.04768404818812198 +flat_mae,patch,logistic,ppmi_dx,59,0.046415888336127774,train,0.8256227758007118,0.015164704507945003,0.8080601092896175,0.017326246228854744,0.7983702633269107,0.017354418264828617 +flat_mae,patch,logistic,ppmi_dx,59,0.046415888336127774,test,0.62,0.04724997354496614,0.5824175824175825,0.05318244161319122,0.5814940577249575,0.050702562481427814 +flat_mae,patch,logistic,ppmi_dx,60,0.3593813663804626,train,0.9341637010676157,0.010052447914392146,0.9298739052282623,0.010801662589931987,0.9265280453864269,0.011451214619629281 +flat_mae,patch,logistic,ppmi_dx,60,0.3593813663804626,test,0.67,0.04609143521306317,0.6349153667441089,0.0515123052542775,0.6320033955857385,0.049595093444571886 +flat_mae,patch,logistic,ppmi_dx,61,0.3593813663804626,train,0.9306049822064056,0.009979182622565638,0.9258010460569746,0.010791546602690022,0.9210286876471847,0.01151825265635683 +flat_mae,patch,logistic,ppmi_dx,61,0.3593813663804626,test,0.6,0.046679331614752156,0.554367201426025,0.05257356415179159,0.5551782682512734,0.04970923987871224 +flat_mae,patch,logistic,ppmi_dx,62,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,62,21.54434690031882,test,0.58,0.05159883719620046,0.565936337329475,0.05248128733253946,0.5696095076400679,0.05367812349713947 +flat_mae,patch,logistic,ppmi_dx,63,0.3593813663804626,train,0.9270462633451957,0.010742948836764982,0.9219959714957937,0.011639597127112135,0.9172687861271676,0.012400594135751132 +flat_mae,patch,logistic,ppmi_dx,63,0.3593813663804626,test,0.6,0.04409809066161482,0.5404411764705883,0.05304430094541917,0.5449915110356536,0.04745782147787389 +flat_mae,patch,logistic,ppmi_dx,64,0.046415888336127774,train,0.8220640569395018,0.014114032890839978,0.8027156437367482,0.01659657801151293,0.7920011774780561,0.016729862996406428 +flat_mae,patch,logistic,ppmi_dx,64,0.046415888336127774,test,0.63,0.04270051990315809,0.5847828526540231,0.048450121325910235,0.5844651952461799,0.04568714819148952 +flat_mae,patch,logistic,ppmi_dx,65,2.782559402207126,train,0.998220640569395,0.0018598553891677049,0.9981184064710746,0.0019703661876776923,0.9976851851851851,0.00241953409424132 +flat_mae,patch,logistic,ppmi_dx,65,2.782559402207126,test,0.62,0.045242895574885575,0.5824175824175825,0.049494957985383004,0.5814940577249575,0.047518425208723354 +flat_mae,patch,logistic,ppmi_dx,66,0.046415888336127774,train,0.8291814946619217,0.01621294315010639,0.8124191641749531,0.018329154683373892,0.8029998929565403,0.01835335778117852 +flat_mae,patch,logistic,ppmi_dx,66,0.046415888336127774,test,0.58,0.04650647696826755,0.525101763907734,0.05250167913557576,0.5288624787775891,0.04864322687340387 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,train,0.8291814946619217,0.015502254828350324,0.8115279815552294,0.01775636257119036,0.8012604367373153,0.017780260188593383 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,test,0.62,0.045983057749566854,0.5766488413547237,0.05254981705577718,0.5764006791171477,0.04969926797603512 +flat_mae,patch,logistic,ppmi_dx,68,0.005994842503189409,train,0.7259786476868327,0.016688175269629252,0.6815090673575129,0.021355947603271752,0.6756984585741811,0.019315115579106198 +flat_mae,patch,logistic,ppmi_dx,68,0.005994842503189409,test,0.65,0.045761538435677616,0.612789025334661,0.05145330640717713,0.6107809847198642,0.04915862136852485 +flat_mae,patch,logistic,ppmi_dx,69,0.046415888336127774,train,0.8149466192170819,0.014926386492236886,0.7932654216185626,0.01765425996798882,0.7818721901091843,0.017465044904384733 +flat_mae,patch,logistic,ppmi_dx,69,0.046415888336127774,test,0.69,0.04278683909802171,0.6615351020853806,0.04720225132987685,0.6583191850594228,0.04624610464079897 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,train,0.7384341637010676,0.016225030597422963,0.6964539632499642,0.02027501459027422,0.6892929779490473,0.018497379209959284 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,test,0.61,0.04593865474739111,0.5555555555555556,0.052940101418439994,0.5581494057724957,0.04865124311739323 +flat_mae,patch,logistic,ppmi_dx,71,0.3593813663804626,train,0.9306049822064056,0.0110932813368957,0.9259434456334069,0.011977756506958256,0.9218984157567973,0.012662969749207555 +flat_mae,patch,logistic,ppmi_dx,71,0.3593813663804626,test,0.61,0.05053747520405031,0.5793334052421529,0.053984150313088224,0.5785229202037352,0.0530098992074841 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,train,0.7295373665480427,0.016510106517789596,0.6826604454879117,0.02116162956299553,0.6768491757653607,0.01895136604716053 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,test,0.64,0.04187800854864042,0.5863970588235294,0.04939544755702973,0.5874363327674024,0.045046822353207124 +flat_mae,patch,logistic,ppmi_dx,73,0.046415888336127774,train,0.8131672597864769,0.014674052612113296,0.7920740795551844,0.017353281973099145,0.7812968315135945,0.017254141340188635 +flat_mae,patch,logistic,ppmi_dx,73,0.046415888336127774,test,0.67,0.04232262279207184,0.6440513428972063,0.045822744047799,0.6421901528013583,0.045267743446532054 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,train,0.7313167259786477,0.017032058212465086,0.6900799433202225,0.021116743017004294,0.683512631128238,0.019375610598759245 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,test,0.59,0.04425788065418406,0.5464100011063171,0.049747196521184936,0.5471137521222411,0.04713279136247667 +flat_mae,patch,logistic,ppmi_dx,75,0.046415888336127774,train,0.8309608540925267,0.015385629900216989,0.813266180508193,0.01762734439562886,0.8027055234425177,0.017663883281701483 +flat_mae,patch,logistic,ppmi_dx,75,0.046415888336127774,test,0.61,0.04395341169920715,0.5555555555555556,0.050446312921517085,0.5581494057724957,0.04666570711008983 +flat_mae,patch,logistic,ppmi_dx,76,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,76,2.782559402207126,test,0.65,0.04511749549786646,0.6266666666666667,0.04880025549168066,0.6260611205432938,0.04885893979398249 +flat_mae,patch,logistic,ppmi_dx,77,0.046415888336127774,train,0.8185053380782918,0.015703175523061767,0.8006953619358876,0.01800271688970363,0.791720188396489,0.018134635973536675 +flat_mae,patch,logistic,ppmi_dx,77,0.046415888336127774,test,0.61,0.043739497025000185,0.568536342515765,0.04875264341337762,0.5683361629881154,0.04664190737508558 +flat_mae,patch,logistic,ppmi_dx,78,0.046415888336127774,train,0.8185053380782918,0.014777180593690813,0.7997484804024313,0.01687279119552596,0.789980732177264,0.016861839933142553 +flat_mae,patch,logistic,ppmi_dx,78,0.046415888336127774,test,0.64,0.041834966236391304,0.5792426367461431,0.05053075971780784,0.5823429541595926,0.045330236689791054 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,train,0.7295373665480427,0.015835866682093205,0.6773628233019096,0.02111065710591862,0.6725005352172982,0.018459942339593145 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,test,0.65,0.04163045039391238,0.5872154735228211,0.051051886965747945,0.5904074702886248,0.04511388859973934 +flat_mae,patch,logistic,ppmi_dx,80,0.3593813663804626,train,0.9288256227758007,0.010797141295442361,0.9241166065810615,0.01158555361734222,0.920453329051595,0.012070544176258276 +flat_mae,patch,logistic,ppmi_dx,80,0.3593813663804626,test,0.64,0.04507726699790039,0.6138996138996139,0.048344300318632405,0.6129032258064516,0.04771668141100414 +flat_mae,patch,logistic,ppmi_dx,81,0.046415888336127774,train,0.8096085409252669,0.0154231705902627,0.7896787506776489,0.017758563892337067,0.7801461143224149,0.017687111689943143 +flat_mae,patch,logistic,ppmi_dx,81,0.046415888336127774,test,0.67,0.04399838178842489,0.6396986570586308,0.04796796451107778,0.6370967741935484,0.0465092677959389 +flat_mae,patch,logistic,ppmi_dx,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,82,2.782559402207126,test,0.62,0.04621861097004106,0.5924495924495925,0.04892060285993674,0.5916808149405772,0.04829643918405128 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,train,0.7491103202846975,0.01603937094136736,0.7114643071713365,0.019757827062831843,0.7031818668379362,0.01832518090477298 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,test,0.55,0.04925338160979406,0.5146154675870995,0.051511924925864934,0.514855687606112,0.050722303814253716 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,train,0.7419928825622776,0.01608898765034403,0.7032788974457007,0.01995373306096694,0.695662063797902,0.018389655251755718 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,test,0.57,0.04609442048664891,0.5174503422735944,0.05095496224729247,0.5207979626485568,0.047937494155020265 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,train,0.8149466192170819,0.01562758378986435,0.7967874278561992,0.017730978661672326,0.7879602868764719,0.017671041032199245 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,test,0.63,0.04497787900735205,0.5906626839252129,0.04979117816185736,0.5895585738539898,0.047665924857085124 +flat_mae,patch,logistic,ppmi_dx,86,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,86,2.782559402207126,test,0.59,0.050106989532399566,0.5626666666666666,0.05274214749752581,0.5623938879456706,0.05220622539830924 +flat_mae,patch,logistic,ppmi_dx,87,0.046415888336127774,train,0.806049822064057,0.016343152805814404,0.7872653713253808,0.018566718309040203,0.7789953971312353,0.018487646186735183 +flat_mae,patch,logistic,ppmi_dx,87,0.046415888336127774,test,0.61,0.04366508903002489,0.5481404240528328,0.05270045988019982,0.5530560271646858,0.047252924227368716 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,train,0.8202846975088968,0.01611700358273084,0.8009880127200502,0.018603003193063686,0.7905560907728538,0.018523646336592113 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,test,0.68,0.03860389099559783,0.6190476190476191,0.05016060545003259,0.6196943972835314,0.04346620716398032 +flat_mae,patch,logistic,ppmi_dx,89,2.782559402207126,train,0.99644128113879,0.0024664027065264475,0.9962400984799828,0.002606159000993625,0.9962400984799828,0.002681719251107251 +flat_mae,patch,logistic,ppmi_dx,89,2.782559402207126,test,0.65,0.04609927982083885,0.6338529134846741,0.04808261565102165,0.6362478777589134,0.04880933440775694 +flat_mae,patch,logistic,ppmi_dx,90,0.046415888336127774,train,0.8220640569395018,0.015050886528507407,0.803199238009861,0.01733632244254473,0.7928709055876686,0.017280772817717623 +flat_mae,patch,logistic,ppmi_dx,90,0.046415888336127774,test,0.65,0.041200116504689646,0.6011396011396011,0.04774112044208454,0.6005942275042444,0.04410207012251114 +flat_mae,patch,logistic,ppmi_dx,91,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,91,1291.5496650148827,test,0.56,0.04656615079647446,0.5331069609507639,0.04799949187828508,0.5331069609507639,0.04793351730373854 +flat_mae,patch,logistic,ppmi_dx,92,2.782559402207126,train,0.99644128113879,0.0024293202734344305,0.9962334964144495,0.0025779327254221336,0.9953703703703703,0.0031603657260883098 +flat_mae,patch,logistic,ppmi_dx,92,2.782559402207126,test,0.64,0.048678582559478864,0.625,0.05016687360047998,0.6281833616298811,0.05108720540747637 +flat_mae,patch,logistic,ppmi_dx,93,0.046415888336127774,train,0.8202846975088968,0.01518477607006673,0.8037784115461066,0.01693135020418671,0.7957744594305287,0.016967774211262235 +flat_mae,patch,logistic,ppmi_dx,93,0.046415888336127774,test,0.65,0.044151557163932514,0.6178622120318812,0.0497554078417493,0.615874363327674,0.04816805184206712 +flat_mae,patch,logistic,ppmi_dx,94,0.3593813663804626,train,0.9217081850533808,0.011439996496455222,0.9168381828699033,0.012211501845657977,0.9146729822307857,0.01268421498602773 +flat_mae,patch,logistic,ppmi_dx,94,0.3593813663804626,test,0.7,0.04465037065915579,0.6703296703296704,0.05024648119358054,0.666383701188455,0.04867628913046136 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,train,0.7313167259786477,0.01596270832729,0.6881942071479223,0.01996111406087623,0.6817731749090131,0.018234955853946043 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,test,0.62,0.03710768653527191,0.5386109762020399,0.04820870196712447,0.5509337860780985,0.04028696826987648 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,train,0.7366548042704626,0.016573092050038107,0.6939178049929345,0.021041707110431105,0.6869781631342324,0.01911963667404783 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,test,0.53,0.0431906425050612,0.4456893501592169,0.04670576727222434,0.4630730050933786,0.042743788002132035 +flat_mae,patch,logistic,ppmi_dx,97,0.3593813663804626,train,0.9234875444839857,0.010646794736830873,0.9186535386686864,0.011400981025352426,0.916118068935988,0.011988399373682946 +flat_mae,patch,logistic,ppmi_dx,97,0.3593813663804626,test,0.64,0.04810249058001051,0.625,0.04893672683419549,0.6281833616298811,0.0494646763487748 +flat_mae,patch,logistic,ppmi_dx,98,2.782559402207126,train,0.998220640569395,0.0017715272465727786,0.9981184064710746,0.0018766254054163986,0.9976851851851851,0.002304625723550701 +flat_mae,patch,logistic,ppmi_dx,98,2.782559402207126,test,0.6,0.04707413727302924,0.5796553173602353,0.04990494557680594,0.5806451612903225,0.05062024216955051 +flat_mae,patch,logistic,ppmi_dx,99,0.046415888336127774,train,0.8256227758007118,0.015168632027180913,0.8080601092896175,0.01745162846404786,0.7983702633269107,0.017661602741312846 +flat_mae,patch,logistic,ppmi_dx,99,0.046415888336127774,test,0.64,0.04490200440960292,0.6043956043956044,0.04969012942546657,0.6027164685908319,0.0478549688907871 +flat_mae,patch,logistic,ppmi_dx,100,0.046415888336127774,train,0.8113879003558719,0.015291320925187812,0.7898279730740463,0.017823869204430038,0.7789820166987798,0.017574274227156465 +flat_mae,patch,logistic,ppmi_dx,100,0.046415888336127774,test,0.61,0.04566136222234286,0.5793334052421529,0.050261294271312824,0.5785229202037352,0.049038212205914596 diff --git a/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic/log.txt b/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ede08e9f54e09d86c1f3d644935c4706c1b3d655 --- /dev/null +++ b/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:20:08 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n800_1; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n800_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n800_1/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:15:15 time: 3.9442 data: 3.0719 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:18 time: 0.1909 data: 0.0673 max mem: 2851 +extract (train) [ 40/232] eta: 0:00:52 time: 0.1734 data: 0.0575 max mem: 2851 +extract (train) [ 60/232] eta: 0:00:41 time: 0.1711 data: 0.0531 max mem: 2851 +extract (train) [ 80/232] eta: 0:00:34 time: 0.1774 data: 0.0578 max mem: 2851 +extract (train) [100/232] eta: 0:00:28 time: 0.1713 data: 0.0506 max mem: 2851 +extract (train) [120/232] eta: 0:00:23 time: 0.1730 data: 0.0525 max mem: 2851 +extract (train) [140/232] eta: 0:00:18 time: 0.1599 data: 0.0454 max mem: 2851 +extract (train) [160/232] eta: 0:00:14 time: 0.1691 data: 0.0500 max mem: 2851 +extract (train) [180/232] eta: 0:00:10 time: 0.1661 data: 0.0474 max mem: 2851 +extract (train) [200/232] eta: 0:00:06 time: 0.1701 data: 0.0503 max mem: 2851 +extract (train) [220/232] eta: 0:00:02 time: 0.1570 data: 0.0435 max mem: 2851 +extract (train) [231/232] eta: 0:00:00 time: 0.1517 data: 0.0427 max mem: 2851 +extract (train) Total time: 0:00:43 (0.1875 s / it) +extract (validation) [ 0/50] eta: 0:03:29 time: 4.1833 data: 4.0121 max mem: 2851 +extract (validation) [20/50] eta: 0:00:12 time: 0.2317 data: 0.0782 max mem: 2851 +extract (validation) [40/50] eta: 0:00:02 time: 0.1436 data: 0.0391 max mem: 2851 +extract (validation) [49/50] eta: 0:00:00 time: 0.1389 data: 0.0385 max mem: 2851 +extract (validation) Total time: 0:00:13 (0.2631 s / it) +extract (test) [ 0/50] eta: 0:03:06 time: 3.7316 data: 3.5716 max mem: 2851 +extract (test) [20/50] eta: 0:00:11 time: 0.2269 data: 0.0755 max mem: 2851 +extract (test) [40/50] eta: 0:00:02 time: 0.1467 data: 0.0410 max mem: 2851 +extract (test) [49/50] eta: 0:00:00 time: 0.1462 data: 0.0419 max mem: 2851 +extract (test) Total time: 0:00:12 (0.2559 s / it) +feature extraction time: 0:01:09 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-------:|:--------|------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 1291.5 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | ppmi_dx | | 1291.5 | test | 0.63 | 0.048689 | 0.60533 | 0.051688 | 0.60596 | 0.051804 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.04907783206295893, "f1": 0.570999570999571, "f1_std": 0.052479039875058024, "bacc": 0.5704584040747029, "bacc_std": 0.051730813065490794} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04818339963099324, "f1": 0.6353496353496353, "f1_std": 0.051233026755402256, "bacc": 0.634125636672326, "bacc_std": 0.050602840192852} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.048093454856144406, "f1": 0.6216897856242118, "f1_std": 0.050416915741930586, "bacc": 0.6230899830220713, "bacc_std": 0.051157742851149254} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.73, "acc_std": 0.04108331048004774, "f1": 0.6923076923076923, "f1_std": 0.04943483690214474, "bacc": 0.685483870967742, "bacc_std": 0.04632145491964051} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04661973830900384, "f1": 0.6263736263736264, "f1_std": 0.05189636345300692, "bacc": 0.6239388794567062, "bacc_std": 0.04988594689318502} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.045114627339699925, "f1": 0.6224786970121885, "f1_std": 0.04912021166970833, "bacc": 0.6209677419354839, "bacc_std": 0.04850403196788844} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.048215765056669996, "f1": 0.5659722222222222, "f1_std": 0.05192358687579425, "bacc": 0.565365025466893, "bacc_std": 0.050432432785198704} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.045841029656847804, "f1": 0.539894512400404, "f1_std": 0.05115916822151085, "bacc": 0.5420203735144312, "bacc_std": 0.04791754511927046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.73, "acc_std": 0.03924334338457926, "f1": 0.6923076923076923, "f1_std": 0.04739981255949676, "bacc": 0.685483870967742, "bacc_std": 0.044295698674376854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04543082653881613, "f1": 0.6178622120318812, "f1_std": 0.050663240135537704, "bacc": 0.615874363327674, "bacc_std": 0.04909648871246988} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 166.81005372000556, "split": "test", "acc": 0.58, "acc_std": 0.04520840629794419, "f1": 0.5320855614973261, "f1_std": 0.05019155733687487, "bacc": 0.533955857385399, "bacc_std": 0.04730887175203646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 1291.5496650148827, "split": "test", "acc": 0.59, "acc_std": 0.04743363785332093, "f1": 0.5464100011063171, "f1_std": 0.051160944862853254, "bacc": 0.5471137521222411, "bacc_std": 0.04912290961768734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.042733481018985574, "f1": 0.5634191176470589, "f1_std": 0.04948489442410754, "bacc": 0.566213921901528, "bacc_std": 0.044838967592050236} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.040165552405014925, "f1": 0.5143273433705683, "f1_std": 0.050412567326175176, "bacc": 0.5297113752122241, "bacc_std": 0.042682538489037615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 2.782559402207126, "split": "test", "acc": 0.55, "acc_std": 0.0513846046982946, "f1": 0.5248653785239151, "f1_std": 0.05198873782800649, "bacc": 0.5250424448217317, "bacc_std": 0.05200498316135411} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 166.81005372000556, "split": "test", "acc": 0.62, "acc_std": 0.04998439756564043, "f1": 0.6006725514922235, "f1_std": 0.05196760300505954, "bacc": 0.6018675721561969, "bacc_std": 0.05257276907658462} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.046982869218471536, "f1": 0.5659722222222222, "f1_std": 0.050500207948351444, "bacc": 0.565365025466893, "bacc_std": 0.04950552851672528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.044643203290086614, "f1": 0.5555555555555556, "f1_std": 0.05112075357108382, "bacc": 0.5581494057724957, "bacc_std": 0.04701561288629165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.047667494165311436, "f1": 0.5755517826825127, "f1_std": 0.04979650539455555, "bacc": 0.5755517826825127, "bacc_std": 0.04962291237171743} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.71, "acc_std": 0.04023232531186831, "f1": 0.6640018537828757, "f1_std": 0.049636063138452674, "bacc": 0.6591680814940577, "bacc_std": 0.04515471587515827} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 21.54434690031882, "split": "test", "acc": 0.7, "acc_std": 0.044364201784772364, "f1": 0.6703296703296704, "f1_std": 0.049828402915884654, "bacc": 0.666383701188455, "bacc_std": 0.048101442402960805} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 1291.5496650148827, "split": "test", "acc": 0.59, "acc_std": 0.04822455391188186, "f1": 0.5746446726838883, "f1_std": 0.04905393717711153, "bacc": 0.5776740237691002, "bacc_std": 0.04990630718111656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 166.81005372000556, "split": "test", "acc": 0.55, "acc_std": 0.05115317780939909, "f1": 0.5366079703429101, "f1_std": 0.05175427827388743, "bacc": 0.5403225806451613, "bacc_std": 0.05296975730511071} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.043221527043823885, "f1": 0.6072270227808326, "f1_std": 0.05037159530314774, "bacc": 0.6056876061120543, "bacc_std": 0.04714665776810136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.03920434159630793, "f1": 0.5552350042072365, "f1_std": 0.051413772285192484, "bacc": 0.5640916808149405, "bacc_std": 0.043530354416382054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.046471772077251366, "f1": 0.5876736111111112, "f1_std": 0.05038113959407194, "bacc": 0.5865874363327674, "bacc_std": 0.04921353934461542} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.040967006236726645, "f1": 0.6026180458158018, "f1_std": 0.05204362503247763, "bacc": 0.6035653650254669, "bacc_std": 0.046037906457947204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.041378617666616165, "f1": 0.5952380952380952, "f1_std": 0.05250561213747245, "bacc": 0.5984719864176571, "bacc_std": 0.04537743839452286} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.049484781498961884, "f1": 0.6224786970121885, "f1_std": 0.053790495183553284, "bacc": 0.6209677419354839, "bacc_std": 0.05286412340023956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.03962937799158598, "f1": 0.6343908479773559, "f1_std": 0.05063765816336378, "bacc": 0.6328522920203735, "bacc_std": 0.04441017653414865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.04854284293281554, "f1": 0.6395839769333744, "f1_std": 0.048944381707321113, "bacc": 0.6464346349745331, "bacc_std": 0.04962018796510489} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.0477029307275769, "f1": 0.630450849963045, "f1_std": 0.049559153738077455, "bacc": 0.6311544991511036, "bacc_std": 0.04979879842727304} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.048157923543275825, "f1": 0.5495495495495495, "f1_std": 0.05080376034675066, "bacc": 0.5492359932088284, "bacc_std": 0.050050657893976454} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.038361626659984056, "f1": 0.6033177064551027, "f1_std": 0.0508885173379912, "bacc": 0.6065365025466893, "bacc_std": 0.04342523222054545} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 21.54434690031882, "split": "test", "acc": 0.59, "acc_std": 0.04603536032225663, "f1": 0.5746446726838883, "f1_std": 0.046942749137610165, "bacc": 0.5776740237691002, "bacc_std": 0.04784205323700581} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.036657217570350314, "f1": 0.5783730158730158, "f1_std": 0.05038386710484335, "bacc": 0.5882852292020373, "bacc_std": 0.04089234891415433} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 166.81005372000556, "split": "test", "acc": 0.61, "acc_std": 0.04733497227209498, "f1": 0.6010230179028133, "f1_std": 0.04797490191957567, "bacc": 0.6090831918505942, "bacc_std": 0.04976116445929104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04302313331220775, "f1": 0.5906626839252129, "f1_std": 0.04810496629066816, "bacc": 0.5895585738539898, "bacc_std": 0.04605147136153626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 1291.5496650148827, "split": "test", "acc": 0.69, "acc_std": 0.045587625513948404, "f1": 0.6828644501278772, "f1_std": 0.045943835964175575, "bacc": 0.6939728353140917, "bacc_std": 0.04704610303311438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.67, "acc_std": 0.04582140984299806, "f1": 0.648, "f1_std": 0.04858108310544392, "bacc": 0.6472835314091681, "bacc_std": 0.04861258451429922} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.73, "acc_std": 0.03847793653511062, "f1": 0.6871741397288842, "f1_std": 0.048499691733899786, "bacc": 0.6803904923599321, "bacc_std": 0.04436196106128882} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.72, "acc_std": 0.041099372257979806, "f1": 0.6783088235294117, "f1_std": 0.05005689185976156, "bacc": 0.6723259762308998, "bacc_std": 0.046534250950052286} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04209560547135532, "f1": 0.6155585707824514, "f1_std": 0.048742333528141266, "bacc": 0.6137521222410866, "bacc_std": 0.04543300636857395} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04346476273948818, "f1": 0.5741893219783819, "f1_std": 0.04855948711622552, "bacc": 0.5734295415959253, "bacc_std": 0.04694907026607988} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.040700245699504076, "f1": 0.6072270227808326, "f1_std": 0.04691184598176335, "bacc": 0.6056876061120543, "bacc_std": 0.04409291392845483} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.043655927432595, "f1": 0.5906626839252129, "f1_std": 0.04981375355922824, "bacc": 0.5895585738539898, "bacc_std": 0.04719537205759887} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 10000.0, "split": "test", "acc": 0.69, "acc_std": 0.046727683443543395, "f1": 0.6828644501278772, "f1_std": 0.04686563209253103, "bacc": 0.6939728353140917, "bacc_std": 0.047532730789822945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.72, "acc_std": 0.04557009545743788, "f1": 0.6996996996996997, "f1_std": 0.04887773405988543, "bacc": 0.6977928692699491, "bacc_std": 0.04849837976242292} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04308085421622927, "f1": 0.6296711929076422, "f1_std": 0.05040461627399023, "bacc": 0.6269100169779287, "bacc_std": 0.04732412138248857} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04530617617941289, "f1": 0.609375, "f1_std": 0.0498209759781038, "bacc": 0.6078098471986417, "bacc_std": 0.04858256322470786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04403827426228235, "f1": 0.6155585707824514, "f1_std": 0.05125524930599525, "bacc": 0.6137521222410866, "bacc_std": 0.04761753609092947} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 2.782559402207126, "split": "test", "acc": 0.61, "acc_std": 0.046644596686004264, "f1": 0.5920075321686369, "f1_std": 0.048216157483037, "bacc": 0.5938030560271647, "bacc_std": 0.04881468253000248} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.043746542720539634, "f1": 0.6657754010695187, "f1_std": 0.05003693701384135, "bacc": 0.6612903225806452, "bacc_std": 0.04770424904904955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 2.782559402207126, "split": "test", "acc": 0.56, "acc_std": 0.046920575444041594, "f1": 0.5225694444444444, "f1_std": 0.04856317357016476, "bacc": 0.5229202037351443, "bacc_std": 0.047465171129280274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 2.782559402207126, "split": "test", "acc": 0.68, "acc_std": 0.044791655472866815, "f1": 0.6715927750410509, "f1_std": 0.04531206554992176, "bacc": 0.6808149405772496, "bacc_std": 0.04641980298251452} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.039234495026698124, "f1": 0.587178241864983, "f1_std": 0.05182144912804727, "bacc": 0.5933786078098472, "bacc_std": 0.04343644276146577} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.72, "acc_std": 0.038836807283812606, "f1": 0.6666666666666667, "f1_std": 0.05089059544154402, "bacc": 0.6621392190152802, "bacc_std": 0.04472711001961647} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.043777784320360474, "f1": 0.6296711929076422, "f1_std": 0.05072816561989559, "bacc": 0.6269100169779287, "bacc_std": 0.04768404818812198} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04724997354496614, "f1": 0.5824175824175825, "f1_std": 0.05318244161319122, "bacc": 0.5814940577249575, "bacc_std": 0.050702562481427814} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.67, "acc_std": 0.04609143521306317, "f1": 0.6349153667441089, "f1_std": 0.0515123052542775, "bacc": 0.6320033955857385, "bacc_std": 0.049595093444571886} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.046679331614752156, "f1": 0.554367201426025, "f1_std": 0.05257356415179159, "bacc": 0.5551782682512734, "bacc_std": 0.04970923987871224} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 21.54434690031882, "split": "test", "acc": 0.58, "acc_std": 0.05159883719620046, "f1": 0.565936337329475, "f1_std": 0.05248128733253946, "bacc": 0.5696095076400679, "bacc_std": 0.05367812349713947} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.04409809066161482, "f1": 0.5404411764705883, "f1_std": 0.05304430094541917, "bacc": 0.5449915110356536, "bacc_std": 0.04745782147787389} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04270051990315809, "f1": 0.5847828526540231, "f1_std": 0.048450121325910235, "bacc": 0.5844651952461799, "bacc_std": 0.04568714819148952} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 2.782559402207126, "split": "test", "acc": 0.62, "acc_std": 0.045242895574885575, "f1": 0.5824175824175825, "f1_std": 0.049494957985383004, "bacc": 0.5814940577249575, "bacc_std": 0.047518425208723354} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.04650647696826755, "f1": 0.525101763907734, "f1_std": 0.05250167913557576, "bacc": 0.5288624787775891, "bacc_std": 0.04864322687340387} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.045983057749566854, "f1": 0.5766488413547237, "f1_std": 0.05254981705577718, "bacc": 0.5764006791171477, "bacc_std": 0.04969926797603512} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.045761538435677616, "f1": 0.612789025334661, "f1_std": 0.05145330640717713, "bacc": 0.6107809847198642, "bacc_std": 0.04915862136852485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.04278683909802171, "f1": 0.6615351020853806, "f1_std": 0.04720225132987685, "bacc": 0.6583191850594228, "bacc_std": 0.04624610464079897} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04593865474739111, "f1": 0.5555555555555556, "f1_std": 0.052940101418439994, "bacc": 0.5581494057724957, "bacc_std": 0.04865124311739323} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.05053747520405031, "f1": 0.5793334052421529, "f1_std": 0.053984150313088224, "bacc": 0.5785229202037352, "bacc_std": 0.0530098992074841} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04187800854864042, "f1": 0.5863970588235294, "f1_std": 0.04939544755702973, "bacc": 0.5874363327674024, "bacc_std": 0.045046822353207124} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04232262279207184, "f1": 0.6440513428972063, "f1_std": 0.045822744047799, "bacc": 0.6421901528013583, "bacc_std": 0.045267743446532054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04425788065418406, "f1": 0.5464100011063171, "f1_std": 0.049747196521184936, "bacc": 0.5471137521222411, "bacc_std": 0.04713279136247667} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04395341169920715, "f1": 0.5555555555555556, "f1_std": 0.050446312921517085, "bacc": 0.5581494057724957, "bacc_std": 0.04666570711008983} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.04511749549786646, "f1": 0.6266666666666667, "f1_std": 0.04880025549168066, "bacc": 0.6260611205432938, "bacc_std": 0.04885893979398249} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.043739497025000185, "f1": 0.568536342515765, "f1_std": 0.04875264341337762, "bacc": 0.5683361629881154, "bacc_std": 0.04664190737508558} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.041834966236391304, "f1": 0.5792426367461431, "f1_std": 0.05053075971780784, "bacc": 0.5823429541595926, "bacc_std": 0.045330236689791054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04163045039391238, "f1": 0.5872154735228211, "f1_std": 0.051051886965747945, "bacc": 0.5904074702886248, "bacc_std": 0.04511388859973934} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04507726699790039, "f1": 0.6138996138996139, "f1_std": 0.048344300318632405, "bacc": 0.6129032258064516, "bacc_std": 0.04771668141100414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04399838178842489, "f1": 0.6396986570586308, "f1_std": 0.04796796451107778, "bacc": 0.6370967741935484, "bacc_std": 0.0465092677959389} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.62, "acc_std": 0.04621861097004106, "f1": 0.5924495924495925, "f1_std": 0.04892060285993674, "bacc": 0.5916808149405772, "bacc_std": 0.04829643918405128} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.55, "acc_std": 0.04925338160979406, "f1": 0.5146154675870995, "f1_std": 0.051511924925864934, "bacc": 0.514855687606112, "bacc_std": 0.050722303814253716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.57, "acc_std": 0.04609442048664891, "f1": 0.5174503422735944, "f1_std": 0.05095496224729247, "bacc": 0.5207979626485568, "bacc_std": 0.047937494155020265} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04497787900735205, "f1": 0.5906626839252129, "f1_std": 0.04979117816185736, "bacc": 0.5895585738539898, "bacc_std": 0.047665924857085124} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 2.782559402207126, "split": "test", "acc": 0.59, "acc_std": 0.050106989532399566, "f1": 0.5626666666666666, "f1_std": 0.05274214749752581, "bacc": 0.5623938879456706, "bacc_std": 0.05220622539830924} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04366508903002489, "f1": 0.5481404240528328, "f1_std": 0.05270045988019982, "bacc": 0.5530560271646858, "bacc_std": 0.047252924227368716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.03860389099559783, "f1": 0.6190476190476191, "f1_std": 0.05016060545003259, "bacc": 0.6196943972835314, "bacc_std": 0.04346620716398032} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 2.782559402207126, "split": "test", "acc": 0.65, "acc_std": 0.04609927982083885, "f1": 0.6338529134846741, "f1_std": 0.04808261565102165, "bacc": 0.6362478777589134, "bacc_std": 0.04880933440775694} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.041200116504689646, "f1": 0.6011396011396011, "f1_std": 0.04774112044208454, "bacc": 0.6005942275042444, "bacc_std": 0.04410207012251114} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 1291.5496650148827, "split": "test", "acc": 0.56, "acc_std": 0.04656615079647446, "f1": 0.5331069609507639, "f1_std": 0.04799949187828508, "bacc": 0.5331069609507639, "bacc_std": 0.04793351730373854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.64, "acc_std": 0.048678582559478864, "f1": 0.625, "f1_std": 0.05016687360047998, "bacc": 0.6281833616298811, "bacc_std": 0.05108720540747637} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.044151557163932514, "f1": 0.6178622120318812, "f1_std": 0.0497554078417493, "bacc": 0.615874363327674, "bacc_std": 0.04816805184206712} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.7, "acc_std": 0.04465037065915579, "f1": 0.6703296703296704, "f1_std": 0.05024648119358054, "bacc": 0.666383701188455, "bacc_std": 0.04867628913046136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.03710768653527191, "f1": 0.5386109762020399, "f1_std": 0.04820870196712447, "bacc": 0.5509337860780985, "bacc_std": 0.04028696826987648} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.53, "acc_std": 0.0431906425050612, "f1": 0.4456893501592169, "f1_std": 0.04670576727222434, "bacc": 0.4630730050933786, "bacc_std": 0.042743788002132035} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04810249058001051, "f1": 0.625, "f1_std": 0.04893672683419549, "bacc": 0.6281833616298811, "bacc_std": 0.0494646763487748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 2.782559402207126, "split": "test", "acc": 0.6, "acc_std": 0.04707413727302924, "f1": 0.5796553173602353, "f1_std": 0.04990494557680594, "bacc": 0.5806451612903225, "bacc_std": 0.05062024216955051} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04490200440960292, "f1": 0.6043956043956044, "f1_std": 0.04969012942546657, "bacc": 0.6027164685908319, "bacc_std": 0.0478549688907871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04566136222234286, "f1": 0.5793334052421529, "f1_std": 0.050261294271312824, "bacc": 0.5785229202037352, "bacc_std": 0.049038212205914596} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 159.45 | 1026.1 | 0.87005 | 0.095242 | 0.85425 | 0.10957 | 0.84846 | 0.11265 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 159.45 | 1026.1 | 0.6356 | 0.043839 | 0.59742 | 0.048125 | 0.5983 | 0.046263 | + + +done! total time: 0:05:16 diff --git a/data_scaling/n800_1/pretrain/config.yaml b/data_scaling/n800_1/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4be9d8bd5f9323cf81ae357894e3a9b41a35090a --- /dev/null +++ b/data_scaling/n800_1/pretrain/config.yaml @@ -0,0 +1,109 @@ +name: data_scaling/n800_1/pretrain +notes: data scaling experiment n800_1 (seed=1644) +output_dir: experiments/data_scaling/output/data_scaling/n800_1/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..00799}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 1644 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560